On 3/7/2025 7:18 PM, Christian König wrote:
We keep the gang submission fence around in adev, make sure that it
stays alive.

v2: fix memory leak on retry

Signed-off-by: Christian König <christian.koe...@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +++++++++-
  1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 198d29faa754..337543ec615c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -6889,18 +6889,26 @@ struct dma_fence *amdgpu_device_switch_gang(struct 
amdgpu_device *adev,
  {
        struct dma_fence *old = NULL;
+ dma_fence_get(gang);
        do {
                dma_fence_put(old);
                old = amdgpu_device_get_gang(adev);
                if (old == gang)
                        break;
- if (!dma_fence_is_signaled(old))
+               if (!dma_fence_is_signaled(old)) {

Here, should we need to check ?

    // Check if old fence isn't signaled
    if (old && !dma_fence_is_signaled(old)) {

+                       dma_fence_put(gang);
                        return old;
+               }
} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
                         old, gang) != old);
+ /*
+        * Drop it once for the exchanged reference in adev and once for the
+        * thread local reference acquired in amdgpu_device_get_gang().
+        */
+       dma_fence_put(old);

if (old)
    dma_fence_put(old); // Ensure to release old reference  only if it is valid?


        dma_fence_put(old);
        return NULL;
  }

Reply via email to