Am 05.11.21 um 00:05 schrieb Felix Kuehling:
If a kfd_bo was shared (e.g. a dmabuf export), the original kfd_bo may be
freed when the amdgpu_bo still lives on. Free the kfd_bo struct in the
release_notify callback then the amdgpu_bo is freed.

Signed-off-by: Felix Kuehling <felix.kuehl...@amd.com>

Reviewed-by: Christian König <christian.koe...@amd.com>

---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h       |  4 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 12 +++++++++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c       |  2 +-
  3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 4accd584886b..5f658823a637 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -307,7 +307,7 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct 
amdgpu_device *adev);
  void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
  void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
                                struct amdgpu_vm *vm);
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
  void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
  #else
  static inline
@@ -322,7 +322,7 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device 
*adev,
  }
static inline
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
  {
  }
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 5174762f0b46..94fccf0b47ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -201,7 +201,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
        spin_unlock(&kfd_mem_limit.mem_limit_lock);
  }
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
  {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
        u32 domain = bo->preferred_domains;
@@ -213,6 +213,8 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo 
*bo)
        }
unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
+
+       kfree(bo->kfd_bo);
  }
@@ -1599,9 +1601,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
        drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
        if (mem->dmabuf)
                dma_buf_put(mem->dmabuf);
-       drm_gem_object_put(&mem->bo->tbo.base);
        mutex_destroy(&mem->lock);
-       kfree(mem);
+
+       /* If this releases the last reference, it will end up calling
+        * amdgpu_amdkfd_release_notify and kfree the mem struct. That's why
+        * this needs to be the last call here.
+        */
+       drm_gem_object_put(&mem->bo->tbo.base);
return ret;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6b25982a9077..156002db24e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1279,7 +1279,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
*bo)
        abo = ttm_to_amdgpu_bo(bo);
if (abo->kfd_bo)
-               amdgpu_amdkfd_unreserve_memory_limit(abo);
+               amdgpu_amdkfd_release_notify(abo);
/* We only remove the fence if the resv has individualized. */
        WARN_ON_ONCE(bo->type == ttm_bo_type_kernel

Reply via email to