Hi Xinhui,

Two suggestions inline. Looks good to me otherwise.

On 2020-02-17 10:36 p.m., xinhui pan wrote:
No need to trigger eviction as the memory mapping will not be used
anymore.

All pt/pd bos share same resv, hence the same shared eviction fence.
Everytime page table is freed, the fence will be signled and that cuases
kfd unexcepted evictions.

Signed-off-by: xinhui pan <xinhui....@amd.com>
CC: Christian König <christian.koe...@amd.com>
CC: Felix Kuehling <felix.kuehl...@amd.com>
CC: Alex Deucher <alexander.deuc...@amd.com>
---
change from v4:
based on new ttm code.

change from v3:
fix a coding error

change from v2:
based on Chris' drm/ttm: rework BO delayed delete patchset.

---
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  1 +
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 37 +++++++++++++++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |  4 ++
  3 files changed, 42 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 9e8db702d878..0ee8aae6c519 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -96,6 +96,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 
context,
                                                       struct mm_struct *mm);
  bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
  struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
struct amdkfd_process_info {
        /* List head of all VMs that belong to a KFD process */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index ef721cb65868..6aa20aa82bd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -276,6 +276,41 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct 
amdgpu_bo *bo,
        return 0;
  }
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+{
+       struct amdgpu_bo *root = bo;
+       struct amdgpu_vm_bo_base *vm_bo;
+       struct amdgpu_vm *vm;
+       struct amdkfd_process_info *info;
+       struct amdgpu_amdkfd_fence *ef;
+       int ret;
+
+       while (root->parent)
+               root = root->parent;

This should not be necessary. Every page table BO has a pointer to a vm_bo that has a pointer to the vm. So you don't need to find the root.

This should do the trick:

        if (!bo->vm_bo || !bo->vm_bo->vm)
                return 0;
        vm = bo->vm_bo->vm;


+
+       vm_bo = root->vm_bo;
+       if (!vm_bo)
+               return 0;
+
+       vm = vm_bo->vm;
+       if (!vm)
+               return 0;
+
+       info = vm->process_info;
+       if (!info || !info->eviction_fence)
+               return 0;
+
+       ef = container_of(dma_fence_get(&info->eviction_fence->base),
+                       struct amdgpu_amdkfd_fence, base);
+
+       dma_resv_lock(bo->tbo.base.resv, NULL);
+       ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
+       dma_resv_unlock(bo->tbo.base.resv);
+
+       dma_fence_put(&ef->base);
+       return ret;
+}
+
  static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
                                     bool wait)
  {
@@ -1045,6 +1080,8 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device 
*adev,
        list_del(&vm->vm_list_node);
        mutex_unlock(&process_info->lock);
+ vm->process_info = NULL;
+
        /* Release per-process resources when last compute VM is destroyed */
        if (!process_info->n_vms) {
                WARN_ON(!list_empty(&process_info->kfd_bo_list));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6f60a581e3ba..16586651020f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1307,6 +1307,10 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object 
*bo)
        if (abo->kfd_bo)
                amdgpu_amdkfd_unreserve_memory_limit(abo);
+ /* We only remove the fence if the resv has individualized. */
+       if (bo->base.resv == &bo->base._resv)

Should this be a WARN_ON? We expect this condition to be always true. If it's not, there should be a noisy warning that something is wrong.

Regards,
  Felix


+               amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
+
        if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node ||
            !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
                return;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to