If applications unmap the memory before destroying the userptr, it needs
trigger a segfault to notify user space to correct the free sequence in
VM debug mode.

v2: Send GPU access fault to user space
v3: Report gpu address to user space, remove unnecessary params

Signed-off-by: Shane Xiao <shane.x...@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 15 +++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_events.c       | 19 +++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  2 ++
 3 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d2ec4130a316..61a698056fb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2496,6 +2496,7 @@ static int update_invalid_user_pages(struct 
amdkfd_process_info *process_info,
        struct ttm_operation_ctx ctx = { false, false };
        uint32_t invalid;
        int ret = 0;
+       uint64_t userptr = 0;
 
        mutex_lock(&process_info->notifier_lock);
 
@@ -2559,6 +2560,20 @@ static int update_invalid_user_pages(struct 
amdkfd_process_info *process_info,
                        if (ret != -EFAULT)
                                return ret;
 
+                       /* If applications unmap memory before destroying the 
userptr
+                        * from the KFD, trigger a segmentation fault in VM 
debug mode.
+                        */
+                       if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) {
+                               amdgpu_ttm_tt_get_userptr(&bo->tbo, &userptr);
+                               pr_err("User space unmap memory before 
destroying a userptr that refers to it\n");
+                               pr_err("The unmap userptr cpu address is 
0x%llx, gpu address is 0x%llx\n",
+                                                               userptr, 
mem->va);
+
+                               // Send GPU VM fault to user space
+                               
kfd_signal_vm_fault_event_with_userptr(kfd_lookup_process_by_pid(process_info->pid),
+                                                               mem->va);
+                       }
+
                        ret = 0;
                }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index fecdb6794075..e54e708ed82d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -1177,6 +1177,25 @@ void kfd_signal_hw_exception_event(u32 pasid)
        kfd_unref_process(p);
 }
 
+void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t 
gpu_va)
+{
+       struct kfd_process_device *pdd;
+       struct kfd_hsa_memory_exception_data exception_data;
+       int i;
+
+       memset(&exception_data, 0, sizeof(exception_data));
+       exception_data.va = gpu_va;
+       exception_data.failure.NotPresent = 1;
+
+       // Send VM seg fault to all kfd process device
+       for (i = 0; i < p->n_pdds; i++) {
+               pdd = p->pdds[i];
+               exception_data.gpu_id = pdd->user_gpu_id;
+               kfd_evict_process_device(pdd);
+               kfd_signal_vm_fault_event(pdd, NULL, &exception_data);
+       }
+}
+
 void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
                                struct kfd_vm_fault_info *info,
                                struct kfd_hsa_memory_exception_data *data)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index f6aedf69c644..8703be8077b0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1507,6 +1507,8 @@ int kfd_event_create(struct file *devkfd, struct 
kfd_process *p,
 int kfd_get_num_events(struct kfd_process *p);
 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
 
+void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t 
gpu_va);
+
 void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
                                struct kfd_vm_fault_info *info,
                                struct kfd_hsa_memory_exception_data *data);
-- 
2.25.1

Reply via email to