From: shaoyunl <shaoyun....@amd.com>

amdgpu save the vm fault related information for KFD usage and keep the
copy until KFD read it.

Signed-off-by: shaoyun liu <shaoyun....@amd.com>
Signed-off-by: Felix Kuehling <felix.kuehl...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h        |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 14 ++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h           |  2 ++
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c             | 33 ++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c             | 33 ++++++++++++++++++++++-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h   | 20 ++++++++++++++
 8 files changed, 105 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index a8418a3..3dc76d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -183,6 +183,9 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev 
*kgd,
 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
                                            struct dma_fence **ef);
 
+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
+                                             struct kfd_vm_fault_info *info);
+
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 0ff36d4..5364e22 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -216,6 +216,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
        .invalidate_tlbs = invalidate_tlbs,
        .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
        .submit_ib = amdgpu_amdkfd_submit_ib,
+       .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info
 };
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 6ef9762..fb5d06d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -176,6 +176,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
        .invalidate_tlbs = invalidate_tlbs,
        .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
        .submit_ib = amdgpu_amdkfd_submit_ib,
+       .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info
 };
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index ff8fd75..051e709d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1621,6 +1621,20 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct 
kgd_dev *kgd,
        return ret;
 }
 
+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
+                                             struct kfd_vm_fault_info *mem)
+{
+       struct amdgpu_device *adev;
+
+       adev = (struct amdgpu_device *)kgd;
+       if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
+               *mem = *adev->gmc.vm_fault_info;
+               mb();
+               atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+       }
+       return 0;
+}
+
 /* Evict a userptr BO by stopping the queues if necessary
  *
  * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 893c249..f8c036c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -105,6 +105,8 @@ struct amdgpu_gmc {
        /* protects concurrent invalidation */
        spinlock_t              invalidate_lock;
        bool                    translate_further;
+       struct kfd_vm_fault_info *vm_fault_info;
+       atomic_t                vm_fault_info_updated;
 
        const struct amdgpu_gmc_funcs   *gmc_funcs;
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 7147bfe..8d61610 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -28,6 +28,7 @@
 #include "cik.h"
 #include "gmc_v7_0.h"
 #include "amdgpu_ucode.h"
+#include "amdgpu_amdkfd.h"
 
 #include "bif/bif_4_1_d.h"
 #include "bif/bif_4_1_sh_mask.h"
@@ -1079,6 +1080,12 @@ static int gmc_v7_0_sw_init(void *handle)
                adev->vm_manager.vram_base_offset = 0;
        }
 
+       adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
+                                       GFP_KERNEL);
+       if (!adev->gmc.vm_fault_info)
+               return -ENOMEM;
+       atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+
        return 0;
 }
 
@@ -1088,6 +1095,7 @@ static int gmc_v7_0_sw_fini(void *handle)
 
        amdgpu_gem_force_release(adev);
        amdgpu_vm_manager_fini(adev);
+       kfree(adev->gmc.vm_fault_info);
        gmc_v7_0_gart_fini(adev);
        amdgpu_bo_fini(adev);
        release_firmware(adev->gmc.fw);
@@ -1277,7 +1285,7 @@ static int gmc_v7_0_process_interrupt(struct 
amdgpu_device *adev,
                                      struct amdgpu_irq_src *source,
                                      struct amdgpu_iv_entry *entry)
 {
-       u32 addr, status, mc_client;
+       u32 addr, status, mc_client, vmid;
 
        addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
        status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
@@ -1302,6 +1310,29 @@ static int gmc_v7_0_process_interrupt(struct 
amdgpu_device *adev,
                                         entry->pasid);
        }
 
+       vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+                            VMID);
+       if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
+               && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+               struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
+               u32 protections = REG_GET_FIELD(status,
+                                       VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+                                       PROTECTIONS);
+
+               info->vmid = vmid;
+               info->mc_id = REG_GET_FIELD(status,
+                                           VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+                                           MEMORY_CLIENT_ID);
+               info->status = status;
+               info->page_addr = addr;
+               info->prot_valid = protections & 0x7 ? true : false;
+               info->prot_read = protections & 0x8 ? true : false;
+               info->prot_write = protections & 0x10 ? true : false;
+               info->prot_exec = protections & 0x20 ? true : false;
+               mb();
+               atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+       }
+
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 1edbe6b..8105ebf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -26,6 +26,7 @@
 #include "amdgpu.h"
 #include "gmc_v8_0.h"
 #include "amdgpu_ucode.h"
+#include "amdgpu_amdkfd.h"
 
 #include "gmc/gmc_8_1_d.h"
 #include "gmc/gmc_8_1_sh_mask.h"
@@ -1181,6 +1182,12 @@ static int gmc_v8_0_sw_init(void *handle)
                adev->vm_manager.vram_base_offset = 0;
        }
 
+       adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
+                                       GFP_KERNEL);
+       if (!adev->gmc.vm_fault_info)
+               return -ENOMEM;
+       atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+
        return 0;
 }
 
@@ -1190,6 +1197,7 @@ static int gmc_v8_0_sw_fini(void *handle)
 
        amdgpu_gem_force_release(adev);
        amdgpu_vm_manager_fini(adev);
+       kfree(adev->gmc.vm_fault_info);
        gmc_v8_0_gart_fini(adev);
        amdgpu_bo_fini(adev);
        release_firmware(adev->gmc.fw);
@@ -1425,7 +1433,7 @@ static int gmc_v8_0_process_interrupt(struct 
amdgpu_device *adev,
                                      struct amdgpu_irq_src *source,
                                      struct amdgpu_iv_entry *entry)
 {
-       u32 addr, status, mc_client;
+       u32 addr, status, mc_client, vmid;
 
        if (amdgpu_sriov_vf(adev)) {
                dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
@@ -1457,6 +1465,29 @@ static int gmc_v8_0_process_interrupt(struct 
amdgpu_device *adev,
                                         entry->pasid);
        }
 
+       vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+                            VMID);
+       if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
+               && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+               struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
+               u32 protections = REG_GET_FIELD(status,
+                                       VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+                                       PROTECTIONS);
+
+               info->vmid = vmid;
+               info->mc_id = REG_GET_FIELD(status,
+                                           VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+                                           MEMORY_CLIENT_ID);
+               info->status = status;
+               info->page_addr = addr;
+               info->prot_valid = protections & 0x7 ? true : false;
+               info->prot_read = protections & 0x8 ? true : false;
+               info->prot_write = protections & 0x10 ? true : false;
+               info->prot_exec = protections & 0x20 ? true : false;
+               mb();
+               atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+       }
+
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h 
b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 5733fbe..28b11d1 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -47,6 +47,17 @@ enum kfd_preempt_type {
        KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
 };
 
+struct kfd_vm_fault_info {
+       uint64_t        page_addr;
+       uint32_t        vmid;
+       uint32_t        mc_id;
+       uint32_t        status;
+       bool            prot_valid;
+       bool            prot_read;
+       bool            prot_write;
+       bool            prot_exec;
+};
+
 struct kfd_cu_info {
        uint32_t num_shader_engines;
        uint32_t num_shader_arrays_per_engine;
@@ -259,6 +270,12 @@ struct tile_config {
  * IB to the corresponding ring (ring type). The IB is executed with the
  * specified VMID in a user mode context.
  *
+ * @get_vm_fault_info: Return information about a recent VM fault on
+ * GFXv7 and v8. If multiple VM faults occurred since the last call of
+ * this function, it will return information about the first of those
+ * faults. On GFXv9 VM fault information is fully contained in the IH
+ * packet and this function is not needed.
+ *
  * This structure contains function pointers to services that the kgd driver
  * provides to amdkfd driver.
  *
@@ -374,6 +391,9 @@ struct kfd2kgd_calls {
        int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine,
                        uint32_t vmid, uint64_t gpu_addr,
                        uint32_t *ib_cmd, uint32_t ib_len);
+
+       int (*get_vm_fault_info)(struct kgd_dev *kgd,
+                       struct kfd_vm_fault_info *info);
 };
 
 /**
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to