Replace mode2 reset with queue reset to improve system performance.

v2: fallback to mode1 reset if queue reset fails.
    move the queue reset handling from KFD to KGD.

Signed-off-by: Tao Zhou <tao.zh...@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 0018d04f4b33..b9429774a006 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -197,6 +197,9 @@ static void event_interrupt_poison_consumption_v9(struct 
kfd_node *dev,
        case SOC15_IH_CLIENTID_SDMA2:
        case SOC15_IH_CLIENTID_SDMA3:
        case SOC15_IH_CLIENTID_SDMA4:
+       case SOC15_IH_CLIENTID_SDMA5:
+       case SOC15_IH_CLIENTID_SDMA6:
+       case SOC15_IH_CLIENTID_SDMA7:
                block = AMDGPU_RAS_BLOCK__SDMA;
                if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == 
IP_VERSION(4, 4, 2)) {
                        /* driver mode-2 for gfx poison is only supported by
@@ -204,7 +207,7 @@ static void event_interrupt_poison_consumption_v9(struct 
kfd_node *dev,
                        if (dev->adev->pm.fw_version < 0x00557300)
                                reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
                        else
-                               reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+                               reset = 0;
                } else if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == 
IP_VERSION(4, 4, 5)) {
                        /* driver mode-2 for gfx poison is only supported by
                         * pmfw 0x05550C00 and onwards */
@@ -223,6 +226,14 @@ static void event_interrupt_poison_consumption_v9(struct 
kfd_node *dev,
                return;
        }
 
+       if (!reset) {
+               ret = amdgpu_amdkfd_ras_poison_queue_reset(dev->adev, block,
+                                client_id, vmid, node_id);
+               /* fallback to mode1 reset */
+               if (ret)
+                       reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+       }
+
        ret = amdgpu_ras_mark_ras_event(dev->adev, type);
        if (ret)
                return;
-- 
2.34.1

Reply via email to