Replace mode2 reset with queue reset to improve system performance. v2: fallback to mode1 reset if queue reset fails. move the queue reset handling from KFD to KGD.
Signed-off-by: Tao Zhou <tao.zh...@amd.com> --- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 0018d04f4b33..b9429774a006 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -197,6 +197,9 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, case SOC15_IH_CLIENTID_SDMA2: case SOC15_IH_CLIENTID_SDMA3: case SOC15_IH_CLIENTID_SDMA4: + case SOC15_IH_CLIENTID_SDMA5: + case SOC15_IH_CLIENTID_SDMA6: + case SOC15_IH_CLIENTID_SDMA7: block = AMDGPU_RAS_BLOCK__SDMA; if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2)) { /* driver mode-2 for gfx poison is only supported by @@ -204,7 +207,7 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, if (dev->adev->pm.fw_version < 0x00557300) reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; else - reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; + reset = 0; } else if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) { /* driver mode-2 for gfx poison is only supported by * pmfw 0x05550C00 and onwards */ @@ -223,6 +226,14 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, return; } + if (!reset) { + ret = amdgpu_amdkfd_ras_poison_queue_reset(dev->adev, block, + client_id, vmid, node_id); + /* fallback to mode1 reset */ + if (ret) + reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; + } + ret = amdgpu_ras_mark_ras_event(dev->adev, type); if (ret) return; -- 2.34.1