In the case of injecting uncorrected error with background workload, the deferred error among uncorrected errors need to be specified by checking the deferred and poison bits of status register.
Signed-off-by: Xiang Liu <xiang....@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 3 +++ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 11 +++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index ffd4c64e123c..3f45a600f547 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -541,6 +541,9 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h if (ret) return ret; + if (type == ACA_ERROR_TYPE_UE) + aca_log_aca_error(handle, ACA_ERROR_TYPE_DEFERRED, err_data); + return aca_log_aca_error(handle, type, err_data); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index c0de682b7774..b21d784a7f9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -876,7 +876,7 @@ static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle, void *data) { struct aca_bank_info info; - u64 misc0; + u64 misc0, status; u32 instlo; int ret; @@ -890,12 +890,15 @@ static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle, info.die_id = instlo == mmSMNAID_XCD0_MCA_SMU ? 0 : 1; misc0 = bank->regs[ACA_REG_IDX_MISC0]; + status = bank->regs[ACA_REG_IDX_STATUS]; switch (type) { case ACA_SMU_TYPE_UE: - bank->aca_err_type = ACA_ERROR_TYPE_UE; - ret = aca_error_cache_log_bank_error(handle, &info, - ACA_ERROR_TYPE_UE, 1ULL); + bank->aca_err_type = (ACA_REG__STATUS__POISON(status) || + ACA_REG__STATUS__DEFERRED(status)) ? + ACA_ERROR_TYPE_DEFERRED : + ACA_ERROR_TYPE_UE; + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, 1ULL); break; case ACA_SMU_TYPE_CE: bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); -- 2.34.1