[AMD Official Use Only - Internal Distribution Only]

Seriers is Reviewed-by: Stanley.Yang <stanley.y...@amd.com>

Regards,
Stanley
> -----Original Message-----
> From: Zhang, Hawking <hawking.zh...@amd.com>
> Sent: Friday, April 16, 2021 5:44 PM
> To: amd-gfx@lists.freedesktop.org; Yang, Stanley <stanley.y...@amd.com>;
> John Clements <john.clemen...@amd.com>; Li, Dennis
> <dennis...@amd.com>
> Cc: Zhang, Hawking <hawking.zh...@amd.com>
> Subject: [PATCH 2/2] drm/amdgpu: only harvest gcea/mmea error status in
> aldebaran
> 
> In aldebaran, driver only needs to harvest SDP RdRspStatus, WrRspStatus
> and first parity error on RdRsp data. Check error type before harvest error
> information.
> 
> Signed-off-by: Hawking Zhang <hawking.zh...@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 21 ++++++++++++---------
> drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c | 11 +++++++----
>  2 files changed, 19 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
> index 9ca76a3ac38c..91427543aabe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
> @@ -808,7 +808,7 @@ static struct gfx_v9_4_2_utc_block
> gfx_v9_4_2_utc_blocks[] = {
>         REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL,
> WRITE_COUNTERS, 1) },  };
> 
> -static const struct soc15_reg_entry gfx_v9_4_2_rdrsp_status_regs =
> +static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs =
>       { SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 };
> 
>  static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev,
> @@ -1040,11 +1040,11 @@ static void
> gfx_v9_4_2_reset_ea_err_status(struct amdgpu_device *adev)
>       uint32_t i, j;
> 
>       mutex_lock(&adev->grbm_idx_mutex);
> -     for (i = 0; i < gfx_v9_4_2_rdrsp_status_regs.se_num; i++) {
> -             for (j = 0; j < gfx_v9_4_2_rdrsp_status_regs.instance;
> +     for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
> +             for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
>                    j++) {
>                       gfx_v9_4_2_select_se_sh(adev, i, 0, j);
> -
>       WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_rdrsp_status_reg
> s), 0x10);
> +
>       WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_re
> gs), 0x10);
>               }
>       }
>       gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); @@
> -1089,17 +1089,20 @@ static void gfx_v9_4_2_query_ea_err_status(struct
> amdgpu_device *adev)
> 
>       mutex_lock(&adev->grbm_idx_mutex);
> 
> -     for (i = 0; i < gfx_v9_4_2_rdrsp_status_regs.se_num; i++) {
> -             for (j = 0; j < gfx_v9_4_2_rdrsp_status_regs.instance;
> +     for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
> +             for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
>                    j++) {
>                       gfx_v9_4_2_select_se_sh(adev, i, 0, j);
>                       reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
> -                             gfx_v9_4_2_rdrsp_status_regs));
> -                     if (reg_value)
> +                             gfx_v9_4_2_ea_err_status_regs));
> +                     if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS,
> SDP_RDRSP_STATUS) ||
> +                         REG_GET_FIELD(reg_value, GCEA_ERR_STATUS,
> SDP_WRRSP_STATUS) ||
> +                         REG_GET_FIELD(reg_value, GCEA_ERR_STATUS,
> +SDP_RDRSP_DATAPARITY_ERROR)) {
>                               dev_warn(adev->dev, "GCEA err detected at
> instance: %d, status: 0x%x!\n",
>                                               j, reg_value);
> +                     }
>                       /* clear after read */
> -
>       WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_rdrsp_status_reg
> s), 0x10);
> +
>       WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_re
> gs), 0x10);
>               }
>       }
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
> b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
> index d0f41346ea0c..cc69c434d0de 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
> @@ -1286,7 +1286,7 @@ static void
> mmhub_v1_7_reset_ras_error_count(struct amdgpu_device *adev)
>       }
>  }
> 
> -static const struct soc15_reg_entry mmhub_v1_7_err_status_regs[] = {
> +static const struct soc15_reg_entry mmhub_v1_7_ea_err_status_regs[] = {
>       { SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_ERR_STATUS), 0, 0, 0 },
>       { SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_ERR_STATUS), 0, 0, 0 },
>       { SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_ERR_STATUS), 0, 0, 0 },
> @@ -1303,12 +1303,15 @@ static void
> mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev)
>       if (!amdgpu_ras_is_supported(adev,
> AMDGPU_RAS_BLOCK__MMHUB))
>               return;
> 
> -     for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_err_status_regs); i++) {
> +     for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) {
>               reg_value =
> -
>       RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_err_status_regs
> [i]));
> -             if (reg_value)
> +
>       RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_r
> egs[i]));
> +             if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS,
> SDP_RDRSP_STATUS) ||
> +                 REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS,
> SDP_WRRSP_STATUS) ||
> +                 REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS,
> +SDP_RDRSP_DATAPARITY_ERROR)) {
>                       dev_warn(adev->dev, "MMHUB EA err detected at
> instance: %d, status: 0x%x!\n",
>                                       i, reg_value);
> +             }
>       }
>  }
> 
> --
> 2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to