[AMD Official Use Only - General]

Reviewed-by: Stanley.Yang <stanley.y...@amd.com>

Regards,
Stanley
> -----Original Message-----
> From: amd-gfx <amd-gfx-boun...@lists.freedesktop.org> On Behalf Of
> Candice Li
> Sent: Wednesday, March 1, 2023 2:10 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Li, Candice <candice...@amd.com>
> Subject: [PATCH] drm/amd/pm: Enable ecc_info table support for smu
> v13_0_10
> 
> Support EccInfoTable which includes umc ras error count and error address.
> 
> Signed-off-by: Candice Li <candice...@amd.com>
> Reviewed-by: Evan Quan <evan.q...@amd.com>
> ---
>  .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 75
> +++++++++++++++++++
>  1 file changed, 75 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> index 923a9fb3c8873c..27448ffe60a439 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
> @@ -46,6 +46,7 @@
>  #include "asic_reg/mp/mp_13_0_0_sh_mask.h"
>  #include "smu_cmn.h"
>  #include "amdgpu_ras.h"
> +#include "umc_v8_10.h"
> 
>  /*
>   * DO NOT use these for err/warn/info/debug messages.
> @@ -90,6 +91,12 @@
> 
>  #define DEBUGSMC_MSG_Mode1Reset      2
> 
> +/*
> + * SMU_v13_0_10 supports ECCTABLE since version 80.34.0,
> + * use this to check ECCTABLE feature whether support  */ #define
> +SUPPORT_ECCTABLE_SMU_13_0_10_VERSION 0x00502200
> +
>  static struct cmn2asic_msg_mapping
> smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = {
>       MSG_MAP(TestMessage,
>       PPSMC_MSG_TestMessage,                 1),
>       MSG_MAP(GetSmuVersion,
>       PPSMC_MSG_GetSmuVersion,               1),
> @@ -229,6 +236,7 @@ static struct cmn2asic_mapping
> smu_v13_0_0_table_map[SMU_TABLE_COUNT] = {
>       TAB_MAP(ACTIVITY_MONITOR_COEFF),
>       [SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE},
>       TAB_MAP(I2C_COMMANDS),
> +     TAB_MAP(ECCINFO),
>  };
> 
>  static struct cmn2asic_mapping
> smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -462,6
> +470,8 @@ static int smu_v13_0_0_tables_init(struct smu_context *smu)
>                      AMDGPU_GEM_DOMAIN_VRAM);
>       SMU_TABLE_INIT(tables, SMU_TABLE_COMBO_PPTABLE,
> MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE,
>                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
> +     SMU_TABLE_INIT(tables, SMU_TABLE_ECCINFO,
> sizeof(EccInfoTable_t),
> +                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
> 
>       smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t),
> GFP_KERNEL);
>       if (!smu_table->metrics_table)
> @@ -477,8 +487,14 @@ static int smu_v13_0_0_tables_init(struct
> smu_context *smu)
>       if (!smu_table->watermarks_table)
>               goto err2_out;
> 
> +     smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size,
> GFP_KERNEL);
> +     if (!smu_table->ecc_table)
> +             goto err3_out;
> +
>       return 0;
> 
> +err3_out:
> +     kfree(smu_table->watermarks_table);
>  err2_out:
>       kfree(smu_table->gpu_metrics_table);
>  err1_out:
> @@ -2036,6 +2052,64 @@ static int
> smu_v13_0_0_send_bad_mem_channel_flag(struct smu_context *smu,
>       return ret;
>  }
> 
> +static int smu_v13_0_0_check_ecc_table_support(struct smu_context
> *smu)
> +{
> +     struct amdgpu_device *adev = smu->adev;
> +     uint32_t if_version = 0xff, smu_version = 0xff;
> +     int ret = 0;
> +
> +     ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version);
> +     if (ret)
> +             return -EOPNOTSUPP;
> +
> +     if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10)) &&
> +             (smu_version >=
> SUPPORT_ECCTABLE_SMU_13_0_10_VERSION))
> +             return ret;
> +     else
> +             return -EOPNOTSUPP;
> +}
> +
> +static ssize_t smu_v13_0_0_get_ecc_info(struct smu_context *smu,
> +                                                                     void
> *table)
> +{
> +     struct smu_table_context *smu_table = &smu->smu_table;
> +     struct amdgpu_device *adev = smu->adev;
> +     EccInfoTable_t *ecc_table = NULL;
> +     struct ecc_info_per_ch *ecc_info_per_channel = NULL;
> +     int i, ret = 0;
> +     struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table;
> +
> +     ret = smu_v13_0_0_check_ecc_table_support(smu);
> +     if (ret)
> +             return ret;
> +
> +     ret = smu_cmn_update_table(smu,
> +                                     SMU_TABLE_ECCINFO,
> +                                     0,
> +                                     smu_table->ecc_table,
> +                                     false);
> +     if (ret) {
> +             dev_info(adev->dev, "Failed to export SMU ecc table!\n");
> +             return ret;
> +     }
> +
> +     ecc_table = (EccInfoTable_t *)smu_table->ecc_table;
> +
> +     for (i = 0; i < UMC_V8_10_TOTAL_CHANNEL_NUM(adev); i++) {
> +             ecc_info_per_channel = &(eccinfo->ecc[i]);
> +             ecc_info_per_channel->ce_count_lo_chip =
> +                             ecc_table->EccInfo[i].ce_count_lo_chip;
> +             ecc_info_per_channel->ce_count_hi_chip =
> +                             ecc_table->EccInfo[i].ce_count_hi_chip;
> +             ecc_info_per_channel->mca_umc_status =
> +                             ecc_table->EccInfo[i].mca_umc_status;
> +             ecc_info_per_channel->mca_umc_addr =
> +                             ecc_table->EccInfo[i].mca_umc_addr;
> +     }
> +
> +     return ret;
> +}
> +
>  static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
>       .get_allowed_feature_mask =
> smu_v13_0_0_get_allowed_feature_mask,
>       .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table,
> @@ -2111,6 +2185,7 @@ static const struct pptable_funcs
> smu_v13_0_0_ppt_funcs = {
>       .send_hbm_bad_pages_num =
> smu_v13_0_0_smu_send_bad_mem_page_num,
>       .send_hbm_bad_channel_flag =
> smu_v13_0_0_send_bad_mem_channel_flag,
>       .gpo_control = smu_v13_0_gpo_control,
> +     .get_ecc_info = smu_v13_0_0_get_ecc_info,
>  };
> 
>  void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)
> --
> 2.17.1

Reply via email to