The RAS bad page retire flip bits can be set per vram type,
vram vendor and nps mode.

Signed-off-by: Tao Zhou <tao.zh...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  1 -
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 82 +++++++++++++++++---------
 drivers/gpu/drm/amd/amdgpu/umc_v12_0.h |  2 -
 3 files changed, 53 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3caebe7c25a5..464015fc2012 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1504,7 +1504,6 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device 
*adev)
                adev->umc.umc_inst_num = UMC_V12_0_UMC_INSTANCE_NUM;
                adev->umc.node_inst_num /= UMC_V12_0_UMC_INSTANCE_NUM;
                adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET;
-               adev->umc.retire_unit = UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL;
                if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu)
                        adev->umc.ras = &umc_v12_0_ras;
                break;
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index d715cfde6aec..774d3baa62d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -174,6 +174,49 @@ static void umc_v12_0_query_ras_error_count(struct 
amdgpu_device *adev,
        umc_v12_0_reset_error_count(adev);
 }
 
+static void umc_v12_0_get_retire_flip_bits(struct amdgpu_device *adev)
+{
+       enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
+       uint32_t vram_type = adev->gmc.vram_type;
+       struct amdgpu_umc_flip_bits *flip_bits = &(adev->umc.flip_bits);
+
+       if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+               nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+
+       /* default setting */
+       flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_C2_BIT;
+       flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C3_BIT;
+       flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_C4_BIT;
+       flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R13_BIT;
+       flip_bits->bit_num = 4;
+
+       switch (vram_type) {
+       case AMDGPU_VRAM_TYPE_HBM:
+               /* other nps modes are taken as nps1 */
+               if (nps == AMDGPU_NPS2_PARTITION_MODE) {
+                       flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH5_BIT;
+                       flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C2_BIT;
+                       flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B1_BIT;
+                       flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT;
+               }
+
+               if (nps == AMDGPU_NPS4_PARTITION_MODE) {
+                       flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH4_BIT;
+                       flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_CH5_BIT;
+                       flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B0_BIT;
+                       flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT;
+               }
+
+               break;
+       default:
+               dev_warn(adev->dev,
+                       "Unknown HBM type, set RAS retire flip bits to the 
value in NPS1 mode.\n");
+               break;
+       }
+
+       adev->umc.retire_unit = 0x1 << flip_bits->bit_num;
+}
+
 static int umc_v12_0_convert_error_address(struct amdgpu_device *adev,
                                        struct ras_err_data *err_data,
                                        struct ta_ras_query_address_input 
*addr_in,
@@ -182,11 +225,10 @@ static int umc_v12_0_convert_error_address(struct 
amdgpu_device *adev,
 {
        uint32_t col, col_lower, row, row_lower, row_high, bank;
        uint32_t channel_index = 0, umc_inst = 0;
-       uint32_t i, loop_bits[UMC_V12_0_RETIRE_LOOP_BITS];
+       uint32_t i, bit_num, retire_unit, *flip_bits;
        uint64_t soc_pa, column, err_addr;
        struct ta_ras_query_address_output addr_out_tmp;
        struct ta_ras_query_address_output *paddr_out;
-       enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
        int ret = 0;
 
        if (!addr_out)
@@ -211,34 +253,15 @@ static int umc_v12_0_convert_error_address(struct 
amdgpu_device *adev,
                umc_inst = addr_in->ma.umc_inst;
        }
 
-       loop_bits[0] = UMC_V12_0_PA_C2_BIT;
-       loop_bits[1] = UMC_V12_0_PA_C3_BIT;
-       loop_bits[2] = UMC_V12_0_PA_C4_BIT;
-       loop_bits[3] = UMC_V12_0_PA_R13_BIT;
-
-       if (adev->gmc.gmc_funcs->query_mem_partition_mode)
-               nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
-
-       /* other nps modes are taken as nps1 */
-       if (nps == AMDGPU_NPS2_PARTITION_MODE) {
-               loop_bits[0] = UMC_V12_0_PA_CH5_BIT;
-               loop_bits[1] = UMC_V12_0_PA_C2_BIT;
-               loop_bits[2] = UMC_V12_0_PA_B1_BIT;
-               loop_bits[3] = UMC_V12_0_PA_R12_BIT;
-       }
-
-       if (nps == AMDGPU_NPS4_PARTITION_MODE) {
-               loop_bits[0] = UMC_V12_0_PA_CH4_BIT;
-               loop_bits[1] = UMC_V12_0_PA_CH5_BIT;
-               loop_bits[2] = UMC_V12_0_PA_B0_BIT;
-               loop_bits[3] = UMC_V12_0_PA_R11_BIT;
-       }
+       flip_bits = adev->umc.flip_bits.flip_bits_in_pa;
+       bit_num = adev->umc.flip_bits.bit_num;
+       retire_unit = adev->umc.retire_unit;
 
        soc_pa = paddr_out->pa.pa;
        channel_index = paddr_out->pa.channel_idx;
        /* clear loop bits in soc physical address */
-       for (i = 0; i < UMC_V12_0_RETIRE_LOOP_BITS; i++)
-               soc_pa &= ~BIT_ULL(loop_bits[i]);
+       for (i = 0; i < bit_num; i++)
+               soc_pa &= ~BIT_ULL(flip_bits[i]);
 
        paddr_out->pa.pa = soc_pa;
        /* get column bit 0 and 1 in mca address */
@@ -259,10 +282,10 @@ static int umc_v12_0_convert_error_address(struct 
amdgpu_device *adev,
                goto out;
 
        /* loop for all possibilities of retired bits */
-       for (column = 0; column < UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; column++) 
{
+       for (column = 0; column < retire_unit; column++) {
                soc_pa = paddr_out->pa.pa;
-               for (i = 0; i < UMC_V12_0_RETIRE_LOOP_BITS; i++)
-                       soc_pa |= (((column >> i) & 0x1ULL) << loop_bits[i]);
+               for (i = 0; i < bit_num; i++)
+                       soc_pa |= (((column >> i) & 0x1ULL) << flip_bits[i]);
 
                col = ((column & 0x7) << 2) | col_lower;
                /* add row bit 13 */
@@ -684,5 +707,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
        .update_ecc_status = umc_v12_0_update_ecc_status,
        .convert_ras_err_addr = umc_v12_0_convert_error_address,
        .get_die_id_from_pa = umc_v12_0_get_die_id,
+       .get_retire_flip_bits = umc_v12_0_get_retire_flip_bits,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h 
b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
index 056bbc038312..ccdd6cd430f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
@@ -55,8 +55,6 @@
 #define UMC_V12_0_NA_MAP_PA_NUM        8
 /* R13 bit shift should be considered, double the number */
 #define UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL (UMC_V12_0_NA_MAP_PA_NUM * 2)
-/* C2, C3, C4, R13, four bits in MCA address are looped in retirement */
-#define UMC_V12_0_RETIRE_LOOP_BITS 4
 
 /* column bits in SOC physical address */
 #define UMC_V12_0_PA_C2_BIT 15
-- 
2.34.1

Reply via email to