bad page adding can be simpler with nps info

Signed-off-by: ganglxie <gangl...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 196 +++++++++++++-----------
 1 file changed, 105 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 5420e2d6d244..439841a2d1c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2801,20 +2801,101 @@ static int amdgpu_ras_mca2pa(struct amdgpu_device 
*adev,
                return  -EINVAL;
 }
 
+static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev,
+                                       struct eeprom_table_record *bps, int 
count)
+{
+       int j;
+       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+       struct ras_err_handler_data *data = con->eh_data;
+
+       for (j = 0; j < count; j++) {
+               if (amdgpu_ras_check_bad_page_unlock(con,
+                       bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+                       continue;
+
+               if (!data->space_left &&
+                       amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
+                       return -ENOMEM;
+               }
+
+               amdgpu_ras_reserve_page(adev, bps[j].retired_page);
+
+               memcpy(&data->bps[data->count], &(bps[j]),
+                               sizeof(struct eeprom_table_record));
+               data->count++;
+               data->space_left--;
+       }
+
+       return 0;
+}
+
+static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev,
+                               struct eeprom_table_record *bps, struct 
ras_err_data *err_data,
+                               enum amdgpu_memory_partition nps)
+{
+       int i = 0;
+       int ret = 0;
+       enum amdgpu_memory_partition save_nps;
+
+       save_nps = (bps[0].retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
+
+       for (i = 0; i < adev->umc.retire_unit; i++)
+               bps[i].retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
+
+       if (save_nps) {
+               if (save_nps == nps) {
+                       if (amdgpu_umc_pages_in_a_row(adev, err_data,
+                                       bps[0].retired_page << 
AMDGPU_GPU_PAGE_SHIFT))
+                               return -EINVAL;
+               } else {
+                       if (amdgpu_ras_mca2pa_by_idx(adev, &bps[0], err_data))
+                               return -EINVAL;
+               }
+       } else {
+               if (amdgpu_ras_mca2pa(adev, &bps[0], err_data)) {
+                       if (nps == AMDGPU_NPS1_PARTITION_MODE)
+                               memcpy(err_data->err_addr, bps,
+                                       sizeof(struct eeprom_table_record) * 
adev->umc.retire_unit);
+                       else
+                               return -EOPNOTSUPP;
+               }
+       }
+
+       return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr, 
adev->umc.retire_unit);
+}
+
+static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev,
+                               struct eeprom_table_record *bps, struct 
ras_err_data *err_data,
+                               enum amdgpu_memory_partition nps)
+{
+       enum amdgpu_memory_partition save_nps;
+
+       save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
+       bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
+
+       if (save_nps == nps) {
+               if (amdgpu_umc_pages_in_a_row(adev, err_data,
+                               bps->retired_page << AMDGPU_GPU_PAGE_SHIFT))
+                       return -EINVAL;
+       } else {
+               if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data))
+                       return -EINVAL;
+       }
+       return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr,
+                                                                       
adev->umc.retire_unit);
+}
+
 /* it deal with vram only. */
 int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
                struct eeprom_table_record *bps, int pages, bool from_rom)
 {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-       struct ras_err_handler_data *data;
        struct ras_err_data err_data;
-       struct eeprom_table_record *err_rec;
        struct amdgpu_ras_eeprom_control *control =
                        &adev->psp.ras_context.ras->eeprom_control;
        enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
        int ret = 0;
-       uint32_t i, j, loop_cnt = 1;
-       bool find_pages_per_pa = false;
+       uint32_t i;
 
        if (!con || !con->eh_data || !bps || pages <= 0)
                return 0;
@@ -2825,108 +2906,41 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device 
*adev,
                                sizeof(struct eeprom_table_record), GFP_KERNEL);
                if (!err_data.err_addr) {
                        dev_warn(adev->dev, "Failed to alloc UMC error address 
record in mca2pa conversion!\n");
-                       ret = -ENOMEM;
-                       goto out;
+                       return -ENOMEM;
                }
 
-               err_rec = err_data.err_addr;
-               loop_cnt = adev->umc.retire_unit;
                if (adev->gmc.gmc_funcs->query_mem_partition_mode)
                        nps = 
adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
        }
 
        mutex_lock(&con->recovery_lock);
-       data = con->eh_data;
-       if (!data) {
-               /* Returning 0 as the absence of eh_data is acceptable */
-               goto free;
-       }
-
-       for (i = 0; i < pages; i++) {
-               if (from_rom &&
-                   control->rec_type == AMDGPU_RAS_EEPROM_REC_MCA) {
-                       if (!find_pages_per_pa) {
-                               if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], 
&err_data)) {
-                                       if (!i && nps == 
AMDGPU_NPS1_PARTITION_MODE) {
-                                               /* may use old RAS TA, use PA 
to find pages in
-                                                * one row
-                                                */
-                                               if 
(amdgpu_umc_pages_in_a_row(adev, &err_data,
-                                                                             
bps[i].retired_page <<
-                                                                             
AMDGPU_GPU_PAGE_SHIFT)) {
-                                                       ret = -EINVAL;
-                                                       goto free;
-                                               } else {
-                                                       find_pages_per_pa = 
true;
-                                               }
-                                       } else {
-                                               /* unsupported cases */
-                                               ret = -EOPNOTSUPP;
-                                               goto free;
-                                       }
-                               }
-                       } else {
-                               if (amdgpu_umc_pages_in_a_row(adev, &err_data,
-                                               bps[i].retired_page << 
AMDGPU_GPU_PAGE_SHIFT)) {
-                                       ret = -EINVAL;
-                                       goto free;
-                               }
-                       }
-               } else {
-                       if (from_rom && !find_pages_per_pa) {
-                               if (bps[i].retired_page & UMC_CHANNEL_IDX_V2) {
-                                       /* bad page in any NPS mode in eeprom */
-                                       if (amdgpu_ras_mca2pa_by_idx(adev, 
&bps[i], &err_data)) {
-                                               ret = -EINVAL;
+
+       if (from_rom) {
+               for (i = 0; i < pages; i++) {
+                       if (control->ras_num_recs - i >= adev->umc.retire_unit) 
{
+                               if ((bps[i].address == bps[i + 1].address) &&
+                                   (bps[i].mem_channel == bps[i + 
1].mem_channel)) {
+                                       //deal with retire_unit records a time
+                                       ret = 
__amdgpu_ras_convert_rec_array_from_rom(adev,
+                                                                       
&bps[i], &err_data, nps);
+                                       if (ret)
                                                goto free;
-                                       }
+                                       i += (adev->umc.retire_unit - 1);
                                } else {
-                                       /* legacy bad page in eeprom, generated 
only in
-                                        * NPS1 mode
-                                        */
-                                       if (amdgpu_ras_mca2pa(adev, &bps[i], 
&err_data)) {
-                                               /* old RAS TA or ASICs which 
don't support to
-                                                * convert addrss via mca 
address
-                                                */
-                                               if (!i && nps == 
AMDGPU_NPS1_PARTITION_MODE) {
-                                                       find_pages_per_pa = 
true;
-                                                       err_rec = &bps[i];
-                                                       loop_cnt = 1;
-                                               } else {
-                                                       /* non-nps1 mode, old 
RAS TA
-                                                        * can't support it
-                                                        */
-                                                       ret = -EOPNOTSUPP;
-                                                       goto free;
-                                               }
-                                       }
+                                       break;
                                }
-
-                               if (!find_pages_per_pa)
-                                       i += (adev->umc.retire_unit - 1);
                        } else {
-                               err_rec = &bps[i];
+                               break;
                        }
                }
-
-               for (j = 0; j < loop_cnt; j++) {
-                       if (amdgpu_ras_check_bad_page_unlock(con,
-                               err_rec[j].retired_page << 
AMDGPU_GPU_PAGE_SHIFT))
-                               continue;
-
-                       if (!data->space_left &&
-                           amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
-                               ret = -ENOMEM;
+               for (; i < pages; i++) {
+                       ret = __amdgpu_ras_convert_rec_from_rom(adev,
+                               &bps[i], &err_data, nps);
+                       if (ret)
                                goto free;
-                       }
-
-                       amdgpu_ras_reserve_page(adev, err_rec[j].retired_page);
-
-                       memcpy(&data->bps[data->count], &(err_rec[j]),
-                                       sizeof(struct eeprom_table_record));
-                       data->count++;
-                       data->space_left--;
                }
+       } else {
+               ret = __amdgpu_ras_restore_bad_pages(adev, bps, pages);
        }
 
 free:
-- 
2.34.1

Reply via email to