Update table version and restore bad page records to EEPROM RAS table
for mismatched table version case. Otherwise force to reset the table.

Signed-off-by: Candice Li <candice...@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c    | 88 ++++++++++++++++---
 1 file changed, 78 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 06a62a8a992e9b..42d0ef2f512474 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -1319,6 +1319,37 @@ static int __read_table_ras_info(struct 
amdgpu_ras_eeprom_control *control)
        return res == RAS_TABLE_V2_1_INFO_SIZE ? 0 : res;
 }
 
+static bool amdgpu_ras_eeprom_table_version_validate(struct 
amdgpu_ras_eeprom_control *control)
+{
+       struct amdgpu_device *adev = to_amdgpu_device(control);
+       struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+
+       switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+       case IP_VERSION(8, 10, 0):
+       case IP_VERSION(12, 0, 0):
+               return hdr->version == RAS_TABLE_VER_V2_1;
+       default:
+               return hdr->version == RAS_TABLE_VER_V1;
+       }
+}
+
+static void amdgpu_ras_update_eeprom_control(struct 
amdgpu_ras_eeprom_table_header *hdr)
+{
+       struct amdgpu_ras_eeprom_control *control =
+               container_of(hdr, struct amdgpu_ras_eeprom_control, tbl_hdr);
+
+       if (hdr->version == RAS_TABLE_VER_V2_1) {
+               control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr);
+               control->ras_record_offset = RAS_RECORD_START_V2_1;
+               control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1;
+       } else {
+               control->ras_num_recs = RAS_NUM_RECS(hdr);
+               control->ras_record_offset = RAS_RECORD_START;
+               control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
+       }
+       control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
+}
+
 int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
                           bool *exceed_err_limit)
 {
@@ -1326,7 +1357,9 @@ int amdgpu_ras_eeprom_init(struct 
amdgpu_ras_eeprom_control *control,
        unsigned char buf[RAS_TABLE_HEADER_SIZE] = { 0 };
        struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
        struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
-       int res;
+       int res, res1;
+       struct eeprom_table_record *bps;
+       u32 num_recs;
 
        *exceed_err_limit = false;
 
@@ -1355,16 +1388,51 @@ int amdgpu_ras_eeprom_init(struct 
amdgpu_ras_eeprom_control *control,
 
        __decode_table_header_from_buf(hdr, buf);
 
-       if (hdr->version == RAS_TABLE_VER_V2_1) {
-               control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr);
-               control->ras_record_offset = RAS_RECORD_START_V2_1;
-               control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1;
-       } else {
-               control->ras_num_recs = RAS_NUM_RECS(hdr);
-               control->ras_record_offset = RAS_RECORD_START;
-               control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
+       amdgpu_ras_update_eeprom_control(hdr);
+
+       if (!amdgpu_ras_eeprom_table_version_validate(control)) {
+               num_recs = control->ras_num_recs;
+               if (num_recs && amdgpu_bad_page_threshold) {
+                       /* Save bad page records existed in EEPROM */
+                       bps = kcalloc(num_recs, sizeof(*bps), GFP_KERNEL);
+                       if (!bps)
+                               return -ENOMEM;
+
+                       res1 = amdgpu_ras_eeprom_read(control, bps, num_recs);
+                       if (res1)
+                               dev_warn(adev->dev, "Fail to load EEPROM table, 
force to reset it.");
+
+                       res = amdgpu_ras_eeprom_reset_table(control);
+                       if (res) {
+                               dev_err(adev->dev, "Failed to create a new 
EEPROM table.");
+                               kfree(bps);
+                               return res < 0 ? res : 0;
+                       }
+
+                       if (!res1) {
+                               /* Update the EEPROM table with correct table 
version and
+                                * original bad page records
+                                */
+                               amdgpu_ras_update_eeprom_control(hdr);
+                               res = amdgpu_ras_eeprom_append(control, bps, 
num_recs);
+
+                               if (res) {
+                                       dev_warn(adev->dev, "Fail to update 
EEPROM table, force to reset it.");
+                                       res = 
amdgpu_ras_eeprom_reset_table(control);
+                               }
+                       }
+
+                       kfree(bps);
+               } else
+                       res = amdgpu_ras_eeprom_reset_table(control);
+
+               if (res) {
+                       dev_err(adev->dev, "Failed to reset EEPROM table.");
+                       return res < 0 ? res : 0;
+               }
+
+               amdgpu_ras_update_eeprom_control(hdr);
        }
-       control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
 
        if (hdr->header == RAS_TABLE_HDR_VAL) {
                DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
-- 
2.25.1

Reply via email to