Reset error data info stored in vram  when user clear eeprom table.

Signed-off-by: Stanley.Yang <stanley.y...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c       | 97 ++++++++++++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h       |  2 +
 .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c    |  4 +
 3 files changed, 77 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 753260745554..9c1072ea5760 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2336,6 +2336,77 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
        return ret;
 }
 
+int amdgpu_ras_error_data_init(struct ras_err_data *err_data)
+{
+       memset(err_data, 0, sizeof(*err_data));
+
+       INIT_LIST_HEAD(&err_data->err_node_list);
+
+       return 0;
+}
+
+static void amdgpu_ras_error_node_release(struct ras_err_node *err_node)
+{
+       if (!err_node)
+               return;
+
+       list_del(&err_node->node);
+       kvfree(err_node);
+}
+
+void amdgpu_ras_error_data_fini(struct ras_err_data *err_data)
+{
+       struct ras_err_node *err_node, *tmp;
+
+       list_for_each_entry_safe(err_node, tmp, &err_data->err_node_list, node)
+               amdgpu_ras_error_node_release(err_node);
+}
+
+static void amdgpu_ras_reset_error_info(struct ras_manager *obj)
+{
+       struct ras_err_data *err_data;
+
+       if (!obj)
+               return;
+
+       err_data = &obj->err_data;
+
+       /* release all error nodes */
+       amdgpu_ras_error_data_fini(err_data);
+
+       /* reset error data and init */
+       amdgpu_ras_error_data_init(err_data);
+}
+
+/* reset vram bad pages data and umc ras manager error count */
+int amdgpu_ras_reset_vram_bad_pages(struct amdgpu_device *adev)
+{
+       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+       struct ras_err_handler_data *data;
+       struct ras_manager *obj;
+
+       if (!con || !con->eh_data)
+               return 0;
+
+       mutex_lock(&con->recovery_lock);
+
+       data = con->eh_data;
+       data->space_left += data->count;
+       data->count = 0;
+       memset(data->bps, 0, data->space_left * sizeof(data->bps));
+
+       mutex_unlock(&con->recovery_lock);
+
+       list_for_each_entry(obj, &con->head, node) {
+               if (obj->head.block == AMDGPU_RAS_BLOCK__UMC) {
+                       amdgpu_ras_reset_error_info(obj);
+                       break;
+               }
+       }
+
+       return 0;
+}
+
 /*
  * write error record array to eeprom, the function should be
  * protected by recovery_lock
@@ -3556,32 +3627,6 @@ void amdgpu_ras_inst_reset_ras_error_count(struct 
amdgpu_device *adev,
        }
 }
 
-int amdgpu_ras_error_data_init(struct ras_err_data *err_data)
-{
-       memset(err_data, 0, sizeof(*err_data));
-
-       INIT_LIST_HEAD(&err_data->err_node_list);
-
-       return 0;
-}
-
-static void amdgpu_ras_error_node_release(struct ras_err_node *err_node)
-{
-       if (!err_node)
-               return;
-
-       list_del(&err_node->node);
-       kvfree(err_node);
-}
-
-void amdgpu_ras_error_data_fini(struct ras_err_data *err_data)
-{
-       struct ras_err_node *err_node, *tmp;
-
-       list_for_each_entry_safe(err_node, tmp, &err_data->err_node_list, node)
-               amdgpu_ras_error_node_release(err_node);
-}
-
 static struct ras_err_node *amdgpu_ras_error_find_node_by_id(struct 
ras_err_data *err_data,
                                                             struct 
amdgpu_smuio_mcm_config_info *mcm_info)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 665414c22ca9..64710517b9fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -622,6 +622,8 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
 int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
                unsigned long *new_cnt);
 
+int amdgpu_ras_reset_vram_bad_pages(struct amdgpu_device *adev);
+
 static inline enum ta_ras_block
 amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
        switch (block) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 65aa218380be..40060f1b8ad6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -462,6 +462,10 @@ int amdgpu_ras_eeprom_reset_table(struct 
amdgpu_ras_eeprom_control *control)
 
        mutex_unlock(&control->ras_tbl_mutex);
 
+       /* reset dad pages in vram structure */
+       if (amdgpu_ras_reset_vram_bad_pages(adev))
+               dev_warn(adev->dev, "reset vram bad pages structure failed, 
need reboot system\n");
+
        return res;
 }
 
-- 
2.25.1

Reply via email to