[AMD Public Use]

Reviewed-by: Hawking Zhang <hawking.zh...@amd.com>

Regards,
Hawking
-----Original Message-----
From: Dennis Li <dennis...@amd.com> 
Sent: Friday, February 26, 2021 14:42
To: amd-gfx@lists.freedesktop.org; Chen, Guchun <guchun.c...@amd.com>; Zhang, 
Hawking <hawking.zh...@amd.com>; Koenig, Christian <christian.koe...@amd.com>
Cc: Li, Dennis <dennis...@amd.com>
Subject: [PATCH v2] drm/amdgpu: remove unnecessary reading for epprom header

If the number of badpage records exceed the threshold, driver has updated both 
epprom header and control->tbl_hdr.header before gpu reset, therefore GPU 
recovery thread no need to read epprom header directly.

v2: merge amdgpu_ras_check_err_threshold into 
amdgpu_ras_eeprom_check_err_threshold

Signed-off-by: Dennis Li <dennis...@amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f0f7ed42ee7f..f2ff10403d93 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4397,7 +4397,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info 
*hive,
                                 * bad_page_threshold value to fix this once
                                 * probing driver again.
                                 */
-                               if (!amdgpu_ras_check_err_threshold(tmp_adev)) {
+                               if 
(!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
                                        /* must succeed. */
                                        amdgpu_ras_resume(tmp_adev);
                                } else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 09546dec40ff..c669435ccc74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2189,19 +2189,3 @@ bool amdgpu_ras_need_emergency_restart(struct 
amdgpu_device *adev)
 
        return false;
 }
-
-bool amdgpu_ras_check_err_threshold(struct amdgpu_device *adev) -{
-       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-       bool exc_err_limit = false;
-
-       if (con && (amdgpu_bad_page_threshold != 0))
-               amdgpu_ras_eeprom_check_err_threshold(&con->eeprom_control,
-                                               &exc_err_limit);
-
-       /*
-        * We are only interested in variable exc_err_limit,
-        * as it says if GPU is in bad state or not.
-        */
-       return exc_err_limit;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index aed0716efa5a..42aab9adc263 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -491,8 +491,6 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev);  
unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
                bool is_ce);
 
-bool amdgpu_ras_check_err_threshold(struct amdgpu_device *adev);
-
 /* error handling functions */
 int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
                struct eeprom_table_record *bps, int pages); diff --git 
a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 19d9aa76cfbf..7f527f8bbdb1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -434,47 +434,21 @@ static uint32_t __correct_eeprom_dest_address(uint32_t 
curr_address)
        return curr_address;
 }
 
-int amdgpu_ras_eeprom_check_err_threshold(
-                               struct amdgpu_ras_eeprom_control *control,
-                               bool *exceed_err_limit)
+bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
 {
-       struct amdgpu_device *adev = to_amdgpu_device(control);
-       unsigned char buff[EEPROM_ADDRESS_SIZE +
-                       EEPROM_TABLE_HEADER_SIZE] = { 0 };
-       struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
-       struct i2c_msg msg = {
-                       .addr = control->i2c_address,
-                       .flags = I2C_M_RD,
-                       .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
-                       .buf = buff,
-       };
-       int ret;
-
-       *exceed_err_limit = false;
+       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 
        if (!__is_ras_eeprom_supported(adev))
-               return 0;
-
-       /* read EEPROM table header */
-       mutex_lock(&control->tbl_mutex);
-       ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1);
-       if (ret < 1) {
-               dev_err(adev->dev, "Failed to read EEPROM table header.\n");
-               goto err;
-       }
-
-       __decode_table_header_from_buff(hdr, &buff[2]);
+               return false;
 
-       if (hdr->header == EEPROM_TABLE_HDR_BAD) {
+       if (con->eeprom_control.tbl_hdr.header == EEPROM_TABLE_HDR_BAD) {
                dev_warn(adev->dev, "This GPU is in BAD status.");
                dev_warn(adev->dev, "Please retire it or setting one bigger "
                                "threshold value when reloading driver.\n");
-               *exceed_err_limit = true;
+               return true;
        }
 
-err:
-       mutex_unlock(&control->tbl_mutex);
-       return 0;
+       return false;
 }
 
 int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control 
*control, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
index c7a5e5c7c61e..178721170974 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
@@ -80,9 +80,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control 
*control,
                        bool *exceed_err_limit);
 int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control);
 
-int amdgpu_ras_eeprom_check_err_threshold(
-                               struct amdgpu_ras_eeprom_control *control,
-                               bool *exceed_err_limit);
+bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev);
 
 int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
                                            struct eeprom_table_record *records,
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to