v2: Trigger GPU reset in case of new bad address errors.

Signed-off-by: Andrey Grodzovsky <andrey.grodzov...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 296e2d9..f5f36ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -243,17 +243,40 @@ static int gmc_v9_0_process_ras_data_cb(struct 
amdgpu_device *adev,
                struct ras_err_data *err_data,
                struct amdgpu_iv_entry *entry)
 {
+       unsigned long new_err_addr_cnt, old_err_addr_cnt;
+       new_err_addr_cnt = 0;
+       old_err_addr_cnt = err_data->err_addr_cnt;
+
        kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
        if (adev->umc.funcs->query_ras_error_count)
                adev->umc.funcs->query_ras_error_count(adev, err_data);
        /* umc query_ras_error_address is also responsible for clearing
         * error status
         */
-       if (adev->umc.funcs->query_ras_error_address)
+       if (adev->umc.funcs->query_ras_error_address) {
+               unsigned long *bps;
+               int i;
+
                adev->umc.funcs->query_ras_error_address(adev, err_data);
 
+               new_err_addr_cnt = err_data->err_addr_cnt - old_err_addr_cnt;
+
+               if (new_err_addr_cnt) {
+                       bps = kcalloc(new_err_addr_cnt, sizeof(*bps), 
GFP_KERNEL);
+                       if (!bps)
+                               return -ENOMEM;
+
+                       for (i = 0; i < new_err_addr_cnt; i++)
+                               bps[i] = err_data->err_addr[old_err_addr_cnt + 
i]  >> PAGE_SHIFT;
+
+                       amdgpu_ras_add_bad_pages(adev, bps, new_err_addr_cnt);
+
+                       kfree(bps);
+               }
+       }
+
        /* only uncorrectable error needs gpu reset */
-       if (err_data->ue_count)
+       if (err_data->ue_count || new_err_addr_cnt)
                amdgpu_ras_reset_gpu(adev, 0);
 
        return AMDGPU_RAS_SUCCESS;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to