The aca handle is introduced by upper caller, it's inappropriate to
poll aca handle to match and validate aca bank, which will cause
unexcepted ras error report.

Signed-off-by: Xiang Liu <xiang....@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 122 ++++++++++--------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h |   2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c |  10 +-
 3 files changed, 58 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
index ffd4c64e123c..b07e101c545d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -122,6 +122,25 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, 
int idx, int total, st
                              idx + 1, total, aca_regs[i].name, 
bank->regs[aca_regs[i].reg_idx]);
 }
 
+static bool aca_bank_should_dump(struct amdgpu_device *adev, enum aca_smu_type 
type)
+{
+       struct amdgpu_aca *aca = &adev->aca;
+       bool ret = true;
+
+       /*
+        * Because the UE Valid MCA count will only be cleared after reset,
+        * the aca bank is only dumped once during the gpu recovery stage.
+        */
+       if (type == ACA_SMU_TYPE_UE) {
+               if (amdgpu_ras_intr_triggered())
+                       ret = atomic_cmpxchg(&aca->ue_dump_flag, 0, 1) == 0;
+               else
+                       atomic_set(&aca->ue_dump_flag, 0);
+       }
+
+       return ret;
+}
+
 static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum 
aca_smu_type type,
                                       int start, int count,
                                       struct aca_banks *banks, struct 
ras_query_context *qctx)
@@ -130,6 +149,7 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device 
*adev, enum aca_smu_
        const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
        struct aca_bank bank;
        int i, max_count, ret;
+       struct aca_bank_node *node;
 
        if (!count)
                return 0;
@@ -159,14 +179,16 @@ static int aca_smu_get_valid_aca_banks(struct 
amdgpu_device *adev, enum aca_smu_
                        return ret;
 
                bank.smu_err_type = type;
-
-               aca_smu_bank_dump(adev, i, count, &bank, qctx);
-
                ret = aca_banks_add_bank(banks, &bank);
                if (ret)
                        return ret;
        }
 
+       i = 0;
+       if (aca_bank_should_dump(adev, type))
+               list_for_each_entry(node, &banks->list, node)
+                       aca_smu_bank_dump(adev, i++, count, &bank, qctx);
+
        return 0;
 }
 
@@ -318,72 +340,29 @@ static int handler_aca_log_bank_error(struct aca_handle 
*handle, struct aca_bank
        return 0;
 }
 
-static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank 
*bank,
-                            enum aca_smu_type type, bank_handler_t handler, 
void *data)
-{
-       struct aca_handle *handle;
-       int ret;
-
-       if (list_empty(&mgr->list))
-               return 0;
-
-       list_for_each_entry(handle, &mgr->list, node) {
-               if (!aca_bank_is_valid(handle, bank, type))
-                       continue;
-
-               ret = handler(handle, bank, type, data);
-               if (ret)
-                       return ret;
-       }
-
-       return 0;
-}
-
-static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks 
*banks,
+static int aca_dispatch_banks(struct aca_handle *handle, struct aca_banks 
*banks,
                              enum aca_smu_type type, bank_handler_t handler, 
void *data)
 {
        struct aca_bank_node *node;
        struct aca_bank *bank;
-       int ret;
 
-       if (!mgr || !banks)
+       if (!handle || !banks)
                return -EINVAL;
 
        /* pre check to avoid unnecessary operations */
-       if (list_empty(&mgr->list) || list_empty(&banks->list))
+       if (list_empty(&banks->list))
                return 0;
 
        list_for_each_entry(node, &banks->list, node) {
                bank = &node->bank;
 
-               ret = aca_dispatch_bank(mgr, bank, type, handler, data);
-               if (ret)
-                       return ret;
+               if (aca_bank_is_valid(handle, bank, type))
+                       handler(handle, bank, type, data);
        }
 
        return 0;
 }
 
-static bool aca_bank_should_update(struct amdgpu_device *adev, enum 
aca_smu_type type)
-{
-       struct amdgpu_aca *aca = &adev->aca;
-       bool ret = true;
-
-       /*
-        * Because the UE Valid MCA count will only be cleared after reset,
-        * in order to avoid repeated counting of the error count,
-        * the aca bank is only updated once during the gpu recovery stage.
-        */
-       if (type == ACA_SMU_TYPE_UE) {
-               if (amdgpu_ras_intr_triggered())
-                       ret = atomic_cmpxchg(&aca->ue_update_flag, 0, 1) == 0;
-               else
-                       atomic_set(&aca->ue_update_flag, 0);
-       }
-
-       return ret;
-}
-
 static void aca_banks_generate_cper(struct amdgpu_device *adev,
                                    enum aca_smu_type type,
                                    struct aca_banks *banks,
@@ -417,20 +396,14 @@ static void aca_banks_generate_cper(struct amdgpu_device 
*adev,
        }
 }
 
-static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
-                           bank_handler_t handler, struct ras_query_context 
*qctx, void *data)
+static int aca_banks_update(struct amdgpu_device *adev, struct aca_handle 
*handle,
+                           enum aca_smu_type type, bank_handler_t handler,
+                           struct ras_query_context *qctx, void *data)
 {
-       struct amdgpu_aca *aca = &adev->aca;
        struct aca_banks banks;
        u32 count = 0;
        int ret;
 
-       if (list_empty(&aca->mgr.list))
-               return 0;
-
-       if (!aca_bank_should_update(adev, type))
-               return 0;
-
        ret = aca_smu_get_valid_aca_count(adev, type, &count);
        if (ret)
                return ret;
@@ -442,15 +415,12 @@ static int aca_banks_update(struct amdgpu_device *adev, 
enum aca_smu_type type,
 
        ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks, qctx);
        if (ret)
-               goto err_release_banks;
+               return ret;
 
-       if (list_empty(&banks.list)) {
-               ret = 0;
-               goto err_release_banks;
-       }
+       if (list_empty(&banks.list))
+               return 0;
 
-       ret = aca_dispatch_banks(&aca->mgr, &banks, type,
-                                handler, data);
+       ret = aca_dispatch_banks(handle, &banks, type, handler, data);
        if (ret)
                goto err_release_banks;
 
@@ -537,7 +507,7 @@ static int __aca_get_error_data(struct amdgpu_device *adev, 
struct aca_handle *h
        }
 
        /* update aca bank to aca source error_cache first */
-       ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, 
qctx, NULL);
+       ret = aca_banks_update(adev, handle, smu_type, 
handler_aca_log_bank_error, qctx, NULL);
        if (ret)
                return ret;
 
@@ -730,7 +700,7 @@ int amdgpu_aca_init(struct amdgpu_device *adev)
        struct amdgpu_aca *aca = &adev->aca;
        int ret;
 
-       atomic_set(&aca->ue_update_flag, 0);
+       atomic_set(&aca->ue_dump_flag, 0);
 
        ret = aca_manager_init(&aca->mgr);
        if (ret)
@@ -745,14 +715,14 @@ void amdgpu_aca_fini(struct amdgpu_device *adev)
 
        aca_manager_fini(&aca->mgr);
 
-       atomic_set(&aca->ue_update_flag, 0);
+       atomic_set(&aca->ue_dump_flag, 0);
 }
 
 int amdgpu_aca_reset(struct amdgpu_device *adev)
 {
        struct amdgpu_aca *aca = &adev->aca;
 
-       atomic_set(&aca->ue_update_flag, 0);
+       atomic_set(&aca->ue_dump_flag, 0);
 
        return 0;
 }
@@ -880,12 +850,20 @@ static int handler_aca_bank_dump(struct aca_handle 
*handle, struct aca_bank *ban
 static int aca_dump_show(struct seq_file *m, enum aca_smu_type type)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+       struct aca_handle_manager *mgr = &adev->aca.mgr;
+       struct aca_handle *handle;
        struct aca_dump_context context = {
                .m = m,
                .idx = 0,
        };
 
-       return aca_banks_update(adev, type, handler_aca_bank_dump, NULL, (void 
*)&context);
+       if (list_empty(&mgr->list))
+               return 0;
+
+       list_for_each_entry(handle, &mgr->list, node)
+               aca_banks_update(adev, handle, type, handler_aca_bank_dump, 
NULL, (void *)&context);
+
+       return 0;
 }
 
 static int aca_dump_ce_show(struct seq_file *m, void *unused)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
index 6f62e5d80ed6..e71d6f5afaec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
@@ -202,7 +202,7 @@ struct aca_smu_funcs {
 struct amdgpu_aca {
        struct aca_handle_manager mgr;
        const struct aca_smu_funcs *smu_funcs;
-       atomic_t ue_update_flag;
+       atomic_t ue_dump_flag;
        bool is_enabled;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index c0de682b7774..a4038e92c59e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -876,10 +876,14 @@ static int gfx_v9_4_3_aca_bank_parser(struct aca_handle 
*handle,
                                      void *data)
 {
        struct aca_bank_info info;
-       u64 misc0;
+       u64 misc0, status;
        u32 instlo;
        int ret;
 
+       status = bank->regs[ACA_REG_IDX_STATUS];
+       if (!ACA_REG__STATUS__VAL(status))
+               return 0;
+
        ret = aca_bank_info_decode(bank, &info);
        if (ret)
                return ret;
@@ -894,8 +898,8 @@ static int gfx_v9_4_3_aca_bank_parser(struct aca_handle 
*handle,
        switch (type) {
        case ACA_SMU_TYPE_UE:
                bank->aca_err_type = ACA_ERROR_TYPE_UE;
-               ret = aca_error_cache_log_bank_error(handle, &info,
-                                                    ACA_ERROR_TYPE_UE, 1ULL);
+               if (ACA_REG__STATUS__UC(status) && ACA_REG__STATUS__PCC(status))
+                       ret = aca_error_cache_log_bank_error(handle, &info, 
ACA_ERROR_TYPE_UE, 1);
                break;
        case ACA_SMU_TYPE_CE:
                bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank);
-- 
2.34.1

Reply via email to