From: Hawking Zhang <hawking.zh...@amd.com>

Introduce new functions that are used to generate
cper ue or ce records.

v2: return -ENOMEM instead of false
v2: check return value of fill section function

Signed-off-by: Hawking Zhang <hawking.zh...@amd.com>
Signed-off-by: Xiang Liu <xiang....@amd.com>
Reviewed-by: Yang Wang <keivnyang.w...@amd.com>
Reviewed-by: Tao Zhou <tao.zh...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c  |  12 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h  |  12 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c | 108 +++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h |   9 +-
 4 files changed, 128 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
index 1a26b8ad14cb..ed1c20bd8114 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -30,16 +30,6 @@
 
 typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, 
enum aca_smu_type type, void *data);
 
-struct aca_banks {
-       int nr_banks;
-       struct list_head list;
-};
-
-struct aca_hwip {
-       int hwid;
-       int mcatype;
-};
-
 static struct aca_hwip aca_hwid_mcatypes[ACA_HWIP_TYPE_COUNT] = {
        ACA_BANK_HWID(SMU,      0x01,   0x01),
        ACA_BANK_HWID(PCS_XGMI, 0x50,   0x00),
@@ -111,7 +101,7 @@ static struct aca_regs_dump {
        {"STATUS",              ACA_REG_IDX_STATUS},
        {"ADDR",                ACA_REG_IDX_ADDR},
        {"MISC",                ACA_REG_IDX_MISC0},
-       {"CONFIG",              ACA_REG_IDX_CONFG},
+       {"CONFIG",              ACA_REG_IDX_CONFIG},
        {"IPID",                ACA_REG_IDX_IPID},
        {"SYND",                ACA_REG_IDX_SYND},
        {"DESTAT",              ACA_REG_IDX_DESTAT},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
index 3cd0115b0244..b84a3489b116 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
@@ -81,7 +81,7 @@ enum aca_reg_idx {
        ACA_REG_IDX_STATUS              = 1,
        ACA_REG_IDX_ADDR                = 2,
        ACA_REG_IDX_MISC0               = 3,
-       ACA_REG_IDX_CONFG               = 4,
+       ACA_REG_IDX_CONFIG              = 4,
        ACA_REG_IDX_IPID                = 5,
        ACA_REG_IDX_SYND                = 6,
        ACA_REG_IDX_DESTAT              = 8,
@@ -114,6 +114,11 @@ enum aca_smu_type {
        ACA_SMU_TYPE_COUNT,
 };
 
+struct aca_hwip {
+       int hwid;
+       int mcatype;
+};
+
 struct aca_bank {
        enum aca_error_type aca_err_type;
        enum aca_smu_type smu_err_type;
@@ -125,6 +130,11 @@ struct aca_bank_node {
        struct list_head node;
 };
 
+struct aca_banks {
+       int nr_banks;
+       struct list_head list;
+};
+
 struct aca_bank_info {
        int die_id;
        int socket_id;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
index 8ce5dc6efcf9..f82aa12a88f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
@@ -21,6 +21,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
+#include <linux/list.h>
 #include "amdgpu.h"
 
 static const guid_t MCE                        = CPER_NOTIFY_MCE;
@@ -257,6 +258,113 @@ struct cper_hdr *amdgpu_cper_alloc_entry(struct 
amdgpu_device *adev,
        return hdr;
 }
 
+int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
+                                  struct aca_bank *bank)
+{
+       struct cper_hdr *fatal = NULL;
+       struct cper_sec_crashdump_reg_data reg_data = { 0 };
+       int ret;
+
+       fatal = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_FATAL, 1);
+       if (!fatal) {
+               dev_err(adev->dev, "fail to alloc cper entry for ue record\n");
+               return -ENOMEM;
+       }
+
+       reg_data.status_lo = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+       reg_data.status_hi = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+       reg_data.addr_lo   = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+       reg_data.addr_hi   = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+       reg_data.ipid_lo   = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+       reg_data.ipid_hi   = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+       reg_data.synd_lo   = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+       reg_data.synd_hi   = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+
+       amdgpu_cper_entry_fill_hdr(adev, fatal, AMDGPU_CPER_TYPE_FATAL, 
CPER_SEV_FATAL);
+       ret = amdgpu_cper_entry_fill_fatal_section(adev, fatal, 0, reg_data);
+       if (ret)
+               return ret;
+
+       /*TODO: commit the cper entry to cper ring */
+
+       return 0;
+}
+
+static enum cper_error_severity amdgpu_aca_err_type_to_cper_sev(struct 
amdgpu_device *adev,
+                                                               enum 
aca_error_type aca_err_type)
+{
+       switch (aca_err_type) {
+       case ACA_ERROR_TYPE_UE:
+               return CPER_SEV_FATAL;
+       case ACA_ERROR_TYPE_CE:
+               return CPER_SEV_NON_FATAL_CORRECTED;
+       case ACA_ERROR_TYPE_DEFERRED:
+               return CPER_SEV_NON_FATAL_UNCORRECTED;
+       default:
+               dev_err(adev->dev, "Unknown ACA error type!\n");
+               return CPER_SEV_FATAL;
+       }
+}
+
+int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
+                                   struct aca_banks *banks,
+                                   uint16_t bank_count)
+{
+       struct cper_hdr *corrected = NULL;
+       enum cper_error_severity sev = CPER_SEV_NON_FATAL_CORRECTED;
+       uint32_t reg_data[CPER_ACA_REG_COUNT] = { 0 };
+       struct aca_bank_node *node;
+       struct aca_bank *bank;
+       uint32_t i;
+       int ret;
+
+       corrected = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_RUNTIME, 
bank_count);
+       if (!corrected) {
+               dev_err(adev->dev, "fail to allocate cper entry for ce 
records\n");
+               return -ENOMEM;
+       }
+
+       /* Raise severity if any DE is detected in the ACA bank list */
+       list_for_each_entry(node, &banks->list, node) {
+               bank = &node->bank;
+               if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) {
+                       sev = CPER_SEV_NON_FATAL_UNCORRECTED;
+                       break;
+               }
+       }
+
+       amdgpu_cper_entry_fill_hdr(adev, corrected, AMDGPU_CPER_TYPE_RUNTIME, 
sev);
+
+       /* Combine CE and UE in cper record */
+       list_for_each_entry(node, &banks->list, node) {
+               bank = &node->bank;
+               reg_data[CPER_ACA_REG_CTL_LO]    = 
lower_32_bits(bank->regs[ACA_REG_IDX_CTL]);
+               reg_data[CPER_ACA_REG_CTL_HI]    = 
upper_32_bits(bank->regs[ACA_REG_IDX_CTL]);
+               reg_data[CPER_ACA_REG_STATUS_LO] = 
lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+               reg_data[CPER_ACA_REG_STATUS_HI] = 
upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+               reg_data[CPER_ACA_REG_ADDR_LO]   = 
lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+               reg_data[CPER_ACA_REG_ADDR_HI]   = 
upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+               reg_data[CPER_ACA_REG_MISC0_LO]  = 
lower_32_bits(bank->regs[ACA_REG_IDX_MISC0]);
+               reg_data[CPER_ACA_REG_MISC0_HI]  = 
upper_32_bits(bank->regs[ACA_REG_IDX_MISC0]);
+               reg_data[CPER_ACA_REG_CONFIG_LO] = 
lower_32_bits(bank->regs[ACA_REG_IDX_CONFIG]);
+               reg_data[CPER_ACA_REG_CONFIG_HI] = 
upper_32_bits(bank->regs[ACA_REG_IDX_CONFIG]);
+               reg_data[CPER_ACA_REG_IPID_LO]   = 
lower_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+               reg_data[CPER_ACA_REG_IPID_HI]   = 
upper_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+               reg_data[CPER_ACA_REG_SYND_LO]   = 
lower_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+               reg_data[CPER_ACA_REG_SYND_HI]   = 
upper_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+
+               ret = amdgpu_cper_entry_fill_runtime_section(adev, corrected, i,
+                               amdgpu_aca_err_type_to_cper_sev(adev, 
bank->aca_err_type),
+                               reg_data, CPER_ACA_REG_COUNT);
+               if (ret)
+                       return ret;
+       }
+
+       /*TODO: commit the cper entry to cper ring */
+
+       return 0;
+}
+
 int amdgpu_cper_init(struct amdgpu_device *adev)
 {
        mutex_init(&adev->cper.cper_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h
index 0ae845420983..6860a809f2f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h
@@ -26,6 +26,7 @@
 #define __AMDGPU_CPER_H__
 
 #include "amd_cper.h"
+#include "amdgpu_aca.h"
 
 #define CPER_MAX_ALLOWED_COUNT         0x1000
 #define HDR_LEN                                (sizeof(struct cper_hdr))
@@ -84,7 +85,13 @@ int amdgpu_cper_entry_fill_bad_page_threshold_section(struct 
amdgpu_device *adev
 struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev,
                                         enum amdgpu_cper_type type,
                                         uint16_t section_count);
-
+/* UE must be encoded into separated cper entries, 1 UE 1 cper */
+int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
+                                  struct aca_bank *bank);
+/* CEs and DEs are combined into 1 cper entry */
+int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
+                                   struct aca_banks *banks,
+                                   uint16_t bank_count);
 int amdgpu_cper_init(struct amdgpu_device *adev);
 int amdgpu_cper_fini(struct amdgpu_device *adev);
 
-- 
2.34.1

Reply via email to