Read CPER raw data from debugfs node "/sys/kernel/debug/dri/*/
amdgpu_ring_cper".

Signed-off-by: Xiang Liu <[email protected]>
Reviewed-by: Tao Zhou <[email protected]>
Reviewed-by: Yang Wang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  1 +
 .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c    |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c      | 58 +++++++++++++++++++
 3 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 603cc155fd9b..66ccd31f1e8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -110,6 +110,7 @@
 #include "amdgpu_mca.h"
 #include "amdgpu_aca.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_ras_mgr.h"
 #include "amdgpu_cper.h"
 #include "amdgpu_xcp.h"
 #include "amdgpu_seq64.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 5a7bf0661dbf..011fa4748084 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -770,7 +770,8 @@ amdgpu_ras_eeprom_update_header(struct 
amdgpu_ras_eeprom_control *control)
                        "Saved bad pages %d reaches threshold value %d\n",
                        control->ras_num_bad_pages, 
ras->bad_page_cnt_threshold);
 
-               if (adev->cper.enabled && 
amdgpu_cper_generate_bp_threshold_record(adev))
+               if (adev->cper.enabled && !amdgpu_uniras_enabled(adev) &&
+                   amdgpu_cper_generate_bp_threshold_record(adev))
                        dev_warn(adev->dev, "fail to generate bad page 
threshold cper records\n");
 
                if ((amdgpu_bad_page_threshold != -1) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index f26002b25de4..47488ea22c49 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -491,6 +491,61 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, 
unsigned int vmid,
  */
 #if defined(CONFIG_DEBUG_FS)
 
+static ssize_t amdgpu_ras_cper_debugfs_read(struct file *f, char __user *buf,
+                                           size_t size, loff_t *offset)
+{
+       const uint8_t ring_header_size = 12;
+       struct amdgpu_ring *ring = file_inode(f)->i_private;
+       struct ras_cmd_cper_snapshot_req *snapshot_req __free(kfree) =
+               kzalloc(sizeof(struct ras_cmd_cper_snapshot_req), GFP_KERNEL);
+       struct ras_cmd_cper_snapshot_rsp *snapshot_rsp __free(kfree) =
+               kzalloc(sizeof(struct ras_cmd_cper_snapshot_rsp), GFP_KERNEL);
+       struct ras_cmd_cper_record_req *record_req __free(kfree) =
+               kzalloc(sizeof(struct ras_cmd_cper_record_req), GFP_KERNEL);
+       struct ras_cmd_cper_record_rsp *record_rsp __free(kfree) =
+               kzalloc(sizeof(struct ras_cmd_cper_record_rsp), GFP_KERNEL);
+       uint8_t *ring_header __free(kfree) =
+               kzalloc(ring_header_size, GFP_KERNEL);
+       uint32_t total_cper_num;
+       uint64_t start_cper_id;
+       int r;
+
+       if (!snapshot_req || !snapshot_rsp || !record_req || !record_rsp ||
+           !ring_header)
+               return -ENOMEM;
+
+       if (!(*offset)) {
+               if (copy_to_user(buf, ring_header, ring_header_size))
+                       return -EFAULT;
+               buf += ring_header_size;
+       }
+
+       r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev,
+                                         RAS_CMD__GET_CPER_SNAPSHOT,
+                                         snapshot_req, sizeof(struct 
ras_cmd_cper_snapshot_req),
+                                         snapshot_rsp, sizeof(struct 
ras_cmd_cper_snapshot_rsp));
+       if (r || !snapshot_rsp->total_cper_num)
+               return r;
+
+       start_cper_id = snapshot_rsp->start_cper_id;
+       total_cper_num = snapshot_rsp->total_cper_num;
+
+       record_req->buf_ptr = (uint64_t)(uintptr_t)buf;
+       record_req->buf_size = size;
+       record_req->cper_start_id = start_cper_id + *offset;
+       record_req->cper_num = total_cper_num;
+       r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev, RAS_CMD__GET_CPER_RECORD,
+                                         record_req, sizeof(struct 
ras_cmd_cper_record_req),
+                                         record_rsp, sizeof(struct 
ras_cmd_cper_record_rsp));
+       if (r)
+               return r;
+
+       r = *offset ? record_rsp->real_data_size : record_rsp->real_data_size + 
ring_header_size;
+       (*offset) += record_rsp->real_cper_num;
+
+       return r;
+}
+
 /* Layout of file is 12 bytes consisting of
  * - rptr
  * - wptr
@@ -507,6 +562,9 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, 
char __user *buf,
        loff_t i;
        int r;
 
+       if (ring->funcs->type == AMDGPU_RING_TYPE_CPER && 
amdgpu_uniras_enabled(ring->adev))
+               return amdgpu_ras_cper_debugfs_read(f, buf, size, pos);
+
        if (*pos & 3 || size & 3)
                return -EINVAL;
 
-- 
2.34.1

Reply via email to