Read CPER raw data from debugfs node "/sys/kernel/debug/dri/*/
amdgpu_ring_cper".

Signed-off-by: Xiang Liu <[email protected]>
Reviewed-by: Tao Zhou <[email protected]>
Reviewed-by: Yang Wang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  1 +
 .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c    |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c      | 87 +++++++++++++++++++
 3 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 603cc155fd9b..66ccd31f1e8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -110,6 +110,7 @@
 #include "amdgpu_mca.h"
 #include "amdgpu_aca.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_ras_mgr.h"
 #include "amdgpu_cper.h"
 #include "amdgpu_xcp.h"
 #include "amdgpu_seq64.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 5a7bf0661dbf..011fa4748084 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -770,7 +770,8 @@ amdgpu_ras_eeprom_update_header(struct 
amdgpu_ras_eeprom_control *control)
                        "Saved bad pages %d reaches threshold value %d\n",
                        control->ras_num_bad_pages, 
ras->bad_page_cnt_threshold);
 
-               if (adev->cper.enabled && 
amdgpu_cper_generate_bp_threshold_record(adev))
+               if (adev->cper.enabled && !amdgpu_uniras_enabled(adev) &&
+                   amdgpu_cper_generate_bp_threshold_record(adev))
                        dev_warn(adev->dev, "fail to generate bad page 
threshold cper records\n");
 
                if ((amdgpu_bad_page_threshold != -1) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index f26002b25de4..8637d924cd48 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -491,6 +491,90 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, 
unsigned int vmid,
  */
 #if defined(CONFIG_DEBUG_FS)
 
+static ssize_t amdgpu_ras_cper_debugfs_read(struct file *f, char __user *buf,
+                                           size_t size, loff_t *offset)
+{
+       struct amdgpu_ring *ring = file_inode(f)->i_private;
+       struct ras_cmd_cper_snapshot_req *snapshot_req;
+       struct ras_cmd_cper_snapshot_rsp *snapshot_rsp;
+       struct ras_cmd_cper_record_req *record_req;
+       struct ras_cmd_cper_record_rsp *record_rsp;
+       const uint8_t ring_header_size = 12;
+       uint8_t *ring_header;
+       uint32_t total_cper_num;
+       uint64_t start_cper_id;
+       int r;
+
+       ring_header = kzalloc(ring_header_size, GFP_KERNEL);
+       if (!ring_header) {
+               r = -ENOMEM;
+               goto out;
+       }
+
+       snapshot_req = kzalloc(sizeof(struct ras_cmd_cper_snapshot_req), 
GFP_KERNEL);
+       if (!snapshot_req) {
+               r = -ENOMEM;
+               goto out;
+       }
+
+       snapshot_rsp = kzalloc(sizeof(struct ras_cmd_cper_snapshot_rsp), 
GFP_KERNEL);
+       if (!snapshot_rsp) {
+               r = -ENOMEM;
+               goto out;
+       }
+
+       record_req = kzalloc(sizeof(struct ras_cmd_cper_record_req), 
GFP_KERNEL);
+       if (!record_req) {
+               r = -ENOMEM;
+               goto out;
+       }
+
+       record_rsp = kzalloc(sizeof(struct ras_cmd_cper_record_rsp), 
GFP_KERNEL);
+       if (!record_rsp) {
+               r = -ENOMEM;
+               goto out;
+       }
+
+       if (!(*offset)) {
+               if (copy_to_user(buf, ring_header, ring_header_size)) {
+                       r = -EFAULT;
+                       goto out;
+               }
+               buf += ring_header_size;
+       }
+
+       r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev,
+                                         RAS_CMD__GET_CPER_SNAPSHOT,
+                                         snapshot_req, sizeof(struct 
ras_cmd_cper_snapshot_req),
+                                         snapshot_rsp, sizeof(struct 
ras_cmd_cper_snapshot_rsp));
+       if (r || !snapshot_rsp->total_cper_num)
+               goto out;
+
+       start_cper_id = snapshot_rsp->start_cper_id;
+       total_cper_num = snapshot_rsp->total_cper_num;
+
+       record_req->buf_ptr = (uint64_t)(uintptr_t)buf;
+       record_req->buf_size = size;
+       record_req->cper_start_id = start_cper_id + *offset;
+       record_req->cper_num = total_cper_num;
+       r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev, RAS_CMD__GET_CPER_RECORD,
+                                         record_req, sizeof(struct 
ras_cmd_cper_record_req),
+                                         record_rsp, sizeof(struct 
ras_cmd_cper_record_rsp));
+       if (r)
+               goto out;
+
+       r = *offset ? record_rsp->real_data_size : record_rsp->real_data_size + 
ring_header_size;
+       (*offset) += record_rsp->real_cper_num;
+
+out:
+       kfree(record_rsp);
+       kfree(record_req);
+       kfree(snapshot_rsp);
+       kfree(snapshot_req);
+       kfree(ring_header);
+       return r;
+}
+
 /* Layout of file is 12 bytes consisting of
  * - rptr
  * - wptr
@@ -507,6 +591,9 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, 
char __user *buf,
        loff_t i;
        int r;
 
+       if (ring->funcs->type == AMDGPU_RING_TYPE_CPER && 
amdgpu_uniras_enabled(ring->adev))
+               return amdgpu_ras_cper_debugfs_read(f, buf, size, pos);
+
        if (*pos & 3 || size & 3)
                return -EINVAL;
 
-- 
2.34.1

Reply via email to