On 3/23/26 15:01, Alex Deucher wrote:
On Fri, Mar 20, 2026 at 4:09 PM Amber Lin <[email protected]> wrote:
Create hung_queue_hqd_info structure and fill in hung queses information
passed by MES, including queue type, pipe id, and queue id.

Suggested-by: Jonathan Kim <[email protected]>
Signed-off-by: Amber Lin <[email protected]>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 13 ++++++++-----
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 13 +++++++++++++
  2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index bea509f6b3ff..710bca87c32b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -447,7 +447,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct 
amdgpu_device *adev,
  {
         struct mes_detect_and_reset_queue_input input;
         u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr[xcc_id];
-       int r, i;
+       int hqd_info_offset = adev->mes.hung_queue_hqd_info_offset, r, i;

         if (!hung_db_num || !hung_db_array)
                 return -EINVAL;
@@ -486,10 +486,13 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct 
amdgpu_device *adev,
                 return r;
         }

-       /*
-        * TODO: return HQD info for MES scheduled user compute queue reset 
cases
-        * stored in hung_db_array hqd info offset to full array size
-        */
+       if (queue_type != AMDGPU_RING_TYPE_COMPUTE) {
+               dev_warn(adev->dev, "Unsupported queue type: %d\n", queue_type);
This function will get called for non-compute queues.  We shouldn't warn here.

Alex
Right, I only consider the caller being KFD and ignored it can be potentially gfx too. I'll remove this warning.


Amber
+               return r;
+       }
+
+       for (i = hqd_info_offset; i < hqd_info_offset + *hung_db_num; i++)
+               hung_db_array[i] = db_array[i];

         if (r)
                 dev_err(adev->dev, "failed to reset\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index f80e3aca9c78..2e6ae9f84db0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -170,6 +170,19 @@ struct amdgpu_mes {
         uint64_t            shared_cmd_buf_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
  };

+struct amdgpu_mes_hung_queue_hqd_info {
+       union {
+               struct {
+                       uint32_t queue_type: 3; // queue type
+                       uint32_t pipe_index: 4; // pipe index
+                       uint32_t queue_index: 8; // queue index
+                       uint32_t reserved: 17;
+               };
+
+               uint32_t bit0_31;
+       };
+};
+
  struct amdgpu_mes_gang {
         int                             gang_id;
         int                             priority;
--
2.43.0


Reply via email to