On 3/23/26 15:01, Alex Deucher wrote:
On Fri, Mar 20, 2026 at 4:09 PM Amber Lin <[email protected]> wrote:
Create hung_queue_hqd_info structure and fill in hung queses information
passed by MES, including queue type, pipe id, and queue id.
Suggested-by: Jonathan Kim <[email protected]>
Signed-off-by: Amber Lin <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 13 ++++++++-----
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 13 +++++++++++++
2 files changed, 21 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index bea509f6b3ff..710bca87c32b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -447,7 +447,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct
amdgpu_device *adev,
{
struct mes_detect_and_reset_queue_input input;
u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr[xcc_id];
- int r, i;
+ int hqd_info_offset = adev->mes.hung_queue_hqd_info_offset, r, i;
if (!hung_db_num || !hung_db_array)
return -EINVAL;
@@ -486,10 +486,13 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct
amdgpu_device *adev,
return r;
}
- /*
- * TODO: return HQD info for MES scheduled user compute queue reset
cases
- * stored in hung_db_array hqd info offset to full array size
- */
+ if (queue_type != AMDGPU_RING_TYPE_COMPUTE) {
+ dev_warn(adev->dev, "Unsupported queue type: %d\n", queue_type);
This function will get called for non-compute queues. We shouldn't warn here.
Alex
Right, I only consider the caller being KFD and ignored it can be
potentially gfx too. I'll remove this warning.
Amber
+ return r;
+ }
+
+ for (i = hqd_info_offset; i < hqd_info_offset + *hung_db_num; i++)
+ hung_db_array[i] = db_array[i];
if (r)
dev_err(adev->dev, "failed to reset\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index f80e3aca9c78..2e6ae9f84db0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -170,6 +170,19 @@ struct amdgpu_mes {
uint64_t shared_cmd_buf_gpu_addr[AMDGPU_MAX_MES_INST_PIPES];
};
+struct amdgpu_mes_hung_queue_hqd_info {
+ union {
+ struct {
+ uint32_t queue_type: 3; // queue type
+ uint32_t pipe_index: 4; // pipe index
+ uint32_t queue_index: 8; // queue index
+ uint32_t reserved: 17;
+ };
+
+ uint32_t bit0_31;
+ };
+};
+
struct amdgpu_mes_gang {
int gang_id;
int priority;
--
2.43.0