[AMD Official Use Only - AMD Internal Distribution Only]

Reviewed-by: Asad Kamal <asad.ka...@amd.com>

Thanks & Regards
Asad

-----Original Message-----
From: Lazar, Lijo <lijo.la...@amd.com>
Sent: Monday, June 9, 2025 10:11 AM
To: amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking <hawking.zh...@amd.com>; Deucher, Alexander 
<alexander.deuc...@amd.com>; Kamal, Asad <asad.ka...@amd.com>
Subject: [PATCH] drm/amdgpu: Suspend IH during mode-2 reset

On multi-aid SOCs, there could be a continuous stream of interrupts from GC 
after poison consumption. Suspend IH to disable them before doing
mode-2 reset. This avoids conflicts in hardware accesses during interrupt 
handlers while a reset is ongoing.

Signed-off-by: Lijo Lazar <lijo.la...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/aldebaran.c | 33 ++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index 49bf8f3a748f..48c09addb29f 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -71,18 +71,29 @@ aldebaran_get_reset_handler(struct amdgpu_reset_control 
*reset_ctl,
        return NULL;
 }

+static inline uint32_t aldebaran_get_ip_block_mask(struct amdgpu_device
+*adev) {
+       uint32_t ip_block_mask = BIT(AMD_IP_BLOCK_TYPE_GFX) |
+                                BIT(AMD_IP_BLOCK_TYPE_SDMA);
+
+       if (adev->aid_mask)
+               ip_block_mask |= BIT(AMD_IP_BLOCK_TYPE_IH);
+
+       return ip_block_mask;
+}
+
 static int aldebaran_mode2_suspend_ip(struct amdgpu_device *adev)  {
+       uint32_t ip_block_mask = aldebaran_get_ip_block_mask(adev);
+       uint32_t ip_block;
        int r, i;

        amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
        amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);

        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
-               if (!(adev->ip_blocks[i].version->type ==
-                             AMD_IP_BLOCK_TYPE_GFX ||
-                     adev->ip_blocks[i].version->type ==
-                             AMD_IP_BLOCK_TYPE_SDMA))
+               ip_block = BIT(adev->ip_blocks[i].version->type);
+               if (!(ip_block_mask & ip_block))
                        continue;

                r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
@@ -200,8 +211,10 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control 
*reset_ctl,  static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)  
{
        struct amdgpu_firmware_info *ucode_list[AMDGPU_UCODE_ID_MAXIMUM];
+       uint32_t ip_block_mask = aldebaran_get_ip_block_mask(adev);
        struct amdgpu_firmware_info *ucode;
        struct amdgpu_ip_block *cmn_block;
+       struct amdgpu_ip_block *ih_block;
        int ucode_count = 0;
        int i, r;

@@ -243,6 +256,18 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device 
*adev)
        if (r)
                return r;

+       if (ip_block_mask & BIT(AMD_IP_BLOCK_TYPE_IH)) {
+               ih_block = amdgpu_device_ip_get_ip_block(adev,
+                                                        AMD_IP_BLOCK_TYPE_IH);
+               if (unlikely(!ih_block)) {
+                       dev_err(adev->dev, "Failed to get IH handle\n");
+                       return -EINVAL;
+               }
+               r = amdgpu_ip_block_resume(ih_block);
+               if (r)
+                       return r;
+       }
+
        /* Reinit GFXHUB */
        adev->gfxhub.funcs->init(adev);
        r = adev->gfxhub.funcs->gart_enable(adev);
--
2.25.1

Reply via email to