From: Jay Cornwall <[email protected]>

S_SETREG_IMM32_B32 does not apply a mask to the MODE bank bits.
SRC2 is consequently unconditonally cleared during context save.

Use S_SETREG_B32 instead to preserve SRC2.

Signed-off-by: Jay Cornwall <[email protected]>
Reviewed-by: Lancelot Six <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
---
 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h         | 2 +-
 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 5c6d533a59562..d82ce2f1e9b92 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -4731,7 +4731,7 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
        0xb8eff822, 0xb980f822,
        0x00000000, 0xb8fa2b01,
        0x847a997a, 0x8c6d7a6d,
-       0xb9802b01, 0x00000000,
+       0xbefa0080, 0xb97a2b01,
        0xbefa007e, 0x8b7bff7f,
        0x01ffffff, 0xbefe00c1,
        0xbeff00c1, 0xee0a407a,
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
index a807e7557e93f..d59400d242d19 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
@@ -414,7 +414,8 @@ L_HAVE_VGPRS:
        s_getreg_b32    s_save_tmp, hwreg(HW_REG_WAVE_MODE, 
SQ_WAVE_MODE_DST_SRC0_SRC1_VGPR_MSB_SHIFT, 
SQ_WAVE_MODE_DST_SRC0_SRC1_VGPR_MSB_SIZE)
        s_lshl_b32      s_save_tmp, s_save_tmp, 
S_SAVE_PC_HI_DST_SRC0_SRC1_VGPR_MSB_SHIFT
        s_or_b32        s_save_pc_hi, s_save_pc_hi, s_save_tmp
-       s_setreg_imm32_b32      hwreg(HW_REG_WAVE_MODE, 
SQ_WAVE_MODE_DST_SRC0_SRC1_VGPR_MSB_SHIFT, 
SQ_WAVE_MODE_DST_SRC0_SRC1_VGPR_MSB_SIZE), 0
+       s_mov_b32       s_save_tmp, 0
+       s_setreg_b32    hwreg(HW_REG_WAVE_MODE, 
SQ_WAVE_MODE_DST_SRC0_SRC1_VGPR_MSB_SHIFT, 
SQ_WAVE_MODE_DST_SRC0_SRC1_VGPR_MSB_SIZE), s_save_tmp
 #endif
 
        // Trap temporaries must be saved via VGPR but all VGPRs are in use.
-- 
2.52.0

Reply via email to