From: Jay Cornwall <[email protected]>

- Identify co-issue of S_SET_VGPR_MSB and VALU with banked VGPR
- Restore previous bank setting when exiting the trap

v2:
- Refine VOP3PX2 detection
- Improve load pipelining
- Fix a comment typo

Signed-off-by: Jay Cornwall <[email protected]>
Reviewed-by: Lancelot Six <[email protected]>
Cc: Joseph Greathouse <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
---
 .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h    | 955 ++++++++++--------
 .../amd/amdkfd/cwsr_trap_handler_gfx12.asm    | 166 ++-
 2 files changed, 706 insertions(+), 415 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index d82ce2f1e9b92..dfffda4aa8e21 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -4666,14 +4666,14 @@ static const uint32_t cwsr_trap_gfx9_5_0_hex[] = {
 };
 
 static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
-       0xbfa00001, 0xbfa002a2,
+       0xbfa00001, 0xbfa003b7,
        0xb0804009, 0xb8f8f804,
        0x9178ff78, 0x00008c00,
        0xb8fbf811, 0x8b6eff78,
        0x00004000, 0xbfa10008,
        0x8b6eff7b, 0x00000080,
        0xbfa20018, 0x8b6ea07b,
-       0xbfa2004f, 0xbf830010,
+       0xbfa200e1, 0xbf830010,
        0xb8fbf811, 0xbfa0fffb,
        0x8b6eff7b, 0x00000bd0,
        0xbfa20010, 0xb8eef812,
@@ -4684,7 +4684,7 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
        0xf0000000, 0xbfa20005,
        0x8b6fff6f, 0x00000200,
        0xbfa20002, 0x8b6ea07b,
-       0xbfa20039, 0x9177ff77,
+       0xbfa200cb, 0x9177ff77,
        0x007fc000, 0xb8fa04a1,
        0x847a967a, 0x8c777a77,
        0xb8fa0421, 0x847a957a,
@@ -4710,169 +4710,330 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
        0x80ec886c, 0x82ed806d,
        0xbfa00002, 0x806c846c,
        0x826d806d, 0x8b6dff6d,
-       0x01ffffff, 0x8bfe7e7e,
+       0x01ffffff, 0xb8fbf811,
+       0xbf0d847b, 0xbfa20078,
+       0xf4003eb6, 0xf8000000,
+       0xf4003bb6, 0xf8000008,
+       0xbfc70001, 0x8b76ff7a,
+       0x80000000, 0xbfa20027,
+       0x9376ff7a, 0x00060019,
+       0x81f9a376, 0xbf0b8179,
+       0xbfa20068, 0x81f9ac76,
+       0xbf0b8179, 0xbfa20062,
+       0x81f9b776, 0xbf0b8179,
+       0xbfa2005f, 0x8b76ff7a,
+       0x000001ff, 0xbf06ff76,
+       0x000000fe, 0xbfa2005d,
+       0xbf06ff76, 0x000000ff,
+       0xbfa20057, 0xbf06ff76,
+       0x000000fa, 0xbfa20054,
+       0x81f9ff76, 0x000000e9,
+       0xbf0b8179, 0xbfa20050,
+       0x8b76ff7b, 0xffff0000,
+       0xbf06ff76, 0xbf860000,
+       0xbfa10051, 0x9376ff7b,
+       0x0002000e, 0x8b79ff7b,
+       0x00003f00, 0x85798679,
+       0x8c767976, 0xb9763b01,
+       0xbfa00049, 0x8b76ff7a,
+       0xfc000000, 0xbf06ff76,
+       0xd4000000, 0xbfa20013,
+       0xbf06ff76, 0xc8000000,
+       0xbfa20027, 0x8b76ff7a,
+       0xff000000, 0xbf06ff76,
+       0xcf000000, 0xbfa20039,
+       0x8b79ff7a, 0xffff0000,
+       0xbf06ff79, 0xcc350000,
+       0xbfa20037, 0xbf06ff79,
+       0xcc3a0000, 0xbfa20034,
+       0xbf06ff76, 0xcc000000,
+       0xbfa10031, 0x8b76ff7b,
+       0x000001ff, 0xbf06ff76,
+       0x000000ff, 0xbfa20029,
+       0xbf06ff76, 0x000000fa,
+       0xbfa20026, 0x81f6ff76,
+       0x000000e9, 0xbf0b8176,
+       0xbfa20022, 0x8b76ff7b,
+       0x0003fe00, 0xbf06ff76,
+       0x0001fe00, 0xbfa2001d,
+       0x8b76ff7b, 0x07fc0000,
+       0xbf06ff76, 0x03fc0000,
+       0xbfa20018, 0xbfa00014,
+       0x9376ff7a, 0x00040016,
+       0x81f68176, 0xbf0b8176,
+       0xbfa20012, 0x9376ff7a,
+       0x00050011, 0x81f68176,
+       0xbf0b8176, 0xbfa2000d,
+       0x8b76ff7a, 0x000001ff,
+       0xbf06ff76, 0x000000ff,
+       0xbfa20008, 0x8b76ff7b,
+       0x000001ff, 0xbf06ff76,
+       0x000000ff, 0xbfa20003,
+       0xbfc70000, 0xbefb006e,
+       0xbfa0ffad, 0xbfc70000,
+       0xbefb006f, 0xbfa0ffaa,
+       0xbfc70000, 0x857a9677,
+       0xb97a04a1, 0x857a9577,
+       0xb97a0421, 0x857a8e77,
+       0xb97a3021, 0x8bfe7e7e,
        0x8bea6a6a, 0x85788978,
        0xb9783244, 0xbe804a6c,
        0xb8faf802, 0xbf0d987a,
        0xbfa10001, 0xbfb00000,
        0x8b6dff6d, 0x01ffffff,
        0xbefa0080, 0xb97a0151,
+       0x9177ff77, 0x007fc000,
+       0xb8fa04a1, 0x847a967a,
+       0x8c777a77, 0xb8fa0421,
+       0x847a957a, 0x8c777a77,
+       0xb8fa3021, 0x847a8e7a,
+       0x8c777a77, 0xb980f821,
+       0x00000000, 0xbf0d847b,
+       0xbfa20078, 0xf4003eb6,
+       0xf8000000, 0xf4003bb6,
+       0xf8000008, 0xbfc70001,
+       0x8b76ff7a, 0x80000000,
+       0xbfa20027, 0x9376ff7a,
+       0x00060019, 0x81f9a376,
+       0xbf0b8179, 0xbfa20068,
+       0x81f9ac76, 0xbf0b8179,
+       0xbfa20062, 0x81f9b776,
+       0xbf0b8179, 0xbfa2005f,
+       0x8b76ff7a, 0x000001ff,
+       0xbf06ff76, 0x000000fe,
+       0xbfa2005d, 0xbf06ff76,
+       0x000000ff, 0xbfa20057,
+       0xbf06ff76, 0x000000fa,
+       0xbfa20054, 0x81f9ff76,
+       0x000000e9, 0xbf0b8179,
+       0xbfa20050, 0x8b76ff7b,
+       0xffff0000, 0xbf06ff76,
+       0xbf860000, 0xbfa10051,
+       0x9376ff7b, 0x0002000e,
+       0x8b79ff7b, 0x00003f00,
+       0x85798679, 0x8c767976,
+       0xb9763b01, 0xbfa00049,
+       0x8b76ff7a, 0xfc000000,
+       0xbf06ff76, 0xd4000000,
+       0xbfa20013, 0xbf06ff76,
+       0xc8000000, 0xbfa20027,
+       0x8b76ff7a, 0xff000000,
+       0xbf06ff76, 0xcf000000,
+       0xbfa20039, 0x8b79ff7a,
+       0xffff0000, 0xbf06ff79,
+       0xcc350000, 0xbfa20037,
+       0xbf06ff79, 0xcc3a0000,
+       0xbfa20034, 0xbf06ff76,
+       0xcc000000, 0xbfa10031,
+       0x8b76ff7b, 0x000001ff,
+       0xbf06ff76, 0x000000ff,
+       0xbfa20029, 0xbf06ff76,
+       0x000000fa, 0xbfa20026,
+       0x81f6ff76, 0x000000e9,
+       0xbf0b8176, 0xbfa20022,
+       0x8b76ff7b, 0x0003fe00,
+       0xbf06ff76, 0x0001fe00,
+       0xbfa2001d, 0x8b76ff7b,
+       0x07fc0000, 0xbf06ff76,
+       0x03fc0000, 0xbfa20018,
+       0xbfa00014, 0x9376ff7a,
+       0x00040016, 0x81f68176,
+       0xbf0b8176, 0xbfa20012,
+       0x9376ff7a, 0x00050011,
+       0x81f68176, 0xbf0b8176,
+       0xbfa2000d, 0x8b76ff7a,
+       0x000001ff, 0xbf06ff76,
+       0x000000ff, 0xbfa20008,
+       0x8b76ff7b, 0x000001ff,
+       0xbf06ff76, 0x000000ff,
+       0xbfa20003, 0xbfc70000,
+       0xbefb006e, 0xbfa0ffad,
+       0xbfc70000, 0xbefb006f,
+       0xbfa0ffaa, 0xbfc70000,
        0xbeee007e, 0xbeef007f,
        0xbefe0180, 0xbefe4d84,
        0xbf8a0000, 0x8b7aff7f,
        0x04000000, 0x847a857a,
-       0x8c6d7a6d, 0x9177ff77,
-       0x007fc000, 0xb8fa04a1,
-       0x847a967a, 0x8c777a77,
-       0xb8fa0421, 0x847a957a,
-       0x8c777a77, 0xb8fa3021,
-       0x847a8e7a, 0x8c777a77,
-       0xb980f821, 0x00000000,
-       0xb8eff822, 0xb980f822,
-       0x00000000, 0xb8fa2b01,
-       0x847a997a, 0x8c6d7a6d,
-       0xbefa0080, 0xb97a2b01,
-       0xbefa007e, 0x8b7bff7f,
-       0x01ffffff, 0xbefe00c1,
-       0xbeff00c1, 0xee0a407a,
-       0x000c0000, 0x00000000,
-       0x7e000280, 0xbefe007a,
-       0xbeff007b, 0xb8fb0742,
-       0x847b997b, 0xb8fa3b05,
-       0x807a817a, 0xbf0d997b,
-       0xbfa20002, 0x847a897a,
-       0xbfa00001, 0x847a8a7a,
+       0x8c6d7a6d, 0xb8eff822,
+       0xb980f822, 0x00000000,
+       0xb8fa2b01, 0x847a997a,
+       0x8c6d7a6d, 0xbefa0080,
+       0xb97a2b01, 0xbefa007e,
        0x8b7bff7f, 0x01ffffff,
-       0x807aff7a, 0x000001c0,
-       0x807a7e7a, 0x827b807b,
-       0xd7610000, 0x00010870,
-       0xd7610000, 0x00010a71,
-       0xd7610000, 0x00010c72,
-       0xd7610000, 0x00010e73,
-       0xd7610000, 0x00011074,
-       0xd7610000, 0x00011275,
-       0xd7610000, 0x00011476,
-       0xd7610000, 0x00011677,
-       0xd7610000, 0x00011a79,
-       0xd7610000, 0x00011c7e,
-       0xd7610000, 0x00011e7f,
-       0xbefe00ff, 0x00003fff,
-       0xbeff0080, 0xee0a407a,
-       0x000c0000, 0x00000000,
-       0xd760007a, 0x00011d00,
-       0xd760007b, 0x00011f00,
+       0xbefe00c1, 0xbeff00c1,
+       0xee0a407a, 0x000c0000,
+       0x00000000, 0x7e000280,
        0xbefe007a, 0xbeff007b,
-       0xbef4007e, 0x8b75ff7f,
-       0x01ffffff, 0xbef1007d,
-       0xb8f30742, 0x84739973,
-       0xbefe00c1, 0x857d9973,
-       0x8b7d817d, 0xbf06817d,
-       0xbfa20002, 0xbeff0080,
-       0xbfa00002, 0xbeff00c1,
-       0xbfa0000a, 0xee0a4074,
-       0x008c0000, 0x00008000,
-       0xee0a4074, 0x010c0000,
+       0xb8fb0742, 0x847b997b,
+       0xb8fa3b05, 0x807a817a,
+       0xbf0d997b, 0xbfa20002,
+       0x847a897a, 0xbfa00001,
+       0x847a8a7a, 0x8b7bff7f,
+       0x01ffffff, 0x807aff7a,
+       0x000001c0, 0x807a7e7a,
+       0x827b807b, 0xd7610000,
+       0x00010870, 0xd7610000,
+       0x00010a71, 0xd7610000,
+       0x00010c72, 0xd7610000,
+       0x00010e73, 0xd7610000,
+       0x00011074, 0xd7610000,
+       0x00011275, 0xd7610000,
+       0x00011476, 0xd7610000,
+       0x00011677, 0xd7610000,
+       0x00011a79, 0xd7610000,
+       0x00011c7e, 0xd7610000,
+       0x00011e7f, 0xbefe00ff,
+       0x00003fff, 0xbeff0080,
+       0xee0a407a, 0x000c0000,
+       0x00000000, 0xd760007a,
+       0x00011d00, 0xd760007b,
+       0x00011f00, 0xbefe007a,
+       0xbeff007b, 0xbef4007e,
+       0x8b75ff7f, 0x01ffffff,
+       0xbef1007d, 0xb8f30742,
+       0x84739973, 0xbefe00c1,
+       0x857d9973, 0x8b7d817d,
+       0xbf06817d, 0xbfa20002,
+       0xbeff0080, 0xbfa00002,
+       0xbeff00c1, 0xbfa0000a,
+       0xee0a4074, 0x008c0000,
+       0x00008000, 0xee0a4074,
+       0x010c0000, 0x00010000,
+       0xee0a4074, 0x018c0000,
+       0x00018000, 0xbfa00009,
+       0xee0a4074, 0x008c0000,
        0x00010000, 0xee0a4074,
-       0x018c0000, 0x00018000,
-       0xbfa00009, 0xee0a4074,
-       0x008c0000, 0x00010000,
-       0xee0a4074, 0x010c0000,
-       0x00020000, 0xee0a4074,
-       0x018c0000, 0x00030000,
-       0xb8f03b05, 0x80708170,
-       0xbf0d9973, 0xbfa20002,
-       0x84708970, 0xbfa00001,
-       0x84708a70, 0x8070ff70,
-       0x00000200, 0x7e000280,
-       0x7e020280, 0x7e040280,
-       0xbefd0080, 0xb8faf802,
-       0xbf0c8b7a, 0xbfa20003,
-       0xbe804fc2, 0xbf94fffe,
-       0xbfa10001, 0xbe804ec4,
-       0xbf94fffc, 0xb8faf804,
-       0x8b7aff7a, 0x0001000c,
-       0x9178ff78, 0x0001000c,
-       0x8c787a78, 0xd7610002,
-       0x0000fa71, 0x807d817d,
-       0xd7610002, 0x0000fa6c,
-       0x807d817d, 0x917aff6d,
-       0x80000000, 0xd7610002,
+       0x010c0000, 0x00020000,
+       0xee0a4074, 0x018c0000,
+       0x00030000, 0xb8f03b05,
+       0x80708170, 0xbf0d9973,
+       0xbfa20002, 0x84708970,
+       0xbfa00001, 0x84708a70,
+       0x8070ff70, 0x00000200,
+       0x7e000280, 0x7e020280,
+       0x7e040280, 0xbefd0080,
+       0xb8faf802, 0xbf0c8b7a,
+       0xbfa20003, 0xbe804fc2,
+       0xbf94fffe, 0xbfa10001,
+       0xbe804ec4, 0xbf94fffc,
+       0xb8faf804, 0x8b7aff7a,
+       0x0001000c, 0x9178ff78,
+       0x0001000c, 0x8c787a78,
+       0xd7610002, 0x0000fa71,
+       0x807d817d, 0xd7610002,
+       0x0000fa6c, 0x807d817d,
+       0x917aff6d, 0x80000000,
+       0xd7610002, 0x0000fa7a,
+       0x807d817d, 0xd7610002,
+       0x0000fa6e, 0x807d817d,
+       0xbefa0080, 0xd7610002,
        0x0000fa7a, 0x807d817d,
-       0xd7610002, 0x0000fa6e,
-       0x807d817d, 0xbefa0080,
+       0xd7610002, 0x0000fa78,
+       0x807d817d, 0xb8faf811,
        0xd7610002, 0x0000fa7a,
        0x807d817d, 0xd7610002,
-       0x0000fa78, 0x807d817d,
-       0xb8faf811, 0xd7610002,
+       0x0000fa6f, 0x807d817d,
+       0xb8f1f801, 0x937aff6d,
+       0x00060019, 0x847a8c7a,
+       0x8c717a71, 0xd7610002,
+       0x0000fa71, 0x807d817d,
+       0xb8f1f814, 0xd7610002,
+       0x0000fa71, 0x807d817d,
+       0xb8f1f815, 0xd7610002,
+       0x0000fa71, 0x807d817d,
+       0xb8f1f812, 0xd7610002,
+       0x0000fa71, 0x807d817d,
+       0xb8f1f813, 0xd7610002,
+       0x0000fa71, 0x807d817d,
+       0xb8faf802, 0xd7610002,
        0x0000fa7a, 0x807d817d,
-       0xd7610002, 0x0000fa6f,
-       0x807d817d, 0xb8f1f801,
-       0x937aff6d, 0x00060019,
-       0x847a8c7a, 0x8c717a71,
-       0xd7610002, 0x0000fa71,
-       0x807d817d, 0xb8f1f814,
-       0xd7610002, 0x0000fa71,
-       0x807d817d, 0xb8f1f815,
-       0xd7610002, 0x0000fa71,
-       0x807d817d, 0xb8f1f812,
-       0xd7610002, 0x0000fa71,
-       0x807d817d, 0xb8f1f813,
-       0xd7610002, 0x0000fa71,
-       0x807d817d, 0xb8faf802,
+       0xbefa50c1, 0xbfc70000,
        0xd7610002, 0x0000fa7a,
-       0x807d817d, 0xbefa50c1,
+       0x807d817d, 0xbefa4c88,
        0xbfc70000, 0xd7610002,
        0x0000fa7a, 0x807d817d,
-       0xbefa4c88, 0xbfc70000,
-       0xd7610002, 0x0000fa7a,
-       0x807d817d, 0xbefe00ff,
-       0x0000ffff, 0xbeff0080,
-       0x80767074, 0x82778075,
+       0xbefe00ff, 0x0000ffff,
+       0xbeff0080, 0x80767074,
+       0x82778075, 0xee0a4076,
+       0x010c0000, 0x00000000,
+       0xbefe00c1, 0x7e040280,
+       0xbefa5081, 0xbfc70000,
+       0xd7610002, 0x0001007a,
+       0xbefa5082, 0xbfc70000,
+       0xd7610002, 0x0001027a,
+       0xbefa5083, 0xbfc70000,
+       0xd7610002, 0x0001047a,
+       0xbefa5084, 0xbfc70000,
+       0xd7610002, 0x0001067a,
+       0xbefa5085, 0xbfc70000,
+       0xd7610002, 0x0001087a,
+       0xbefa5086, 0xbfc70000,
+       0xd7610002, 0x00010a7a,
+       0xbefa5087, 0xbfc70000,
+       0xd7610002, 0x00010c7a,
+       0xbefa5088, 0xbfc70000,
+       0xd7610002, 0x00010e7a,
+       0xbefa5089, 0xbfc70000,
+       0xd7610002, 0x0001107a,
+       0xbefa508a, 0xbfc70000,
+       0xd7610002, 0x0001127a,
+       0xbefa508b, 0xbfc70000,
+       0xd7610002, 0x0001147a,
+       0xbefa508c, 0xbfc70000,
+       0xd7610002, 0x0001167a,
+       0xbefa508d, 0xbfc70000,
+       0xd7610002, 0x0001187a,
+       0xbefa508e, 0xbfc70000,
+       0xd7610002, 0x00011a7a,
+       0xbefa508f, 0xbfc70000,
+       0xd7610002, 0x00011c7a,
+       0xbefa5090, 0xbfc70000,
+       0xd7610002, 0x00011e7a,
        0xee0a4076, 0x010c0000,
-       0x00000000, 0xbefe00c1,
-       0x7e040280, 0xbefa5081,
-       0xbfc70000, 0xd7610002,
-       0x0001007a, 0xbefa5082,
-       0xbfc70000, 0xd7610002,
-       0x0001027a, 0xbefa5083,
-       0xbfc70000, 0xd7610002,
-       0x0001047a, 0xbefa5084,
-       0xbfc70000, 0xd7610002,
-       0x0001067a, 0xbefa5085,
-       0xbfc70000, 0xd7610002,
-       0x0001087a, 0xbefa5086,
-       0xbfc70000, 0xd7610002,
-       0x00010a7a, 0xbefa5087,
-       0xbfc70000, 0xd7610002,
-       0x00010c7a, 0xbefa5088,
-       0xbfc70000, 0xd7610002,
-       0x00010e7a, 0xbefa5089,
-       0xbfc70000, 0xd7610002,
-       0x0001107a, 0xbefa508a,
-       0xbfc70000, 0xd7610002,
-       0x0001127a, 0xbefa508b,
-       0xbfc70000, 0xd7610002,
-       0x0001147a, 0xbefa508c,
-       0xbfc70000, 0xd7610002,
-       0x0001167a, 0xbefa508d,
-       0xbfc70000, 0xd7610002,
-       0x0001187a, 0xbefa508e,
-       0xbfc70000, 0xd7610002,
-       0x00011a7a, 0xbefa508f,
-       0xbfc70000, 0xd7610002,
-       0x00011c7a, 0xbefa5090,
-       0xbfc70000, 0xd7610002,
-       0x00011e7a, 0xee0a4076,
-       0x010c0000, 0x00008000,
-       0xb8f03b05, 0x80708170,
-       0xbf0d9973, 0xbfa20002,
-       0x84708970, 0xbfa00001,
-       0x84708a70, 0xbef90080,
-       0xbefd0080, 0xbf800000,
+       0x00008000, 0xb8f03b05,
+       0x80708170, 0xbf0d9973,
+       0xbfa20002, 0x84708970,
+       0xbfa00001, 0x84708a70,
+       0xbef90080, 0xbefd0080,
+       0xbf800000, 0xbe804100,
+       0xbe824102, 0xbe844104,
+       0xbe864106, 0xbe884108,
+       0xbe8a410a, 0xbe8c410c,
+       0xbe8e410e, 0xd7610002,
+       0x0000f200, 0x80798179,
+       0xd7610002, 0x0000f201,
+       0x80798179, 0xd7610002,
+       0x0000f202, 0x80798179,
+       0xd7610002, 0x0000f203,
+       0x80798179, 0xd7610002,
+       0x0000f204, 0x80798179,
+       0xd7610002, 0x0000f205,
+       0x80798179, 0xd7610002,
+       0x0000f206, 0x80798179,
+       0xd7610002, 0x0000f207,
+       0x80798179, 0xd7610002,
+       0x0000f208, 0x80798179,
+       0xd7610002, 0x0000f209,
+       0x80798179, 0xd7610002,
+       0x0000f20a, 0x80798179,
+       0xd7610002, 0x0000f20b,
+       0x80798179, 0xd7610002,
+       0x0000f20c, 0x80798179,
+       0xd7610002, 0x0000f20d,
+       0x80798179, 0xd7610002,
+       0x0000f20e, 0x80798179,
+       0xd7610002, 0x0000f20f,
+       0x80798179, 0xbf06a079,
+       0xbfa10009, 0x80767074,
+       0x82778075, 0xee0a4076,
+       0x010c0000, 0x00000000,
+       0x8070ff70, 0x00000080,
+       0xbef90080, 0x7e040280,
+       0x807d907d, 0xbf0aff7d,
+       0x00000060, 0xbfa2ffb9,
        0xbe804100, 0xbe824102,
        0xbe844104, 0xbe864106,
        0xbe884108, 0xbe8a410a,
-       0xbe8c410c, 0xbe8e410e,
        0xd7610002, 0x0000f200,
        0x80798179, 0xd7610002,
        0x0000f201, 0x80798179,
@@ -4891,63 +5052,39 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
        0xd7610002, 0x0000f20a,
        0x80798179, 0xd7610002,
        0x0000f20b, 0x80798179,
-       0xd7610002, 0x0000f20c,
-       0x80798179, 0xd7610002,
-       0x0000f20d, 0x80798179,
-       0xd7610002, 0x0000f20e,
-       0x80798179, 0xd7610002,
-       0x0000f20f, 0x80798179,
-       0xbf06a079, 0xbfa10009,
+       0xbefe00ff, 0x0000ffff,
        0x80767074, 0x82778075,
        0xee0a4076, 0x010c0000,
-       0x00000000, 0x8070ff70,
-       0x00000080, 0xbef90080,
-       0x7e040280, 0x807d907d,
-       0xbf0aff7d, 0x00000060,
-       0xbfa2ffb9, 0xbe804100,
-       0xbe824102, 0xbe844104,
-       0xbe864106, 0xbe884108,
-       0xbe8a410a, 0xd7610002,
-       0x0000f200, 0x80798179,
-       0xd7610002, 0x0000f201,
-       0x80798179, 0xd7610002,
-       0x0000f202, 0x80798179,
-       0xd7610002, 0x0000f203,
-       0x80798179, 0xd7610002,
-       0x0000f204, 0x80798179,
-       0xd7610002, 0x0000f205,
-       0x80798179, 0xd7610002,
-       0x0000f206, 0x80798179,
-       0xd7610002, 0x0000f207,
-       0x80798179, 0xd7610002,
-       0x0000f208, 0x80798179,
-       0xd7610002, 0x0000f209,
-       0x80798179, 0xd7610002,
-       0x0000f20a, 0x80798179,
-       0xd7610002, 0x0000f20b,
-       0x80798179, 0xbefe00ff,
-       0x0000ffff, 0x80767074,
+       0x00000000, 0xbefe00c1,
+       0x857d9973, 0x8b7d817d,
+       0xbf06817d, 0xbfa20002,
+       0xbeff0080, 0xbfa00001,
+       0xbeff00c1, 0xb8fb4306,
+       0x8b7bc17b, 0xbfa10042,
+       0x8b7aff6d, 0x80000000,
+       0xbfa1003f, 0x847b8a7b,
+       0xb8f03b05, 0x80708170,
+       0xbf0d9973, 0xbfa20002,
+       0x84708970, 0xbfa00001,
+       0x84708a70, 0x8070ff70,
+       0x00000200, 0x8070ff70,
+       0x00000200, 0xd71f0000,
+       0x000100c1, 0xd7200000,
+       0x000200c1, 0x16000084,
+       0x857d9973, 0x8b7d817d,
+       0xbf06817d, 0xbefd0080,
+       0xbfa20015, 0xbe8300ff,
+       0x00000080, 0xbf800000,
+       0xbf800000, 0xbf800000,
+       0xd8d80000, 0x01000000,
+       0xbf8a0000, 0x80767074,
        0x82778075, 0xee0a4076,
-       0x010c0000, 0x00000000,
-       0xbefe00c1, 0x857d9973,
-       0x8b7d817d, 0xbf06817d,
-       0xbfa20002, 0xbeff0080,
-       0xbfa00001, 0xbeff00c1,
-       0xb8fb4306, 0x8b7bc17b,
-       0xbfa10042, 0x8b7aff6d,
-       0x80000000, 0xbfa1003f,
-       0x847b8a7b, 0xb8f03b05,
-       0x80708170, 0xbf0d9973,
-       0xbfa20002, 0x84708970,
-       0xbfa00001, 0x84708a70,
-       0x8070ff70, 0x00000200,
-       0x8070ff70, 0x00000200,
-       0xd71f0000, 0x000100c1,
-       0xd7200000, 0x000200c1,
-       0x16000084, 0x857d9973,
-       0x8b7d817d, 0xbf06817d,
-       0xbefd0080, 0xbfa20015,
-       0xbe8300ff, 0x00000080,
+       0x008c0000, 0x00000000,
+       0x807d037d, 0x80700370,
+       0xd5250000, 0x0001ff00,
+       0x00000080, 0xbf0a7b7d,
+       0xbfa2fff1, 0xbfa00014,
+       0xbe8300ff, 0x00000100,
        0xbf800000, 0xbf800000,
        0xbf800000, 0xd8d80000,
        0x01000000, 0xbf8a0000,
@@ -4955,252 +5092,242 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
        0xee0a4076, 0x008c0000,
        0x00000000, 0x807d037d,
        0x80700370, 0xd5250000,
-       0x0001ff00, 0x00000080,
+       0x0001ff00, 0x00000100,
        0xbf0a7b7d, 0xbfa2fff1,
-       0xbfa00014, 0xbe8300ff,
-       0x00000100, 0xbf800000,
-       0xbf800000, 0xbf800000,
-       0xd8d80000, 0x01000000,
-       0xbf8a0000, 0x80767074,
-       0x82778075, 0xee0a4076,
-       0x008c0000, 0x00000000,
-       0x807d037d, 0x80700370,
-       0xd5250000, 0x0001ff00,
-       0x00000100, 0xbf0a7b7d,
-       0xbfa2fff1, 0xbefe00c1,
-       0x857d9973, 0x8b7d817d,
-       0xbf06817d, 0xbfa20004,
-       0xbef000ff, 0x00000200,
-       0xbeff0080, 0xbfa00003,
-       0xbef000ff, 0x00000400,
-       0xbeff00c1, 0xb8fb3b05,
-       0x807b817b, 0x847b827b,
-       0x857d9973, 0x8b7d817d,
-       0xbf06817d, 0xbfa2001b,
+       0xbefe00c1, 0x857d9973,
+       0x8b7d817d, 0xbf06817d,
+       0xbfa20004, 0xbef000ff,
+       0x00000200, 0xbeff0080,
+       0xbfa00003, 0xbef000ff,
+       0x00000400, 0xbeff00c1,
+       0xb8fb3b05, 0x807b817b,
+       0x847b827b, 0x857d9973,
+       0x8b7d817d, 0xbf06817d,
+       0xbfa2001b, 0xbefd0084,
+       0xbf0a7b7d, 0xbfa10032,
+       0x7e008700, 0x7e028701,
+       0x7e048702, 0x7e068703,
+       0x80767074, 0x82778075,
+       0xee0a4076, 0x000c0000,
+       0x00000000, 0xee0a4076,
+       0x008c0000, 0x00008000,
+       0xee0a4076, 0x010c0000,
+       0x00010000, 0xee0a4076,
+       0x018c0000, 0x00018000,
+       0x807d847d, 0x8070ff70,
+       0x00000200, 0xbf0a7b7d,
+       0xbfa2ffe9, 0xbfa0001a,
        0xbefd0084, 0xbf0a7b7d,
-       0xbfa10032, 0x7e008700,
+       0xbfa10017, 0x7e008700,
        0x7e028701, 0x7e048702,
        0x7e068703, 0x80767074,
        0x82778075, 0xee0a4076,
        0x000c0000, 0x00000000,
        0xee0a4076, 0x008c0000,
-       0x00008000, 0xee0a4076,
-       0x010c0000, 0x00010000,
+       0x00010000, 0xee0a4076,
+       0x010c0000, 0x00020000,
        0xee0a4076, 0x018c0000,
-       0x00018000, 0x807d847d,
-       0x8070ff70, 0x00000200,
+       0x00030000, 0x807d847d,
+       0x8070ff70, 0x00000400,
        0xbf0a7b7d, 0xbfa2ffe9,
-       0xbfa0001a, 0xbefd0084,
-       0xbf0a7b7d, 0xbfa10017,
-       0x7e008700, 0x7e028701,
-       0x7e048702, 0x7e068703,
-       0x80767074, 0x82778075,
-       0xee0a4076, 0x000c0000,
-       0x00000000, 0xee0a4076,
-       0x008c0000, 0x00010000,
-       0xee0a4076, 0x010c0000,
-       0x00020000, 0xee0a4076,
-       0x018c0000, 0x00030000,
-       0x807d847d, 0x8070ff70,
-       0x00000400, 0xbf0a7b7d,
-       0xbfa2ffe9, 0xbfa00180,
-       0xbef4007e, 0x8b75ff7f,
-       0x01ffffff, 0xbef1007f,
-       0xb8f20742, 0x84729972,
-       0x8b6eff7f, 0x04000000,
-       0xbfa10044, 0xbefe00c1,
-       0x857d9972, 0x8b7d817d,
-       0xbf06817d, 0xbfa20002,
-       0xbeff0080, 0xbfa00001,
-       0xbeff00c1, 0xb8ef4306,
-       0x8b6fc16f, 0xbfa10039,
-       0x846f8a6f, 0xb8f83b05,
-       0x80788178, 0xbf0d9972,
-       0xbfa20002, 0x84788978,
-       0xbfa00001, 0x84788a78,
-       0x8078ff78, 0x00000200,
-       0x8078ff78, 0x00000200,
-       0x857d9972, 0x8b7d817d,
-       0xbf06817d, 0xbefd0080,
-       0xd71f0001, 0x000100c1,
-       0xd7200001, 0x000202c1,
-       0x30020282, 0xbfa20012,
-       0x80767874, 0x82778075,
-       0xee0a0076, 0x000c0000,
-       0x00000000, 0xbf8a0000,
-       0xd8340000, 0x00000001,
-       0xd5250001, 0x0001ff01,
-       0x00000080, 0x807dff7d,
-       0x00000080, 0x8078ff78,
-       0x00000080, 0xbf0a6f7d,
-       0xbfa2ffef, 0xbfa00011,
-       0x80767874, 0x82778075,
-       0xee0a0076, 0x000c0000,
-       0x00000000, 0xbf8a0000,
-       0xd8340000, 0x00000001,
-       0xd5250001, 0x0001ff01,
-       0x00000100, 0x807dff7d,
-       0x00000100, 0x8078ff78,
-       0x00000100, 0xbf0a6f7d,
-       0xbfa2ffef, 0xbef80080,
+       0xbfa00180, 0xbef4007e,
+       0x8b75ff7f, 0x01ffffff,
+       0xbef1007f, 0xb8f20742,
+       0x84729972, 0x8b6eff7f,
+       0x04000000, 0xbfa10044,
        0xbefe00c1, 0x857d9972,
        0x8b7d817d, 0xbf06817d,
        0xbfa20002, 0xbeff0080,
        0xbfa00001, 0xbeff00c1,
-       0xb8ef3b05, 0x806f816f,
-       0x846f826f, 0x857d9972,
+       0xb8ef4306, 0x8b6fc16f,
+       0xbfa10039, 0x846f8a6f,
+       0xb8f83b05, 0x80788178,
+       0xbf0d9972, 0xbfa20002,
+       0x84788978, 0xbfa00001,
+       0x84788a78, 0x8078ff78,
+       0x00000200, 0x8078ff78,
+       0x00000200, 0x857d9972,
        0x8b7d817d, 0xbf06817d,
-       0xbfa2002c, 0xbeee0078,
-       0x8078ff78, 0x00000200,
-       0xbefd0084, 0x80767874,
-       0x82778075, 0xee0a0076,
-       0x000c0000, 0x00000000,
-       0xee0a0076, 0x000c0001,
-       0x00008000, 0xee0a0076,
-       0x000c0002, 0x00010000,
-       0xee0a0076, 0x000c0003,
-       0x00018000, 0xbf8a0000,
-       0x7e008500, 0x7e028501,
-       0x7e048502, 0x7e068503,
-       0x807d847d, 0x8078ff78,
-       0x00000200, 0xbf0a6f7d,
-       0xbfa2ffe8, 0x80766e74,
+       0xbefd0080, 0xd71f0001,
+       0x000100c1, 0xd7200001,
+       0x000202c1, 0x30020282,
+       0xbfa20012, 0x80767874,
        0x82778075, 0xee0a0076,
        0x000c0000, 0x00000000,
-       0xee0a0076, 0x000c0001,
-       0x00008000, 0xee0a0076,
-       0x000c0002, 0x00010000,
-       0xee0a0076, 0x000c0003,
-       0x00018000, 0xbf8a0000,
-       0xbfa0002d, 0xbeee0078,
-       0x8078ff78, 0x00000400,
-       0xbefd0084, 0xbf0a6f7d,
-       0xbfa10018, 0x80767874,
+       0xbf8a0000, 0xd8340000,
+       0x00000001, 0xd5250001,
+       0x0001ff01, 0x00000080,
+       0x807dff7d, 0x00000080,
+       0x8078ff78, 0x00000080,
+       0xbf0a6f7d, 0xbfa2ffef,
+       0xbfa00011, 0x80767874,
        0x82778075, 0xee0a0076,
        0x000c0000, 0x00000000,
-       0xee0a0076, 0x000c0001,
+       0xbf8a0000, 0xd8340000,
+       0x00000001, 0xd5250001,
+       0x0001ff01, 0x00000100,
+       0x807dff7d, 0x00000100,
+       0x8078ff78, 0x00000100,
+       0xbf0a6f7d, 0xbfa2ffef,
+       0xbef80080, 0xbefe00c1,
+       0x857d9972, 0x8b7d817d,
+       0xbf06817d, 0xbfa20002,
+       0xbeff0080, 0xbfa00001,
+       0xbeff00c1, 0xb8ef3b05,
+       0x806f816f, 0x846f826f,
+       0x857d9972, 0x8b7d817d,
+       0xbf06817d, 0xbfa2002c,
+       0xbeee0078, 0x8078ff78,
+       0x00000200, 0xbefd0084,
+       0x80767874, 0x82778075,
+       0xee0a0076, 0x000c0000,
+       0x00000000, 0xee0a0076,
+       0x000c0001, 0x00008000,
+       0xee0a0076, 0x000c0002,
        0x00010000, 0xee0a0076,
-       0x000c0002, 0x00020000,
-       0xee0a0076, 0x000c0003,
-       0x00030000, 0xbf8a0000,
-       0x7e008500, 0x7e028501,
-       0x7e048502, 0x7e068503,
-       0x807d847d, 0x8078ff78,
-       0x00000400, 0xbf0a6f7d,
-       0xbfa2ffe8, 0x80766e74,
-       0x82778075, 0xee0a0076,
-       0x000c0000, 0x00000000,
-       0xee0a0076, 0x000c0001,
+       0x000c0003, 0x00018000,
+       0xbf8a0000, 0x7e008500,
+       0x7e028501, 0x7e048502,
+       0x7e068503, 0x807d847d,
+       0x8078ff78, 0x00000200,
+       0xbf0a6f7d, 0xbfa2ffe8,
+       0x80766e74, 0x82778075,
+       0xee0a0076, 0x000c0000,
+       0x00000000, 0xee0a0076,
+       0x000c0001, 0x00008000,
+       0xee0a0076, 0x000c0002,
        0x00010000, 0xee0a0076,
-       0x000c0002, 0x00020000,
-       0xee0a0076, 0x000c0003,
-       0x00030000, 0xbf8a0000,
-       0xb8f83b05, 0x80788178,
-       0xbf0d9972, 0xbfa20002,
-       0x84788978, 0xbfa00001,
-       0x84788a78, 0x8078ff78,
-       0x00000200, 0x80f8ff78,
-       0x00000060, 0x80767874,
-       0x82778075, 0xbefd00ff,
-       0x0000006c, 0xf460403b,
-       0xf8000000, 0xbf8a0000,
-       0x80fd847d, 0xbf800000,
-       0xbe804300, 0xbe824302,
-       0x80f6a076, 0x82f78077,
-       0xf460603b, 0xf8000000,
-       0xbf8a0000, 0x80fd887d,
+       0x000c0003, 0x00018000,
+       0xbf8a0000, 0xbfa0002d,
+       0xbeee0078, 0x8078ff78,
+       0x00000400, 0xbefd0084,
+       0xbf0a6f7d, 0xbfa10018,
+       0x80767874, 0x82778075,
+       0xee0a0076, 0x000c0000,
+       0x00000000, 0xee0a0076,
+       0x000c0001, 0x00010000,
+       0xee0a0076, 0x000c0002,
+       0x00020000, 0xee0a0076,
+       0x000c0003, 0x00030000,
+       0xbf8a0000, 0x7e008500,
+       0x7e028501, 0x7e048502,
+       0x7e068503, 0x807d847d,
+       0x8078ff78, 0x00000400,
+       0xbf0a6f7d, 0xbfa2ffe8,
+       0x80766e74, 0x82778075,
+       0xee0a0076, 0x000c0000,
+       0x00000000, 0xee0a0076,
+       0x000c0001, 0x00010000,
+       0xee0a0076, 0x000c0002,
+       0x00020000, 0xee0a0076,
+       0x000c0003, 0x00030000,
+       0xbf8a0000, 0xb8f83b05,
+       0x80788178, 0xbf0d9972,
+       0xbfa20002, 0x84788978,
+       0xbfa00001, 0x84788a78,
+       0x8078ff78, 0x00000200,
+       0x80f8ff78, 0x00000060,
+       0x80767874, 0x82778075,
+       0xbefd00ff, 0x0000006c,
+       0xf460403b, 0xf8000000,
+       0xbf8a0000, 0x80fd847d,
        0xbf800000, 0xbe804300,
-       0xbe824302, 0xbe844304,
-       0xbe864306, 0x80f6c076,
-       0x82f78077, 0xf460803b,
+       0xbe824302, 0x80f6a076,
+       0x82f78077, 0xf460603b,
        0xf8000000, 0xbf8a0000,
-       0x80fd907d, 0xbf800000,
+       0x80fd887d, 0xbf800000,
        0xbe804300, 0xbe824302,
        0xbe844304, 0xbe864306,
-       0xbe884308, 0xbe8a430a,
-       0xbe8c430c, 0xbe8e430e,
-       0xbf06807d, 0xbfa1ffef,
-       0xb980f801, 0x00000000,
-       0xb8f83b05, 0x80788178,
-       0xbf0d9972, 0xbfa20002,
-       0x84788978, 0xbfa00001,
-       0x84788a78, 0x8078ff78,
-       0x00000200, 0x80767874,
-       0x82778075, 0xbeff0071,
-       0xf4601bfb, 0xf8000000,
-       0xf4601b3b, 0xf8000004,
-       0xf4601b7b, 0xf8000008,
-       0xf4601c3b, 0xf800000c,
-       0xf4601c7b, 0xf8000010,
-       0xf4601ebb, 0xf8000014,
-       0xf4601efb, 0xf8000018,
-       0xf4601e7b, 0xf800001c,
-       0xf4601cfb, 0xf8000020,
-       0xf4601bbb, 0xf8000024,
-       0xbf8a0000, 0xb96ef814,
-       0xf4601bbb, 0xf8000028,
-       0xbf8a0000, 0xb96ef815,
-       0xf4601bbb, 0xf800002c,
-       0xbf8a0000, 0xb96ef812,
-       0xf4601bbb, 0xf8000030,
-       0xbf8a0000, 0xb96ef813,
-       0x8b6eff7f, 0x04000000,
-       0xbfa10022, 0xf4601bbb,
-       0xf8000038, 0xbf8a0000,
-       0xbf0d806e, 0xbfa1001d,
-       0x856e906e, 0x8b6e6e6e,
-       0xbfa10003, 0xbe804ec1,
-       0x816ec16e, 0xbfa0fffb,
-       0xbef800ff, 0x00000080,
-       0xbefd0081, 0xf4601bbb,
-       0xf0000000, 0xbfc70000,
-       0x80788478, 0x937eff6e,
-       0x00070004, 0x847e907e,
-       0x8c7d7e7d, 0xbe80517d,
-       0x917dff7d, 0x007f0000,
-       0x856e906e, 0x8b6e6e6e,
-       0xbfa10003, 0xbe804e7d,
-       0x816ec16e, 0xbfa0fffb,
-       0x807d817d, 0xbf08907d,
-       0xbfa1ffec, 0xf4601bbb,
-       0xf800003c, 0xbfc70000,
-       0xbf0d806e, 0xbfa1000c,
-       0xbf0d9a7f, 0xbfa10002,
-       0xbf068180, 0xbe804fc4,
-       0xbf94fffc, 0xbfa10006,
-       0x856e906e, 0x8b6e6e6e,
-       0xbfa10003, 0xbe804ec3,
-       0x816ec16e, 0xbfa0fffb,
-       0xbefd006f, 0xbefe0070,
-       0xbeff0071, 0xb979f822,
-       0xb97b2011, 0x857b867b,
-       0xb97b0191, 0x857b827b,
-       0xb97bba11, 0xb973f801,
-       0xb8ee3b05, 0x806e816e,
-       0xbf0d9972, 0xbfa20002,
-       0x846e896e, 0xbfa00001,
-       0x846e8a6e, 0x806eff6e,
-       0x000001c0, 0x806e746e,
-       0x826f8075, 0xf4605c37,
-       0xf8000010, 0xf4605d37,
-       0xf8000020, 0xf4601e77,
-       0xf8000034, 0xbf8a0000,
-       0x856e9677, 0xb96e04a1,
-       0x856e9577, 0xb96e0421,
-       0x856e8e77, 0xb96e3021,
-       0x8b6dff6d, 0x01ffffff,
-       0x8bfe7e7e, 0x8bea6a6a,
-       0xb97af804, 0xb8eef802,
-       0xbf0c8b6e, 0xbfa20003,
-       0xbe804fc2, 0xbf94fffe,
-       0xbfa10001, 0xbe804ec4,
-       0xbf94fffc, 0x857a897a,
-       0xb97a0244, 0xbe804a6c,
+       0x80f6c076, 0x82f78077,
+       0xf460803b, 0xf8000000,
+       0xbf8a0000, 0x80fd907d,
+       0xbf800000, 0xbe804300,
+       0xbe824302, 0xbe844304,
+       0xbe864306, 0xbe884308,
+       0xbe8a430a, 0xbe8c430c,
+       0xbe8e430e, 0xbf06807d,
+       0xbfa1ffef, 0xb980f801,
+       0x00000000, 0xb8f83b05,
+       0x80788178, 0xbf0d9972,
+       0xbfa20002, 0x84788978,
+       0xbfa00001, 0x84788a78,
+       0x8078ff78, 0x00000200,
+       0x80767874, 0x82778075,
+       0xbeff0071, 0xf4601bfb,
+       0xf8000000, 0xf4601b3b,
+       0xf8000004, 0xf4601b7b,
+       0xf8000008, 0xf4601c3b,
+       0xf800000c, 0xf4601c7b,
+       0xf8000010, 0xf4601ebb,
+       0xf8000014, 0xf4601efb,
+       0xf8000018, 0xf4601e7b,
+       0xf800001c, 0xf4601cfb,
+       0xf8000020, 0xf4601bbb,
+       0xf8000024, 0xbf8a0000,
+       0xb96ef814, 0xf4601bbb,
+       0xf8000028, 0xbf8a0000,
+       0xb96ef815, 0xf4601bbb,
+       0xf800002c, 0xbf8a0000,
+       0xb96ef812, 0xf4601bbb,
+       0xf8000030, 0xbf8a0000,
+       0xb96ef813, 0x8b6eff7f,
+       0x04000000, 0xbfa10022,
+       0xf4601bbb, 0xf8000038,
+       0xbf8a0000, 0xbf0d806e,
+       0xbfa1001d, 0x856e906e,
+       0x8b6e6e6e, 0xbfa10003,
+       0xbe804ec1, 0x816ec16e,
+       0xbfa0fffb, 0xbef800ff,
+       0x00000080, 0xbefd0081,
+       0xf4601bbb, 0xf0000000,
+       0xbfc70000, 0x80788478,
+       0x937eff6e, 0x00070004,
+       0x847e907e, 0x8c7d7e7d,
+       0xbe80517d, 0x917dff7d,
+       0x007f0000, 0x856e906e,
+       0x8b6e6e6e, 0xbfa10003,
+       0xbe804e7d, 0x816ec16e,
+       0xbfa0fffb, 0x807d817d,
+       0xbf08907d, 0xbfa1ffec,
+       0xf4601bbb, 0xf800003c,
+       0xbfc70000, 0xbf0d806e,
+       0xbfa1000c, 0xbf0d9a7f,
+       0xbfa10002, 0xbf068180,
+       0xbe804fc4, 0xbf94fffc,
+       0xbfa10006, 0x856e906e,
+       0x8b6e6e6e, 0xbfa10003,
+       0xbe804ec3, 0x816ec16e,
+       0xbfa0fffb, 0xbefd006f,
+       0xbefe0070, 0xbeff0071,
+       0xb979f822, 0xb97b2011,
+       0x857b867b, 0xb97b0191,
+       0x857b827b, 0xb97bba11,
+       0xb973f801, 0xb8ee3b05,
+       0x806e816e, 0xbf0d9972,
+       0xbfa20002, 0x846e896e,
+       0xbfa00001, 0x846e8a6e,
+       0x806eff6e, 0x000001c0,
+       0x806e746e, 0x826f8075,
+       0xf4605c37, 0xf8000010,
+       0xf4605d37, 0xf8000020,
+       0xf4601e77, 0xf8000034,
+       0xbf8a0000, 0x856e9677,
+       0xb96e04a1, 0x856e9577,
+       0xb96e0421, 0x856e8e77,
+       0xb96e3021, 0x8b6dff6d,
+       0x01ffffff, 0x8bfe7e7e,
+       0x8bea6a6a, 0xb97af804,
        0xb8eef802, 0xbf0c8b6e,
        0xbfa20003, 0xbe804fc2,
        0xbf94fffe, 0xbfa10001,
        0xbe804ec4, 0xbf94fffc,
-       0xbfb10000, 0xbf9f0000,
+       0x857a897a, 0xb97a0244,
+       0xbe804a6c, 0xb8eef802,
+       0xbf0c8b6e, 0xbfa20003,
+       0xbe804fc2, 0xbf94fffe,
+       0xbfa10001, 0xbe804ec4,
+       0xbf94fffc, 0xbfb10000,
        0xbf9f0000, 0xbf9f0000,
        0xbf9f0000, 0xbf9f0000,
+       0xbf9f0000, 0x00000000,
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
index d59400d242d19..b7b82f1c6072f 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
@@ -73,6 +73,7 @@ var SQ_WAVE_LDS_ALLOC_GRANULARITY             = 10
 #endif
 
 var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK     = 0xF
+var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_SHIFT      = 4
 var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK       = 0x10
 var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT  = 5
 var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK   = 0x20
@@ -362,6 +363,15 @@ L_TRAP_CASE:
 L_EXIT_TRAP:
        s_and_b32       ttmp1, ttmp1, ADDRESS_HI32_MASK
 
+#if HAVE_BANKED_VGPRS
+       s_getreg_b32    s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+       fixup_vgpr_bank_selection()
+#endif
+
+#if HAVE_XNACK
+       restore_xnack_state_priv(s_save_tmp)
+#endif
+
        // Restore SQ_WAVE_STATUS.
        s_and_b64       exec, exec, exec                                        
// Restore STATUS.EXECZ, not writable by s_setreg_b32
        s_and_b64       vcc, vcc, vcc                                           
// Restore STATUS.VCCZ, not writable by s_setreg_b32
@@ -390,6 +400,14 @@ L_HAVE_VGPRS:
        s_mov_b32       s_save_tmp, 0
        s_setreg_b32    hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, 
SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT, 1), s_save_tmp     //clear saveCtx 
bit
 
+#if HAVE_XNACK
+       save_and_clear_xnack_state_priv(s_save_tmp)
+#endif
+
+#if HAVE_BANKED_VGPRS
+       fixup_vgpr_bank_selection()
+#endif
+
        /* inform SPI the readiness and wait for SPI's go signal */
        s_mov_b32       s_save_exec_lo, exec_lo                                 
//save EXEC and use EXEC for the go signal from SPI
        s_mov_b32       s_save_exec_hi, exec_hi
@@ -404,7 +422,6 @@ L_HAVE_VGPRS:
        s_or_b32        s_save_pc_hi, s_save_pc_hi, s_save_tmp
 
 #if HAVE_XNACK
-       save_and_clear_xnack_state_priv(s_save_tmp)
        s_getreg_b32    s_save_xnack_mask, hwreg(HW_REG_WAVE_XNACK_MASK)
        s_setreg_imm32_b32      hwreg(HW_REG_WAVE_XNACK_MASK), 0
 #endif
@@ -1328,3 +1345,150 @@ L_BARRIER_RESTORE_LOOP:
 
 L_BARRIER_RESTORE_DONE:
 end
+
+#if HAVE_BANKED_VGPRS
+function fixup_vgpr_bank_selection
+       // PC read may fault if memory violation has been asserted.
+       // In this case no further progress is expected so fixup is not needed.
+       s_bitcmp1_b32   s_save_excp_flag_priv, 
SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_SHIFT
+       s_cbranch_scc1  L_FIXUP_DONE
+
+       // ttmp[0:1]: {7b'0} PC[56:0]
+       // ttmp2, 3, 10, 13, 14, 15: free
+       s_load_b64      [ttmp14, ttmp15], [ttmp0, ttmp1], 0 scope:SCOPE_CU      
// Load the 2 instruction DW we are returning to
+       s_load_b64      [ttmp2, ttmp3], [ttmp0, ttmp1], 8 scope:SCOPE_CU        
// Load the next 2 instruction DW, just in case
+       s_wait_kmcnt    1
+       s_and_b32       ttmp10, ttmp14, 0x80000000                              
// Check bit 31 in the first DWORD
+                                                                               
// SCC set if ttmp10 is != 0, i.e. if bit 31 == 1
+       s_cbranch_scc1  L_FIXUP_NOT_VOP12C                                      
// If bit 31 is 1, we are not VOP1, VOP2, or VOP3C
+       // Fall through here means bit 31 == 0, meaning we are VOP1, VOP2, or 
VOPC
+       // Size of instruction depends on Opcode or SRC0_9
+       // Check for VOP2 opcode
+       s_bfe_u32       ttmp10, ttmp14, (25 | (6 << 0x10))                      
// Check bits 30:25 for VOP2 Opcode
+       // VOP2 V_FMAMK_F64 of V_FMAAK_F64 has implied 64-bit literature, 3 DW
+       s_sub_co_i32    ttmp13, ttmp10, 0x23                                    
// V_FMAMK_F64 is 0x23, V_FMAAK_F64 is 0x24
+       s_cmp_le_u32    ttmp13, 0x1                                             
// 0==0x23, 1==0x24
+       s_cbranch_scc1  L_FIXUP_THREE_DWORD                                     
// If either, this is 3 DWORD inst
+       // VOP2 V_FMAMK_F32, V_FMAAK_F32, V_FMAMK_F16, V_FMAAK_F16, 2 DW
+       s_sub_co_i32    ttmp13, ttmp10, 0x2c                                    
// V_FMAMK_F32 is 0x2c, V_FMAAK_F32 is 0x2d
+       s_cmp_le_u32    ttmp13, 0x1                                             
// 0==0x2c, 1==0x2d
+       s_cbranch_scc1  L_FIXUP_TWO_DWORD                                       
// If either, this is 2 DWORD inst
+       s_sub_co_i32    ttmp13, ttmp10, 0x37                                    
// V_FMAMK_F16 is 0x37, V_FMAAK_F16 is 0x38
+       s_cmp_le_u32    ttmp13, 0x1                                             
// 0==0x37, 1==0x38
+       s_cbranch_scc1  L_FIXUP_TWO_DWORD                                       
// If either, this is 2 DWORD inst
+       // Check SRC0_9 for VOP1, VOP2, and VOPC
+       s_and_b32       ttmp10, ttmp14, 0x1ff                                   
// Check bits 8:0 for SRC0_9
+       // Literal constant 64 is 3 DWORDs
+       s_cmp_eq_u32    ttmp10, 0xfe                                            
// 0xfe == 254 == Literal constant64
+       s_cbranch_scc1  L_FIXUP_THREE_DWORD                                     
// 3 DWORD inst
+       // Literal constant 32, DPP16, DPP8, and DPP8FI are 2 DWORDs
+       s_cmp_eq_u32    ttmp10, 0xff                                            
// 0xff == 255 = Literal constant32
+       s_cbranch_scc1  L_FIXUP_TWO_DWORD                                       
// 2 DWORD inst
+       s_cmp_eq_u32    ttmp10, 0xfa                                            
// 0xfa == 250 = DPP16
+       s_cbranch_scc1  L_FIXUP_TWO_DWORD                                       
// 2 DWORD inst
+       s_sub_co_i32    ttmp13, ttmp10, 0xe9                                    
// DPP8 is 0xe9, DPP8FI is 0xea
+       s_cmp_le_u32    ttmp13, 0x1                                             
// 0==0xe9, 1==0xea
+       s_cbranch_scc1  L_FIXUP_TWO_DWORD                                       
// If either, this is 2 DWORD inst
+       // Instruction is 1 DWORD otherwise
+
+L_FIXUP_ONE_DWORD:
+       // Check if TTMP15 contains the value for S_SET_VGPR_MSB instruction
+       s_and_b32       ttmp10, ttmp15, 0xffff0000                              
// Check encoding in upper 16 bits
+       s_cmp_eq_u32    ttmp10, 0xbf860000                                      
// Check if SOPP (9b'10_1111111) and S_SET_VGPR_MSB (7b'0000110)
+       s_cbranch_scc0  L_FIXUP_DONE                                            
// No problem, no fixup needed
+       // VALU op followed by a S_SET_VGPR_MSB. Need to pull SIMM[15:8] to fix 
up MODE.*_VGPR_MSB
+       s_bfe_u32       ttmp10, ttmp15, (14 | (2 << 0x10))                      
// Shift SIMM[15:14] over to 1:0, Dst
+       s_and_b32       ttmp13, ttmp15, 0x3f00                                  
// Mask to get SIMM[13:8] only
+       s_lshr_b32      ttmp13, ttmp13, 6                                       
// Shift SIMM[13:8] into 7:2, Src2, Src1, Src0
+       s_or_b32        ttmp10, ttmp10, ttmp13                                  
// Src2, Src1, Src0, Dst --> format in MODE register
+       s_setreg_b32    hwreg(HW_REG_WAVE_MODE, 12, 8), ttmp10                  
// Write value into MODE[19:12]
+       s_branch        L_FIXUP_DONE
+
+L_FIXUP_NOT_VOP12C:
+       // ttmp[0:1]: {7b'0} PC[56:0]
+       // ttmp2: PC+2 value (not waitcnt'ed yet)
+       // ttmp3: PC+3 value (not waitcnt'ed yet)
+       // ttmp10, ttmp13: free
+       // ttmp14: PC+O value
+       // ttmp15: PC+1 value
+       // Not VOP1, VOP2, or VOPC.
+       // Check if we are VOP3 or VOP3SD
+       s_and_b32       ttmp10, ttmp14, 0xfc000000                              
// Bits 31:26
+       s_cmp_eq_u32    ttmp10, 0xd4000000                                      
// If 31:26 = 0x35, this is VOP3 or VOP3SD
+       s_cbranch_scc1  L_FIXUP_CHECK_VOP3                                      
// If VOP3 or VOP3SD, need to check SRC2_9, SRC1_9, SRC0_9
+       // Not VOP1, VOP2, VOPC, VOP3, or VOP3SD.
+       // Check for VOPD
+       s_cmp_eq_u32    ttmp10, 0xc8000000                                      
// If 31:26 = 0x32, this is VOPD
+       s_cbranch_scc1  L_FIXUP_CHECK_VOPD                                      
// If VOPD, need to check OpX, OpY, SRCX0 and SRCY0
+       // Not VOP1, VOP2, VOPC, VOP3, VOP3SD, VOPD.
+       // Check if we are VOPD3
+       s_and_b32       ttmp10, ttmp14, 0xff000000                              
// Bits 31:24
+       s_cmp_eq_u32    ttmp10, 0xcf000000                                      
// If 31:24 = 0xcf, this is VOPD3
+       s_cbranch_scc1  L_FIXUP_THREE_DWORD                                     
// If VOPD3, 3 DWORD inst
+       // Not VOP1, VOP2, VOPC, VOP3, VOP3SD, VOPD, or VOPD3.
+       // Might be in VOP3P, but we must ensure we are not VOP3PX2
+       s_and_b32       ttmp13, ttmp14, 0xffff0000                              
// Bits 31:16
+       s_cmp_eq_u32    ttmp13, 0xcc350000                                      
// If 31:16 = 0xcc35, this is VOP3PX2
+       s_cbranch_scc1  L_FIXUP_DONE                                            
// If VOP3PX2, no fixup needed
+       s_cmp_eq_u32    ttmp13, 0xcc3a0000                                      
// If 31:16 = 0xcc3a, this is VOP3PX2
+       s_cbranch_scc1  L_FIXUP_DONE                                            
// If VOP3PX2, no fixup needed
+       // Check if we are VOP3P
+       s_cmp_eq_u32    ttmp10, 0xcc000000                                      
// If 31:24 = 0xcc, this is VOP3P
+       s_cbranch_scc0  L_FIXUP_DONE                                            
// Not in VOP3P, so instruction is not VOP1, VOP2,
+                                                                               
// VOPC, VOP3, VOP3SD, VOP3P, VOPD, or VOPD3
+                                                                               
// No fixup needed.
+       // Fall-through if we are in VOP3P to check SRC2_9, SRC1_9, and SRC0_9
+L_FIXUP_CHECK_VOP3:
+       // Start with Src0, which is in bits 8:0 of second instruction DW, 
ttmp15
+       s_and_b32       ttmp10, ttmp15, 0x1ff                                   
// Mask out unused bits
+       // Src0_9 == Literal constant 32, DPP16, DPP8, and DPP8FI means 3 DWORDs
+       s_cmp_eq_u32    ttmp10, 0xff                                            
// 0xff == 255 = Literal constant32
+       s_cbranch_scc1  L_FIXUP_THREE_DWORD                                     
// 3 DWORD inst
+       s_cmp_eq_u32    ttmp10, 0xfa                                            
// 0xfa == 250 = DPP16
+       s_cbranch_scc1  L_FIXUP_THREE_DWORD                                     
// 3 DWORD inst
+       s_sub_co_i32    ttmp10, ttmp10, 0xe9                                    
// DPP8 is 0xe9, DPP8FI is 0xea
+       s_cmp_le_u32    ttmp10, 0x1                                             
// 0==0xe9, 1==0xea
+       s_cbranch_scc1  L_FIXUP_THREE_DWORD                                     
// If either, this is 3 DWORD inst
+       s_and_b32       ttmp10, ttmp15, 0x3fe00                                 
// Next is Src1, which is in 17:9
+       s_cmp_eq_u32    ttmp10, 0x1fe00                                         
// 0xff == 255 = Literal constant32
+       s_cbranch_scc1  L_FIXUP_THREE_DWORD                                     
// 3 DWORD inst
+       s_and_b32       ttmp10, ttmp15, 0x7fc0000                               
// Next is Src2, which is in 26:18
+       s_cmp_eq_u32    ttmp10, 0x3fc0000                                       
// 0xff == 255 = Literal constant32
+       s_cbranch_scc1  L_FIXUP_THREE_DWORD                                     
// 3 DWORD inst
+       s_branch        L_FIXUP_TWO_DWORD                                       
// No special encodings, VOP3* is 2 Dword
+
+L_FIXUP_CHECK_VOPD:
+       // OpX being V_DUAL_FMA*K_F32 means 3 DWORDs
+       s_bfe_u32       ttmp10, ttmp14, (22 | (4 << 0x10))                      
// OPX is bits 25:22
+       s_sub_co_i32    ttmp10, ttmp10, 0x1                                     
// V_DUAL_FMAAK_F32 is 0x1, V_DUAL_FMAMK_F32 is 0x2
+       s_cmp_le_u32    ttmp10, 0x1                                             
// 0==0x1, 1==0x2
+       s_cbranch_scc1  L_FIXUP_THREE_DWORD                                     
// If either, this is 3 DWORD inst
+       // OpY being V_DUAL_FMA*K_F32 means 3 DWORDs
+       s_bfe_u32       ttmp10, ttmp14, (17 | (5 << 0x10))                      
// OPX is bits 21:17
+       s_sub_co_i32    ttmp10, ttmp10, 0x1                                     
// V_DUAL_FMAAK_F32 is 0x1, V_DUAL_FMAMK_F32 is 0x2
+       s_cmp_le_u32    ttmp10, 0x1                                             
// 0==0x1, 1==0x2
+       s_cbranch_scc1  L_FIXUP_THREE_DWORD                                     
// If either, this is 3 DWORD inst
+       // SRCX0 == Literal constant 32 means 3 DWORDs
+       s_and_b32       ttmp10, ttmp14, 0x1ff                                   
// SRCX0 is in bits 8:0 of 1st DWORD
+       s_cmp_eq_u32    ttmp10, 0xff                                            
// 0xff == 255 = Literal constant32
+       s_cbranch_scc1  L_FIXUP_THREE_DWORD                                     
// 3 DWORD inst
+       // SRCY0 == Literal constant 32 means 3 DWORDs
+       s_and_b32       ttmp10, ttmp15, 0x1ff                                   
// SRCY0 is in bits 8:0 of 2nd DWORD
+       s_cmp_eq_u32    ttmp10, 0xff                                            
// 0xff == 255 = Literal constant32
+       s_cbranch_scc1  L_FIXUP_THREE_DWORD                                     
// 3 DWORD inst
+                                                                               
// If otherwise, no special encodings. Default VOPD is 2 Dword
+                                                                               
// Fall-thru if true, because this is a 2 DWORD inst
+L_FIXUP_TWO_DWORD:
+       s_wait_kmcnt    0                                                       
// Wait for PC+2 and PC+3 to arrive in ttmp2 and ttmp3
+       s_mov_b32       ttmp15, ttmp2                                           
// Move possible S_SET_VGPR_MSB into ttmp15
+       s_branch        L_FIXUP_ONE_DWORD                                       
// Go to common logic that checks if it is S_SET_VGPR_MSB
+
+L_FIXUP_THREE_DWORD:
+       s_wait_kmcnt    0                                                       
// Wait for PC+2 and PC+3 to arrive in ttmp2 and ttmp3
+       s_mov_b32       ttmp15, ttmp3                                           
// Move possible S_SET_VGPR_MSB into ttmp15
+       s_branch        L_FIXUP_ONE_DWORD                                       
// Go to common logic that checks if it is S_SET_VGPR_MSB
+
+L_FIXUP_DONE:
+       s_wait_kmcnt    0                                                       
// Ensure load of ttmp2 and ttmp3 is done
+end
+#endif
-- 
2.52.0

Reply via email to