[llvm-branch-commits] [llvm] [AMDGPU] Test precommit for subreg reload (PR #175001)

Christudasan Devadasan via llvm-branch-commits Mon, 12 Jan 2026 21:08:55 -0800

https://github.com/cdevadas updated 
https://github.com/llvm/llvm-project/pull/175001


>From 74adfaf612775e2ac27f59873b2f61a184cebd53 Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <[email protected]>
Date: Thu, 8 Jan 2026 06:23:19 +0000
Subject: [PATCH 1/2] [AMDGPU] Test precommit for subreg reload

This test currently fails due to insufficient
registers during allocation. Once the subreg
reload is implemented, it will begin to pass
as the partial reload help mitigate register
pressure.
---
 ...pressure-mitigation-with-subreg-reload.mir | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 
llvm/test/CodeGen/AMDGPU/regpressure-mitigation-with-subreg-reload.mir

diff --git 
a/llvm/test/CodeGen/AMDGPU/regpressure-mitigation-with-subreg-reload.mir 
b/llvm/test/CodeGen/AMDGPU/regpressure-mitigation-with-subreg-reload.mir
new file mode 100644
index 0000000000000..43289230c587d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/regpressure-mitigation-with-subreg-reload.mir
@@ -0,0 +1,37 @@
+# RUN: not llc -mcpu=gfx1100 -mtriple=amdgcn-amd-amdhsa -stress-regalloc=4 
-run-pass=greedy -filetype=null %s 2>&1 | FileCheck %s
+
+# This test demonstrates register pressure mitigation through subreg-aware
+# reloads during register allocation. Currently, the test would fail during
+# RA due to insufficient registers. Subreg-aware reload, when implemented,
+# mitigates register pressure by reloading only the used portions of tuple
+# registers from their spilled locations instead of reloading entire tuples,
+# thereby enhancing register allocation.
+
+# CHECK: error: <unknown>:0:0: ran out of registers during register allocation
+
+---
+name:            subreg-reload
+tracksRegLiveness: true
+machineFunctionInfo:
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0.entry:
+    liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3
+
+    %8:vgpr_32 = COPY $vgpr1
+    %12:vreg_64 = COPY killed renamable $sgpr0_sgpr1
+    %11:vreg_128 = FLAT_LOAD_DWORDX4 %12, 0, 0, implicit $exec, implicit 
$flat_scr
+    undef early-clobber %49.sub0_sub1:vreg_128, $sgpr_null = 
V_MAD_U64_U32_gfx11_e64 %11.sub0, 42, 0, 0, implicit $exec
+    undef %50.sub0:vreg_64 = COPY %49.sub1
+    early-clobber %49.sub1_sub2:vreg_128, $sgpr_null = V_MAD_U64_U32_gfx11_e64 
%11.sub1, 42, %50, 0, implicit $exec
+    early-clobber %15:vreg_64, $sgpr_null = V_MAD_U64_U32_gfx11_e64 %11.sub2, 
42, 0, 0, implicit $exec
+    undef %52.sub0:vreg_64 = COPY %15.sub1
+    early-clobber %27:vreg_64, $sgpr_null = V_MAD_U64_U32_gfx11_e64 %11.sub3, 
42, %52, 0, implicit $exec
+    %49.sub2:vreg_128 = COPY %15.sub0
+    %49.sub3:vreg_128 = COPY %27.sub0
+    $vgpr31 = COPY %8
+    INLINEASM &"; use v1", 1, 327690, $vgpr1
+    FLAT_STORE_DWORDX4 %12, %49, 0, 0, implicit $exec, implicit $flat_scr
+    SI_RETURN
+...

>From 12d1aa0c8430c9d8015bfb285aae7d5e260db8ad Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <[email protected]>
Date: Thu, 8 Jan 2026 17:30:02 +0000
Subject: [PATCH 2/2] compacted the virt-reg numbers

---
 ...pressure-mitigation-with-subreg-reload.mir | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git 
a/llvm/test/CodeGen/AMDGPU/regpressure-mitigation-with-subreg-reload.mir 
b/llvm/test/CodeGen/AMDGPU/regpressure-mitigation-with-subreg-reload.mir
index 43289230c587d..a0993d7e35176 100644
--- a/llvm/test/CodeGen/AMDGPU/regpressure-mitigation-with-subreg-reload.mir
+++ b/llvm/test/CodeGen/AMDGPU/regpressure-mitigation-with-subreg-reload.mir
@@ -16,22 +16,22 @@ machineFunctionInfo:
   frameOffsetReg:  '$sgpr33'
   stackPtrOffsetReg: '$sgpr32'
 body:             |
-  bb.0.entry:
+  bb.0:
     liveins: $vgpr1, $sgpr0_sgpr1, $sgpr2_sgpr3
 
-    %8:vgpr_32 = COPY $vgpr1
-    %12:vreg_64 = COPY killed renamable $sgpr0_sgpr1
-    %11:vreg_128 = FLAT_LOAD_DWORDX4 %12, 0, 0, implicit $exec, implicit 
$flat_scr
-    undef early-clobber %49.sub0_sub1:vreg_128, $sgpr_null = 
V_MAD_U64_U32_gfx11_e64 %11.sub0, 42, 0, 0, implicit $exec
-    undef %50.sub0:vreg_64 = COPY %49.sub1
-    early-clobber %49.sub1_sub2:vreg_128, $sgpr_null = V_MAD_U64_U32_gfx11_e64 
%11.sub1, 42, %50, 0, implicit $exec
-    early-clobber %15:vreg_64, $sgpr_null = V_MAD_U64_U32_gfx11_e64 %11.sub2, 
42, 0, 0, implicit $exec
-    undef %52.sub0:vreg_64 = COPY %15.sub1
-    early-clobber %27:vreg_64, $sgpr_null = V_MAD_U64_U32_gfx11_e64 %11.sub3, 
42, %52, 0, implicit $exec
-    %49.sub2:vreg_128 = COPY %15.sub0
-    %49.sub3:vreg_128 = COPY %27.sub0
-    $vgpr31 = COPY %8
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vreg_64 = COPY killed renamable $sgpr0_sgpr1
+    %3:vreg_128 = FLAT_LOAD_DWORDX4 %2, 0, 0, implicit $exec, implicit 
$flat_scr
+    undef early-clobber %4.sub0_sub1:vreg_128, $sgpr_null = 
V_MAD_U64_U32_gfx11_e64 %3.sub0, 42, 0, 0, implicit $exec
+    undef %5.sub0:vreg_64 = COPY %4.sub1
+    early-clobber %4.sub1_sub2:vreg_128, $sgpr_null = V_MAD_U64_U32_gfx11_e64 
%3.sub1, 42, %5, 0, implicit $exec
+    early-clobber %6:vreg_64, $sgpr_null = V_MAD_U64_U32_gfx11_e64 %3.sub2, 
42, 0, 0, implicit $exec
+    undef %7.sub0:vreg_64 = COPY %6.sub1
+    early-clobber %8:vreg_64, $sgpr_null = V_MAD_U64_U32_gfx11_e64 %3.sub3, 
42, %7, 0, implicit $exec
+    %4.sub2:vreg_128 = COPY %6.sub0
+    %4.sub3:vreg_128 = COPY %8.sub0
+    $vgpr31 = COPY %1
     INLINEASM &"; use v1", 1, 327690, $vgpr1
-    FLAT_STORE_DWORDX4 %12, %49, 0, 0, implicit $exec, implicit $flat_scr
+    FLAT_STORE_DWORDX4 %2, %4, 0, 0, implicit $exec, implicit $flat_scr
     SI_RETURN
 ...

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] Test precommit for subreg reload (PR #175001)

Reply via email to