[llvm-bugs] [Bug 122905] [GISel] Missing combines to expose common subexpressions and eliminate them

LLVM Bugs via llvm-bugs Tue, 14 Jan 2025 06:17:33 -0800

Issue	122905
Summary	[GISel] Missing combines to expose common subexpressions and eliminate them
Labels	llvm:globalisel
Assignees
Reporter	qcolombet

    When running GISel on the given input IR, we end up generating essentially a one-to-one mapping with the input IR whereas most of the computation is just duplicated.
SDISel performs a much better job by doing `instcombine`-like optimizations that allow it to simplify the IR and eventually exposes the CSE opportunity.


This is not that surprising given that historically GISel has had a garbage in garbage out approach, but it may make sense to strengthen GISel combines for optimized builds.

Note: I observed this with AMDGPU but I suspect it affects all backends.

# To Reproduce #

Download the attached IR or copy paste the snippet below and run
```bash
llc -O3 -march=amdgcn -mcpu=gfx942  -mtriple amdgcn-amd-hmcsa -global-isel=<0|1> repro.ll -o <sd|g>isel.s 
```

# Result #

GISel ends up generating two `fdiv` instructions whereas SDISel is able to CSE the whole thing and produces just one.

GISel:
```asm
        v_div_scale_f32 v1, s[0:1], v0, v0, 1.0
        v_mov_b32_e32 v7, v2
        v_mov_b32_e32 v2, v3
 v_mov_b32_e32 v3, v4
        v_rcp_f32_e32 v4, v1
        v_div_scale_f32 v5, vcc, 1.0, v0, 1.0
        v_fma_f32 v8, -v1, v4, 1.0
 v_fmac_f32_e32 v4, v8, v4
        v_mul_f32_e32 v8, v5, v4
 v_fma_f32 v9, -v1, v8, v5
        v_fmac_f32_e32 v8, v9, v4
 v_fma_f32 v1, -v1, v8, v5
        v_div_fmas_f32 v5, v1, v4, v8
 v_div_fixup_f32 v5, v5, v0, 1.0
        v_div_fmas_f32 v1, v1, v4, v8
 v_div_fixup_f32 v0, v1, v0, 1.0
```

SDISel:
```asm
 v_div_scale_f32 v1, s[0:1], v0, v0, 1.0
        v_rcp_f32_e32 v6, v1
 s_nop 0
        v_fma_f32 v7, -v1, v6, 1.0
        v_fmac_f32_e32 v6, v7, v6
        v_div_scale_f32 v7, vcc, 1.0, v0, 1.0
        v_mul_f32_e32 v8, v7, v6
        v_fma_f32 v9, -v1, v8, v7
        v_fmac_f32_e32 v8, v9, v6
        v_fma_f32 v1, -v1, v8, v7
        v_div_fmas_f32 v1, v1, v6, v8
        v_div_fixup_f32 v0, v1, v0, 1.0
```

# IR Snippet #
```llvm
define void @foo(<1 x float> %in, ptr %out, ptr %out2) {
 %t1174 = insertvalue [4 x <1 x float>] zeroinitializer, <1 x float> %in, 0
 %t1175 = insertvalue [4 x <1 x float>] %t1174, <1 x float> %in, 1
  %t1176 = insertvalue [4 x <1 x float>] %t1175, <1 x float> %in, 2
  %t1177 = insertvalue [4 x <1 x float>] %t1176, <1 x float> %in, 3
  %t1178 = insertvalue [2 x [1 x [4 x [1 x [4 x <1 x float>]]]]] zeroinitializer, [4 x <1 x float>] %t1177, 0, 0, 0, 0
  %t1179 = insertvalue [2 x [1 x [4 x [1 x [4 x <1 x float>]]]]] %t1178, [4 x <1 x float>] %t1177, 0, 0, 1, 0
  %t1180 = insertvalue [2 x [1 x [4 x [1 x [4 x <1 x float>]]]]] %t1179, [4 x <1 x float>] %t1177, 0, 0, 2, 0
  %t1181 = insertvalue [2 x [1 x [4 x [1 x [4 x <1 x float>]]]]] %t1180, [4 x <1 x float>] %t1177, 0, 0, 3, 0
  %t1182 = insertvalue [2 x [1 x [4 x [1 x [4 x <1 x float>]]]]] %t1181, [4 x <1 x float>] %t1177, 1, 0, 0, 0
  %t1183 = insertvalue [2 x [1 x [4 x [1 x [4 x <1 x float>]]]]] %t1182, [4 x <1 x float>] %t1177, 1, 0, 1, 0
  %t1184 = insertvalue [2 x [1 x [4 x [1 x [4 x <1 x float>]]]]] %t1183, [4 x <1 x float>] %t1177, 1, 0, 2, 0
  %t1185 = insertvalue [2 x [1 x [4 x [1 x [4 x <1 x float>]]]]] %t1184, [4 x <1 x float>] %t1177, 1, 0, 3, 0
  %t1186 = extractvalue [2 x [1 x [4 x [1 x [4 x <1 x float>]]]]] %t1178, 0, 0, 0, 0, 0
  %t1187 = fdiv <1 x float> splat (float 1.000000e+00), %t1186
  %t1188 = extractvalue [2 x [1 x [4 x [1 x [4 x <1 x float>]]]]] %t1178, 0, 0, 0, 0, 1
  %t1189 = fdiv <1 x float> splat (float 1.000000e+00), %t1188

  store <1 x float> %t1187, ptr %out
  store <1 x float> %t1189, ptr %out2
  ret void
}
```

# Note #

SDISel is essentially able to do the equivalent of:
```bash
opt -passes=instcombine,early-cse -S repro.ll -o -
```

Which yields:
```llvm
define void @foo(<1 x float> %in, ptr %out, ptr %out2) {
  %t1187 = fdiv <1 x float> splat (float 1.000000e+00), %in
 store <1 x float> %t1187, ptr %out, align 4
  store <1 x float> %t1187, ptr %out2, align 4
  ret void
}
```

[repro.ll.txt](https://github.com/user-attachments/files/18411275/repro.ll.txt)

_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

[llvm-bugs] [Bug 122905] [GISel] Missing combines to expose common subexpressions and eliminate them

Reply via email to