From: Daniel Barboza <[email protected]>
Given a comparison with the format
A << CST1 EQ|NE CST2
Turn it into:
A & CSTmask EQ|NE CST2 >> CST1
Where 'CSTmask' is a bitmask that filters A bits that would be discarded
during the shift. The idea is that a bit_and is, on most common targets
at least, more efficient than a lshift.
Note that we're not handling the case "A << 1 != CST2" because this is a
pattern that is optimized by tree-ssa-loop-niter.cc, turning a whole
loop into a single CTZ().
Bootstrapped and regression tested on x86. Includes aarch64 regression
changes pointed by Linaro automatic CI bot.
PR tree-optimization/124019
gcc/ChangeLog:
* match.pd (`A<<CST1 EQ|NE CST2 -> (A&CSTmask) EQ|NE (CST2>>CST1)`):
New pattern.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/cmp_shifted_reg_1.c: changed shift
immediate to "1" to dodge this optimization and keep
generating the shift and compare.
* gcc.target/aarch64/negs.c: changed shift immediate to "1" in
negs_si_test3 and negs_di_test3, and switched the comparison
from "== 0" to "!= 0", to dodge this optimization and keep
generating "negs". Expected value for both were updated
accordingly.
* gcc.dg/tree-ssa/pr124019.c: New test.
---
Changes from v1:
- Fixed aarch64 regression tests reported by the Linaro CI bot.
- v1 link: https://gcc.gnu.org/pipermail/gcc-patches/2026-February/709529.html
gcc/match.pd | 24 +++++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr124019.c | 40 +++++++++++++++++++
.../gcc.target/aarch64/cmp_shifted_reg_1.c | 4 +-
gcc/testsuite/gcc.target/aarch64/negs.c | 36 +++++++++--------
4 files changed, 86 insertions(+), 18 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr124019.c
diff --git a/gcc/match.pd b/gcc/match.pd
index 7f16fd4e081..b5f5a1cf1ac 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1454,6 +1454,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (TYPE_UNSIGNED (type))
(bit_and @0 (bit_not (lshift { build_all_ones_cst (type); } @1)))))
+/* PR124019: A << CST1 EQ|NE CST2 -> (A & CSTmask) EQ|NE (CST2 >> CST1); */
+(for cmp (eq ne)
+ (simplify
+ (cmp (lshift@3 @0 INTEGER_CST@1) INTEGER_CST@2)
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && TREE_INT_CST_LOW (@1) <= TYPE_PRECISION (TREE_TYPE (@0))
+ /* The "A << 1 != CST1" pattern is used in trailing zero
+ count detection to determine if a loop consists of a
+ CTZ call, and that generates better code than what
+ we're doing here. See pr114760-1.c, ntz32_3 () for
+ more info. */
+ && (!(cmp == NE_EXPR && TREE_INT_CST_LOW (@1) == 1)))
+ (with {
+ tree type0 = TREE_TYPE (@0);
+ tree type3 = TREE_TYPE (@3);
+ unsigned prec = TYPE_PRECISION (type0);
+ unsigned mask_len = prec - TREE_INT_CST_LOW (@1);
+ wide_int cst1_mask = wi::mask (mask_len, false, prec);
+ wide_int res = wi::lrshift (wi::to_wide (@2), wi::to_wide (@1));
+ }
+ (cmp (convert:type3
+ (bit_and @0 { wide_int_to_tree (type0, cst1_mask); }))
+ { wide_int_to_tree (type3, res); })))))
+
(for bitop (bit_and bit_ior)
cmp (eq ne)
/* PR35691: Transform
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr124019.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr124019.c
new file mode 100644
index 00000000000..bd46864c75b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr124019.c
@@ -0,0 +1,40 @@
+/* { dg-additional-options -O2 } */
+/* { dg-additional-options -fdump-tree-optimized } */
+
+typedef unsigned char uint8_t;
+typedef signed char int8_t;
+
+uint8_t f1(int8_t a)
+{
+ int8_t b = a << 4;
+ return b == -128;
+}
+
+uint8_t f2(int8_t a)
+{
+ int8_t b = a << 4;
+ return b != -128;
+}
+
+uint8_t f3(int8_t a)
+{
+ int8_t b = a << 6;
+ return b == -128;
+}
+
+uint8_t f4(int8_t a)
+{
+ int8_t b = a << 7;
+ return b == -128;
+}
+
+uint8_t f5(int8_t a)
+{
+ int8_t b = a << 1;
+ return b == -128;
+}
+/* { dg-final { scan-tree-dump-times " & 15;" 2 optimized } } */
+/* { dg-final { scan-tree-dump-times " & 3;" 1 optimized } } */
+/* { dg-final { scan-tree-dump-times " & 1;" 1 optimized } } */
+/* { dg-final { scan-tree-dump-times " & 127;" 1 optimized } } */
+/* { dg-final { scan-tree-dump-times " << " 0 optimized } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/cmp_shifted_reg_1.c
b/gcc/testsuite/gcc.target/aarch64/cmp_shifted_reg_1.c
index cacecf4e71d..3ef02a325f2 100644
--- a/gcc/testsuite/gcc.target/aarch64/cmp_shifted_reg_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/cmp_shifted_reg_1.c
@@ -3,9 +3,9 @@
int f3 (int x, int y)
{
- int res = x << 3;
+ int res = x << 1;
return res != 0;
}
/* We should combine the shift and compare */
-/* { dg-final { scan-assembler "cmp\.*\twzr, w\[0-9\]+, lsl 3" } } */
+/* { dg-final { scan-assembler "cmp\.*\twzr, w\[0-9\]+, lsl 1" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/negs.c
b/gcc/testsuite/gcc.target/aarch64/negs.c
index 1c23041eae7..55b7ec2a0cd 100644
--- a/gcc/testsuite/gcc.target/aarch64/negs.c
+++ b/gcc/testsuite/gcc.target/aarch64/negs.c
@@ -20,14 +20,16 @@ negs_si_test1 (int a, int b, int c)
int
negs_si_test3 (int a, int b, int c)
{
- int d = -(b) << 3;
+ int d = -(b) << 1;
- /* { dg-final { scan-assembler "negs\tw\[0-9\]+, w\[0-9\]+, lsl 3" } } */
- if (d == 0)
- return a + c;
+ /* { dg-final { scan-assembler "negs\tw\[0-9\]+, w\[0-9\]+, lsl 1" } } */
+ if (d != 0)
+ {
+ z = d;
+ return b + c + d;
+ }
- z = d;
- return b + c + d;
+ return a + c;
}
typedef long long s64;
@@ -49,14 +51,16 @@ negs_di_test1 (s64 a, s64 b, s64 c)
s64
negs_di_test3 (s64 a, s64 b, s64 c)
{
- s64 d = -(b) << 3;
+ s64 d = -(b) << 1;
- /* { dg-final { scan-assembler "negs\tx\[0-9\]+, x\[0-9\]+, lsl 3" } } */
- if (d == 0)
- return a + c;
+ /* { dg-final { scan-assembler "negs\tx\[0-9\]+, x\[0-9\]+, lsl 1" } } */
+ if (d != 0)
+ {
+ zz = d;
+ return b + c + d;
+ }
- zz = d;
- return b + c + d;
+ return a + c;
}
int main ()
@@ -73,11 +77,11 @@ int main ()
abort ();
x = negs_si_test3 (13, 14, 5);
- if (x != -93)
+ if (x != -9)
abort ();
x = negs_si_test3 (15, 21, 2);
- if (x != -145)
+ if (x != -19)
abort ();
y = negs_di_test1 (0x20202020ll,
@@ -95,13 +99,13 @@ int main ()
y = negs_di_test3 (0x62523781ll,
0x64234978ll,
0x12345123ll);
- if (y != 0xfffffffd553d4edbll)
+ if (y != 0xffffffffae1107abll)
abort ();
y = negs_di_test3 (0x763526268ll,
0x101010101ll,
0x222222222ll);
- if (y != 0xfffffffb1b1b1b1bll)
+ if (y != 0x0000000121212121ll)
abort ();
return 0;
--
2.43.0