From: Daniel Barboza <[email protected]>

Given a comparison with the format

A << CST1 EQ|NE CST2

Turn it into:

A & CSTmask EQ|NE CST2 >> CST1

Where 'CSTmask' is a bitmask that filters A bits that would be discarded
during the shift. The idea is that a bit_and is, on most common targets
at least, more efficient than a lshift.

Note that we're not handling the case "A << 1 != CST2" because this is a
pattern that is optimized by tree-ssa-loop-niter.cc, turning a whole
loop into a single CTZ().

Bootstrapped and regression tested on x86.  Includes aarch64 regression
changes pointed by Linaro automatic CI bot.

        PR tree-optimization/124019

gcc/ChangeLog:

        * match.pd (`A<<CST1 EQ|NE CST2 -> (A&CSTmask) EQ|NE (CST2>>CST1)`):
        New pattern.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/cmp_shifted_reg_1.c: changed shift
        immediate to "1" to dodge this optimization and keep
        generating the shift and compare.
        * gcc.target/aarch64/negs.c: changed shift immediate to "1" in
        negs_si_test3 and negs_di_test3, and switched the comparison
        from "== 0" to "!= 0", to dodge this optimization and keep
        generating "negs".  Expected value for both were updated
        accordingly.
        * gcc.dg/tree-ssa/pr124019.c: New test.
---

Changes from v1:
- Fixed aarch64 regression tests reported by the Linaro CI bot.
- v1 link: https://gcc.gnu.org/pipermail/gcc-patches/2026-February/709529.html

 gcc/match.pd                                  | 24 +++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr124019.c      | 40 +++++++++++++++++++
 .../gcc.target/aarch64/cmp_shifted_reg_1.c    |  4 +-
 gcc/testsuite/gcc.target/aarch64/negs.c       | 36 +++++++++--------
 4 files changed, 86 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr124019.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 7f16fd4e081..b5f5a1cf1ac 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1454,6 +1454,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (if (TYPE_UNSIGNED (type))
     (bit_and @0 (bit_not (lshift { build_all_ones_cst (type); } @1)))))
 
+/* PR124019: A << CST1 EQ|NE CST2 -> (A & CSTmask) EQ|NE (CST2 >> CST1);  */
+(for cmp (eq ne)
+ (simplify
+  (cmp (lshift@3 @0 INTEGER_CST@1) INTEGER_CST@2)
+   (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+       && TREE_INT_CST_LOW (@1) <= TYPE_PRECISION (TREE_TYPE (@0))
+       /* The "A << 1 != CST1" pattern is used in trailing zero
+          count detection to determine if a loop consists of a
+          CTZ call, and that generates better code than what
+          we're doing here.  See pr114760-1.c, ntz32_3 () for
+          more info.  */
+       && (!(cmp == NE_EXPR && TREE_INT_CST_LOW (@1) == 1)))
+    (with {
+      tree type0 = TREE_TYPE (@0);
+      tree type3 = TREE_TYPE (@3);
+      unsigned prec = TYPE_PRECISION (type0);
+      unsigned mask_len = prec - TREE_INT_CST_LOW (@1);
+      wide_int cst1_mask = wi::mask (mask_len, false, prec);
+      wide_int res = wi::lrshift (wi::to_wide (@2), wi::to_wide (@1));
+    }
+     (cmp (convert:type3
+           (bit_and @0 { wide_int_to_tree (type0, cst1_mask); }))
+         { wide_int_to_tree (type3, res); })))))
+
 (for bitop (bit_and bit_ior)
      cmp (eq ne)
  /* PR35691: Transform
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr124019.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr124019.c
new file mode 100644
index 00000000000..bd46864c75b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr124019.c
@@ -0,0 +1,40 @@
+/* { dg-additional-options -O2 } */
+/* { dg-additional-options -fdump-tree-optimized } */
+
+typedef unsigned char uint8_t;
+typedef signed char int8_t;
+
+uint8_t f1(int8_t a)
+{
+    int8_t b = a << 4;
+    return b == -128;
+}
+
+uint8_t f2(int8_t a)
+{
+    int8_t b = a << 4;
+    return b != -128;
+}
+
+uint8_t f3(int8_t a)
+{
+    int8_t b = a << 6;
+    return b == -128;
+}
+
+uint8_t f4(int8_t a)
+{
+    int8_t b = a << 7;
+    return b == -128;
+}
+
+uint8_t f5(int8_t a)
+{
+    int8_t b = a << 1;
+    return b == -128;
+}
+/* { dg-final { scan-tree-dump-times " & 15;" 2 optimized } } */
+/* { dg-final { scan-tree-dump-times " & 3;" 1 optimized } } */
+/* { dg-final { scan-tree-dump-times " & 1;" 1 optimized } } */
+/* { dg-final { scan-tree-dump-times " & 127;" 1 optimized } } */
+/* { dg-final { scan-tree-dump-times " << " 0 optimized } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/cmp_shifted_reg_1.c 
b/gcc/testsuite/gcc.target/aarch64/cmp_shifted_reg_1.c
index cacecf4e71d..3ef02a325f2 100644
--- a/gcc/testsuite/gcc.target/aarch64/cmp_shifted_reg_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/cmp_shifted_reg_1.c
@@ -3,9 +3,9 @@
 
 int f3 (int x, int y)
 {
-  int res = x << 3;
+  int res = x << 1;
   return res != 0;
 }
 
 /* We should combine the shift and compare */
-/* { dg-final { scan-assembler "cmp\.*\twzr, w\[0-9\]+, lsl 3" } } */
+/* { dg-final { scan-assembler "cmp\.*\twzr, w\[0-9\]+, lsl 1" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/negs.c 
b/gcc/testsuite/gcc.target/aarch64/negs.c
index 1c23041eae7..55b7ec2a0cd 100644
--- a/gcc/testsuite/gcc.target/aarch64/negs.c
+++ b/gcc/testsuite/gcc.target/aarch64/negs.c
@@ -20,14 +20,16 @@ negs_si_test1 (int a, int b, int c)
 int
 negs_si_test3 (int a, int b, int c)
 {
-  int d = -(b) << 3;
+  int d = -(b) << 1;
 
-  /* { dg-final { scan-assembler "negs\tw\[0-9\]+, w\[0-9\]+, lsl 3" } } */
-  if (d == 0)
-    return a + c;
+  /* { dg-final { scan-assembler "negs\tw\[0-9\]+, w\[0-9\]+, lsl 1" } } */
+  if (d != 0)
+    {
+      z = d;
+      return b + c + d;
+    }
 
-  z = d;
-    return b + c + d;
+  return a + c;
 }
 
 typedef long long s64;
@@ -49,14 +51,16 @@ negs_di_test1 (s64 a, s64 b, s64 c)
 s64
 negs_di_test3 (s64 a, s64 b, s64 c)
 {
-  s64 d = -(b) << 3;
+  s64 d = -(b) << 1;
 
-  /* { dg-final { scan-assembler "negs\tx\[0-9\]+, x\[0-9\]+, lsl 3" } } */
-  if (d == 0)
-    return a + c;
+  /* { dg-final { scan-assembler "negs\tx\[0-9\]+, x\[0-9\]+, lsl 1" } } */
+  if (d != 0)
+    {
+      zz = d;
+      return b + c + d;
+    }
 
-  zz = d;
-    return b + c + d;
+  return a + c;
 }
 
 int main ()
@@ -73,11 +77,11 @@ int main ()
     abort ();
 
   x = negs_si_test3 (13, 14, 5);
-  if (x != -93)
+  if (x != -9)
     abort ();
 
   x = negs_si_test3 (15, 21, 2);
-  if (x != -145)
+  if (x != -19)
     abort ();
 
   y = negs_di_test1 (0x20202020ll,
@@ -95,13 +99,13 @@ int main ()
   y = negs_di_test3 (0x62523781ll,
                     0x64234978ll,
                     0x12345123ll);
-  if (y != 0xfffffffd553d4edbll)
+  if (y != 0xffffffffae1107abll)
     abort ();
 
   y = negs_di_test3 (0x763526268ll,
                     0x101010101ll,
                     0x222222222ll);
-  if (y != 0xfffffffb1b1b1b1bll)
+  if (y != 0x0000000121212121ll)
     abort ();
 
   return 0;
-- 
2.43.0

Reply via email to