Given a comparison with the format

A << CST1 EQ|NE CST2

Turn it into:

A & CSTmask EQ|NE CST2 >> CST1

Where 'CSTmask' is a bitmask that filters A bits that would be discarded
during the shift. The idea is that a bit_and is, on most common targets
at least, more efficient than a lshift.

Note that we're not handling the case "A << 1 != CST2" because this is a
pattern that is optimized by tree-ssa-loop-niter.cc, turning a whole loop
into a single CTZ().

        PR tree-optimization/124019

gcc/ChangeLog:

        * match.pd (`A<<CST1 EQ|NE CST2 -> (A&CSTmask) EQ|NE (CST2>>CST1)`):
        New pattern.

gcc/testsuite/ChangeLog:

        * gcc.dg/tree-ssa/pr124019.c: New test.
---
 gcc/match.pd                             | 24 ++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr124019.c | 40 ++++++++++++++++++++++++
 2 files changed, 64 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr124019.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 7f16fd4e081..d7dd76fc350 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1454,6 +1454,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (if (TYPE_UNSIGNED (type))
     (bit_and @0 (bit_not (lshift { build_all_ones_cst (type); } @1)))))
 
+/* PR124019: A << CST1 EQ|NE CST2 -> (A & CSTmask) EQ|NE (CST2 >> CST1);  */
+(for cmp (eq ne)
+ (simplify
+  (cmp (lshift@3 @0 INTEGER_CST@1) INTEGER_CST@2)
+   (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+       && TREE_INT_CST_LOW (@1) <= TYPE_PRECISION (TREE_TYPE (@0))
+       /* The "A << 1 != CST1" pattern is used in trailing zero
+          count detection to determine if a loop consists of a
+          CTZ call, and that generates better code than what
+          we're doing here.  See ntz32_3 in pr114760-1.c for
+          more info.  */
+       && (!(cmp == NE_EXPR && TREE_INT_CST_LOW (@1) == 1)))
+    (with {
+      tree type0 = TREE_TYPE (@0);
+      tree type3 = TREE_TYPE (@3);
+      unsigned prec = TYPE_PRECISION (type0);
+      unsigned mask_len = prec - TREE_INT_CST_LOW (@1);
+      wide_int cst1_mask = wi::mask (mask_len, false, prec);
+      wide_int res = wi::lrshift (wi::to_wide (@2), wi::to_wide (@1));
+    }
+     (cmp (convert:type3
+           (bit_and @0 { wide_int_to_tree (type0, cst1_mask); }))
+         { wide_int_to_tree (type3, res); })))))
+
 (for bitop (bit_and bit_ior)
      cmp (eq ne)
  /* PR35691: Transform
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr124019.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr124019.c
new file mode 100644
index 00000000000..bd46864c75b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr124019.c
@@ -0,0 +1,40 @@
+/* { dg-additional-options -O2 } */
+/* { dg-additional-options -fdump-tree-optimized } */
+
+typedef unsigned char uint8_t;
+typedef signed char int8_t;
+
+uint8_t f1(int8_t a)
+{
+    int8_t b = a << 4;
+    return b == -128;
+}
+
+uint8_t f2(int8_t a)
+{
+    int8_t b = a << 4;
+    return b != -128;
+}
+
+uint8_t f3(int8_t a)
+{
+    int8_t b = a << 6;
+    return b == -128;
+}
+
+uint8_t f4(int8_t a)
+{
+    int8_t b = a << 7;
+    return b == -128;
+}
+
+uint8_t f5(int8_t a)
+{
+    int8_t b = a << 1;
+    return b == -128;
+}
+/* { dg-final { scan-tree-dump-times " & 15;" 2 optimized } } */
+/* { dg-final { scan-tree-dump-times " & 3;" 1 optimized } } */
+/* { dg-final { scan-tree-dump-times " & 1;" 1 optimized } } */
+/* { dg-final { scan-tree-dump-times " & 127;" 1 optimized } } */
+/* { dg-final { scan-tree-dump-times " << " 0 optimized } } */
-- 
2.43.0

Reply via email to