Given a comparison with the format
A << CST1 EQ|NE CST2
Turn it into:
A & CSTmask EQ|NE CST2 >> CST1
Where 'CSTmask' is a bitmask that filters A bits that would be discarded
during the shift. The idea is that a bit_and is, on most common targets
at least, more efficient than a lshift.
Note that we're not handling the case "A << 1 != CST2" because this is a
pattern that is optimized by tree-ssa-loop-niter.cc, turning a whole loop
into a single CTZ().
PR tree-optimization/124019
gcc/ChangeLog:
* match.pd (`A<<CST1 EQ|NE CST2 -> (A&CSTmask) EQ|NE (CST2>>CST1)`):
New pattern.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/pr124019.c: New test.
---
gcc/match.pd | 24 ++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr124019.c | 40 ++++++++++++++++++++++++
2 files changed, 64 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr124019.c
diff --git a/gcc/match.pd b/gcc/match.pd
index 7f16fd4e081..d7dd76fc350 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1454,6 +1454,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (TYPE_UNSIGNED (type))
(bit_and @0 (bit_not (lshift { build_all_ones_cst (type); } @1)))))
+/* PR124019: A << CST1 EQ|NE CST2 -> (A & CSTmask) EQ|NE (CST2 >> CST1); */
+(for cmp (eq ne)
+ (simplify
+ (cmp (lshift@3 @0 INTEGER_CST@1) INTEGER_CST@2)
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && TREE_INT_CST_LOW (@1) <= TYPE_PRECISION (TREE_TYPE (@0))
+ /* The "A << 1 != CST1" pattern is used in trailing zero
+ count detection to determine if a loop consists of a
+ CTZ call, and that generates better code than what
+ we're doing here. See ntz32_3 in pr114760-1.c for
+ more info. */
+ && (!(cmp == NE_EXPR && TREE_INT_CST_LOW (@1) == 1)))
+ (with {
+ tree type0 = TREE_TYPE (@0);
+ tree type3 = TREE_TYPE (@3);
+ unsigned prec = TYPE_PRECISION (type0);
+ unsigned mask_len = prec - TREE_INT_CST_LOW (@1);
+ wide_int cst1_mask = wi::mask (mask_len, false, prec);
+ wide_int res = wi::lrshift (wi::to_wide (@2), wi::to_wide (@1));
+ }
+ (cmp (convert:type3
+ (bit_and @0 { wide_int_to_tree (type0, cst1_mask); }))
+ { wide_int_to_tree (type3, res); })))))
+
(for bitop (bit_and bit_ior)
cmp (eq ne)
/* PR35691: Transform
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr124019.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr124019.c
new file mode 100644
index 00000000000..bd46864c75b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr124019.c
@@ -0,0 +1,40 @@
+/* { dg-additional-options -O2 } */
+/* { dg-additional-options -fdump-tree-optimized } */
+
+typedef unsigned char uint8_t;
+typedef signed char int8_t;
+
+uint8_t f1(int8_t a)
+{
+ int8_t b = a << 4;
+ return b == -128;
+}
+
+uint8_t f2(int8_t a)
+{
+ int8_t b = a << 4;
+ return b != -128;
+}
+
+uint8_t f3(int8_t a)
+{
+ int8_t b = a << 6;
+ return b == -128;
+}
+
+uint8_t f4(int8_t a)
+{
+ int8_t b = a << 7;
+ return b == -128;
+}
+
+uint8_t f5(int8_t a)
+{
+ int8_t b = a << 1;
+ return b == -128;
+}
+/* { dg-final { scan-tree-dump-times " & 15;" 2 optimized } } */
+/* { dg-final { scan-tree-dump-times " & 3;" 1 optimized } } */
+/* { dg-final { scan-tree-dump-times " & 1;" 1 optimized } } */
+/* { dg-final { scan-tree-dump-times " & 127;" 1 optimized } } */
+/* { dg-final { scan-tree-dump-times " << " 0 optimized } } */
--
2.43.0