We have long had the fold:

/* Pattern match
     tem = (sizetype) ptr;
     tem = tem & algn;
     tem = -tem;
     ... = ptr p+ tem;
   and produce the simpler and easier to analyze with respect to alignment
     ... = ptr & ~algn;  */

But the gimple in gcc.target/aarch64/sve/pr98119.c has a variant in
which a constant is added before the conversion, giving:

     tem = (sizetype) (ptr p+ CST);
     tem = tem & algn;
     tem = -tem;
     ... = ptr p+ tem;

This case is also valid if algn fits within the trailing zero bits
of CST.  Adding CST then has no effect.

Similarly the testcase has:

     tem = (sizetype) (ptr p+ CST1);
     tem = tem & algn;
     tem = CST2 - tem;
     ... = ptr p+ tem;

This folds to:

     ... = (ptr & ~algn) p+ CST2;

if algn fits within the trailing zero bits of both CST1 and CST2.

An alternative would be:

     ... = (ptr p+ CST2) & ~algn;

but I would expect the alignment to be more easily shareable than
the CST2 addition, given that the CST2 addition wasn't being applied
by a POINTER_PLUS_EXPR.

gcc/
        * match.pd: Extend pointer alignment folds so that they handle
        the case where a constant is added before or after the alignment.

gcc/testsuite/
        * gcc.dg/pointer-arith-11.c: New test.
        * gcc.dg/pointer-arith-12.c: Likewise.
---
 gcc/match.pd                            | 27 ++++++++
 gcc/testsuite/gcc.dg/pointer-arith-11.c | 39 ++++++++++++
 gcc/testsuite/gcc.dg/pointer-arith-12.c | 82 +++++++++++++++++++++++++
 3 files changed, 148 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pointer-arith-11.c
 create mode 100644 gcc/testsuite/gcc.dg/pointer-arith-12.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 3197d1cac75..0dc74cc86f7 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3037,6 +3037,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (with { tree algn = wide_int_to_tree (TREE_TYPE (@0), ~wi::to_wide (@1)); }
    (bit_and @0 { algn; })))
 
+/* Also match cases in which a constant is applied:
+
+   (1) tem = (sizetype) ptr;  ---> tem = (sizetype) ptr + CST;
+   (2) tem = -tem             ---> tem = CST - tem;
+
+   and where "& align" masks only trailing zeros of CST.  (1) then has no
+   effect, whereas (2) adds CST to the result.  */
+(simplify
+ (pointer_plus @0 (negate (bit_and (convert (pointer_plus @0 INTEGER_CST@1))
+                                  INTEGER_CST@2)))
+ (if (tree_int_cst_min_precision (@2, UNSIGNED) <= tree_ctz (@1))
+  (with { tree algn = wide_int_to_tree (TREE_TYPE (@0), ~wi::to_wide (@2)); }
+   (bit_and @0 { algn; }))))
+(simplify
+ (pointer_plus @0 (minus:s INTEGER_CST@1 (bit_and (convert @0) INTEGER_CST@2)))
+ (if (tree_int_cst_min_precision (@2, UNSIGNED) <= tree_ctz (@1))
+  (with { tree algn = wide_int_to_tree (TREE_TYPE (@0), ~wi::to_wide (@2)); }
+   (pointer_plus (bit_and @0 { algn; }) @1))))
+(simplify
+ (pointer_plus @0 (minus:s INTEGER_CST@1
+                          (bit_and (convert (pointer_plus @0 INTEGER_CST@2))
+                                   INTEGER_CST@3)))
+ (with { auto mask_width = tree_int_cst_min_precision (@3, UNSIGNED); }
+  (if (mask_width <= tree_ctz (@1) && mask_width <= tree_ctz (@2))
+   (with { tree algn = wide_int_to_tree (TREE_TYPE (@0), ~wi::to_wide (@3)); }
+    (pointer_plus (bit_and @0 { algn; }) @1)))))
+
 /* Try folding difference of addresses.  */
 (simplify
  (minus (convert ADDR_EXPR@0) (convert (pointer_plus @1 @2)))
diff --git a/gcc/testsuite/gcc.dg/pointer-arith-11.c 
b/gcc/testsuite/gcc.dg/pointer-arith-11.c
new file mode 100644
index 00000000000..08a295b9453
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pointer-arith-11.c
@@ -0,0 +1,39 @@
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* { dg-final { scan-tree-dump-times { & -16B?;} 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times { \+ 16;} 3 "optimized" } } */
+/* { dg-final { scan-tree-dump-not { & 15;} "optimized" } } */
+/* { dg-final { scan-tree-dump-not { \+ 96;} "optimized" } } */
+
+typedef __UINTPTR_TYPE__ uintptr_t;
+
+char *
+f1 (char *x)
+{
+  char *y = x + 32;
+  x += -((uintptr_t) y & 15);
+  return x;
+}
+
+char *
+f2 (char *x)
+{
+  x += 16 - ((uintptr_t) x & 15);
+  return x;
+}
+
+char *
+f3 (char *x)
+{
+  char *y = x + 32;
+  x += 16 - ((uintptr_t) y & 15);
+  return x;
+}
+
+char *
+f4 (char *x)
+{
+  char *y = x + 16;
+  x += 16 - ((uintptr_t) y & 15);
+  return x;
+}
diff --git a/gcc/testsuite/gcc.dg/pointer-arith-12.c 
b/gcc/testsuite/gcc.dg/pointer-arith-12.c
new file mode 100644
index 00000000000..ebdcbd3c967
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pointer-arith-12.c
@@ -0,0 +1,82 @@
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+/* { dg-final { scan-tree-dump-not { & -16B?;} "optimized" } } */
+/* { dg-final { scan-tree-dump-times { & 15;} 10 "optimized" } } */
+
+typedef __UINTPTR_TYPE__ uintptr_t;
+
+char *
+f1 (char *x)
+{
+  char *y = x + 97;
+  x += -((uintptr_t) y & 15);
+  return x;
+}
+
+char *
+f2 (char *x)
+{
+  char *y = x + 98;
+  x += -((uintptr_t) y & 15);
+  return x;
+}
+
+char *
+f3 (char *x)
+{
+  char *y = x + 100;
+  x += -((uintptr_t) y & 15);
+  return x;
+}
+
+char *
+f4 (char *x)
+{
+  char *y = x + 104;
+  x += -((uintptr_t) y & 15);
+  return x;
+}
+
+char *
+f5 (char *x)
+{
+  x += 1 - ((uintptr_t) x & 15);
+  return x;
+}
+
+char *
+f6 (char *x)
+{
+  x += 2 - ((uintptr_t) x & 15);
+  return x;
+}
+
+char *
+f7 (char *x)
+{
+  x += 4 - ((uintptr_t) x & 15);
+  return x;
+}
+
+char *
+f8 (char *x)
+{
+  x += 8 - ((uintptr_t) x & 15);
+  return x;
+}
+
+char *
+f9 (char *x)
+{
+  char *y = x + 8;
+  x += 16 - ((uintptr_t) y & 15);
+  return x;
+}
+
+char *
+f10 (char *x)
+{
+  char *y = x + 16;
+  x += 8 - ((uintptr_t) y & 15);
+  return x;
+}
-- 
2.25.1

Reply via email to