As Marc suggested, removed the new pointer_diff rule, and add another rule to 
fold
convert-add expression. This new rule is:

  (T)(A * C) +- (T)(B * C) -> (T) ((A +- B) * C)

Regards,
Feng

---
2020-06-01  Feng Xue  <f...@os.amperecomputing.com>

gcc/
        PR tree-optimization/94234
        * match.pd ((T)(A * C) +- (T)(B * C)) -> (T)((A +- B) * C): New
        simplification.
        * ((PTR_A + OFF) - (PTR_B + OFF)) -> (PTR_A - PTR_B): New
        simplification.

gcc/testsuite/
        PR tree-optimization/94234
        * gcc.dg/pr94234.c: New test.
---
 gcc/match.pd                   | 28 ++++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/pr94234.c | 24 ++++++++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr94234.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 33ee1a920bf..4f340bfe40a 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2515,6 +2515,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
             && TREE_CODE (@2) == INTEGER_CST
             && tree_int_cst_sign_bit (@2) == 0))
      (minus (convert @1) (convert @2)))))
+   (simplify
+    (pointer_diff (pointer_plus @0 @2) (pointer_plus @1 @2))
+     (pointer_diff @0 @1))
    (simplify
     (pointer_diff (pointer_plus @@0 @1) (pointer_plus @0 @2))
     /* The second argument of pointer_plus must be interpreted as signed, and
@@ -2526,6 +2529,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      (minus (convert (view_convert:stype @1))
            (convert (view_convert:stype @2)))))))

+/* (T)(A * C) +- (T)(B * C) -> (T)((A +- B) * C) and
+   (T)(A * C) +- (T)(A) -> (T)(A * (C +- 1)). */
+(if (INTEGRAL_TYPE_P (type))
+ (for plusminus (plus minus)
+  (simplify
+   (plusminus (convert:s (mult:cs @0 @1)) (convert:s (mult:cs @0 @2)))
+   (if (element_precision (type) <= element_precision (TREE_TYPE (@0))
+       && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type))
+       && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
+    (convert (mult (plusminus @1 @2) @0))))
+  (simplify
+   (plusminus (convert @0) (convert@2 (mult:c@3 @0 @1)))
+   (if (element_precision (type) <= element_precision (TREE_TYPE (@0))
+       && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type))
+       && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
+       && single_use (@2) && single_use (@3))
+    (convert (mult (plusminus { build_one_cst (TREE_TYPE (@1)); } @1) @0))))
+  (simplify
+   (plusminus (convert@2 (mult:c@3 @0 @1)) (convert @0))
+   (if (element_precision (type) <= element_precision (TREE_TYPE (@0))
+       && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type))
+       && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
+       && single_use (@2) && single_use (@3))
+    (convert (mult (plusminus @1 { build_one_cst (TREE_TYPE (@1)); }) @0))))))
+
 /* (A * C) +- (B * C) -> (A+-B) * C and (A * C) +- A -> A * (C+-1).
     Modeled after fold_plusminus_mult_expr.  */
 (if (!TYPE_SATURATING (type)
diff --git a/gcc/testsuite/gcc.dg/pr94234.c b/gcc/testsuite/gcc.dg/pr94234.c
new file mode 100644
index 00000000000..3f7c7a5e58f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr94234.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop1" } */
+
+typedef __SIZE_TYPE__ size_t;
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+
+ptrdiff_t foo (char *a, size_t n)
+{
+  char *b1 = a + 8 * n;
+  char *b2 = a + 8 * (n - 1);
+
+  return b1 - b2;
+}
+
+ptrdiff_t goo (char *a, size_t n, size_t m)
+{
+  char *b1 = a + 8 * n;
+  char *b2 = a + 8 * (n + 1);
+
+  return (b1 + m) - (b2 + m);
+}
+
+/* { dg-final { scan-tree-dump-times "return 8;" 1 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "return -8;" 1 "forwprop1" } } */
--


________________________________________
From: Richard Biener <richard.guent...@gmail.com>
Sent: Thursday, June 4, 2020 4:30 PM
To: gcc-patches@gcc.gnu.org
Cc: Feng Xue OS
Subject: Re: [PATCH] Add pattern for pointer-diff on addresses with same 
base/offset (PR 94234)

On Wed, Jun 3, 2020 at 4:33 PM Marc Glisse <marc.gli...@inria.fr> wrote:
>
> On Wed, 3 Jun 2020, Feng Xue OS via Gcc-patches wrote:
>
> >> Ah, looking at the PR, you decided to perform the operation as unsigned
> >> because that has fewer NOP conversions, which, in that particular testcase
> >> where the offsets are originally unsigned, means we simplify better. But I
> >> would expect it to regress other testcases (in particular if the offsets
> >> were originally signed). Also, changing the second argument of
> >> pointer_plus to be signed, as is supposed to eventually happen, would
> >> break your testcase again.
> > The old rule might produce overflow result (offset_a = (signed_int_max)UL,
> > offset_b = 1UL).
>
> signed_int_max-1 does not overflow. But the point is that pointer_plus /
> pointer_diff are defined in a way that if that subtraction would overflow,
> then one of the pointer_plus or pointed_diff would have been undefined
> already. In particular, you cannot have objects larger than half the
> address space, and pointer_plus/pointer_diff have to remain inside an
> object. Doing the subtraction in a signed type keeps (part of) that
> information.
>
> > Additionally, (stype)(offset_a - offset_b) is more compact,
>
> Not if offset_a comes from (utype)a and offset_b from (utype)b with a and
> b signed. Using size_t indices as in the bugzilla testcase is not
> recommended practice. Change it to ssize_t, and we do optimize the
> testcase in CCP1 already.
>
> > there might be
> > further simplification opportunities on offset_a - offset_b, even it is not
> > in form of (A * C - B * C), for example (~A - 1 -> -A). But for old rule, 
> > we have
> > to introduce another rule as (T)A - (T)(B) -> (T)(A - B), which seems to
> > be too generic to benefit performance in all situations.
>
> Sadly, conversions complicate optimizations and are all over the place, we
> need to handle them in more places. I sometimes dream of getting rid of
> NOP conversions, and having a single PLUS_EXPR with some kind of flag
> saying if it can wrap/saturate/trap when seen as a signed/unsigned
> operation, i.e. push the information on the operations instead of objects.
>
> > If the 2nd argument is signed, we can add a specific rule as your suggestion
> > (T)(A * C) - (T)(B * C) -> (T) (A - B) * C.
> >
> >> At the very least we want to keep a comment next to the transformation
> >> explaining the situation.
> >
> >> If there are platforms where the second argument of pointer_plus is a
> >> smaller type than the result of pointer_diff (can this happen? I keep
> >> forgetting all the weird things some platforms do), this version may do an
> >> unsafe zero-extension.
> > If the 2nd argument is a smaller type, this might bring confuse semantic to
> > pointer_plus operator. Suppose the type is a (unsigned) char, the expression
> > "ptr + ((char) -1)" represents ptr + 255 or ptr - 1?
>
> (pointer_plus ptr 255) would mean ptr - 1 on a platform where the second
> argument of pointer_plus has size 1 byte.

Yes.  Note this is the reason for using a signed type for the minus as comment

    /* The second argument of pointer_plus must be interpreted as signed, and
       thus sign-extended if necessary.  */

explains.  That the type of the offset operand in a pointer-plus is always
unsigned is semantically incorrect.  Whenever taken out of context you
have to re-interpret the offset value as a signed entity.

And yes, there do exist targets where pointers are larger than offsets.
The size of pointers (ptr_mode) is specified by POINTER_SIZE
while the pointer-plus offset type is SIZETYPE (not SIZE_TYPE,
the C ptrdiff_t type is derived from SIZE_TYPE).

As Marc said for this particular reason pointer-plus offsets should
be instead forced to have signed type [or not forced a particular
sign and precision so the operands sign specifies how to extend it].
But it's far from a trivial task to rectify this IL mistake.

Richard.

>
>
> Do note that I am not a reviewer, what I say isn't final.
>
> --
> Marc Glisse
From bf6174dd8124d22a2f22a9cff156cfb8bd94de23 Mon Sep 17 00:00:00 2001
From: Feng Xue <f...@os.amperecomputing.com>
Date: Mon, 1 Jun 2020 11:57:35 +0800
Subject: [PATCH] tree-optimization/94234 - add convert-add patterns for
 ptr-diff on addresses with same base and unsigned offset

2020-06-01  Feng Xue  <f...@os.amperecomputing.com>

gcc/
	PR tree-optimization/94234
	* match.pd ((T)(A * C) +- (T)(B * C)) -> (T)((A +- B) * C): New
	simplification.
	* ((PTR_A + OFF) - (PTR_B + OFF)) -> (PTR_A - PTR_B): New
	simplification.

gcc/testsuite/
	PR tree-optimization/94234
	* gcc.dg/pr94234.c: New test.
---
 gcc/match.pd                   | 28 ++++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/pr94234.c | 24 ++++++++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr94234.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 33ee1a920bf..4f340bfe40a 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2515,6 +2515,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	     && TREE_CODE (@2) == INTEGER_CST
 	     && tree_int_cst_sign_bit (@2) == 0))
      (minus (convert @1) (convert @2)))))
+   (simplify
+    (pointer_diff (pointer_plus @0 @2) (pointer_plus @1 @2))
+     (pointer_diff @0 @1))
    (simplify
     (pointer_diff (pointer_plus @@0 @1) (pointer_plus @0 @2))
     /* The second argument of pointer_plus must be interpreted as signed, and
@@ -2526,6 +2529,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      (minus (convert (view_convert:stype @1))
 	    (convert (view_convert:stype @2)))))))
 
+/* (T)(A * C) +- (T)(B * C) -> (T)((A +- B) * C) and
+   (T)(A * C) +- (T)(A) -> (T)(A * (C +- 1)). */
+(if (INTEGRAL_TYPE_P (type))
+ (for plusminus (plus minus)
+  (simplify
+   (plusminus (convert:s (mult:cs @0 @1)) (convert:s (mult:cs @0 @2)))
+   (if (element_precision (type) <= element_precision (TREE_TYPE (@0))
+	&& (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type))
+	&& TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
+    (convert (mult (plusminus @1 @2) @0))))
+  (simplify
+   (plusminus (convert @0) (convert@2 (mult:c@3 @0 @1)))
+   (if (element_precision (type) <= element_precision (TREE_TYPE (@0))
+	&& (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type))
+	&& TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
+	&& single_use (@2) && single_use (@3))
+    (convert (mult (plusminus { build_one_cst (TREE_TYPE (@1)); } @1) @0))))
+  (simplify
+   (plusminus (convert@2 (mult:c@3 @0 @1)) (convert @0))
+   (if (element_precision (type) <= element_precision (TREE_TYPE (@0))
+	&& (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type))
+	&& TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
+	&& single_use (@2) && single_use (@3))
+    (convert (mult (plusminus @1 { build_one_cst (TREE_TYPE (@1)); }) @0))))))
+
 /* (A * C) +- (B * C) -> (A+-B) * C and (A * C) +- A -> A * (C+-1).
     Modeled after fold_plusminus_mult_expr.  */
 (if (!TYPE_SATURATING (type)
diff --git a/gcc/testsuite/gcc.dg/pr94234.c b/gcc/testsuite/gcc.dg/pr94234.c
new file mode 100644
index 00000000000..3f7c7a5e58f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr94234.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop1" } */ 
+
+typedef __SIZE_TYPE__ size_t;
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+
+ptrdiff_t foo (char *a, size_t n)
+{
+  char *b1 = a + 8 * n;
+  char *b2 = a + 8 * (n - 1);
+
+  return b1 - b2;
+}
+
+ptrdiff_t goo (char *a, size_t n, size_t m)
+{
+  char *b1 = a + 8 * n;
+  char *b2 = a + 8 * (n + 1);
+
+  return (b1 + m) - (b2 + m);
+}
+
+/* { dg-final { scan-tree-dump-times "return 8;" 1 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "return -8;" 1 "forwprop1" } } */
-- 
2.17.1

Reply via email to