As Marc suggested, removed the new pointer_diff rule, and add another rule to fold convert-add expression. This new rule is:
(T)(A * C) +- (T)(B * C) -> (T) ((A +- B) * C) Regards, Feng --- 2020-06-01 Feng Xue <f...@os.amperecomputing.com> gcc/ PR tree-optimization/94234 * match.pd ((T)(A * C) +- (T)(B * C)) -> (T)((A +- B) * C): New simplification. * ((PTR_A + OFF) - (PTR_B + OFF)) -> (PTR_A - PTR_B): New simplification. gcc/testsuite/ PR tree-optimization/94234 * gcc.dg/pr94234.c: New test. --- gcc/match.pd | 28 ++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/pr94234.c | 24 ++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/pr94234.c diff --git a/gcc/match.pd b/gcc/match.pd index 33ee1a920bf..4f340bfe40a 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -2515,6 +2515,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && TREE_CODE (@2) == INTEGER_CST && tree_int_cst_sign_bit (@2) == 0)) (minus (convert @1) (convert @2))))) + (simplify + (pointer_diff (pointer_plus @0 @2) (pointer_plus @1 @2)) + (pointer_diff @0 @1)) (simplify (pointer_diff (pointer_plus @@0 @1) (pointer_plus @0 @2)) /* The second argument of pointer_plus must be interpreted as signed, and @@ -2526,6 +2529,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (minus (convert (view_convert:stype @1)) (convert (view_convert:stype @2))))))) +/* (T)(A * C) +- (T)(B * C) -> (T)((A +- B) * C) and + (T)(A * C) +- (T)(A) -> (T)(A * (C +- 1)). */ +(if (INTEGRAL_TYPE_P (type)) + (for plusminus (plus minus) + (simplify + (plusminus (convert:s (mult:cs @0 @1)) (convert:s (mult:cs @0 @2))) + (if (element_precision (type) <= element_precision (TREE_TYPE (@0)) + && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type)) + && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))) + (convert (mult (plusminus @1 @2) @0)))) + (simplify + (plusminus (convert @0) (convert@2 (mult:c@3 @0 @1))) + (if (element_precision (type) <= element_precision (TREE_TYPE (@0)) + && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type)) + && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)) + && single_use (@2) && single_use (@3)) + (convert (mult (plusminus { build_one_cst (TREE_TYPE (@1)); } @1) @0)))) + (simplify + (plusminus (convert@2 (mult:c@3 @0 @1)) (convert @0)) + (if (element_precision (type) <= element_precision (TREE_TYPE (@0)) + && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type)) + && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)) + && single_use (@2) && single_use (@3)) + (convert (mult (plusminus @1 { build_one_cst (TREE_TYPE (@1)); }) @0)))))) + /* (A * C) +- (B * C) -> (A+-B) * C and (A * C) +- A -> A * (C+-1). Modeled after fold_plusminus_mult_expr. */ (if (!TYPE_SATURATING (type) diff --git a/gcc/testsuite/gcc.dg/pr94234.c b/gcc/testsuite/gcc.dg/pr94234.c new file mode 100644 index 00000000000..3f7c7a5e58f --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr94234.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-forwprop1" } */ + +typedef __SIZE_TYPE__ size_t; +typedef __PTRDIFF_TYPE__ ptrdiff_t; + +ptrdiff_t foo (char *a, size_t n) +{ + char *b1 = a + 8 * n; + char *b2 = a + 8 * (n - 1); + + return b1 - b2; +} + +ptrdiff_t goo (char *a, size_t n, size_t m) +{ + char *b1 = a + 8 * n; + char *b2 = a + 8 * (n + 1); + + return (b1 + m) - (b2 + m); +} + +/* { dg-final { scan-tree-dump-times "return 8;" 1 "forwprop1" } } */ +/* { dg-final { scan-tree-dump-times "return -8;" 1 "forwprop1" } } */ -- ________________________________________ From: Richard Biener <richard.guent...@gmail.com> Sent: Thursday, June 4, 2020 4:30 PM To: gcc-patches@gcc.gnu.org Cc: Feng Xue OS Subject: Re: [PATCH] Add pattern for pointer-diff on addresses with same base/offset (PR 94234) On Wed, Jun 3, 2020 at 4:33 PM Marc Glisse <marc.gli...@inria.fr> wrote: > > On Wed, 3 Jun 2020, Feng Xue OS via Gcc-patches wrote: > > >> Ah, looking at the PR, you decided to perform the operation as unsigned > >> because that has fewer NOP conversions, which, in that particular testcase > >> where the offsets are originally unsigned, means we simplify better. But I > >> would expect it to regress other testcases (in particular if the offsets > >> were originally signed). Also, changing the second argument of > >> pointer_plus to be signed, as is supposed to eventually happen, would > >> break your testcase again. > > The old rule might produce overflow result (offset_a = (signed_int_max)UL, > > offset_b = 1UL). > > signed_int_max-1 does not overflow. But the point is that pointer_plus / > pointer_diff are defined in a way that if that subtraction would overflow, > then one of the pointer_plus or pointed_diff would have been undefined > already. In particular, you cannot have objects larger than half the > address space, and pointer_plus/pointer_diff have to remain inside an > object. Doing the subtraction in a signed type keeps (part of) that > information. > > > Additionally, (stype)(offset_a - offset_b) is more compact, > > Not if offset_a comes from (utype)a and offset_b from (utype)b with a and > b signed. Using size_t indices as in the bugzilla testcase is not > recommended practice. Change it to ssize_t, and we do optimize the > testcase in CCP1 already. > > > there might be > > further simplification opportunities on offset_a - offset_b, even it is not > > in form of (A * C - B * C), for example (~A - 1 -> -A). But for old rule, > > we have > > to introduce another rule as (T)A - (T)(B) -> (T)(A - B), which seems to > > be too generic to benefit performance in all situations. > > Sadly, conversions complicate optimizations and are all over the place, we > need to handle them in more places. I sometimes dream of getting rid of > NOP conversions, and having a single PLUS_EXPR with some kind of flag > saying if it can wrap/saturate/trap when seen as a signed/unsigned > operation, i.e. push the information on the operations instead of objects. > > > If the 2nd argument is signed, we can add a specific rule as your suggestion > > (T)(A * C) - (T)(B * C) -> (T) (A - B) * C. > > > >> At the very least we want to keep a comment next to the transformation > >> explaining the situation. > > > >> If there are platforms where the second argument of pointer_plus is a > >> smaller type than the result of pointer_diff (can this happen? I keep > >> forgetting all the weird things some platforms do), this version may do an > >> unsafe zero-extension. > > If the 2nd argument is a smaller type, this might bring confuse semantic to > > pointer_plus operator. Suppose the type is a (unsigned) char, the expression > > "ptr + ((char) -1)" represents ptr + 255 or ptr - 1? > > (pointer_plus ptr 255) would mean ptr - 1 on a platform where the second > argument of pointer_plus has size 1 byte. Yes. Note this is the reason for using a signed type for the minus as comment /* The second argument of pointer_plus must be interpreted as signed, and thus sign-extended if necessary. */ explains. That the type of the offset operand in a pointer-plus is always unsigned is semantically incorrect. Whenever taken out of context you have to re-interpret the offset value as a signed entity. And yes, there do exist targets where pointers are larger than offsets. The size of pointers (ptr_mode) is specified by POINTER_SIZE while the pointer-plus offset type is SIZETYPE (not SIZE_TYPE, the C ptrdiff_t type is derived from SIZE_TYPE). As Marc said for this particular reason pointer-plus offsets should be instead forced to have signed type [or not forced a particular sign and precision so the operands sign specifies how to extend it]. But it's far from a trivial task to rectify this IL mistake. Richard. > > > Do note that I am not a reviewer, what I say isn't final. > > -- > Marc Glisse
From bf6174dd8124d22a2f22a9cff156cfb8bd94de23 Mon Sep 17 00:00:00 2001 From: Feng Xue <f...@os.amperecomputing.com> Date: Mon, 1 Jun 2020 11:57:35 +0800 Subject: [PATCH] tree-optimization/94234 - add convert-add patterns for ptr-diff on addresses with same base and unsigned offset 2020-06-01 Feng Xue <f...@os.amperecomputing.com> gcc/ PR tree-optimization/94234 * match.pd ((T)(A * C) +- (T)(B * C)) -> (T)((A +- B) * C): New simplification. * ((PTR_A + OFF) - (PTR_B + OFF)) -> (PTR_A - PTR_B): New simplification. gcc/testsuite/ PR tree-optimization/94234 * gcc.dg/pr94234.c: New test. --- gcc/match.pd | 28 ++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/pr94234.c | 24 ++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/pr94234.c diff --git a/gcc/match.pd b/gcc/match.pd index 33ee1a920bf..4f340bfe40a 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -2515,6 +2515,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && TREE_CODE (@2) == INTEGER_CST && tree_int_cst_sign_bit (@2) == 0)) (minus (convert @1) (convert @2))))) + (simplify + (pointer_diff (pointer_plus @0 @2) (pointer_plus @1 @2)) + (pointer_diff @0 @1)) (simplify (pointer_diff (pointer_plus @@0 @1) (pointer_plus @0 @2)) /* The second argument of pointer_plus must be interpreted as signed, and @@ -2526,6 +2529,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (minus (convert (view_convert:stype @1)) (convert (view_convert:stype @2))))))) +/* (T)(A * C) +- (T)(B * C) -> (T)((A +- B) * C) and + (T)(A * C) +- (T)(A) -> (T)(A * (C +- 1)). */ +(if (INTEGRAL_TYPE_P (type)) + (for plusminus (plus minus) + (simplify + (plusminus (convert:s (mult:cs @0 @1)) (convert:s (mult:cs @0 @2))) + (if (element_precision (type) <= element_precision (TREE_TYPE (@0)) + && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type)) + && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))) + (convert (mult (plusminus @1 @2) @0)))) + (simplify + (plusminus (convert @0) (convert@2 (mult:c@3 @0 @1))) + (if (element_precision (type) <= element_precision (TREE_TYPE (@0)) + && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type)) + && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)) + && single_use (@2) && single_use (@3)) + (convert (mult (plusminus { build_one_cst (TREE_TYPE (@1)); } @1) @0)))) + (simplify + (plusminus (convert@2 (mult:c@3 @0 @1)) (convert @0)) + (if (element_precision (type) <= element_precision (TREE_TYPE (@0)) + && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type)) + && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)) + && single_use (@2) && single_use (@3)) + (convert (mult (plusminus @1 { build_one_cst (TREE_TYPE (@1)); }) @0)))))) + /* (A * C) +- (B * C) -> (A+-B) * C and (A * C) +- A -> A * (C+-1). Modeled after fold_plusminus_mult_expr. */ (if (!TYPE_SATURATING (type) diff --git a/gcc/testsuite/gcc.dg/pr94234.c b/gcc/testsuite/gcc.dg/pr94234.c new file mode 100644 index 00000000000..3f7c7a5e58f --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr94234.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-forwprop1" } */ + +typedef __SIZE_TYPE__ size_t; +typedef __PTRDIFF_TYPE__ ptrdiff_t; + +ptrdiff_t foo (char *a, size_t n) +{ + char *b1 = a + 8 * n; + char *b2 = a + 8 * (n - 1); + + return b1 - b2; +} + +ptrdiff_t goo (char *a, size_t n, size_t m) +{ + char *b1 = a + 8 * n; + char *b2 = a + 8 * (n + 1); + + return (b1 + m) - (b2 + m); +} + +/* { dg-final { scan-tree-dump-times "return 8;" 1 "forwprop1" } } */ +/* { dg-final { scan-tree-dump-times "return -8;" 1 "forwprop1" } } */ -- 2.17.1