On Thu, 17 Sep 2020, Michael Matz wrote: > Hello, > > On Wed, 16 Sep 2020, Richard Biener wrote: > > > This removes the canonicalization of X + -CST to X - CST to facilitate > > SLP vectorization analysis where we currently treat the added testcase > > as two-operation plus blend vectorization opportunity. While there > > may be the possibility to avoid this in the vectorizer itself the > > canonicalization is somewhat premature - it's motivation is that > > a FP constant of both signs is common and thus canonicalizing to > > positive values optimizes constant pool. But if you mix, say, > > x * -7.1 and x - 7.1 this backfires - this instead looks like > > a local optimization to be applied in RTL CSE or even LRA > > load inheritance. I filed PR97071 for this. > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu, any objections? > > You need to adjust the comments, this will be very confusing in a couple > years: > > /* We want to canonicalize to positive real constants. Pretend > that only negative ones can be easily negated. */ > return true;
Yep, fixed. I've spotted and changed the above only after testing and re-testing revealed FAIL: gcc.dg/extract_recip_4.c scan-tree-dump-times optimized "mult_expr" 7 FAIL: gcc.dg/extract_recip_4.c scan-tree-dump-times optimized "rdiv_expr" 3 FAIL: gcc.dg/pr19988.c scan-tree-dump-times optimized " \\\\* " 2 FAIL: gcc.dg/pr19988.c scan-tree-dump-times original " 1.23" 2 where FAIL: gcc.dg/pr19988.c is now fixed with the FRE patch pushed today. FAIL: gcc.dg/extract_recip_4.c looks quite special (the different canonicalization makes hoisting a division no longer happen which in turn alters the recip pass cost consideration), a slightly altered testcase would already FAIL. Still I've not pushed this for now, I'll eventually come back though. Richard. > > Ciao, > Michael. > > > > > > Thanks, > > Richard. > > > > 2020-09-16 Richard Biener <rguent...@suse.de> > > > > * fold-const.c (fold_binary_loc): Remove condition > > on REAL_CST for A - B -> A + (-B) transform. > > (negate_expr_p): Remove condition on REAL_CST. > > * match.pd (negate_expr_p): Likewise. > > (X + -C -> X - C): Remove. > > > > * gcc.dg/vect/slp-49.c: New testcase. > > * gcc.dg/tree-ssa/pr90356-1.c: Adjust. > > * gcc.dg/tree-ssa/pr90356-3.c: Likewise. > > * gcc.dg/tree-ssa/pr90356-4.c: Likewise. > > * gcc.target/i386/pr54855-3.c: Likewise. > > * gfortran.dg/reassoc_1.f90: Likewise. > > * gfortran.dg/reassoc_2.f90: Likewise. > > --- > > gcc/fold-const.c | 7 ++----- > > gcc/match.pd | 11 +---------- > > gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c | 4 ++-- > > gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c | 2 +- > > gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c | 2 +- > > gcc/testsuite/gcc.dg/vect/slp-49.c | 15 +++++++++++++++ > > gcc/testsuite/gcc.target/i386/pr54855-3.c | 2 +- > > gcc/testsuite/gfortran.dg/reassoc_1.f90 | 2 +- > > gcc/testsuite/gfortran.dg/reassoc_2.f90 | 2 +- > > 9 files changed, 25 insertions(+), 22 deletions(-) > > create mode 100644 gcc/testsuite/gcc.dg/vect/slp-49.c > > > > diff --git a/gcc/fold-const.c b/gcc/fold-const.c > > index 0cc80adf632..ec369169a4c 100644 > > --- a/gcc/fold-const.c > > +++ b/gcc/fold-const.c > > @@ -401,7 +401,7 @@ negate_expr_p (tree t) > > case REAL_CST: > > /* We want to canonicalize to positive real constants. Pretend > > that only negative ones can be easily negated. */ > > - return REAL_VALUE_NEGATIVE (TREE_REAL_CST (t)); > > + return true; > > > > case COMPLEX_CST: > > return negate_expr_p (TREE_REALPART (t)) > > @@ -10962,10 +10962,7 @@ fold_binary_loc (location_t loc, enum tree_code > > code, tree type, > > /* A - B -> A + (-B) if B is easily negatable. */ > > if (negate_expr_p (op1) > > && ! TYPE_OVERFLOW_SANITIZED (type) > > - && ((FLOAT_TYPE_P (type) > > - /* Avoid this transformation if B is a positive REAL_CST. > > */ > > - && (TREE_CODE (op1) != REAL_CST > > - || REAL_VALUE_NEGATIVE (TREE_REAL_CST (op1)))) > > + && (FLOAT_TYPE_P (type) > > || INTEGRAL_TYPE_P (type))) > > return fold_build2_loc (loc, PLUS_EXPR, type, > > fold_convert_loc (loc, type, arg0), > > diff --git a/gcc/match.pd b/gcc/match.pd > > index 7d63bb973cb..9f14d50ae2d 100644 > > --- a/gcc/match.pd > > +++ b/gcc/match.pd > > @@ -1307,8 +1307,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > (negate @0) > > (if (!TYPE_OVERFLOW_SANITIZED (type)))) > > (match negate_expr_p > > - REAL_CST > > - (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (t))))) > > + REAL_CST) > > /* VECTOR_CST handling of non-wrapping types would recurse in unsupported > > ways. */ > > (match negate_expr_p > > @@ -3326,14 +3325,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > > > /* Canonicalization of binary operations. */ > > > > -/* Convert X + -C into X - C. */ > > -(simplify > > - (plus @0 REAL_CST@1) > > - (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1))) > > - (with { tree tem = const_unop (NEGATE_EXPR, type, @1); } > > - (if (!TREE_OVERFLOW (tem) || !flag_trapping_math) > > - (minus @0 { tem; }))))) > > - > > /* Convert x+x into x*2. */ > > (simplify > > (plus @0 @0) > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c > > b/gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c > > index c3a15ea21af..ac95ec56810 100644 > > --- a/gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr90356-1.c > > @@ -2,8 +2,8 @@ > > /* { dg-do compile } */ > > /* { dg-options "-O2 -fno-rounding-math -fsignaling-nans -fsigned-zeros > > -fdump-tree-optimized" } */ > > /* { dg-final { scan-tree-dump-times "x_\[0-9]*.D. \\+ 0.0;" 12 > > "optimized" } } */ > > -/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. - 0.0;" 4 "optimized" } > > } */ > > -/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 16 "optimized" } } */ > > +/* { dg-final { scan-tree-dump-times "y_\[0-9]*.D. \\+ -0.0;" 4 > > "optimized" } } */ > > +/* { dg-final { scan-tree-dump-times " \\+ -?0.0;" 16 "optimized" } } */ > > > > double f1 (double x) { return (x + 0.0) + 0.0; } > > double f2 (double y) { return (y + (-0.0)) + (-0.0); } > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c > > b/gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c > > index e658130c69f..951971b95ee 100644 > > --- a/gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr90356-3.c > > @@ -1,6 +1,6 @@ > > /* PR tree-optimization/90356 */ > > /* { dg-do compile } */ > > /* { dg-options "-O2 -frounding-math -fsignaling-nans -fsigned-zeros > > -fdump-tree-optimized" } */ > > -/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 32 "optimized" } } */ > > +/* { dg-final { scan-tree-dump-times " \\+ -?0.0;" 32 "optimized" } } */ > > > > #include "pr90356-1.c" > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c > > b/gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c > > index 126cd10928b..3459ddba356 100644 > > --- a/gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr90356-4.c > > @@ -1,6 +1,6 @@ > > /* PR tree-optimization/90356 */ > > /* { dg-do compile } */ > > /* { dg-options "-O2 -frounding-math -fno-signaling-nans -fsigned-zeros > > -fdump-tree-optimized" } */ > > -/* { dg-final { scan-tree-dump-times " \[+-] 0.0;" 32 "optimized" } } */ > > +/* { dg-final { scan-tree-dump-times " \\+ -?0.0;" 32 "optimized" } } */ > > > > #include "pr90356-1.c" > > diff --git a/gcc/testsuite/gcc.dg/vect/slp-49.c > > b/gcc/testsuite/gcc.dg/vect/slp-49.c > > new file mode 100644 > > index 00000000000..4b3ec77b99d > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/vect/slp-49.c > > @@ -0,0 +1,15 @@ > > +/* { dg-do compile } */ > > +/* { dg-require-effective-target vect_double } */ > > + > > +void foo (double *x) > > +{ > > + for (int i = 0; i < 1024; ++i) > > + { > > + x[2*i] += 1.0; > > + x[2*i+1] -= 2.0; > > + } > > +} > > + > > +/* { dg-final { scan-tree-dump "loop vectorized" "vect" } } */ > > +/* The loop should be SLPed with uniform plus. */ > > +/* { dg-final { scan-tree-dump-not "VEC_PERM" "vect" } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/pr54855-3.c > > b/gcc/testsuite/gcc.target/i386/pr54855-3.c > > index 3c15dfc93d1..802d6882e43 100644 > > --- a/gcc/testsuite/gcc.target/i386/pr54855-3.c > > +++ b/gcc/testsuite/gcc.target/i386/pr54855-3.c > > @@ -1,6 +1,6 @@ > > /* { dg-do compile } */ > > /* { dg-options "-O2 -msse2 -mfpmath=sse" } */ > > -/* { dg-final { scan-assembler-times "subsd" 1 } } */ > > +/* { dg-final { scan-assembler-times "addsd" 1 } } */ > > /* { dg-final { scan-assembler-not "movapd" } } */ > > /* { dg-final { scan-assembler-not "movsd" } } */ > > > > diff --git a/gcc/testsuite/gfortran.dg/reassoc_1.f90 > > b/gcc/testsuite/gfortran.dg/reassoc_1.f90 > > index e3d84c4ffd9..19b47fc7ce6 100644 > > --- a/gcc/testsuite/gfortran.dg/reassoc_1.f90 > > +++ b/gcc/testsuite/gfortran.dg/reassoc_1.f90 > > @@ -9,4 +9,4 @@ end > > > > ! We need an explicit +5 and -5, and an intermediate ((bla)) expression > > ! (the reassoc barrier). Make use of "." matching lineends. > > -! { dg-final { scan-tree-dump "\\\+ 5.*\\\)\\\).* - 5" "optimized" } } > > +! { dg-final { scan-tree-dump "\\\+ 5.*\\\)\\\).* \\\+ -5" "optimized" } } > > diff --git a/gcc/testsuite/gfortran.dg/reassoc_2.f90 > > b/gcc/testsuite/gfortran.dg/reassoc_2.f90 > > index 3e323eb0134..62361619932 100644 > > --- a/gcc/testsuite/gfortran.dg/reassoc_2.f90 > > +++ b/gcc/testsuite/gfortran.dg/reassoc_2.f90 > > @@ -12,4 +12,4 @@ function test(a) > > test = d > > end > > > > -! { dg-final { scan-tree-dump "- 5" "optimized" } } > > +! { dg-final { scan-tree-dump "\\\+ -5" "optimized" } } > > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imend