On Tue, 14 Aug 2012, William J. Schmidt wrote: > Currently we can insert an initializer that performs a multiply in too > small of a type for correctness. For now, detect the problem and avoid > the optimization when this would happen. Eventually I will fix this up > to cause the multiply to be performed in a sufficiently wide type. > > Bootstrapped and tested on powerpc64-unknown-linux-gnu with no new > regressions. Ok for trunk?
Ok. Thanks, Richard. > Thanks, > Bill > > > gcc: > > 2012-08-14 Bill Schmidt <wschm...@linux.vnet.ibm.com> > > PR tree-optimization/54245 > * gimple-ssa-strength-reduction.c (legal_cast_p_1): New function. > (legal_cast_p): Split out logic to legal_cast_p_1. > (analyze_increments): Avoid introducing multiplies in smaller types. > > > gcc/testsuite: > > 2012-08-14 Bill Schmidt <wschm...@linux.vnet.ibm.com> > > PR tree-optimization/54245 > * gcc.dg/tree-ssa/pr54245.c: New test. > > > Index: gcc/testsuite/gcc.dg/tree-ssa/pr54245.c > =================================================================== > --- gcc/testsuite/gcc.dg/tree-ssa/pr54245.c (revision 0) > +++ gcc/testsuite/gcc.dg/tree-ssa/pr54245.c (revision 0) > @@ -0,0 +1,49 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-slsr-details" } */ > + > +#include <stdio.h> > + > +#define W1 22725 > +#define W2 21407 > +#define W3 19266 > +#define W6 8867 > + > +void idct_row(short *row, int *dst) > +{ > + int a0, a1, b0, b1; > + > + a0 = W1 * row[0]; > + a1 = a0; > + > + a0 += W2 * row[2]; > + a1 += W6 * row[2]; > + > + b0 = W1 * row[1]; > + b1 = W3 * row[1]; > + > + dst[0] = a0 + b0; > + dst[1] = a0 - b0; > + dst[2] = a1 + b1; > + dst[3] = a1 - b1; > +} > + > +static short block[8] = { 1, 2, 3, 4 }; > + > +int main(void) > +{ > + int out[4]; > + int i; > + > + idct_row(block, out); > + > + for (i = 0; i < 4; i++) > + printf("%d\n", out[i]); > + > + return !(out[2] == 87858 && out[3] == 10794); > +} > + > +/* For now, disable inserting an initializer when the multiplication will > + take place in a smaller type than originally. This test may be deleted > + in future when this case is handled more precisely. */ > +/* { dg-final { scan-tree-dump-times "Inserting initializer" 0 "slsr" } } */ > +/* { dg-final { cleanup-tree-dump "slsr" } } */ > Index: gcc/gimple-ssa-strength-reduction.c > =================================================================== > --- gcc/gimple-ssa-strength-reduction.c (revision 190305) > +++ gcc/gimple-ssa-strength-reduction.c (working copy) > @@ -1089,6 +1089,32 @@ slsr_process_neg (gimple gs, tree rhs1, bool speed > add_cand_for_stmt (gs, c); > } > > +/* Help function for legal_cast_p, operating on two trees. Checks > + whether it's allowable to cast from RHS to LHS. See legal_cast_p > + for more details. */ > + > +static bool > +legal_cast_p_1 (tree lhs, tree rhs) > +{ > + tree lhs_type, rhs_type; > + unsigned lhs_size, rhs_size; > + bool lhs_wraps, rhs_wraps; > + > + lhs_type = TREE_TYPE (lhs); > + rhs_type = TREE_TYPE (rhs); > + lhs_size = TYPE_PRECISION (lhs_type); > + rhs_size = TYPE_PRECISION (rhs_type); > + lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type); > + rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type); > + > + if (lhs_size < rhs_size > + || (rhs_wraps && !lhs_wraps) > + || (rhs_wraps && lhs_wraps && rhs_size != lhs_size)) > + return false; > + > + return true; > +} > + > /* Return TRUE if GS is a statement that defines an SSA name from > a conversion and is legal for us to combine with an add and multiply > in the candidate table. For example, suppose we have: > @@ -1129,28 +1155,11 @@ slsr_process_neg (gimple gs, tree rhs1, bool speed > static bool > legal_cast_p (gimple gs, tree rhs) > { > - tree lhs, lhs_type, rhs_type; > - unsigned lhs_size, rhs_size; > - bool lhs_wraps, rhs_wraps; > - > if (!is_gimple_assign (gs) > || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs))) > return false; > > - lhs = gimple_assign_lhs (gs); > - lhs_type = TREE_TYPE (lhs); > - rhs_type = TREE_TYPE (rhs); > - lhs_size = TYPE_PRECISION (lhs_type); > - rhs_size = TYPE_PRECISION (rhs_type); > - lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type); > - rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type); > - > - if (lhs_size < rhs_size > - || (rhs_wraps && !lhs_wraps) > - || (rhs_wraps && lhs_wraps && rhs_size != lhs_size)) > - return false; > - > - return true; > + return legal_cast_p_1 (gimple_assign_lhs (gs), rhs); > } > > /* Given GS which is a cast to a scalar integer type, determine whether > @@ -1996,6 +2005,31 @@ analyze_increments (slsr_cand_t first_dep, enum ma > != POINTER_PLUS_EXPR))) > incr_vec[i].cost = COST_NEUTRAL; > > + /* FORNOW: If we need to add an initializer, give up if a cast from > + the candidate's type to its stride's type can lose precision. > + This could eventually be handled better by expressly retaining the > + result of a cast to a wider type in the stride. Example: > + > + short int _1; > + _2 = (int) _1; > + _3 = _2 * 10; > + _4 = x + _3; ADD: x + (10 * _1) : int > + _5 = _2 * 15; > + _6 = x + _3; ADD: x + (15 * _1) : int > + > + Right now replacing _6 would cause insertion of an initializer > + of the form "short int T = _1 * 5;" followed by a cast to > + int, which could overflow incorrectly. Had we recorded _2 or > + (int)_1 as the stride, this wouldn't happen. However, doing > + this breaks other opportunities, so this will require some > + care. */ > + else if (!incr_vec[i].initializer > + && TREE_CODE (first_dep->stride) != INTEGER_CST > + && !legal_cast_p_1 (first_dep->stride, > + gimple_assign_lhs (first_dep->cand_stmt))) > + > + incr_vec[i].cost = COST_INFINITE; > + > /* For any other increment, if this is a multiply candidate, we > must introduce a temporary T and initialize it with > T_0 = stride * increment. When optimizing for speed, walk the > > > -- Richard Guenther <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imend