On Sat, May 2, 2015 at 2:36 AM, Jeff Law <l...@redhat.com> wrote: > Here's an updated patch to add more type narrowing to match.pd. > > Changes since the last version: > > Slight refactoring of the condition by using types_match as suggested by > Richi. I also applied the new types_match to 2 other patterns in match.pd > where it seemed clearly appropriate. > > Additionally the transformation is restricted by using the new single_use > predicate. I didn't change other patterns in match.pd to use the new > single_use predicate. But some probably could be changed. > > This (of course) continues to pass the bootstrap and regression check for > x86-linux-gnu. > > There's still a ton of work to do in this space. This is meant to be an > incremental stand-alone improvement. > > OK now?
Ok with the {gimple,generic}-match-head.c changes mentioned in the ChangeLog. Thanks, Richard. > > > Jeff > > diff --git a/gcc/ChangeLog b/gcc/ChangeLog > index e006b26..5ee89de 100644 > --- a/gcc/ChangeLog > +++ b/gcc/ChangeLog > @@ -1,3 +1,8 @@ > +2015-05-01 Jeff Law <l...@redhat.com> > + > + * match.pd (bit_and (plus/minus (convert @0) (convert @1) mask): New > + simplifier to narrow arithmetic. > + > 2015-05-01 Rasmus Villemoes <r...@rasmusvillemoes.dk> > > * match.pd: New simplification patterns. > diff --git a/gcc/generic-match-head.c b/gcc/generic-match-head.c > index daa56aa..303b237 100644 > --- a/gcc/generic-match-head.c > +++ b/gcc/generic-match-head.c > @@ -70,4 +70,20 @@ along with GCC; see the file COPYING3. If not see > #include "dumpfile.h" > #include "generic-match.h" > > +/* Routine to determine if the types T1 and T2 are effectively > + the same for GENERIC. */ > > +inline bool > +types_match (tree t1, tree t2) > +{ > + return TYPE_MAIN_VARIANT (t1) == TYPE_MAIN_VARIANT (t2); > +} > + > +/* Return if T has a single use. For GENERIC, we assume this is > + always true. */ > + > +inline bool > +single_use (tree t) > +{ > + return true; > +} > diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c > index c7b2f95..dc13218 100644 > --- a/gcc/gimple-match-head.c > +++ b/gcc/gimple-match-head.c > @@ -861,3 +861,21 @@ do_valueize (tree (*valueize)(tree), tree op) > return op; > } > > +/* Routine to determine if the types T1 and T2 are effectively > + the same for GIMPLE. */ > + > +inline bool > +types_match (tree t1, tree t2) > +{ > + return types_compatible_p (t1, t2); > +} > + > +/* Return if T has a single use. For GIMPLE, we also allow any > + non-SSA_NAME (ie constants) and zero uses to cope with uses > + that aren't linked up yet. */ > + > +inline bool > +single_use (tree t) > +{ > + return TREE_CODE (t) != SSA_NAME || has_zero_uses (t) || has_single_use > (t); > +} > diff --git a/gcc/match.pd b/gcc/match.pd > index 87ecaf1..51a950a 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -289,8 +289,7 @@ along with GCC; see the file COPYING3. If not see > (if (((TREE_CODE (@1) == INTEGER_CST > && INTEGRAL_TYPE_P (TREE_TYPE (@0)) > && int_fits_type_p (@1, TREE_TYPE (@0))) > - || (GIMPLE && types_compatible_p (TREE_TYPE (@0), TREE_TYPE (@1))) > - || (GENERIC && TREE_TYPE (@0) == TREE_TYPE (@1))) > + || types_match (TREE_TYPE (@0), TREE_TYPE (@1))) > /* ??? This transform conflicts with fold-const.c doing > Convert (T)(x & c) into (T)x & (T)c, if c is an integer > constants (if x has signed type, the sign bit cannot be set > @@ -949,8 +948,7 @@ along with GCC; see the file COPYING3. If not see > /* Unordered tests if either argument is a NaN. */ > (simplify > (bit_ior (unordered @0 @0) (unordered @1 @1)) > - (if ((GIMPLE && types_compatible_p (TREE_TYPE (@0), TREE_TYPE (@1))) > - || (GENERIC && TREE_TYPE (@0) == TREE_TYPE (@1))) > + (if (types_match (TREE_TYPE (@0), TREE_TYPE (@1))) > (unordered @0 @1))) > (simplify > (bit_ior:c (unordered @0 @0) (unordered:c@2 @0 @1)) > @@ -1054,7 +1052,7 @@ along with GCC; see the file COPYING3. If not see > operation and convert the result to the desired type. */ > (for op (plus minus) > (simplify > - (convert (op (convert@2 @0) (convert@3 @1))) > + (convert (op@4 (convert@2 @0) (convert@3 @1))) > (if (INTEGRAL_TYPE_P (type) > /* We check for type compatibility between @0 and @1 below, > so there's no need to check that @1/@3 are integral types. */ > @@ -1070,15 +1068,45 @@ along with GCC; see the file COPYING3. If not see > && TYPE_PRECISION (type) == GET_MODE_PRECISION (TYPE_MODE (type)) > /* The inner conversion must be a widening conversion. */ > && TYPE_PRECISION (TREE_TYPE (@2)) > TYPE_PRECISION (TREE_TYPE > (@0)) > - && ((GENERIC > - && (TYPE_MAIN_VARIANT (TREE_TYPE (@0)) > - == TYPE_MAIN_VARIANT (TREE_TYPE (@1))) > - && (TYPE_MAIN_VARIANT (TREE_TYPE (@0)) > - == TYPE_MAIN_VARIANT (type))) > - || (GIMPLE > - && types_compatible_p (TREE_TYPE (@0), TREE_TYPE (@1)) > - && types_compatible_p (TREE_TYPE (@0), type)))) > + && types_match (TREE_TYPE (@0), TREE_TYPE (@1)) > + && types_match (TREE_TYPE (@0), type) > + && single_use (@4)) > (if (TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))) > (convert (op @0 @1))) > (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); } > (convert (op (convert:utype @0) (convert:utype @1))))))) > + > +/* This is another case of narrowing, specifically when there's an outer > + BIT_AND_EXPR which masks off bits outside the type of the innermost > + operands. Like the previous case we have to convert the operands > + to unsigned types to avoid introducing undefined behaviour for the > + arithmetic operation. */ > +(for op (minus plus) > + (simplify > + (bit_and (op@5 (convert@2 @0) (convert@3 @1)) INTEGER_CST@4) > + (if (INTEGRAL_TYPE_P (type) > + /* We check for type compatibility between @0 and @1 below, > + so there's no need to check that @1/@3 are integral types. */ > + && INTEGRAL_TYPE_P (TREE_TYPE (@0)) > + && INTEGRAL_TYPE_P (TREE_TYPE (@2)) > + /* The precision of the type of each operand must match the > + precision of the mode of each operand, similarly for the > + result. */ > + && (TYPE_PRECISION (TREE_TYPE (@0)) > + == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0)))) > + && (TYPE_PRECISION (TREE_TYPE (@1)) > + == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@1)))) > + && TYPE_PRECISION (type) == GET_MODE_PRECISION (TYPE_MODE (type)) > + /* The inner conversion must be a widening conversion. */ > + && TYPE_PRECISION (TREE_TYPE (@2)) > TYPE_PRECISION (TREE_TYPE > (@0)) > + && types_match (TREE_TYPE (@0), TREE_TYPE (@1)) > + && (tree_int_cst_min_precision (@4, UNSIGNED) > + <= TYPE_PRECISION (TREE_TYPE (@0))) > + && single_use (@5)) > + (if (TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))) > + (with { tree ntype = TREE_TYPE (@0); } > + (convert (bit_and (op @0 @1) (convert:ntype @4))))) > + (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); } > + (convert (bit_and (op (convert:utype @0) (convert:utype @1)) > + (convert:utype @4))))))) > + > diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog > index 2aedc46..bd32179 100644 > --- a/gcc/testsuite/ChangeLog > +++ b/gcc/testsuite/ChangeLog > @@ -1,3 +1,7 @@ > +2015-05-01 Jeff Law <l...@redhat.com> > + > + * gcc.dg/tree-ssa/shorten-1.c: New test. > + > 2015-05-01 Rasmus Villemoes <r...@rasmusvillemoes.dk> > > * gcc.dg/20150120-1.c: New test. > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/shorten-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/shorten-1.c > new file mode 100644 > index 0000000..cecdd44 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/shorten-1.c > @@ -0,0 +1,78 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > + > +extern const unsigned char mode_ibit[]; > +extern const unsigned char mode_fbit[]; > +extern const signed char smode_ibit[]; > +extern const signed char smode_fbit[]; > + > +/* We use bit-and rather than modulo to ensure we're actually > + testing the desired match.pd pattern. */ > +unsigned char > +muufubar (int indx) > +{ > + int ret = (mode_fbit [indx] - mode_ibit [indx]) & 3; > + return ret; > +} > + > +signed char > +msufubar (int indx) > +{ > + int ret = (mode_fbit [indx] - mode_ibit [indx]) & 3; > + return ret; > +} > + > +unsigned char > +musfubar (int indx) > +{ > + int ret = (smode_fbit [indx] - smode_ibit [indx]) & 3; > + return ret; > +} > + > +signed char > +mssfubar (int indx) > +{ > + int ret = (smode_fbit [indx] - smode_ibit [indx]) & 3; > + return ret; > +} > + > + > +unsigned char > +puufubar (int indx) > +{ > + int ret = (mode_fbit [indx] + mode_ibit [indx]) & 3; > + return ret; > +} > + > +signed char > +psufubar (int indx) > +{ > + int ret = (mode_fbit [indx] + mode_ibit [indx]) & 3; > + return ret; > +} > + > +unsigned char > +pusfubar (int indx) > +{ > + int ret = (smode_fbit [indx] + smode_ibit [indx]) & 3; > + return ret; > +} > + > +signed char > +pssfubar (int indx) > +{ > + int ret = (smode_fbit [indx] + smode_ibit [indx]) & 3; > + return ret; > +} > + > +/* The shortening patterns in match.pd should arrange to do the > + arithmetic in char modes and thus any casts to ints should > + have been removed. */ > +/* { dg-final {scan-tree-dump-not "\\(int\\)" "optimized"} } */ > + > +/* We should have casted 4 operands from signed to unsigned char types. */ > +/* { dg-final {scan-tree-dump-times "\\(unsigned char\\)" 8 "optimized" } } > */ > + > +/* And two return values should have been casted from unsigned char to > + a normal char. */ > +/* { dg-final {scan-tree-dump-times "\\(signed char\\)" 4 "optimized" } } > */ >