On Thu, Apr 30, 2015 at 5:52 AM, Jeff Law <l...@redhat.com> wrote:
>
> This is an incremental improvement to the type narrowing in match.pd. It's
> largely based on the pattern I added to fix 47477.
>
> Basically if we have
>
> (bit_and (arith_op (convert A) (convert B)) mask)
>
> Where the conversions are widening and the mask turns off all bits outside
> the original types of A & B, then we can turn that into
>
> (bit_and (arith_op A B) mask)
>
> We may need to convert A & B to an unsigned type with the same
> width/precision as their original type, but that's still better than a
> widening conversion.
>
> Bootstrapped and regression tested on x86_64-linux-gnu.
>
> OK for the trunk?

Without looking too close at this patch I'll note that we might want to
improve the previous one first to also handle a constant 2nd operand
for the operation (your new one also misses that).

I have in my local dev tree (so completely untested...)

@@ -1040,31 +1052,22 @@ (define_operator_list CBRT BUILT_IN_CBRT
    operation and convert the result to the desired type.  */
 (for op (plus minus)
   (simplify
-    (convert (op (convert@2 @0) (convert@3 @1)))
+    (convert (op:c@4 (convert@2 @0) (convert?@3 @1)))
     (if (INTEGRAL_TYPE_P (type)
-        /* We check for type compatibility between @0 and @1 below,
-           so there's no need to check that @1/@3 are integral types.  */
         && INTEGRAL_TYPE_P (TREE_TYPE (@0))
-        && INTEGRAL_TYPE_P (TREE_TYPE (@2))
+        && INTEGRAL_TYPE_P (TREE_TYPE (@4))
         /* The precision of the type of each operand must match the
            precision of the mode of each operand, similarly for the
            result.  */
         && (TYPE_PRECISION (TREE_TYPE (@0))
             == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0))))
-        && (TYPE_PRECISION (TREE_TYPE (@1))
-            == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@1))))
-        && TYPE_PRECISION (type) == GET_MODE_PRECISION (TYPE_MODE (type))
         /* The inner conversion must be a widening conversion.  */
         && TYPE_PRECISION (TREE_TYPE (@2)) > TYPE_PRECISION (TREE_TYPE (@0))
-        && ((GENERIC
-             && (TYPE_MAIN_VARIANT (TREE_TYPE (@0))
-                 == TYPE_MAIN_VARIANT (TREE_TYPE (@1)))
-             && (TYPE_MAIN_VARIANT (TREE_TYPE (@0))
-                 == TYPE_MAIN_VARIANT (type)))
-            || (GIMPLE
-                && types_compatible_p (TREE_TYPE (@0), TREE_TYPE (@1))
-                && types_compatible_p (TREE_TYPE (@0), type))))
+        /* The final precision should match that of operand @0.  */
+        && TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (@0))
+        /* Make sure the wide operation is dead after the transform.  */
+        && (TREE_CODE (@4) != SSA_NAME || has_single_use (@4)))
       (if (TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
-       (convert (op @0 @1)))
+       (convert (op @0 (convert @1))))
       (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
        (convert (op (convert:utype @0) (convert:utype @1)))))))

and it was noticed multiple times that the type comparison boiler-plate
needs some helper function.  Like

Index: gimple-match-head.c
===================================================================
--- gimple-match-head.c (revision 222375)
+++ gimple-match-head.c (working copy)
@@ -861,3 +861,8 @@
   return op;
 }

+inline bool
+types_match (tree t1, tree t2)
+{
+  return types_compatible_p (t1, t2);
+}
Index: generic-match-head.c
===================================================================
--- generic-match-head.c        (revision 222375)
+++ generic-match-head.c        (working copy)
@@ -70,4 +70,8 @@
 #include "dumpfile.h"
 #include "generic-match.h"

-
+inline bool
+types_match (tree t1, tree t2)
+{
+  return TYPE_MAIN_VARIANT (t1) == TYPE_MAIN_VARIANT (t2);
+}

and then just use types_match (TREE_TYPE (...), TREE_TYPE (...)) everywhere.

I'll also add to the comment about using has_single_use - that will cause
missed optimizations for pattern uses via gimple_build which adds stmts
to a sequence and does not have SSA operands computed (so everything
will appear as having zero uses).  We need to add an abstraction for the
single-use test as well, like for generic-match-head.c

inline bool
single_use (tree t)
{
  return true;
}

and for gimple-match-head.c

inline bool
single use (tree t)
{
  return TREE_CODE (t) != SSA_NAME || has_zero_uses (t) || has_single_use (t);
}

And if you'd like to lend helping hands to adding patterns then transitioning
patterns from fold-const.c to match.pd is more appreciated than inventing
new ones ;)

Thanks,
Richard.

>
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 5c7558a..51f68ab 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,8 @@
> +2015-04-29  Jeff Law  <l...@redhat.com>
> +
> +       * match.pd (bit_and (plus/minus (convert @0) (convert @1) mask): New
> +       simplifier to narrow arithmetic.
> +
>  2015-04-29  Mikhail Maltsev  <malts...@gmail.com>
>
>         * dojump.c (do_compare_rtx_and_jump): Use std::swap instead of
> diff --git a/gcc/match.pd b/gcc/match.pd
> index fc374de..357e767 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1068,3 +1068,41 @@ along with GCC; see the file COPYING3.  If not see
>         (convert (op @0 @1)))
>        (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
>         (convert (op (convert:utype @0) (convert:utype @1)))))))
> +
> +/* This is another case of narrowing, specifically when there's an outer
> +   BIT_AND_EXPR which masks off bits outside the type of the innermost
> +   operands.   Like the previous case we have to convert the operands
> +   to unsigned types to avoid introducing undefined behaviour for the
> +   arithmetic operation.  */
> +(for op (minus plus)
> +  (simplify
> +    (bit_and (op (convert@2 @0) (convert@3 @1)) INTEGER_CST@4)
> +    (if (INTEGRAL_TYPE_P (type)
> +        /* We check for type compatibility between @0 and @1 below,
> +           so there's no need to check that @1/@3 are integral types.  */
> +        && INTEGRAL_TYPE_P (TREE_TYPE (@0))
> +        && INTEGRAL_TYPE_P (TREE_TYPE (@2))
> +        /* The precision of the type of each operand must match the
> +           precision of the mode of each operand, similarly for the
> +           result.  */
> +        && (TYPE_PRECISION (TREE_TYPE (@0))
> +            == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0))))
> +        && (TYPE_PRECISION (TREE_TYPE (@1))
> +            == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@1))))
> +        && TYPE_PRECISION (type) == GET_MODE_PRECISION (TYPE_MODE (type))
> +        /* The inner conversion must be a widening conversion.  */
> +        && TYPE_PRECISION (TREE_TYPE (@2)) > TYPE_PRECISION (TREE_TYPE
> (@0))
> +        && ((GENERIC
> +             && (TYPE_MAIN_VARIANT (TREE_TYPE (@0))
> +                 == TYPE_MAIN_VARIANT (TREE_TYPE (@1))))
> +            || (GIMPLE
> +                && types_compatible_p (TREE_TYPE (@0), TREE_TYPE (@1))))
> +        && (tree_int_cst_min_precision (@4, UNSIGNED)
> +            <= TYPE_PRECISION (TREE_TYPE (@0))))
> +      (if (TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
> +       (with { tree ntype = TREE_TYPE (@0); }
> +         (convert (bit_and (op @0 @1) (convert:ntype @4)))))
> +      (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
> +       (convert (bit_and (op (convert:utype @0) (convert:utype @1))
> +                         (convert:utype @4)))))))
> +
> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
> index 7aebfec..9df76c6 100644
> --- a/gcc/testsuite/ChangeLog
> +++ b/gcc/testsuite/ChangeLog
> @@ -1,3 +1,7 @@
> +2015-04-29  Jeff Law  <l...@redhat.com>
> +
> +       * gcc.dg/tree-ssa/shorten-1.c: New test.
> +
>  2015-04-29  Petar Jovanovic  <petar.jovano...@rt-rk.com>
>
>         * gcc.target/mips/call-from-init.c: New test.
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/shorten-1.c
> b/gcc/testsuite/gcc.dg/tree-ssa/shorten-1.c
> new file mode 100644
> index 0000000..cecdd44
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/shorten-1.c
> @@ -0,0 +1,78 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +extern const unsigned char mode_ibit[];
> +extern const unsigned char mode_fbit[];
> +extern const signed char smode_ibit[];
> +extern const signed char smode_fbit[];
> +
> +/* We use bit-and rather than modulo to ensure we're actually
> +   testing the desired match.pd pattern.  */
> +unsigned char
> +muufubar (int indx)
> +{
> +  int ret = (mode_fbit [indx] - mode_ibit [indx]) & 3;
> +  return ret;
> +}
> +
> +signed char
> +msufubar (int indx)
> +{
> +  int ret = (mode_fbit [indx] - mode_ibit [indx]) & 3;
> +  return ret;
> +}
> +
> +unsigned char
> +musfubar (int indx)
> +{
> +  int ret = (smode_fbit [indx] - smode_ibit [indx]) & 3;
> +  return ret;
> +}
> +
> +signed char
> +mssfubar (int indx)
> +{
> +  int ret = (smode_fbit [indx] - smode_ibit [indx]) & 3;
> +  return ret;
> +}
> +
> +
> +unsigned char
> +puufubar (int indx)
> +{
> +  int ret = (mode_fbit [indx] + mode_ibit [indx]) & 3;
> +  return ret;
> +}
> +
> +signed char
> +psufubar (int indx)
> +{
> +  int ret = (mode_fbit [indx] + mode_ibit [indx]) & 3;
> +  return ret;
> +}
> +
> +unsigned char
> +pusfubar (int indx)
> +{
> +  int ret = (smode_fbit [indx] + smode_ibit [indx]) & 3;
> +  return ret;
> +}
> +
> +signed char
> +pssfubar (int indx)
> +{
> +  int ret = (smode_fbit [indx] + smode_ibit [indx]) & 3;
> +  return ret;
> +}
> +
> +/* The shortening patterns in match.pd should arrange to do the
> +   arithmetic in char modes and thus any casts to ints should
> +   have been removed.  */
> +/* { dg-final {scan-tree-dump-not "\\(int\\)" "optimized"} } */
> +
> +/* We should have casted 4 operands from signed to unsigned char types.  */
> +/* { dg-final {scan-tree-dump-times "\\(unsigned char\\)" 8 "optimized" } }
> */
> +
> +/* And two return values should have been casted from unsigned char to
> +   a normal char.  */
> +/* { dg-final {scan-tree-dump-times "\\(signed char\\)" 4 "optimized" } }
> */
>

Reply via email to