[PATCH][match-and-simplify] More flexible 'for', polish match-builtins.pd

Richard Biener Wed, 10 Sep 2014 04:59:31 -0700

This completes match-builtins.pd by handling all builtin variants
in all present patterns.  That requires a little more flexibility
in the 'for' handling to support for example


 /* Optimize sqrt(expN(x)) = expN(x*0.5).  */
 (for expfn (BUILT_IN_EXP10F BUILT_IN_EXP10 BUILT_IN_EXP10L
             BUILT_IN_POW10F BUILT_IN_POW10 BUILT_IN_POW10L
             BUILT_IN_EXPF BUILT_IN_EXP BUILT_IN_EXPL
             BUILT_IN_EXP2F BUILT_IN_EXP2 BUILT_IN_EXP2L)
      SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL)
  (simplify
   (SQRT (expfn @0))
   (expfn (mult @0 { build_real (type, dconsthalf); }))))

without having to repeat the SQRT substitutions four times
to match the expfn substitution length.  Now we just repeat
the substitution vector as needed (but require all lengths
to be a multiple of the smallest one, just as a sanity check).

The patch also fixes the cabs() patterns and completes them
from fold_builtin_cabs.  Similarly it fixes the x * pow(x, c) -> pow (x, 
c+1) pattern to match that from fold-const.c but in addition requires
-fno-math-errno (errno differences can occur - this seems to be
a common issue with all math builtin foldings that are just
conditionalized on flag_unsafe_math_optimizations...).

Committed.

Richard.

2014-09-10  Richard Biener  <rguent...@suse.de>

        * genmatch.c (parse_for): Allow more flexible replacement counts.
        * match-builtins.pd: Fix initial patterns and complete them
        from the source functions.  Properly handle all builtin variants.

Index: gcc/genmatch.c
===================================================================
--- gcc/genmatch.c      (revision 215057)
+++ gcc/genmatch.c      (working copy)
@@ -2414,7 +2414,7 @@ parse_for (cpp_reader *r, source_locatio
   vec<const char *> user_ids = vNULL;
   vec< vec<const char *> > opers_vec = vNULL;
   const cpp_token *token;
-  unsigned n_opers = 0;
+  unsigned min_n_opers = 0, max_n_opers = 0;
 
   while (1)
     {
@@ -2445,13 +2445,29 @@ parse_for (cpp_reader *r, source_locatio
          
          opers.safe_push (oper);
        }
+      token = expect (r, CPP_CLOSE_PAREN);
+      if (opers.length () == 0)
+       fatal_at (token, "A user-defined identifier must have at least one 
substitution");
+      if (opers_vec.length () == 0)
+       {
+         min_n_opers = opers.length ();
+         max_n_opers = opers.length ();
+       }
+      else
+       {
+         if (opers.length () % min_n_opers != 0
+             && min_n_opers % opers.length () != 0)
+           fatal_at (token, "All user-defined identifiers must have a "
+                     "multiple number of operator substitutions of the "
+                     "smallest number of substitutions");
+         if (opers.length () < min_n_opers)
+           min_n_opers = opers.length ();
+         else if (opers.length () > max_n_opers)
+           max_n_opers = opers.length ();
+       }
+
       opers_vec.safe_push (opers);
-      if (n_opers == 0)
-       n_opers = opers.length ();
-      else if (n_opers != opers.length ())
-       fatal_at (token, "All user-defined identifiers must have same number of 
operator substitutions");
-      eat_token (r, CPP_CLOSE_PAREN);
-    }    
+    }
 
   if (user_ids.length () == 0)
     fatal_at (token, "for requires at least one user-defined identifier");
@@ -2474,12 +2490,8 @@ parse_for (cpp_reader *r, source_locatio
     {
       simplify *s = for_simplifiers[ix];
 
-      for (unsigned j = 0; j < n_opers; ++j)
+      for (unsigned j = 0; j < max_n_opers; ++j)
        {
-         vec<const char *> opers = vNULL;
-         for (unsigned i = 0; i < opers_vec.length (); ++i)
-           opers.safe_push (opers_vec[i][j]);
-         
          operand *match_op = s->match;
          operand *result_op = s->result;
          vec<if_or_with> ifexpr_vec = vNULL;
@@ -2489,16 +2501,17 @@ parse_for (cpp_reader *r, source_locatio
 
          for (unsigned i = 0; i < n_ids; ++i)
            {
-             match_op = replace_id (match_op, user_ids[i], opers[i]);
-             result_op = replace_id (result_op, user_ids[i], opers[i]);
+             const char *oper = opers_vec[i][j % opers_vec[i].length ()];
+             match_op = replace_id (match_op, user_ids[i], oper);
+             result_op = replace_id (result_op, user_ids[i], oper);
 
              for (unsigned k = 0; k < s->ifexpr_vec.length (); ++k)
-               ifexpr_vec[k].cexpr = replace_id (ifexpr_vec[k].cexpr, 
user_ids[i], opers[i]);
+               ifexpr_vec[k].cexpr = replace_id (ifexpr_vec[k].cexpr, 
user_ids[i], oper);
 
            }
-           simplify *ns = new simplify (s->name, match_op, s->match_location,
-                                        result_op, s->result_location, 
ifexpr_vec);
-           simplifiers.safe_push (ns);
+         simplify *ns = new simplify (s->name, match_op, s->match_location,
+                                      result_op, s->result_location, 
ifexpr_vec);
+         simplifiers.safe_push (ns);
        }
     }
 } 
Index: gcc/match-builtin.pd
===================================================================
--- gcc/match-builtin.pd        (revision 215011)
+++ gcc/match-builtin.pd        (working copy)
@@ -18,29 +18,44 @@ along with GCC; see the file COPYING3.
 <http://www.gnu.org/licenses/>.  */
 
 
-/* ???  For math builtins we fail to properly repeat patterns for
-   all FP type kinds (sqrtf, sqrt, sqrtl).  And we fail to provide
-   a mechanism to iterate two ops in lock-step like
-   (for fn1 in sqrt sqrtf sqrtl and fn2 in pow powf powl ...)
-   if we were to do that repetition semi-manually.
-   We could also automagically use the type of the expr to
-   always do mathfn_built_in at code-gen time and always
-   automagically iterate over kinds (but that's bogus for
-   things like (convert (BUILT_IN_SQRT @0)) -> (BUILT_IN_SQRTF @0).  */
-
-
-/* One builtin function to builtin function.  */
-(simplify
-  (BUILT_IN_CABS (complex:c @0 real_zerop))
+/* From fold_builtin_cabs.  */
+/* If either part is zero, cabs is fabs of the other.  */
+(for CABS (BUILT_IN_CABSF BUILT_IN_CABS BUILT_IN_CABSL)
+ (simplify
+  (CABS (complex:c @0 real_zerop))
   (abs @0))
-/* One builtin function to expr.  */
-(simplify
-  (BUILT_IN_CABS (complex @0 @0))
-  (mult (abs @0) { build_real (TREE_TYPE (@0), real_value_truncate (TYPE_MODE 
(TREE_TYPE (@0)), dconst_sqrt2 ())); }))
-/* One nested fn.  */
-(simplify
-  (mult:c (BUILT_IN_POW @0 @1) @0)
-  (BUILT_IN_POW @0 (PLUS_EXPR @1 { build_one_cst (TREE_TYPE (@1)); })))
+ /* cabs(x+xi) -> fabs(x)*sqrt(2).  */
+ (if (flag_unsafe_math_optimizations)
+  (simplify
+   (CABS (complex @0 @0))
+   (mult (abs @0) { build_real (TREE_TYPE (@0), real_value_truncate (TYPE_MODE 
(TREE_TYPE (@0)), dconst_sqrt2 ())); })))
+ /* Optimize cabs(-z) and cabs(conj(z)) as cabs(z).  */
+ (for op (negate conj)
+  (simplify
+   (CABS (op @0))
+   (CABS @0))))
+/* Don't do this when optimizing for size.  */
+(if (flag_unsafe_math_optimizations && optimize_function_for_speed_p (cfun))
+ (for CABS (BUILT_IN_CABSF BUILT_IN_CABS BUILT_IN_CABSL)
+      SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL)
+  (simplify
+   (CABS @0)
+   (SQRT (plus
+         /* ???  There is no way to CSE here.  We'd need to support
+            expression captures here, like with
+             (mult (realpart@1 @0) @1) */
+         (mult (realpart @0) (realpart @0))
+         (mult (imagpart @0) (imagpart @0)))))))
+
+/* From fold_binary.  */
+/* Optimize x*pow(x,c) as pow(x,c+1).  */
+(if (flag_unsafe_math_optimizations
+     /* ???  fold-const.c does not check for flag_errno_math.  */
+     && !flag_errno_math)
+ (for POW (BUILT_IN_POW BUILT_IN_POWF BUILT_IN_POWL)
+  (simplify
+   (mult:c (POW @0 REAL_CST@1) @0)
+   (POW @0 (plus @1 { build_one_cst (TREE_TYPE (@1)); })))))
 
 /* From fold_builtin_fabs and fold_builtin_abs.  */
 /* Fold a call to fabs, fabsf or fabsl, to abs, labs, llabs or imaxabs.  */
@@ -50,14 +65,14 @@ along with GCC; see the file COPYING3.
   (abs @0)))
 
 /* From fold_builtin_pow.  */
-/* Optimize pow(1.0,y) = 1.0.  */
-(simplify
- (BUILT_IN_POW real_onep@0 @1)
- @0)
-
 (for POW (BUILT_IN_POWF BUILT_IN_POW BUILT_IN_POWL)
      SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL)
      CBRT (BUILT_IN_CBRTF BUILT_IN_CBRT BUILT_IN_CBRTL)
+ /* Optimize pow(1.0,y) = 1.0.  */
+ (simplify
+  (POW real_onep@0 @1)
+  @0)
+
 (simplify
  (POW @0 REAL_CST@1)
  (with { REAL_VALUE_TYPE c = TREE_REAL_CST (@1); }
@@ -80,14 +95,14 @@ along with GCC; see the file COPYING3.
         = real_value_truncate (TYPE_MODE (type), dconst_third ()); }
     (if (flag_unsafe_math_optimizations
         && REAL_VALUES_EQUAL (c, dconstroot))
-     (CBRT @0))))))
+     (CBRT @0)))))
 
-/* Strip sign ops from even integer powers.
-   ???  The code in builtins.c manages to perform this recursively
-   through the whole expression in arg0 of pow.  */
-(for sgnop (abs negate)
+ /* Strip sign ops from even integer powers.
+    ???  The code in builtins.c manages to perform this recursively
+    through the whole expression in arg0 of pow.  */
+ (for sgnop (abs negate)
   (simplify
-    (BUILT_IN_POW (sgnop @0) REAL_CST@1)
+    (POW (sgnop @0) REAL_CST@1)
     (with
       { 
        REAL_VALUE_TYPE c = TREE_REAL_CST (@1);
@@ -98,19 +113,26 @@ along with GCC; see the file COPYING3.
       (if (real_identical (&c, &cint)
           && (n & 1) == 0
           && flag_unsafe_math_optimizations)
-       (BUILT_IN_POW @0 @1)))))
+       (POW @0 @1))))))
 
 /* From fold_builtin_sqrt.  */
 (if (flag_unsafe_math_optimizations)
  /* Optimize sqrt(expN(x)) = expN(x*0.5).  */
- (for expfn (BUILT_IN_EXP10 BUILT_IN_POW10 BUILT_IN_EXP BUILT_IN_EXP2)
+ (for expfn (BUILT_IN_EXP10F BUILT_IN_EXP10 BUILT_IN_EXP10L
+            BUILT_IN_POW10F BUILT_IN_POW10 BUILT_IN_POW10L
+            BUILT_IN_EXPF BUILT_IN_EXP BUILT_IN_EXPL
+            BUILT_IN_EXP2F BUILT_IN_EXP2 BUILT_IN_EXP2L)
+      SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL)
   (simplify
-   (BUILT_IN_SQRT (expfn @0))
+   (SQRT (expfn @0))
    (expfn (mult @0 { build_real (type, dconsthalf); }))))
  /* Optimize sqrt(Nroot(x)) -> pow(x,1/(2*N)).  */
- (for rootfn (BUILT_IN_SQRT BUILT_IN_CBRT)
+ (for rootfn (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL
+             BUILT_IN_CBRTF BUILT_IN_CBRT BUILT_IN_CBRTL)
+      SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL)
+      POW (BUILT_IN_POWF BUILT_IN_POW BUILT_IN_POWL)
   (simplify
-   (BUILT_IN_SQRT (rootfn @0))
+   (SQRT (rootfn @0))
    (with
     { REAL_VALUE_TYPE dconstroot;
       if (BUILTIN_SQRT_P (rootfn)) dconstroot = dconsthalf;
@@ -118,8 +140,10 @@ along with GCC; see the file COPYING3.
       /* Adjust for the outer root.  */
       SET_REAL_EXP (&dconstroot, REAL_EXP (&dconstroot) - 1);
       dconstroot = real_value_truncate (TYPE_MODE (type), dconstroot); }
-    (BUILT_IN_POW @0 { build_real (type, dconstroot); }))))
+    (POW @0 { build_real (type, dconstroot); }))))
  /* Optimize sqrt(pow(x,y)) = pow(|x|,y*0.5).  */
- (simplify
-  (BUILT_IN_SQRT (BUILT_IN_POW @0 @1))
-  (BUILT_IN_POW (abs @0) (mult @1 { build_real (TREE_TYPE (@1), dconsthalf); 
}))))
+ (for SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL)
+      POW (BUILT_IN_POWF BUILT_IN_POW BUILT_IN_POWL)
+  (simplify
+   (SQRT (POW @0 @1))
+   (POW (abs @0) (mult @1 { build_real (TREE_TYPE (@1), dconsthalf); })))))

[PATCH][match-and-simplify] More flexible 'for', polish match-builtins.pd

Reply via email to