This completes match-builtins.pd by handling all builtin variants in all present patterns. That requires a little more flexibility in the 'for' handling to support for example
/* Optimize sqrt(expN(x)) = expN(x*0.5). */ (for expfn (BUILT_IN_EXP10F BUILT_IN_EXP10 BUILT_IN_EXP10L BUILT_IN_POW10F BUILT_IN_POW10 BUILT_IN_POW10L BUILT_IN_EXPF BUILT_IN_EXP BUILT_IN_EXPL BUILT_IN_EXP2F BUILT_IN_EXP2 BUILT_IN_EXP2L) SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL) (simplify (SQRT (expfn @0)) (expfn (mult @0 { build_real (type, dconsthalf); })))) without having to repeat the SQRT substitutions four times to match the expfn substitution length. Now we just repeat the substitution vector as needed (but require all lengths to be a multiple of the smallest one, just as a sanity check). The patch also fixes the cabs() patterns and completes them from fold_builtin_cabs. Similarly it fixes the x * pow(x, c) -> pow (x, c+1) pattern to match that from fold-const.c but in addition requires -fno-math-errno (errno differences can occur - this seems to be a common issue with all math builtin foldings that are just conditionalized on flag_unsafe_math_optimizations...). Committed. Richard. 2014-09-10 Richard Biener <rguent...@suse.de> * genmatch.c (parse_for): Allow more flexible replacement counts. * match-builtins.pd: Fix initial patterns and complete them from the source functions. Properly handle all builtin variants. Index: gcc/genmatch.c =================================================================== --- gcc/genmatch.c (revision 215057) +++ gcc/genmatch.c (working copy) @@ -2414,7 +2414,7 @@ parse_for (cpp_reader *r, source_locatio vec<const char *> user_ids = vNULL; vec< vec<const char *> > opers_vec = vNULL; const cpp_token *token; - unsigned n_opers = 0; + unsigned min_n_opers = 0, max_n_opers = 0; while (1) { @@ -2445,13 +2445,29 @@ parse_for (cpp_reader *r, source_locatio opers.safe_push (oper); } + token = expect (r, CPP_CLOSE_PAREN); + if (opers.length () == 0) + fatal_at (token, "A user-defined identifier must have at least one substitution"); + if (opers_vec.length () == 0) + { + min_n_opers = opers.length (); + max_n_opers = opers.length (); + } + else + { + if (opers.length () % min_n_opers != 0 + && min_n_opers % opers.length () != 0) + fatal_at (token, "All user-defined identifiers must have a " + "multiple number of operator substitutions of the " + "smallest number of substitutions"); + if (opers.length () < min_n_opers) + min_n_opers = opers.length (); + else if (opers.length () > max_n_opers) + max_n_opers = opers.length (); + } + opers_vec.safe_push (opers); - if (n_opers == 0) - n_opers = opers.length (); - else if (n_opers != opers.length ()) - fatal_at (token, "All user-defined identifiers must have same number of operator substitutions"); - eat_token (r, CPP_CLOSE_PAREN); - } + } if (user_ids.length () == 0) fatal_at (token, "for requires at least one user-defined identifier"); @@ -2474,12 +2490,8 @@ parse_for (cpp_reader *r, source_locatio { simplify *s = for_simplifiers[ix]; - for (unsigned j = 0; j < n_opers; ++j) + for (unsigned j = 0; j < max_n_opers; ++j) { - vec<const char *> opers = vNULL; - for (unsigned i = 0; i < opers_vec.length (); ++i) - opers.safe_push (opers_vec[i][j]); - operand *match_op = s->match; operand *result_op = s->result; vec<if_or_with> ifexpr_vec = vNULL; @@ -2489,16 +2501,17 @@ parse_for (cpp_reader *r, source_locatio for (unsigned i = 0; i < n_ids; ++i) { - match_op = replace_id (match_op, user_ids[i], opers[i]); - result_op = replace_id (result_op, user_ids[i], opers[i]); + const char *oper = opers_vec[i][j % opers_vec[i].length ()]; + match_op = replace_id (match_op, user_ids[i], oper); + result_op = replace_id (result_op, user_ids[i], oper); for (unsigned k = 0; k < s->ifexpr_vec.length (); ++k) - ifexpr_vec[k].cexpr = replace_id (ifexpr_vec[k].cexpr, user_ids[i], opers[i]); + ifexpr_vec[k].cexpr = replace_id (ifexpr_vec[k].cexpr, user_ids[i], oper); } - simplify *ns = new simplify (s->name, match_op, s->match_location, - result_op, s->result_location, ifexpr_vec); - simplifiers.safe_push (ns); + simplify *ns = new simplify (s->name, match_op, s->match_location, + result_op, s->result_location, ifexpr_vec); + simplifiers.safe_push (ns); } } } Index: gcc/match-builtin.pd =================================================================== --- gcc/match-builtin.pd (revision 215011) +++ gcc/match-builtin.pd (working copy) @@ -18,29 +18,44 @@ along with GCC; see the file COPYING3. <http://www.gnu.org/licenses/>. */ -/* ??? For math builtins we fail to properly repeat patterns for - all FP type kinds (sqrtf, sqrt, sqrtl). And we fail to provide - a mechanism to iterate two ops in lock-step like - (for fn1 in sqrt sqrtf sqrtl and fn2 in pow powf powl ...) - if we were to do that repetition semi-manually. - We could also automagically use the type of the expr to - always do mathfn_built_in at code-gen time and always - automagically iterate over kinds (but that's bogus for - things like (convert (BUILT_IN_SQRT @0)) -> (BUILT_IN_SQRTF @0). */ - - -/* One builtin function to builtin function. */ -(simplify - (BUILT_IN_CABS (complex:c @0 real_zerop)) +/* From fold_builtin_cabs. */ +/* If either part is zero, cabs is fabs of the other. */ +(for CABS (BUILT_IN_CABSF BUILT_IN_CABS BUILT_IN_CABSL) + (simplify + (CABS (complex:c @0 real_zerop)) (abs @0)) -/* One builtin function to expr. */ -(simplify - (BUILT_IN_CABS (complex @0 @0)) - (mult (abs @0) { build_real (TREE_TYPE (@0), real_value_truncate (TYPE_MODE (TREE_TYPE (@0)), dconst_sqrt2 ())); })) -/* One nested fn. */ -(simplify - (mult:c (BUILT_IN_POW @0 @1) @0) - (BUILT_IN_POW @0 (PLUS_EXPR @1 { build_one_cst (TREE_TYPE (@1)); }))) + /* cabs(x+xi) -> fabs(x)*sqrt(2). */ + (if (flag_unsafe_math_optimizations) + (simplify + (CABS (complex @0 @0)) + (mult (abs @0) { build_real (TREE_TYPE (@0), real_value_truncate (TYPE_MODE (TREE_TYPE (@0)), dconst_sqrt2 ())); }))) + /* Optimize cabs(-z) and cabs(conj(z)) as cabs(z). */ + (for op (negate conj) + (simplify + (CABS (op @0)) + (CABS @0)))) +/* Don't do this when optimizing for size. */ +(if (flag_unsafe_math_optimizations && optimize_function_for_speed_p (cfun)) + (for CABS (BUILT_IN_CABSF BUILT_IN_CABS BUILT_IN_CABSL) + SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL) + (simplify + (CABS @0) + (SQRT (plus + /* ??? There is no way to CSE here. We'd need to support + expression captures here, like with + (mult (realpart@1 @0) @1) */ + (mult (realpart @0) (realpart @0)) + (mult (imagpart @0) (imagpart @0))))))) + +/* From fold_binary. */ +/* Optimize x*pow(x,c) as pow(x,c+1). */ +(if (flag_unsafe_math_optimizations + /* ??? fold-const.c does not check for flag_errno_math. */ + && !flag_errno_math) + (for POW (BUILT_IN_POW BUILT_IN_POWF BUILT_IN_POWL) + (simplify + (mult:c (POW @0 REAL_CST@1) @0) + (POW @0 (plus @1 { build_one_cst (TREE_TYPE (@1)); }))))) /* From fold_builtin_fabs and fold_builtin_abs. */ /* Fold a call to fabs, fabsf or fabsl, to abs, labs, llabs or imaxabs. */ @@ -50,14 +65,14 @@ along with GCC; see the file COPYING3. (abs @0))) /* From fold_builtin_pow. */ -/* Optimize pow(1.0,y) = 1.0. */ -(simplify - (BUILT_IN_POW real_onep@0 @1) - @0) - (for POW (BUILT_IN_POWF BUILT_IN_POW BUILT_IN_POWL) SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL) CBRT (BUILT_IN_CBRTF BUILT_IN_CBRT BUILT_IN_CBRTL) + /* Optimize pow(1.0,y) = 1.0. */ + (simplify + (POW real_onep@0 @1) + @0) + (simplify (POW @0 REAL_CST@1) (with { REAL_VALUE_TYPE c = TREE_REAL_CST (@1); } @@ -80,14 +95,14 @@ along with GCC; see the file COPYING3. = real_value_truncate (TYPE_MODE (type), dconst_third ()); } (if (flag_unsafe_math_optimizations && REAL_VALUES_EQUAL (c, dconstroot)) - (CBRT @0)))))) + (CBRT @0))))) -/* Strip sign ops from even integer powers. - ??? The code in builtins.c manages to perform this recursively - through the whole expression in arg0 of pow. */ -(for sgnop (abs negate) + /* Strip sign ops from even integer powers. + ??? The code in builtins.c manages to perform this recursively + through the whole expression in arg0 of pow. */ + (for sgnop (abs negate) (simplify - (BUILT_IN_POW (sgnop @0) REAL_CST@1) + (POW (sgnop @0) REAL_CST@1) (with { REAL_VALUE_TYPE c = TREE_REAL_CST (@1); @@ -98,19 +113,26 @@ along with GCC; see the file COPYING3. (if (real_identical (&c, &cint) && (n & 1) == 0 && flag_unsafe_math_optimizations) - (BUILT_IN_POW @0 @1))))) + (POW @0 @1)))))) /* From fold_builtin_sqrt. */ (if (flag_unsafe_math_optimizations) /* Optimize sqrt(expN(x)) = expN(x*0.5). */ - (for expfn (BUILT_IN_EXP10 BUILT_IN_POW10 BUILT_IN_EXP BUILT_IN_EXP2) + (for expfn (BUILT_IN_EXP10F BUILT_IN_EXP10 BUILT_IN_EXP10L + BUILT_IN_POW10F BUILT_IN_POW10 BUILT_IN_POW10L + BUILT_IN_EXPF BUILT_IN_EXP BUILT_IN_EXPL + BUILT_IN_EXP2F BUILT_IN_EXP2 BUILT_IN_EXP2L) + SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL) (simplify - (BUILT_IN_SQRT (expfn @0)) + (SQRT (expfn @0)) (expfn (mult @0 { build_real (type, dconsthalf); })))) /* Optimize sqrt(Nroot(x)) -> pow(x,1/(2*N)). */ - (for rootfn (BUILT_IN_SQRT BUILT_IN_CBRT) + (for rootfn (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL + BUILT_IN_CBRTF BUILT_IN_CBRT BUILT_IN_CBRTL) + SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL) + POW (BUILT_IN_POWF BUILT_IN_POW BUILT_IN_POWL) (simplify - (BUILT_IN_SQRT (rootfn @0)) + (SQRT (rootfn @0)) (with { REAL_VALUE_TYPE dconstroot; if (BUILTIN_SQRT_P (rootfn)) dconstroot = dconsthalf; @@ -118,8 +140,10 @@ along with GCC; see the file COPYING3. /* Adjust for the outer root. */ SET_REAL_EXP (&dconstroot, REAL_EXP (&dconstroot) - 1); dconstroot = real_value_truncate (TYPE_MODE (type), dconstroot); } - (BUILT_IN_POW @0 { build_real (type, dconstroot); })))) + (POW @0 { build_real (type, dconstroot); })))) /* Optimize sqrt(pow(x,y)) = pow(|x|,y*0.5). */ - (simplify - (BUILT_IN_SQRT (BUILT_IN_POW @0 @1)) - (BUILT_IN_POW (abs @0) (mult @1 { build_real (TREE_TYPE (@1), dconsthalf); })))) + (for SQRT (BUILT_IN_SQRTF BUILT_IN_SQRT BUILT_IN_SQRTL) + POW (BUILT_IN_POWF BUILT_IN_POW BUILT_IN_POWL) + (simplify + (SQRT (POW @0 @1)) + (POW (abs @0) (mult @1 { build_real (TREE_TYPE (@1), dconsthalf); })))))