On Thu, May 26, 2011 at 7:50 PM, William J. Schmidt
<wschm...@linux.vnet.ibm.com> wrote:
>
> On Thu, 2011-05-26 at 11:22 +0200, Richard Guenther wrote:
>
> <snip>
>
>
>> There are several extra pre-conditions in the original code in
>> builtins.c as well as commens for reasonings (yes, there seem
>> to be duplicates of several of the transforms there ...).  Please
>> try to preserve them.  I noticed especially
>>
>>               /* Optimize pow (x, 0.25) into sqrt (sqrt (x)).  Assume on most
>>                  machines that a builtin sqrt instruction is smaller than a
>>                  call to pow with 0.25, so do this optimization even if
>>                  -Os.  */
>>
>> and
>>
>>       /* Check whether we can do cbrt insstead of pow (x, 1./3.) and
>>          cbrt/sqrts instead of pow (x, 1./6.).  */
>>       if (cbrtfn && ! op
>>           && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode)))
>>
>> where you omit the non-negative check and the check for NANs.
>> Note that -funsafe-math-optimizations does not mean you can
>> ignore nans or signed zero issues.  pow(x,1/3) -> cbrt(x) does not
>> need a -funsafe-math-optimization check either.
>
> OK.  Good catch on the non-negative/NaN check -- that just slipped
> through the cracks.  I've fixed this and beefed up the comments as you
> suggest.
>
>>
>> It would be nice if you could re-arrange this function so that before
>> each individual replacement all preconditions are checked (even if
>> that means duplicating some checks and code) - that way
>> reviewing and later maintainance should be much easier.
>>
>
> Done -- you're right, the result is much cleaner.  Hopefully this
> version is easier to follow.
>
> Thanks,
> Bill
>
>
> 2011-05-25  Bill Schmidt  <wschm...@linux.vnet.ibm.com>
>
>        PR tree-optimization/46728
>        * tree-ssa-math-opts.c (powi_as_mults_1): Add gimple_set_location.
>        (powi_as_mults): Add gimple_set_location.
>        (build_and_insert_call): New.
>        (gimple_expand_builtin_pow): Add handling for pow(x,y) when y is
>        0.5, 0.25, 0.75, 1./3., or 1./6.
>
>
> Index: gcc/tree-ssa-math-opts.c
> ===================================================================
> --- gcc/tree-ssa-math-opts.c    (revision 174199)
> +++ gcc/tree-ssa-math-opts.c    (working copy)
> @@ -965,6 +965,7 @@ powi_as_mults_1 (gimple_stmt_iterator *gsi, locati
>     }
>
>   mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0, op1);
> +  gimple_set_location (mult_stmt, loc);
>   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
>
>   return ssa_target;
> @@ -999,6 +1000,7 @@ powi_as_mults (gimple_stmt_iterator *gsi, location
>   div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target,
>                                           build_real (type, dconst1),
>                                           result);
> +  gimple_set_location (div_stmt, loc);
>   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
>
>   return target;
> @@ -1024,6 +1026,34 @@ gimple_expand_builtin_powi (gimple_stmt_iterator *
>   return NULL_TREE;
>  }
>
> +/* Build a gimple call statement that calls FN with argument ARG.
> +   Set the lhs of the call statement to a fresh SSA name for
> +   variable VAR.  If VAR is NULL, first allocate it.  Insert the
> +   statement prior to GSI's current position, and return the fresh
> +   SSA name.  */
> +
> +static tree
> +build_and_insert_call (gimple_stmt_iterator *gsi, tree fn, tree arg,
> +                      tree *var, location_t loc)
> +{
> +  gimple call_stmt;
> +  tree ssa_target;
> +
> +  if (!*var)
> +    {
> +      *var = create_tmp_var (TREE_TYPE (arg), "powroot");
> +      add_referenced_var (*var);
> +    }
> +
> +  call_stmt = gimple_build_call (fn, 1, arg);
> +  ssa_target = make_ssa_name (*var, NULL);
> +  gimple_set_lhs (call_stmt, ssa_target);
> +  gimple_set_location (call_stmt, loc);
> +  gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
> +
> +  return ssa_target;
> +}
> +
>  /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
>    with location info LOC.  If possible, create an equivalent and
>    less expensive sequence of statements prior to GSI, and return an
> @@ -1033,16 +1063,21 @@ static tree
>  gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
>                           tree arg0, tree arg1)
>  {
> -  REAL_VALUE_TYPE c, cint;
> +  REAL_VALUE_TYPE c, cint, dconst1_4, dconst3_4, dconst1_3, dconst1_6;
>   HOST_WIDE_INT n;
> +  tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, ssa_target;
> +  tree target = NULL_TREE;
> +  enum machine_mode mode;
> +  bool hw_sqrt_exists;
> +  gimple mult_stmt;
>
>   /* If the exponent isn't a constant, there's nothing of interest
>      to be done.  */
>   if (TREE_CODE (arg1) != REAL_CST)
>     return NULL_TREE;
>
> -  /* If the exponent is equivalent to an integer, expand it into
> -     multiplies when profitable.  */
> +  /* If the exponent is equivalent to an integer, expand to an optimal
> +     multiplication sequence when profitable.  */
>   c = TREE_REAL_CST (arg1);
>   n = real_to_integer (&c);
>   real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
> @@ -1054,6 +1089,97 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g
>              && powi_cost (n) <= POWI_MAX_MULTS)))
>     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
>
> +  /* Attempt various optimizations using sqrt and cbrt.  */
> +  type = TREE_TYPE (arg0);
> +  mode = TYPE_MODE (type);
> +  sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
> +
> +  /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
> +     unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
> +     sqrt(-0) = -0.  */
> +  if (sqrtfn
> +      && REAL_VALUES_EQUAL (c, dconsthalf)
> +      && (flag_unsafe_math_optimizations
> +         || !HONOR_SIGNED_ZEROS (mode)))

Drop flag_unsafe_math_optimizations - the replacement isn't safe
for -funsafe-math-optimizations -fsigned-zeros.

> +    return build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
> +
> +  /* Optimize pow(x,0.25) = sqrt(sqrt(x)).  Assume on most machines that
> +     a builtin sqrt instruction is smaller than a call to pow with 0.25,
> +     so do this optimization even if -Os.  Don't do this optimization
> +     if we don't have a hardware sqrt insn.  */
> +  dconst1_4 = dconst1;
> +  SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
> +  hw_sqrt_exists = optab_handler(sqrt_optab, mode) != CODE_FOR_nothing;
> +
> +  if (flag_unsafe_math_optimizations
> +      && sqrtfn
> +      && REAL_VALUES_EQUAL (c, dconst1_4)
> +      && hw_sqrt_exists)
> +    {
> +      /* sqrt(x)  */
> +      sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
> +
> +      /* sqrt(sqrt(x))  */
> +      return build_and_insert_call (gsi, sqrtfn, sqrt_arg0, &target, loc);
> +    }
> +
> +  /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)) unless we are
> +     optimizing for space.  Don't do this optimization if we don't have
> +     a hardware sqrt insn.  */
> +  real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0);
> +  SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2);
> +
> +  if (flag_unsafe_math_optimizations
> +      && sqrtfn
> +      && optimize_function_for_speed_p (cfun)
> +      && REAL_VALUES_EQUAL (c, dconst3_4)
> +      && hw_sqrt_exists)
> +    {
> +      /* sqrt(x)  */
> +      sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
> +
> +      /* sqrt(sqrt(x))  */
> +      sqrt_sqrt = build_and_insert_call (gsi, sqrtfn, sqrt_arg0, &target, 
> loc);
> +
> +      /* sqrt(x) * sqrt(sqrt(x))  */
> +      ssa_target = make_ssa_name (target, NULL);
> +      mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target,
> +                                               sqrt_arg0, sqrt_sqrt);
> +      gimple_set_location (mult_stmt, loc);
> +      gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
> +
> +      return ssa_target;
> +    }
> +
> +  /* Optimize pow(x,1./3.) = cbrt(x).  This is always safe.  */
> +  cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
> +  dconst1_3 = real_value_truncate (mode, dconst_third ());
> +
> +  if (cbrtfn
> +      && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode))
> +      && REAL_VALUES_EQUAL (c, dconst1_3))
> +    return build_and_insert_call (gsi, cbrtfn, arg0, &target, loc);

As Joseph said this needs to be conditional on flag_unsafe_math_optimization
because of the inexactness of 1./3..
Please add rationale as explained by Joseph for the HONOR_NANS
check.

Now, the tree_expr_nonnegative_p check will always return false
because arg0 is a gimple value.  We would need to implement
either some propagation engine to derive properties for floating
point entities or mimic what tree_expr_nonnegative_p does by
looking at defining statements.  A very simple variant could be
like

bool
gimple_val_nonnegative_p (tree val)
{
  if (tree_expr_nonnegative_p (val))
    return true;

  if (TREE_CODE (val) == SSA_NAME)
   {
     gimple def_stmt = SSA_NAME_DEF_STMT (val);
     if (is_gimple_assign (def_stmt))
       return gimple_assign_rhs_code (def_stmt) == ABS_EXPR;
     else if (is_gimple_call (def_stmt))
       {
          tree fndecl = gimple_call_fndecl (def_stmt);
          if (fndecl
              && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
            {
               switch (DECL_FUNCTION_CODE (fndecl))
                 {
                 CASE_FLT_FN (BUILT_IN_FABS):
                     return true;
                 default:;
                 }
            }
       }
   }

  return false;
}

suitable for gimple-fold.c.

Alternatively you can drop the call and add a FIXME comment.

> +
> +  /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
> +     if we don't have a hardware sqrt insn.  */
> +  dconst1_6 = dconst1_3;
> +  SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
> +
> +  if (flag_unsafe_math_optimizations
> +      && sqrtfn
> +      && cbrtfn
> +      && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode))

Likewise.

Ok with these changes.

Thanks,
Richard.

> +      && optimize_function_for_speed_p (cfun)
> +      && hw_sqrt_exists
> +      && REAL_VALUES_EQUAL (c, dconst1_6))
> +    {
> +      /* sqrt(x)  */
> +      sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
> +
> +      /* cbrt(sqrt(x))  */
> +      return build_and_insert_call (gsi, cbrtfn, sqrt_arg0, &target, loc);
> +    }
> +
>   return NULL_TREE;
>  }

Reply via email to