On Wed, 28 Mar 2018, Jakub Jelinek wrote:

> On Wed, Mar 28, 2018 at 09:46:52AM +0200, Richard Biener wrote:
> > > +/* Return true if pow(cst, x) should be optimized into exp(log(cst) * 
> > > x).  */
> > > +
> > > +static bool
> > > +optimize_pow_to_exp (tree arg0, tree arg1)
> > > +{
> > > +  return false;
> > > +}
> > 
> > So instead of this simply guard the pattern with #if GIMPLE as we already
> > do for some.  That saves in generic-match.c code size.
> 
> Ok.
> > > --- gcc/gimple-match-head.c.jj    2018-02-13 09:33:31.107560174 +0100
> > > +++ gcc/gimple-match-head.c       2018-03-27 18:48:21.205369113 +0200
> > > @@ -840,3 +840,55 @@ canonicalize_math_after_vectorization_p
> > >  {
> > >    return !cfun || (cfun->curr_properties & PROP_gimple_lvec) != 0;
> > >  }
> > > +
> > > +/* Return true if pow(cst, x) should be optimized into exp(log(cst) * x).
> > > +   As a workaround for SPEC CPU2017 628.pop2_s, don't do it if arg0
> > > +   is 10.0, arg1 = phi_res + cst1 and phi_res = PHI <cst2, ...>
> > > +   where cst1 + cst2 is an exact integer, because then pow (10.0, arg1)
> > > +   will likely be exact, while exp (log (10.0) * arg1) might be not. */
> > 
> > So this looks somewhat like a very specific SPEC hack to me.
> > Can we make this sligtly more generic and consider
> > 
> >  pow (integer, cst1 [+- cst2])
> > 
> > where the 2nd arg is an exact integer?  Basically allow not only
> > 10 for arg0 but any real_is_integer plus allow arg1 to be directly
> > fed by the PHI, not only via an intermediate plus (here also consider
> > a minus, negate or other binary op we can fed to const_binop).
> 
> Like this?  I'm just handling PLUS_EXPR and MINUS_EXPR and no +/- at all.
> Furthermore, I've moved it to the exp replacement only, if we do exp2, it is
> fine to optimize it even if cst1 + cst2 is integer, it is just the exp and
> log that isn't exact for originally integral values.

Yes, that looks good.

> > Btw, did anybody file a defect with SPEC for this?
> 
> I don't really have any SPEC experience, so will defer that to those
> involved with SPEC benchmarking.  The fix would be
> to add some small epsilon to the
>        if (chlcnc(m) .le. chlamnt .and. &
>            chlamnt .le. chlcnc(m+1) ) then
> comparisons, but bet they'll argue that the upstream pop does it that way.

Probably...

Thanks,
Richard.

> 2018-03-28  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR tree-optimization/82004
>       * gimple-match-head.c (optimize_pow_to_exp): New function.
>       * match.pd (pow(C,x) -> exp(log(C)*x)): Wrap with #if GIMPLE.
>       Don't fold to exp if optimize_pow_to_exp is false.
> 
>       * gcc.dg/pr82004.c: New test.
> 
> --- gcc/gimple-match-head.c.jj        2018-03-27 19:08:32.047824741 +0200
> +++ gcc/gimple-match-head.c   2018-03-28 15:30:42.687565271 +0200
> @@ -840,3 +840,71 @@ canonicalize_math_after_vectorization_p
>  {
>    return !cfun || (cfun->curr_properties & PROP_gimple_lvec) != 0;
>  }
> +
> +/* Return true if pow(cst, x) should be optimized into exp(log(cst) * x).
> +   As a workaround for SPEC CPU2017 628.pop2_s, don't do it if arg0
> +   is an exact integer, arg1 = phi_res +/- cst1 and phi_res = PHI <cst2, ...>
> +   where cst2 +/- cst1 is an exact integer, because then pow (arg0, arg1)
> +   will likely be exact, while exp (log (arg0) * arg1) might be not.
> +   Also don't do it if arg1 is phi_res above and cst2 is an exact integer.  
> */
> +
> +static bool
> +optimize_pow_to_exp (tree arg0, tree arg1)
> +{
> +  gcc_assert (TREE_CODE (arg0) == REAL_CST);
> +  if (!real_isinteger (TREE_REAL_CST_PTR (arg0), TYPE_MODE (TREE_TYPE 
> (arg0))))
> +    return true;
> +
> +  if (TREE_CODE (arg1) != SSA_NAME)
> +    return true;
> +
> +  gimple *def = SSA_NAME_DEF_STMT (arg1);
> +  gphi *phi = dyn_cast <gphi *> (def);
> +  tree cst1 = NULL_TREE;
> +  enum tree_code code = ERROR_MARK;
> +  if (!phi)
> +    {
> +      if (!is_gimple_assign (def))
> +     return true;
> +      code = gimple_assign_rhs_code (def);
> +      switch (code)
> +     {
> +     case PLUS_EXPR:
> +     case MINUS_EXPR:
> +       break;
> +     default:
> +       return true;
> +     }
> +      if (TREE_CODE (gimple_assign_rhs1 (def)) != SSA_NAME
> +       || TREE_CODE (gimple_assign_rhs2 (def)) != REAL_CST)
> +     return true;
> +
> +      cst1 = gimple_assign_rhs2 (def);
> +
> +      phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def)));
> +      if (!phi)
> +     return true;
> +    }
> +
> +  tree cst2 = NULL_TREE;
> +  int n = gimple_phi_num_args (phi);
> +  for (int i = 0; i < n; i++)
> +    {
> +      tree arg = PHI_ARG_DEF (phi, i);
> +      if (TREE_CODE (arg) != REAL_CST)
> +     continue;
> +      else if (cst2 == NULL_TREE)
> +     cst2 = arg;
> +      else if (!operand_equal_p (cst2, arg, 0))
> +     return true;
> +    }
> +
> +  if (cst1 && cst2)
> +    cst2 = const_binop (code, TREE_TYPE (cst2), cst2, cst1);
> +  if (cst2
> +      && TREE_CODE (cst2) == REAL_CST
> +      && real_isinteger (TREE_REAL_CST_PTR (cst2),
> +                      TYPE_MODE (TREE_TYPE (cst2))))
> +    return false;
> +  return true;
> +}
> --- gcc/match.pd.jj   2018-03-27 19:08:36.336826318 +0200
> +++ gcc/match.pd      2018-03-28 15:19:50.208231654 +0200
> @@ -4006,6 +4006,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   /* pow(C,x) -> exp(log(C)*x) if C > 0,
>      or if C is a positive power of 2,
>      pow(C,x) -> exp2(log2(C)*x).  */
> +#if GIMPLE
>   (for pows (POW)
>        exps (EXP)
>        logs (LOG)
> @@ -4035,8 +4036,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>        }
>       }
>       (if (!use_exp2)
> -      (exps (mult (logs @0) @1))
> +      (if (optimize_pow_to_exp (@0, @1))
> +       (exps (mult (logs @0) @1)))
>        (exp2s (mult (log2s @0) @1)))))))
> +#endif
>  
>   /* pow(C,x)*expN(y) -> expN(logN(C)*x+y) if C > 0.  */
>   (for pows (POW)
> --- gcc/testsuite/gcc.dg/pr82004.c.jj 2018-03-28 15:16:09.417118760 +0200
> +++ gcc/testsuite/gcc.dg/pr82004.c    2018-03-28 15:16:09.417118760 +0200
> @@ -0,0 +1,32 @@
> +/* PR tree-optimization/82004 */
> +/* { dg-do run } */
> +/* { dg-options "-Ofast" } */
> +
> +extern double log10 (double);
> +extern double pow (double, double);
> +
> +__attribute__((noipa)) void
> +bar (double x)
> +{
> +  if (x < 0.001)
> +    __builtin_abort ();
> +  asm volatile ("" : : : "memory");
> +}
> +
> +int
> +main ()
> +{
> +  double d = 0.001;
> +  double e = 10.0;
> +  double f = (log10 (e) - log10 (d)) / 400.0;
> +  double g = log10 (d) - f;
> +  volatile int q = 0;
> +  int i;
> +  if (__builtin_expect (q == 0, 0))
> +    for (i = 0; i < 400; ++i)
> +      {
> +     g = g + f;
> +     bar (pow (10.0, g));
> +      }
> +  return 0;  
> +}
> 
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)

Reply via email to