On Tue, Oct 27, 2020 at 6:32 AM Alexandre Oliva <ol...@adacore.com> wrote:
>
> On Oct 23, 2020, Richard Biener <richard.guent...@gmail.com> wrote:
>
> > Can you move it one pass further after sink please?
>
> I did, but it didn't solve the recip regressions that my first attempt
> brought about.
>
> > Also I don't
> > remember exactly but does pass_sincos only handle sin/cos unifying?
>
> It rearranges some powi computations, and that's what breaks recip.  It
> adds a copy of y*y in extract_recip_[34], we'd need a forwprop or
> similar to get rid of the trivial copy before recip could do its job
> properly again.
>
> So I figured I'd try to cse type conversions before sincos, and that
> turned out to be pretty easy, and it didn't regress anything.
>
> Regstrapped on x86_64-linux-gnu.  Ok to install?
>
>
> CSE conversions within sincos
>
> From: Alexandre Oliva <ol...@adacore.com>
>
> On platforms in which Aux_[Real_Type] involves non-NOP conversions
> (e.g., between single- and double-precision, or between short float
> and float), the conversions before the calls are CSEd too late for
> sincos to combine calls.
>
> This patch enables the sincos pass to CSE type casts used as arguments
> to eligible calls before looking for other calls using the same
> operand.
>
>
> for  gcc/ChangeLog
>
>         * tree-ssa-math-opts.c (sincos_stats): Rename inserted to
>         sincos_inserted.  Add conv_inserted.
>         (maybe_record_sincos): Rename to...
>         (maybe_record_stmt): ... this.
>         (execute_cse_conv_1): New.
>         (execute_cse_sincos_1): Call it.  Adjust.
>         (pass_cse_sincos::execute): Adjust.  Report conv_inserted.
>
> for  gcc/testsuite/ChangeLog
>
>         * gnat.dg/sin_cos.ads: New.
>         * gnat.dg/sin_cos.adb: New.
>         * gcc.dg/sin_cos.c: New.
> ---
>  gcc/testsuite/gcc.dg/sin_cos.c    |   41 +++++++++++++
>  gcc/testsuite/gnat.dg/sin_cos.adb |   14 ++++
>  gcc/testsuite/gnat.dg/sin_cos.ads |    4 +
>  gcc/tree-ssa-math-opts.c          |  119 
> +++++++++++++++++++++++++++++++++++--
>  4 files changed, 171 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/sin_cos.c
>  create mode 100644 gcc/testsuite/gnat.dg/sin_cos.adb
>  create mode 100644 gcc/testsuite/gnat.dg/sin_cos.ads
>
> diff --git a/gcc/testsuite/gcc.dg/sin_cos.c b/gcc/testsuite/gcc.dg/sin_cos.c
> new file mode 100644
> index 00000000..aa71dca
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/sin_cos.c
> @@ -0,0 +1,41 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +/* This maps to essentially the same gimple that is generated for
> +   gnat.dg/sin_cos.adb, on platforms that use the wraplf variant of
> +   Ada.Numerics.Aux_Float.  The value of EPSILON is not relevant to
> +   the test, but the test must be there to keep the conversions in
> +   different BBs long enough to trigger the problem that prevented the
> +   sincos optimization, because the arguments passed to sin and cos
> +   didn't get unified into a single SSA_NAME in time for sincos.  */
> +
> +#include <math.h>
> +
> +#define EPSILON 3.4526697709225118160247802734375e-4
> +
> +static float my_sinf(float x) {
> +  return (float) sin ((double) x);
> +}
> +
> +static float wrap_sinf(float x) {
> +  if (fabs (x) < EPSILON)
> +    return 0;
> +  return my_sinf (x);
> +}
> +
> +static float my_cosf(float x) {
> +  return (float) cos ((double) x);
> +}
> +
> +static float wrap_cosf(float x) {
> +  if (fabs (x) < EPSILON)
> +    return 1;
> +  return my_cosf (x);
> +}
> +
> +float my_sin_cos(float x, float *s, float *c) {
> +  *s = wrap_sinf (x);
> +  *c = wrap_cosf (x);
> +}
> +
> +/* { dg-final { scan-assembler "sincos\|cexp" { target *-linux-gnu* 
> *-w64-mingw* *-*-vxworks* } } } */
> diff --git a/gcc/testsuite/gnat.dg/sin_cos.adb 
> b/gcc/testsuite/gnat.dg/sin_cos.adb
> new file mode 100644
> index 00000000..6e18df9
> --- /dev/null
> +++ b/gcc/testsuite/gnat.dg/sin_cos.adb
> @@ -0,0 +1,14 @@
> +--  { dg-do compile }
> +--  { dg-options "-O2 -gnatn" }
> +
> +with Ada.Numerics.Elementary_Functions;
> +use Ada.Numerics.Elementary_Functions;
> +package body Sin_Cos is
> +   procedure Sin_Cos (Angle : T; SinA, CosA : out T) is
> +   begin
> +      SinA := Sin (Angle);
> +      CosA := Cos (Angle);
> +   end;
> +end Sin_Cos;
> +
> +--  { dg-final { scan-assembler "sincos\|cexp" { target *-linux-gnu* 
> *-w64-mingw* *-*-vxworks* } } }
> diff --git a/gcc/testsuite/gnat.dg/sin_cos.ads 
> b/gcc/testsuite/gnat.dg/sin_cos.ads
> new file mode 100644
> index 00000000..a0eff3d
> --- /dev/null
> +++ b/gcc/testsuite/gnat.dg/sin_cos.ads
> @@ -0,0 +1,4 @@
> +package Sin_Cos is
> +   subtype T is Float;
> +   procedure Sin_Cos (Angle : T; SinA, CosA : out T);
> +end Sin_Cos;
> diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
> index 90dfb98..a32f5ca 100644
> --- a/gcc/tree-ssa-math-opts.c
> +++ b/gcc/tree-ssa-math-opts.c
> @@ -186,7 +186,11 @@ static struct
>  static struct
>  {
>    /* Number of cexpi calls inserted.  */
> -  int inserted;
> +  int sincos_inserted;
> +
> +  /* Number of conversions inserted.  */
> +  int conv_inserted;
> +
>  } sincos_stats;
>
>  static struct
> @@ -1106,7 +1110,7 @@ make_pass_cse_reciprocals (gcc::context *ctxt)
>     statements in the vector.  */
>
>  static bool
> -maybe_record_sincos (vec<gimple *> *stmts,
> +maybe_record_stmt (vec<gimple *> *stmts,
>                      basic_block *top_bb, gimple *use_stmt)
>  {
>    basic_block use_bb = gimple_bb (use_stmt);
> @@ -1126,6 +1130,103 @@ maybe_record_sincos (vec<gimple *> *stmts,
>    return true;
>  }
>
> +/* If NAME is the result of a type conversion, look for other
> +   conversions from the same source to the same type and CSE them.
> +   Replace results of conversions in sin, cos and cexpi calls with the
> +   CSEd SSA_NAME.  Return a SSA_NAME that holds the result of the
> +   conversion to be used instead of NAME.  */
> +
> +static tree
> +execute_cse_conv_1 (tree name)
> +{
> +  if (SSA_NAME_IS_DEFAULT_DEF (name))
> +    return name;
> +
> +  gimple *def_stmt = SSA_NAME_DEF_STMT (name);
> +
> +  if (!gimple_assign_cast_p (def_stmt))
> +    return name;
> +
> +  tree src = gimple_assign_rhs1 (def_stmt);

For trapping math SRC may be a constant?  Better be safe
and guard against TREE_CODE (src) != SSA_NAME.

You also want to guard against SSA_NAME_OCCURS_IN_ABNORMAL_PHI (src)
since you cannot generally propagate or move uses of those.  That
applies to 'name' as well, and ...

> +  int count = 0;
> +  imm_use_iterator use_iter;
> +  gimple *use_stmt;
> +  auto_vec<gimple *> stmts;
> +  basic_block top_bb = NULL;
> +
> +  /* Record equivalent conversions from the same source.  */
> +  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
> +    {
> +      if (!gimple_assign_cast_p (use_stmt))
> +       continue;

... the lhs of the other conversions.

> +      if (!types_compatible_p (TREE_TYPE (name),
> +                              TREE_TYPE (gimple_assign_lhs (use_stmt))))
> +       continue;
> +
> +      gcc_checking_assert (gimple_assign_rhs1 (use_stmt)
> +                          == gimple_assign_rhs1 (def_stmt));
> +
> +      if (maybe_record_stmt (&stmts, &top_bb, use_stmt))
> +       count++;
> +    }
> +
> +  if (count <= 1)
> +    return name;
> +
> +  /* Build and insert a new conversion.  */
> +  name = make_temp_ssa_name (TREE_TYPE (name), def_stmt, "convtmp");
> +  gimple *stmt = gimple_build_assign (name,
> +                                     gimple_assign_rhs_code (def_stmt),
> +                                     gimple_assign_rhs1 (def_stmt));
> +
> +  gimple_stmt_iterator gsi;
> +  if (!SSA_NAME_IS_DEFAULT_DEF (name)
> +      && gimple_code (def_stmt) != GIMPLE_PHI
> +      && gimple_bb (def_stmt) == top_bb)
> +    gsi = gsi_for_stmt (def_stmt);

So I think this will go wrong if def_stmt is in a BB like

    def1 = (T) src;
    def2 = (T) src;

and def_stmt is def2.  I think you need to look at the
definition of SRC instead, so

   if (!SSA_NAME_IS_DEFAULT_DEF (src)
       && gimple_code (SSA_NAME_DEF_STMT (src)) != GIMPLE_PHI
       && gimple_bb (SSA_NAME_DEF_STMT (src)) == top_bb)
   {
     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (src);
     gsi_insert_after (...);

> +  else
> +    gsi = gsi_after_labels (top_bb);
> +  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
> +  update_stmt (stmt);
> +  sincos_stats.conv_inserted++;
> +
> +  /* And adjust the recorded old conversion sites.  */
> +  for (int i = 0; stmts.iterate (i, &use_stmt); ++i)
> +    {
> +      tree lhs = gimple_assign_lhs (use_stmt);
> +      gimple_assign_set_rhs1 (use_stmt, name);
> +      gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
> +      update_stmt (use_stmt);
> +
> +      /* Replace uses of LHS with NAME in sin, cos and cexpi calls
> +        right away, so that we can recognize them as taking the same
> +        arguments.  */
> +      FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
> +       {
> +         if (gimple_code (use_stmt) != GIMPLE_CALL
> +             || !gimple_call_lhs (use_stmt))
> +           continue;

Any reason you are not replacing all uses via replace_uses_by
and removing the old conversion stmts?  OK, removing might
wreck the upthread iterator so replacing with GIMPLE_NOP
is the usual trick then.

> +         switch (gimple_call_combined_fn (use_stmt))
> +           {
> +           CASE_CFN_COS:
> +           CASE_CFN_SIN:
> +           CASE_CFN_CEXPI:
> +             gcc_checking_assert (gimple_call_arg (use_stmt, 0) == lhs);
> +             gimple_call_set_arg (use_stmt, 0, name);
> +             update_stmt (use_stmt);
> +             break;
> +
> +           default:
> +             continue;
> +           }
> +       }
> +    }
> +
> +  return name;
> +}
> +
>  /* Look for sin, cos and cexpi calls with the same argument NAME and
>     create a single call to cexpi CSEing the result in this case.
>     We first walk over all immediate uses of the argument collecting
> @@ -1147,6 +1248,8 @@ execute_cse_sincos_1 (tree name)
>    int i;
>    bool cfg_changed = false;
>
> +  name = execute_cse_conv_1 (name);
> +
>    FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
>      {
>        if (gimple_code (use_stmt) != GIMPLE_CALL
> @@ -1156,15 +1259,15 @@ execute_cse_sincos_1 (tree name)
>        switch (gimple_call_combined_fn (use_stmt))
>         {
>         CASE_CFN_COS:
> -         seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
> +         seen_cos |= maybe_record_stmt (&stmts, &top_bb, use_stmt) ? 1 : 0;
>           break;
>
>         CASE_CFN_SIN:
> -         seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
> +         seen_sin |= maybe_record_stmt (&stmts, &top_bb, use_stmt) ? 1 : 0;
>           break;
>
>         CASE_CFN_CEXPI:
> -         seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 
> 0;
> +         seen_cexpi |= maybe_record_stmt (&stmts, &top_bb, use_stmt) ? 1 : 0;
>           break;
>
>         default:;
> @@ -1208,7 +1311,7 @@ execute_cse_sincos_1 (tree name)
>        gsi = gsi_after_labels (top_bb);
>        gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
>      }
> -  sincos_stats.inserted++;
> +  sincos_stats.sincos_inserted++;
>
>    /* And adjust the recorded old call sites.  */
>    for (i = 0; stmts.iterate (i, &use_stmt); ++i)
> @@ -2295,7 +2398,9 @@ pass_cse_sincos::execute (function *fun)
>      }
>
>    statistics_counter_event (fun, "sincos statements inserted",
> -                           sincos_stats.inserted);
> +                           sincos_stats.sincos_inserted);
> +  statistics_counter_event (fun, "conv statements inserted",
> +                           sincos_stats.conv_inserted);
>
>    return cfg_changed ? TODO_cleanup_cfg : 0;
>  }
>
>
> --
> Alexandre Oliva, happy hacker
> https://FSFLA.org/blogs/lxo/
> Free Software Activist
> GNU Toolchain Engineer

Reply via email to