Hi!

On 2020-05-11T16:51:41+0200, Richard Biener <rguent...@suse.de> wrote:
> This enhances store-order preserving store motion to handle the case
> of non-invariant dependent stores in the sequence of unconditionally
> executed stores on exit by re-issueing them as part of the sequence
> of stores on the exit.  This fixes the observed regression of
> gcc.target/i386/pr64110.c which relies on store-motion of 'b'
> for a loop like
>
>   for (int i = 0; i < j; ++i)
>     *b++ = x;
>
> where for correctness we now no longer apply store-motion.  With
> the patch we emit the correct
>
>   tem = b;
>   for (int i = 0; i < j; ++i)
>     {
>       tem = tem + 1;
>       *tem = x;
>     }
>   b = tem;
>   *tem = x;
>
> preserving the original order of stores.  A testcase reflecting
> the miscompilation done by earlier GCC is added as well.
>
> This also fixes the reported ICE in PR95025 and adds checking code
> to catch it earlier - the issue was not-supported refs propagation
> leaving stray refs in the sequence.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

I noticed that since this commit b6ff3ddecfa93d53867afaaa078f85fc848abbbd
"tree-optimization/94988 - enhance SM some more", for
'gcc.dg/graphite/pr80906.c' XPASSes what you recently had XFAILed in
commit 283cb9ea6293e813e48a1b769e1e0779918ea20a "tree-optimization/57359
- rewrite SM code".  I've thus pushed commit
2210ef7d3d68a027ec16476825049567953c7fa4 "Un-XFAIL
'gcc.dg/graphite/pr80906.c'", see attached.  I assume but have not
verified that's the expected/correct thing vs. your commit introducing a
bug.  ;-)


Grüße
 Thomas


> 2020-05-11  Richard Biener  <rguent...@suse.de>
>
>       PR tree-optimization/94988
>       PR tree-optimization/95025
>       * tree-ssa-loop-im.c (seq_entry): Make a struct, add from.
>       (sm_seq_push_down): Take extra parameter denoting where we
>       moved the ref to.
>       (execute_sm_exit): Re-issue sm_other stores in the correct
>       order.
>       (sm_seq_valid_bb): When always executed, allow sm_other to
>       prevail inbetween sm_ord and record their stored value.
>       (hoist_memory_references): Adjust refs_not_supported propagation
>       and prune sm_other from the end of the ordered sequences.
>
>       * gcc.dg/torture/pr94988.c: New testcase.
>       * gcc.dg/torture/pr95025.c: Likewise.
>       * gcc.dg/torture/pr95045.c: Likewise.
>       * g++.dg/asan/pr95025.C: New testcase.
> ---
>  gcc/testsuite/g++.dg/asan/pr95025.C    |  28 ++++
>  gcc/testsuite/gcc.dg/torture/pr94988.c |  20 +++
>  gcc/testsuite/gcc.dg/torture/pr95025.c |  13 ++
>  gcc/testsuite/gcc.dg/torture/pr95045.c |  29 ++++
>  gcc/tree-ssa-loop-im.c                 | 177 ++++++++++++++++++-------
>  5 files changed, 218 insertions(+), 49 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/asan/pr95025.C
>  create mode 100644 gcc/testsuite/gcc.dg/torture/pr94988.c
>  create mode 100644 gcc/testsuite/gcc.dg/torture/pr95025.c
>  create mode 100644 gcc/testsuite/gcc.dg/torture/pr95045.c
>
> diff --git a/gcc/testsuite/g++.dg/asan/pr95025.C 
> b/gcc/testsuite/g++.dg/asan/pr95025.C
> new file mode 100644
> index 00000000000..dabb7e92f82
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/asan/pr95025.C
> @@ -0,0 +1,28 @@
> +// { dg-do compile }
> +// { dg-options "-O2 -fsanitize=address" }
> +
> +struct a {
> +    int b;
> +} * c;
> +struct d {
> +    d *e;
> +};
> +struct f {
> +    bool done;
> +    d *g;
> +};
> +int h;
> +int i(f *j) {
> +    if (j->g) {
> +     j->g = j->g->e;
> +     return h;
> +    }
> +    j->done = true;
> +    return 0;
> +}
> +void k(bool j) { c->b = j; }
> +void l() {
> +    f a;
> +    for (; !(&a)->done; i(&a))
> +      k(true);
> +}
> diff --git a/gcc/testsuite/gcc.dg/torture/pr94988.c 
> b/gcc/testsuite/gcc.dg/torture/pr94988.c
> new file mode 100644
> index 00000000000..1ee99fea5ce
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/torture/pr94988.c
> @@ -0,0 +1,20 @@
> +/* { dg-do run } */
> +
> +short *b;
> +
> +void __attribute__((noipa))
> +bar (short x, int j)
> +{
> +  for (int i = 0; i < j; ++i)
> +    *b++ = x;
> +}
> +
> +int
> +main()
> +{
> +  b = (short *)&b;
> +  bar (0, 1);
> +  if ((short)(__UINTPTR_TYPE__)b != 0)
> +    __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/torture/pr95025.c 
> b/gcc/testsuite/gcc.dg/torture/pr95025.c
> new file mode 100644
> index 00000000000..5834dc04887
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/torture/pr95025.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +
> +static int a;
> +short b;
> +int *c;
> +void d() {
> +    for (;; a -= 1)
> +      for (; b; b += 1) {
> +       *c ^= 5;
> +       if (a)
> +         return;
> +      }
> +}
> diff --git a/gcc/testsuite/gcc.dg/torture/pr95045.c 
> b/gcc/testsuite/gcc.dg/torture/pr95045.c
> new file mode 100644
> index 00000000000..9f591beb6be
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/torture/pr95045.c
> @@ -0,0 +1,29 @@
> +/* { dg-do run } */
> +
> +int a, c, f;
> +long b;
> +char d;
> +int e[3];
> +int g[9][3][2];
> +int main()
> +{
> +    {
> +h:
> +      for (f = 0; f <= 5; f++) {
> +       b = 3;
> +       for (; b >= 0; b--) {
> +           e[2] = d = 0;
> +           for (; d <= 3; d++) {
> +               g[8][2][0] = e[1] = c = 0;
> +               for (; c <= 1; c++)
> +                 e[c + 1] = g[d + 5][2][c] = 4;
> +           }
> +           if (a)
> +             goto h;
> +       }
> +      }
> +    }
> +  if (e[2] != 4)
> +    __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
> index 2aabb54c98d..bb78dfb2ce8 100644
> --- a/gcc/tree-ssa-loop-im.c
> +++ b/gcc/tree-ssa-loop-im.c
> @@ -2209,7 +2209,14 @@ execute_sm (class loop *loop, im_mem_ref *ref,
>         able to execute in arbitrary order with respect to other stores
>     sm_other is used for stores we do not try to apply store motion to.  */
>  enum sm_kind { sm_ord, sm_unord, sm_other };
> -typedef std::pair<unsigned, sm_kind> seq_entry;
> +struct seq_entry
> +{
> +  seq_entry (unsigned f, sm_kind k, tree fr = NULL)
> +    : first (f), second (k), from (fr) {}
> +  unsigned first;
> +  sm_kind second;
> +  tree from;
> +};
>
>  static void
>  execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq,
> @@ -2218,35 +2225,54 @@ execute_sm_exit (class loop *loop, edge ex, 
> vec<seq_entry> &seq,
>    /* Sink the stores to exit from the loop.  */
>    for (unsigned i = seq.length (); i > 0; --i)
>      {
> -      if (seq[i-1].second != kind)
> -     continue;
>        im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first];
> -      sm_aux *aux = *aux_map.get (ref);
> -      if (!aux->store_flag)
> +      if (seq[i-1].second == sm_other)
>       {
> -       gassign *store;
> -       store = gimple_build_assign (unshare_expr (ref->mem.ref),
> -                                    aux->tmp_var);
> +       gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE);
> +       if (dump_file && (dump_flags & TDF_DETAILS))
> +         {
> +           fprintf (dump_file, "Re-issueing dependent store of ");
> +           print_generic_expr (dump_file, ref->mem.ref);
> +           fprintf (dump_file, " from loop %d on exit %d -> %d\n",
> +                    loop->num, ex->src->index, ex->dest->index);
> +         }
> +       gassign *store = gimple_build_assign (unshare_expr (ref->mem.ref),
> +                                             seq[i-1].from);
>         gsi_insert_on_edge (ex, store);
>       }
>        else
> -     execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var, aux->store_flag,
> -                            loop_preheader_edge (loop), &aux->flag_bbs);
> +     {
> +       sm_aux *aux = *aux_map.get (ref);
> +       if (!aux->store_flag)
> +         {
> +           gassign *store;
> +           store = gimple_build_assign (unshare_expr (ref->mem.ref),
> +                                        aux->tmp_var);
> +           gsi_insert_on_edge (ex, store);
> +         }
> +       else
> +         execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var,
> +                                aux->store_flag,
> +                                loop_preheader_edge (loop), &aux->flag_bbs);
> +     }
>      }
>  }
>
>  /* Push the SM candidate at index PTR in the sequence SEQ down until
>     we hit the next SM candidate.  Return true if that went OK and
> -   false if we could not disambiguate agains another unrelated ref.  */
> +   false if we could not disambiguate agains another unrelated ref.
> +   Update *AT to the index where the candidate now resides.  */
>
>  static bool
> -sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr)
> +sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at)
>  {
> +  *at = ptr;
>    for (; ptr > 0; --ptr)
>      {
>        seq_entry &new_cand = seq[ptr];
>        seq_entry &against = seq[ptr-1];
> -      if (against.second == sm_ord)
> +      if (against.second == sm_ord
> +       || (against.second == sm_other && against.from != NULL_TREE))
>       /* Found the tail of the sequence.  */
>       break;
>        if (!refs_independent_p (memory_accesses.refs_list[new_cand.first],
> @@ -2255,6 +2281,7 @@ sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr)
>       /* ???  Prune new_cand from the list of refs to apply SM to.  */
>       return false;
>        std::swap (new_cand, against);
> +      *at = ptr - 1;
>      }
>    return true;
>  }
> @@ -2367,37 +2394,41 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, 
> tree vdef,
>                    not order-preserving SM code.  */
>                 if (first_edge_seq[i].first != edge_seq[i].first)
>                   {
> -                   bitmap_set_bit (refs_not_supported,
> -                                   first_edge_seq[i].first);
> -                   bitmap_set_bit (refs_not_supported, edge_seq[i].first);
> -                   first_edge_seq[i].second = sm_unord;
> +                   if (first_edge_seq[i].second == sm_ord)
> +                     bitmap_set_bit (refs_not_supported,
> +                                     first_edge_seq[i].first);
> +                   if (edge_seq[i].second == sm_ord)
> +                     bitmap_set_bit (refs_not_supported, edge_seq[i].first);
> +                   first_edge_seq[i].second = sm_other;
>                   }
> -               /* sm_unord prevails.  */
> +               /* sm_other prevails.  */
>                 else if (first_edge_seq[i].second != edge_seq[i].second)
>                   {
>                     /* This is just an optimization.  */
>                     gcc_assert (bitmap_bit_p (refs_not_supported,
>                                               first_edge_seq[i].first));
> -                   first_edge_seq[i].second = sm_unord;
> +                   first_edge_seq[i].second = sm_other;
>                   }
>               }
> -           /* Any excess elements become sm_unord since they are now
> +           /* Any excess elements become sm_other since they are now
>                coonditionally executed.  */
>             if (first_edge_seq.length () > edge_seq.length ())
>               {
>                 for (unsigned i = edge_seq.length ();
>                      i < first_edge_seq.length (); ++i)
>                   {
> -                   bitmap_set_bit (refs_not_supported,
> -                                   first_edge_seq[i].first);
> -                   first_edge_seq[i].second = sm_unord;
> +                   if (first_edge_seq[i].second == sm_ord)
> +                     bitmap_set_bit (refs_not_supported,
> +                                     first_edge_seq[i].first);
> +                   first_edge_seq[i].second = sm_other;
>                   }
>               }
>             else if (edge_seq.length () > first_edge_seq.length ())
>               {
>                 for (unsigned i = first_edge_seq.length ();
>                      i < edge_seq.length (); ++i)
> -                 bitmap_set_bit (refs_not_supported, edge_seq[i].first);
> +                 if (edge_seq[i].second == sm_ord)
> +                   bitmap_set_bit (refs_not_supported, edge_seq[i].first);
>               }
>           }
>         /* Use the sequence from the first edge and push SMs down.  */
> @@ -2407,17 +2438,13 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, 
> tree vdef,
>               break;
>             unsigned id = first_edge_seq[i].first;
>             seq.safe_push (first_edge_seq[i]);
> +           unsigned new_idx;
>             if (first_edge_seq[i].second == sm_ord
> -               && !sm_seq_push_down (seq, seq.length () - 1))
> +               && !sm_seq_push_down (seq, seq.length () - 1, &new_idx))
>               {
>                 bitmap_set_bit (refs_not_supported, id);
> -               /* ???  Mark it sm_unord but it's now "somewhere" ... */
> -               for (unsigned i = seq.length (); i != 0; --i)
> -                 if (seq[i - 1].first == id)
> -                   {
> -                     seq[i - 1].second = sm_unord;
> -                     break;
> -                   }
> +               /* Mark it sm_other.  */
> +               seq[new_idx].second = sm_other;
>               }
>           }
>         return 1;
> @@ -2429,21 +2456,21 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, 
> tree vdef,
>        /* One of the stores we want to apply SM to and we've not yet seen.  */
>        else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
>       {
> -       seq.safe_push (std::make_pair (data->ref, sm_ord));
> +       seq.safe_push (seq_entry (data->ref, sm_ord));
>
>         /* 1) push it down the queue until a SMed
>            and not ignored ref is reached, skipping all not SMed refs
>            and ignored refs via non-TBAA disambiguation.  */
> -       if (!sm_seq_push_down (seq, seq.length () - 1))
> +       unsigned new_idx;
> +       if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx)
> +           /* If that fails but we did not fork yet continue, we'll see
> +              to re-materialize all of the stores in the sequence then.
> +              Further stores will only be pushed up to this one.  */
> +           && forked)
>           {
>             bitmap_set_bit (refs_not_supported, data->ref);
> -           /* ???  Mark it sm_unord but it's now "somewhere" ... */
> -           for (unsigned i = seq.length (); i != 0; --i)
> -             if (seq[i - 1].first == data->ref)
> -               {
> -                 seq[i - 1].second = sm_unord;
> -                 break;
> -               }
> +           /* Mark it sm_other.  */
> +           seq[new_idx].second = sm_other;
>           }
>
>         /* 2) check whether we've seen all refs we want to SM and if so
> @@ -2453,7 +2480,8 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree 
> vdef,
>       }
>        else
>       /* Another store not part of the final sequence.  Simply push it.  */
> -     seq.safe_push (std::make_pair (data->ref, sm_other));
> +     seq.safe_push (seq_entry (data->ref, sm_other,
> +                               gimple_assign_rhs1 (def)));
>
>        vdef = gimple_vuse (def);
>      }
> @@ -2513,21 +2541,72 @@ hoist_memory_references (class loop *loop, bitmap 
> mem_refs,
>        std::pair<edge, vec<seq_entry> > *seq;
>        FOR_EACH_VEC_ELT (sms, i, seq)
>       {
> +       bool need_to_push = false;
>         for (unsigned i = 0; i < seq->second.length (); ++i)
>           {
> -           if (seq->second[i].second == sm_other)
> +           sm_kind kind = seq->second[i].second;
> +           if (kind == sm_other && seq->second[i].from == NULL_TREE)
>               break;
>             unsigned id = seq->second[i].first;
> -           if (bitmap_bit_p (refs_not_supported, id))
> -             seq->second[i].second = sm_other;
> -           else if (!sm_seq_push_down (seq->second, i))
> +           unsigned new_idx;
> +           if (kind == sm_ord
> +               && bitmap_bit_p (refs_not_supported, id))
>               {
> -               if (bitmap_set_bit (refs_not_supported, id))
> -                 changed = true;
> +               seq->second[i].second = sm_other;
> +               gcc_assert (seq->second[i].from == NULL_TREE);
> +               need_to_push = true;
> +             }
> +           else if (need_to_push
> +                    && !sm_seq_push_down (seq->second, i, &new_idx))
> +             {
> +               /* We need to push down both sm_ord and sm_other
> +                  but for the latter we need to disqualify all
> +                  following refs.  */
> +               if (kind == sm_ord)
> +                 {
> +                   if (bitmap_set_bit (refs_not_supported, id))
> +                     changed = true;
> +                   seq->second[new_idx].second = sm_other;
> +                 }
> +               else
> +                 {
> +                   for (unsigned j = seq->second.length () - 1;
> +                        j > new_idx; --j)
> +                     if (seq->second[j].second == sm_ord
> +                         && bitmap_set_bit (refs_not_supported,
> +                                            seq->second[j].first))
> +                       changed = true;
> +                   seq->second.truncate (new_idx);
> +                   break;
> +                 }
>               }
>           }
>       }
>      }
> +  std::pair<edge, vec<seq_entry> > *seq;
> +  FOR_EACH_VEC_ELT (sms, i, seq)
> +    {
> +      /* Prune sm_other from the end.  */
> +      while (!seq->second.is_empty ()
> +          && seq->second.last ().second == sm_other)
> +     seq->second.pop ();
> +      /* Prune duplicates from the start.  */
> +      auto_bitmap seen (&lim_bitmap_obstack);
> +      unsigned j, k;
> +      for (j = k = 0; j < seq->second.length (); ++j)
> +     if (bitmap_set_bit (seen, seq->second[j].first))
> +       {
> +         if (k != j)
> +           seq->second[k] = seq->second[j];
> +         ++k;
> +       }
> +      seq->second.truncate (k);
> +      /* And verify.  */
> +      seq_entry *e;
> +      FOR_EACH_VEC_ELT (seq->second, j, e)
> +     gcc_assert (e->second == sm_ord
> +                 || (e->second == sm_other && e->from != NULL_TREE));
> +    }
>
>    /* Verify dependence for refs we cannot handle with the order preserving
>       code (refs_not_supported) or prune them from mem_refs.  */
> @@ -2540,7 +2619,7 @@ hoist_memory_references (class loop *loop, bitmap 
> mem_refs,
>        /* We've now verified store order for ref with respect to all other
>        stores in the loop does not matter.  */
>        else
> -     unord_refs.safe_push (std::make_pair (i, sm_unord));
> +     unord_refs.safe_push (seq_entry (i, sm_unord));
>      }
>
>    hash_map<im_mem_ref *, sm_aux *> aux_map;


-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter
>From 2210ef7d3d68a027ec16476825049567953c7fa4 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <tho...@codesourcery.com>
Date: Sat, 6 Jun 2020 18:23:28 +0200
Subject: [PATCH] Un-XFAIL 'gcc.dg/graphite/pr80906.c'

The recent commit b6ff3ddecfa93d53867afaaa078f85fc848abbbd
"tree-optimization/94988 - enhance SM some more" fixed this.

	gcc/testsuite/
	PR tree-optimization/94988
	* gcc.dg/graphite/pr80906.c: Un-XFAIL.
---
 gcc/testsuite/gcc.dg/graphite/pr80906.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/graphite/pr80906.c b/gcc/testsuite/gcc.dg/graphite/pr80906.c
index 85981b1c587a..59c7f59cadff 100644
--- a/gcc/testsuite/gcc.dg/graphite/pr80906.c
+++ b/gcc/testsuite/gcc.dg/graphite/pr80906.c
@@ -25,5 +25,4 @@ ec (int lh[][2])
   return c5 + m3;
 }
 
-/* We cannot perform a required store motion.  */
-/* { dg-final { scan-tree-dump "isl AST to Gimple succeeded" "graphite" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "isl AST to Gimple succeeded" "graphite" } } */
-- 
2.27.0

Reply via email to