On Tue, 16 Jun 2020, Thomas Schwinge wrote: > Hi! > > On 2020-05-11T16:51:41+0200, Richard Biener <rguent...@suse.de> wrote: > > This enhances store-order preserving store motion to handle the case > > of non-invariant dependent stores in the sequence of unconditionally > > executed stores on exit by re-issueing them as part of the sequence > > of stores on the exit. This fixes the observed regression of > > gcc.target/i386/pr64110.c which relies on store-motion of 'b' > > for a loop like > > > > for (int i = 0; i < j; ++i) > > *b++ = x; > > > > where for correctness we now no longer apply store-motion. With > > the patch we emit the correct > > > > tem = b; > > for (int i = 0; i < j; ++i) > > { > > tem = tem + 1; > > *tem = x; > > } > > b = tem; > > *tem = x; > > > > preserving the original order of stores. A testcase reflecting > > the miscompilation done by earlier GCC is added as well. > > > > This also fixes the reported ICE in PR95025 and adds checking code > > to catch it earlier - the issue was not-supported refs propagation > > leaving stray refs in the sequence. > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. > > I noticed that since this commit b6ff3ddecfa93d53867afaaa078f85fc848abbbd > "tree-optimization/94988 - enhance SM some more", for > 'gcc.dg/graphite/pr80906.c' XPASSes what you recently had XFAILed in > commit 283cb9ea6293e813e48a1b769e1e0779918ea20a "tree-optimization/57359 > - rewrite SM code". I've thus pushed commit > 2210ef7d3d68a027ec16476825049567953c7fa4 "Un-XFAIL > 'gcc.dg/graphite/pr80906.c'", see attached. I assume but have not > verified that's the expected/correct thing vs. your commit introducing a > bug. ;-)
Ah, sorry - I also noticed this and wanted to update the test but keep forgetting. So thanks for doing it. We're indeed again doing the expected store motion thanks to the folloup enhancements. Richard > > Grüße > Thomas > > > > 2020-05-11 Richard Biener <rguent...@suse.de> > > > > PR tree-optimization/94988 > > PR tree-optimization/95025 > > * tree-ssa-loop-im.c (seq_entry): Make a struct, add from. > > (sm_seq_push_down): Take extra parameter denoting where we > > moved the ref to. > > (execute_sm_exit): Re-issue sm_other stores in the correct > > order. > > (sm_seq_valid_bb): When always executed, allow sm_other to > > prevail inbetween sm_ord and record their stored value. > > (hoist_memory_references): Adjust refs_not_supported propagation > > and prune sm_other from the end of the ordered sequences. > > > > * gcc.dg/torture/pr94988.c: New testcase. > > * gcc.dg/torture/pr95025.c: Likewise. > > * gcc.dg/torture/pr95045.c: Likewise. > > * g++.dg/asan/pr95025.C: New testcase. > > --- > > gcc/testsuite/g++.dg/asan/pr95025.C | 28 ++++ > > gcc/testsuite/gcc.dg/torture/pr94988.c | 20 +++ > > gcc/testsuite/gcc.dg/torture/pr95025.c | 13 ++ > > gcc/testsuite/gcc.dg/torture/pr95045.c | 29 ++++ > > gcc/tree-ssa-loop-im.c | 177 ++++++++++++++++++------- > > 5 files changed, 218 insertions(+), 49 deletions(-) > > create mode 100644 gcc/testsuite/g++.dg/asan/pr95025.C > > create mode 100644 gcc/testsuite/gcc.dg/torture/pr94988.c > > create mode 100644 gcc/testsuite/gcc.dg/torture/pr95025.c > > create mode 100644 gcc/testsuite/gcc.dg/torture/pr95045.c > > > > diff --git a/gcc/testsuite/g++.dg/asan/pr95025.C > > b/gcc/testsuite/g++.dg/asan/pr95025.C > > new file mode 100644 > > index 00000000000..dabb7e92f82 > > --- /dev/null > > +++ b/gcc/testsuite/g++.dg/asan/pr95025.C > > @@ -0,0 +1,28 @@ > > +// { dg-do compile } > > +// { dg-options "-O2 -fsanitize=address" } > > + > > +struct a { > > + int b; > > +} * c; > > +struct d { > > + d *e; > > +}; > > +struct f { > > + bool done; > > + d *g; > > +}; > > +int h; > > +int i(f *j) { > > + if (j->g) { > > + j->g = j->g->e; > > + return h; > > + } > > + j->done = true; > > + return 0; > > +} > > +void k(bool j) { c->b = j; } > > +void l() { > > + f a; > > + for (; !(&a)->done; i(&a)) > > + k(true); > > +} > > diff --git a/gcc/testsuite/gcc.dg/torture/pr94988.c > > b/gcc/testsuite/gcc.dg/torture/pr94988.c > > new file mode 100644 > > index 00000000000..1ee99fea5ce > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/torture/pr94988.c > > @@ -0,0 +1,20 @@ > > +/* { dg-do run } */ > > + > > +short *b; > > + > > +void __attribute__((noipa)) > > +bar (short x, int j) > > +{ > > + for (int i = 0; i < j; ++i) > > + *b++ = x; > > +} > > + > > +int > > +main() > > +{ > > + b = (short *)&b; > > + bar (0, 1); > > + if ((short)(__UINTPTR_TYPE__)b != 0) > > + __builtin_abort (); > > + return 0; > > +} > > diff --git a/gcc/testsuite/gcc.dg/torture/pr95025.c > > b/gcc/testsuite/gcc.dg/torture/pr95025.c > > new file mode 100644 > > index 00000000000..5834dc04887 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/torture/pr95025.c > > @@ -0,0 +1,13 @@ > > +/* { dg-do compile } */ > > + > > +static int a; > > +short b; > > +int *c; > > +void d() { > > + for (;; a -= 1) > > + for (; b; b += 1) { > > + *c ^= 5; > > + if (a) > > + return; > > + } > > +} > > diff --git a/gcc/testsuite/gcc.dg/torture/pr95045.c > > b/gcc/testsuite/gcc.dg/torture/pr95045.c > > new file mode 100644 > > index 00000000000..9f591beb6be > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/torture/pr95045.c > > @@ -0,0 +1,29 @@ > > +/* { dg-do run } */ > > + > > +int a, c, f; > > +long b; > > +char d; > > +int e[3]; > > +int g[9][3][2]; > > +int main() > > +{ > > + { > > +h: > > + for (f = 0; f <= 5; f++) { > > + b = 3; > > + for (; b >= 0; b--) { > > + e[2] = d = 0; > > + for (; d <= 3; d++) { > > + g[8][2][0] = e[1] = c = 0; > > + for (; c <= 1; c++) > > + e[c + 1] = g[d + 5][2][c] = 4; > > + } > > + if (a) > > + goto h; > > + } > > + } > > + } > > + if (e[2] != 4) > > + __builtin_abort (); > > + return 0; > > +} > > diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c > > index 2aabb54c98d..bb78dfb2ce8 100644 > > --- a/gcc/tree-ssa-loop-im.c > > +++ b/gcc/tree-ssa-loop-im.c > > @@ -2209,7 +2209,14 @@ execute_sm (class loop *loop, im_mem_ref *ref, > > able to execute in arbitrary order with respect to other stores > > sm_other is used for stores we do not try to apply store motion to. */ > > enum sm_kind { sm_ord, sm_unord, sm_other }; > > -typedef std::pair<unsigned, sm_kind> seq_entry; > > +struct seq_entry > > +{ > > + seq_entry (unsigned f, sm_kind k, tree fr = NULL) > > + : first (f), second (k), from (fr) {} > > + unsigned first; > > + sm_kind second; > > + tree from; > > +}; > > > > static void > > execute_sm_exit (class loop *loop, edge ex, vec<seq_entry> &seq, > > @@ -2218,35 +2225,54 @@ execute_sm_exit (class loop *loop, edge ex, > > vec<seq_entry> &seq, > > /* Sink the stores to exit from the loop. */ > > for (unsigned i = seq.length (); i > 0; --i) > > { > > - if (seq[i-1].second != kind) > > - continue; > > im_mem_ref *ref = memory_accesses.refs_list[seq[i-1].first]; > > - sm_aux *aux = *aux_map.get (ref); > > - if (!aux->store_flag) > > + if (seq[i-1].second == sm_other) > > { > > - gassign *store; > > - store = gimple_build_assign (unshare_expr (ref->mem.ref), > > - aux->tmp_var); > > + gcc_assert (kind == sm_ord && seq[i-1].from != NULL_TREE); > > + if (dump_file && (dump_flags & TDF_DETAILS)) > > + { > > + fprintf (dump_file, "Re-issueing dependent store of "); > > + print_generic_expr (dump_file, ref->mem.ref); > > + fprintf (dump_file, " from loop %d on exit %d -> %d\n", > > + loop->num, ex->src->index, ex->dest->index); > > + } > > + gassign *store = gimple_build_assign (unshare_expr (ref->mem.ref), > > + seq[i-1].from); > > gsi_insert_on_edge (ex, store); > > } > > else > > - execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var, > > aux->store_flag, > > - loop_preheader_edge (loop), &aux->flag_bbs); > > + { > > + sm_aux *aux = *aux_map.get (ref); > > + if (!aux->store_flag) > > + { > > + gassign *store; > > + store = gimple_build_assign (unshare_expr (ref->mem.ref), > > + aux->tmp_var); > > + gsi_insert_on_edge (ex, store); > > + } > > + else > > + execute_sm_if_changed (ex, ref->mem.ref, aux->tmp_var, > > + aux->store_flag, > > + loop_preheader_edge (loop), > > &aux->flag_bbs); > > + } > > } > > } > > > > /* Push the SM candidate at index PTR in the sequence SEQ down until > > we hit the next SM candidate. Return true if that went OK and > > - false if we could not disambiguate agains another unrelated ref. */ > > + false if we could not disambiguate agains another unrelated ref. > > + Update *AT to the index where the candidate now resides. */ > > > > static bool > > -sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr) > > +sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr, unsigned *at) > > { > > + *at = ptr; > > for (; ptr > 0; --ptr) > > { > > seq_entry &new_cand = seq[ptr]; > > seq_entry &against = seq[ptr-1]; > > - if (against.second == sm_ord) > > + if (against.second == sm_ord > > + || (against.second == sm_other && against.from != NULL_TREE)) > > /* Found the tail of the sequence. */ > > break; > > if (!refs_independent_p (memory_accesses.refs_list[new_cand.first], > > @@ -2255,6 +2281,7 @@ sm_seq_push_down (vec<seq_entry> &seq, unsigned ptr) > > /* ??? Prune new_cand from the list of refs to apply SM to. */ > > return false; > > std::swap (new_cand, against); > > + *at = ptr - 1; > > } > > return true; > > } > > @@ -2367,37 +2394,41 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, > > tree vdef, > > not order-preserving SM code. */ > > if (first_edge_seq[i].first != edge_seq[i].first) > > { > > - bitmap_set_bit (refs_not_supported, > > - first_edge_seq[i].first); > > - bitmap_set_bit (refs_not_supported, edge_seq[i].first); > > - first_edge_seq[i].second = sm_unord; > > + if (first_edge_seq[i].second == sm_ord) > > + bitmap_set_bit (refs_not_supported, > > + first_edge_seq[i].first); > > + if (edge_seq[i].second == sm_ord) > > + bitmap_set_bit (refs_not_supported, > > edge_seq[i].first); > > + first_edge_seq[i].second = sm_other; > > } > > - /* sm_unord prevails. */ > > + /* sm_other prevails. */ > > else if (first_edge_seq[i].second != edge_seq[i].second) > > { > > /* This is just an optimization. */ > > gcc_assert (bitmap_bit_p (refs_not_supported, > > first_edge_seq[i].first)); > > - first_edge_seq[i].second = sm_unord; > > + first_edge_seq[i].second = sm_other; > > } > > } > > - /* Any excess elements become sm_unord since they are now > > + /* Any excess elements become sm_other since they are now > > coonditionally executed. */ > > if (first_edge_seq.length () > edge_seq.length ()) > > { > > for (unsigned i = edge_seq.length (); > > i < first_edge_seq.length (); ++i) > > { > > - bitmap_set_bit (refs_not_supported, > > - first_edge_seq[i].first); > > - first_edge_seq[i].second = sm_unord; > > + if (first_edge_seq[i].second == sm_ord) > > + bitmap_set_bit (refs_not_supported, > > + first_edge_seq[i].first); > > + first_edge_seq[i].second = sm_other; > > } > > } > > else if (edge_seq.length () > first_edge_seq.length ()) > > { > > for (unsigned i = first_edge_seq.length (); > > i < edge_seq.length (); ++i) > > - bitmap_set_bit (refs_not_supported, edge_seq[i].first); > > + if (edge_seq[i].second == sm_ord) > > + bitmap_set_bit (refs_not_supported, edge_seq[i].first); > > } > > } > > /* Use the sequence from the first edge and push SMs down. */ > > @@ -2407,17 +2438,13 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, > > tree vdef, > > break; > > unsigned id = first_edge_seq[i].first; > > seq.safe_push (first_edge_seq[i]); > > + unsigned new_idx; > > if (first_edge_seq[i].second == sm_ord > > - && !sm_seq_push_down (seq, seq.length () - 1)) > > + && !sm_seq_push_down (seq, seq.length () - 1, &new_idx)) > > { > > bitmap_set_bit (refs_not_supported, id); > > - /* ??? Mark it sm_unord but it's now "somewhere" ... */ > > - for (unsigned i = seq.length (); i != 0; --i) > > - if (seq[i - 1].first == id) > > - { > > - seq[i - 1].second = sm_unord; > > - break; > > - } > > + /* Mark it sm_other. */ > > + seq[new_idx].second = sm_other; > > } > > } > > return 1; > > @@ -2429,21 +2456,21 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, > > tree vdef, > > /* One of the stores we want to apply SM to and we've not yet seen. > > */ > > else if (bitmap_clear_bit (refs_not_in_seq, data->ref)) > > { > > - seq.safe_push (std::make_pair (data->ref, sm_ord)); > > + seq.safe_push (seq_entry (data->ref, sm_ord)); > > > > /* 1) push it down the queue until a SMed > > and not ignored ref is reached, skipping all not SMed refs > > and ignored refs via non-TBAA disambiguation. */ > > - if (!sm_seq_push_down (seq, seq.length () - 1)) > > + unsigned new_idx; > > + if (!sm_seq_push_down (seq, seq.length () - 1, &new_idx) > > + /* If that fails but we did not fork yet continue, we'll see > > + to re-materialize all of the stores in the sequence then. > > + Further stores will only be pushed up to this one. */ > > + && forked) > > { > > bitmap_set_bit (refs_not_supported, data->ref); > > - /* ??? Mark it sm_unord but it's now "somewhere" ... */ > > - for (unsigned i = seq.length (); i != 0; --i) > > - if (seq[i - 1].first == data->ref) > > - { > > - seq[i - 1].second = sm_unord; > > - break; > > - } > > + /* Mark it sm_other. */ > > + seq[new_idx].second = sm_other; > > } > > > > /* 2) check whether we've seen all refs we want to SM and if so > > @@ -2453,7 +2480,8 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, > > tree vdef, > > } > > else > > /* Another store not part of the final sequence. Simply push it. */ > > - seq.safe_push (std::make_pair (data->ref, sm_other)); > > + seq.safe_push (seq_entry (data->ref, sm_other, > > + gimple_assign_rhs1 (def))); > > > > vdef = gimple_vuse (def); > > } > > @@ -2513,21 +2541,72 @@ hoist_memory_references (class loop *loop, bitmap > > mem_refs, > > std::pair<edge, vec<seq_entry> > *seq; > > FOR_EACH_VEC_ELT (sms, i, seq) > > { > > + bool need_to_push = false; > > for (unsigned i = 0; i < seq->second.length (); ++i) > > { > > - if (seq->second[i].second == sm_other) > > + sm_kind kind = seq->second[i].second; > > + if (kind == sm_other && seq->second[i].from == NULL_TREE) > > break; > > unsigned id = seq->second[i].first; > > - if (bitmap_bit_p (refs_not_supported, id)) > > - seq->second[i].second = sm_other; > > - else if (!sm_seq_push_down (seq->second, i)) > > + unsigned new_idx; > > + if (kind == sm_ord > > + && bitmap_bit_p (refs_not_supported, id)) > > { > > - if (bitmap_set_bit (refs_not_supported, id)) > > - changed = true; > > + seq->second[i].second = sm_other; > > + gcc_assert (seq->second[i].from == NULL_TREE); > > + need_to_push = true; > > + } > > + else if (need_to_push > > + && !sm_seq_push_down (seq->second, i, &new_idx)) > > + { > > + /* We need to push down both sm_ord and sm_other > > + but for the latter we need to disqualify all > > + following refs. */ > > + if (kind == sm_ord) > > + { > > + if (bitmap_set_bit (refs_not_supported, id)) > > + changed = true; > > + seq->second[new_idx].second = sm_other; > > + } > > + else > > + { > > + for (unsigned j = seq->second.length () - 1; > > + j > new_idx; --j) > > + if (seq->second[j].second == sm_ord > > + && bitmap_set_bit (refs_not_supported, > > + seq->second[j].first)) > > + changed = true; > > + seq->second.truncate (new_idx); > > + break; > > + } > > } > > } > > } > > } > > + std::pair<edge, vec<seq_entry> > *seq; > > + FOR_EACH_VEC_ELT (sms, i, seq) > > + { > > + /* Prune sm_other from the end. */ > > + while (!seq->second.is_empty () > > + && seq->second.last ().second == sm_other) > > + seq->second.pop (); > > + /* Prune duplicates from the start. */ > > + auto_bitmap seen (&lim_bitmap_obstack); > > + unsigned j, k; > > + for (j = k = 0; j < seq->second.length (); ++j) > > + if (bitmap_set_bit (seen, seq->second[j].first)) > > + { > > + if (k != j) > > + seq->second[k] = seq->second[j]; > > + ++k; > > + } > > + seq->second.truncate (k); > > + /* And verify. */ > > + seq_entry *e; > > + FOR_EACH_VEC_ELT (seq->second, j, e) > > + gcc_assert (e->second == sm_ord > > + || (e->second == sm_other && e->from != NULL_TREE)); > > + } > > > > /* Verify dependence for refs we cannot handle with the order preserving > > code (refs_not_supported) or prune them from mem_refs. */ > > @@ -2540,7 +2619,7 @@ hoist_memory_references (class loop *loop, bitmap > > mem_refs, > > /* We've now verified store order for ref with respect to all other > > stores in the loop does not matter. */ > > else > > - unord_refs.safe_push (std::make_pair (i, sm_unord)); > > + unord_refs.safe_push (seq_entry (i, sm_unord)); > > } > > > > hash_map<im_mem_ref *, sm_aux *> aux_map; > > > ----------------- > Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany > Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, > Alexander Walter > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)