On 23/09/2024 11:31, Alex Coplan wrote: > Hi, > > As the PR shows, pair-fusion was tricking memory_modified_in_insn_p into > returning false when a common base register (in this case, x1) was > modified between the mem and the store insn. This lead to wrong code as > the accesses really did alias. > > To avoid this sort of problem, this patch avoids invoking RTL alias > analysis altogether (and assume an alias conflict) if the two insns to > be compared share a common address register R, and the insns see different > definitions of R (i.e. it was modified in between). > > Bootstrapped/regtested on aarch64-linux-gnu (all languages, both regular > bootstrap and LTO+PGO bootstrap). OK for trunk?
Ping: https://gcc.gnu.org/pipermail/gcc-patches/2024-September/663600.html I realise it was bad timing on my part sending this just after Richard (S) went away for a week, sorry about that! Alex > > Thanks, > Alex > > gcc/ChangeLog: > > PR rtl-optimization/116783 > * pair-fusion.cc (def_walker::cand_addr_uses): New. > (def_walker::def_walker): Add parameter for candidate address > uses. > (def_walker::alias_conflict_p): Declare. > (def_walker::addr_reg_conflict_p): New. > (def_walker::conflict_p): New. > (store_walker::store_walker): Add parameter for candidate > address uses and pass to base ctor. > (store_walker::conflict_p): Rename to ... > (store_walker::alias_conflict_p): ... this. > (load_walker::load_walker): Add parameter for candidate > address uses and pass to base ctor. > (load_walker::conflict_p): Rename to ... > (load_walker::alias_conflict_p): ... this. > (pair_fusion_bb_info::try_fuse_pair): Collect address register > uses for candidate insns and pass down to alias walkers. > > gcc/testsuite/ChangeLog: > > PR rtl-optimization/116783 > * g++.dg/torture/pr116783.C: New test. > diff --git a/gcc/pair-fusion.cc b/gcc/pair-fusion.cc > index cb0374f426b..b1ea611bacd 100644 > --- a/gcc/pair-fusion.cc > +++ b/gcc/pair-fusion.cc > @@ -2089,11 +2089,80 @@ protected: > > def_iter_t def_iter; > insn_info *limit; > - def_walker (def_info *def, insn_info *limit) : > - def_iter (def), limit (limit) {} > + > + // Array of register uses from the candidate insn which occur in MEMs. > + use_array cand_addr_uses; > + > + def_walker (def_info *def, insn_info *limit, use_array addr_uses) : > + def_iter (def), limit (limit), cand_addr_uses (addr_uses) {} > > virtual bool iter_valid () const { return *def_iter; } > > + // Implemented in {load,store}_walker. > + virtual bool alias_conflict_p (int &budget) const = 0; > + > + // Return true if the current (walking) INSN () uses a register R inside a > + // MEM, where R is also used inside a MEM by the (static) candidate insn, > and > + // those uses see different definitions of that register. In this case we > + // can't rely on RTL alias analysis, and for now we conservatively assume > that > + // there is an alias conflict. See PR116783. > + bool addr_reg_conflict_p () const > + { > + use_array curr_insn_uses = insn ()->uses (); > + auto cand_use_iter = cand_addr_uses.begin (); > + auto insn_use_iter = curr_insn_uses.begin (); > + while (cand_use_iter != cand_addr_uses.end () > + && insn_use_iter != curr_insn_uses.end ()) > + { > + auto insn_use = *insn_use_iter; > + auto cand_use = *cand_use_iter; > + if (insn_use->regno () > cand_use->regno ()) > + cand_use_iter++; > + else if (insn_use->regno () < cand_use->regno ()) > + insn_use_iter++; > + else > + { > + // As it stands I believe the alias code (memory_modified_in_insn_p) > + // doesn't look at insn notes such as REG_EQU{IV,AL}, so it should > + // be safe to skip over uses that only occur in notes. > + if (insn_use->includes_address_uses () > + && !insn_use->only_occurs_in_notes () > + && insn_use->def () != cand_use->def ()) > + { > + if (dump_file) > + { > + fprintf (dump_file, > + "assuming aliasing of cand i%d and i%d:\n" > + "-> insns see different defs of common addr reg > r%u\n" > + "-> ", > + cand_use->insn ()->uid (), insn_use->insn ()->uid > (), > + insn_use->regno ()); > + > + // Note that while the following sequence could be made more > + // concise by eliding pp_string calls into the pp_printf > + // calls, doing so triggers -Wformat-diag. > + pretty_printer pp; > + pp_string (&pp, "["); > + pp_access (&pp, cand_use, 0); > + pp_string (&pp, "] in "); > + pp_printf (&pp, "i%d", cand_use->insn ()->uid ()); > + pp_string (&pp, " vs ["); > + pp_access (&pp, insn_use, 0); > + pp_string (&pp, "] in "); > + pp_printf (&pp, "i%d", insn_use->insn ()->uid ()); > + fprintf (dump_file, "%s\n", pp_formatted_text (&pp)); > + } > + return true; > + } > + > + cand_use_iter++; > + insn_use_iter++; > + } > + } > + > + return false; > + } > + > public: > insn_info *insn () const override { return (*def_iter)->insn (); } > void advance () override { def_iter++; } > @@ -2107,6 +2176,14 @@ public: > else > return *(insn ()) < *limit; > } > + > + bool conflict_p (int &budget) const override final > + { > + if (addr_reg_conflict_p ()) > + return true; > + > + return alias_conflict_p (budget); > + } > }; > > // alias_walker that iterates over stores. > @@ -2117,12 +2194,14 @@ class store_walker : public def_walker<reverse> > InsnPredicate tombstone_p; > > public: > - store_walker (def_info *mem_def, rtx mem, insn_info *limit_insn, > + store_walker (def_info *mem_def, rtx mem, > + use_array addr_uses, > + insn_info *limit_insn, > InsnPredicate tombstone_fn) : > - def_walker<reverse> (mem_def, limit_insn), > + def_walker<reverse> (mem_def, limit_insn, addr_uses), > cand_mem (mem), tombstone_p (tombstone_fn) {} > > - bool conflict_p (int &budget) const override final > + bool alias_conflict_p (int &budget) const override final > { > if (tombstone_p (this->insn ())) > return false; > @@ -2159,13 +2238,14 @@ public: > return (*use_iter)->insn (); > } > > - bool conflict_p (int &budget) const override final > + bool alias_conflict_p (int &budget) const override final > { > return load_modified_by_store_p (insn (), cand_store, budget); > } > > - load_walker (def_info *def, insn_info *store, insn_info *limit_insn) > - : Base (def, limit_insn), > + load_walker (def_info *def, insn_info *store, use_array addr_uses, > + insn_info *limit_insn) > + : Base (def, limit_insn, addr_uses), > use_iter (Base::start_use_chain (this->def_iter)), > cand_store (store) {} > }; > @@ -2544,11 +2624,37 @@ pair_fusion_bb_info::try_fuse_pair (bool load_p, > unsigned access_size, > && bitmap_bit_p (&m_tombstone_bitmap, insn->uid ()); > }; > > + // Maximum number of distinct regnos we expect to appear in a single > + // MEM (and thus in a candidate insn). > + static constexpr int max_mem_regs = 2; > + auto_vec<access_info *, max_mem_regs> addr_use_vec[2]; > + use_array addr_uses[2]; > + > + // Collect the lists of register uses that occur in the candidate MEMs. > + for (int i = 0; i < 2; i++) > + { > + // N.B. it's safe for us to ignore uses that only occur in notes > + // here (e.g. in a REG_EQUIV expression) since we only pass the > + // MEM down to the alias machinery, so it can't see any insn-level > + // notes. > + for (auto use : insns[i]->uses ()) > + if (use->is_reg () > + && use->includes_address_uses () > + && !use->only_occurs_in_notes ()) > + { > + gcc_checking_assert (addr_use_vec[i].length () < max_mem_regs); > + addr_use_vec[i].quick_push (use); > + } > + addr_uses[i] = use_array (addr_use_vec[i]); > + } > + > store_walker<false, decltype(tombstone_p)> > - forward_store_walker (mem_defs[0], cand_mems[0], insns[1], tombstone_p); > + forward_store_walker (mem_defs[0], cand_mems[0], addr_uses[0], insns[1], > + tombstone_p); > > store_walker<true, decltype(tombstone_p)> > - backward_store_walker (mem_defs[1], cand_mems[1], insns[0], tombstone_p); > + backward_store_walker (mem_defs[1], cand_mems[1], addr_uses[1], insns[0], > + tombstone_p); > > alias_walker *walkers[4] = {}; > if (mem_defs[0]) > @@ -2562,8 +2668,10 @@ pair_fusion_bb_info::try_fuse_pair (bool load_p, > unsigned access_size, > { > // We want to find any loads hanging off the first store. > mem_defs[0] = memory_access (insns[0]->defs ()); > - load_walker<false> forward_load_walker (mem_defs[0], insns[0], > insns[1]); > - load_walker<true> backward_load_walker (mem_defs[1], insns[1], > insns[0]); > + load_walker<false> forward_load_walker (mem_defs[0], insns[0], > + addr_uses[0], insns[1]); > + load_walker<true> backward_load_walker (mem_defs[1], insns[1], > + addr_uses[1], insns[0]); > walkers[2] = &forward_load_walker; > walkers[3] = &backward_load_walker; > m_pass->do_alias_analysis (alias_hazards, walkers, load_p); > diff --git a/gcc/testsuite/g++.dg/torture/pr116783.C > b/gcc/testsuite/g++.dg/torture/pr116783.C > new file mode 100644 > index 00000000000..6d59159459d > --- /dev/null > +++ b/gcc/testsuite/g++.dg/torture/pr116783.C > @@ -0,0 +1,98 @@ > +// { dg-do run } > +// { dg-additional-options "-fstack-protector-strong > -fno-late-combine-instructions" } > +// { dg-require-effective-target fstack_protector } > +// { dg-require-effective-target c++11 } > + > +struct Private { > + char data[24]{}; > + long moved_from : 4; > + Private() : moved_from (0) {} > +}; > + > +struct QVariant { > + __attribute__((noipa)) > + ~QVariant() { > + if (!d.moved_from && d.data[0] != 42) > + __builtin_abort (); > + } > + __attribute__((noipa)) > + QVariant() { > + d.data[0] = 42; > + } > + __attribute__((noipa)) > + QVariant(QVariant &other) : d(other.d) {} > + QVariant(QVariant &&other) : d(other.d) { > + other.d = Private(); > + other.d.moved_from = true; > + } > + QVariant &operator=(QVariant); > + Private d; > +}; > + > +QVariant id (QVariant v) { return v; } > +QVariant &QVariant::operator=(QVariant other) > +{ > + id(other); > + return *this; > +} > + > +template <typename T> struct QList { > + T d; > + struct const_iterator { > + T *ptr; > + T &operator*() { return *ptr; } > + __attribute__((noipa)) > + bool operator!=(const_iterator other) { return ptr != other.ptr; } > + void operator++() { ptr++; } > + }; > + __attribute__((noipa)) > + T at() { return d; } > + const_iterator begin() { return const_iterator { &d }; } > + const_iterator end() { return const_iterator { &d }; } > +}; > +struct QArrayDataPointer { > + int d; > + int *ptr; > + long size; > +}; > + > +QArrayDataPointer null_qadp; > + > +struct QString { > + __attribute__((noipa)) > + QList<QString> split() const { > + return QList<QString> {null_qadp}; > + } > + __attribute__((noipa)) > + friend bool operator==(QString, QString) { return true; } > + > + __attribute__((noipa)) > + QString(QArrayDataPointer dp) : d(dp) {} > + QArrayDataPointer d; > +}; > + > +__attribute__((noipa)) > +QString as_qstr (QVariant *v) > +{ > + return QString (null_qadp); > +} > + > +int *getNode(const QString &tagContent) { > + auto expr = tagContent.split(); > + auto blockName = expr.at(); > + auto loadedBlocksVariant = QVariant (); > + QList<QVariant> blockVariantList; > + for (auto &item : blockVariantList) { > + auto blockNodeName = as_qstr (&item); > + blockNodeName == blockName; > + QString q(null_qadp); > + } > + loadedBlocksVariant = QVariant(); > + return nullptr; > +} > + > +int main(void) > +{ > + QString foo(null_qadp); > + getNode(foo); > +}