> Hi. > > The patch makes signification LTO WPA speed up for godot project > from 76s to 65s. The patch uses more smart data structure for > value numbering algorithm that we use. > > Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > > Ready to be installed?
If I get it right, this replaces reference vectors by a central hash map. I wonder when you need to many querries to them? I would expect the subdividing algorithm to travel the reference lists quite sequentially... Honza > Thanks, > Martin > > gcc/ChangeLog: > > 2019-06-03 Martin Liska <mli...@suse.cz> > > * ipa-icf.h (struct sem_usage_pair_hash): New. > (sem_usage_pair_hash::hash): Likewise. > (sem_usage_pair_hash::equal): Likewise. > (struct sem_usage_hash): Likewise. > * ipa-icf.c (sem_item::sem_item): Initialize > referenced_by_count. > (sem_item::add_reference): Register a reference > in ref_map and not in target->usages. > (sem_item::setup): Remove initialization of > dead vectors. > (sem_item::~sem_item): Remove usage of dead vectors. > (sem_item::dump): Remove dump of references. > (sem_item_optimizer::sem_item_optimizer): Initialize > m_references. > (sem_item_optimizer::read_section): Remove useless > dump. > (sem_item_optimizer::parse_funcs_and_vars): Likewise here. > (sem_item_optimizer::build_graph): Pass m_references > to ::add_reference. > (sem_item_optimizer::verify_classes): Remove usage of dead > vectors. > (sem_item_optimizer::traverse_congruence_split): Return true > when a class is split. > (sem_item_optimizer::do_congruence_step_for_index): Use > hash_map for look up of (sem_item *, index). That brings > significant speed up. > (sem_item_optimizer::do_congruence_step): Return true > when a split is done. > (congruence_class::is_class_used): Use referenced_by_count. > --- > gcc/ipa-icf.c | 102 ++++++++++++++++++++------------------------------ > gcc/ipa-icf.h | 45 ++++++++++++++++++---- > 2 files changed, 78 insertions(+), 69 deletions(-) > > > diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c > index 074181491da..a4705eee936 100644 > --- a/gcc/ipa-icf.c > +++ b/gcc/ipa-icf.c > @@ -138,14 +138,15 @@ sem_usage_pair::sem_usage_pair (sem_item *_item, > unsigned int _index) > } > > sem_item::sem_item (sem_item_type _type, bitmap_obstack *stack) > -: type (_type), m_hash (-1), m_hash_set (false) > +: type (_type), referenced_by_count (0), m_hash (-1), m_hash_set (false) > { > setup (stack); > } > > sem_item::sem_item (sem_item_type _type, symtab_node *_node, > bitmap_obstack *stack) > -: type (_type), node (_node), m_hash (-1), m_hash_set (false) > +: type (_type), node (_node), referenced_by_count (0), m_hash (-1), > + m_hash_set (false) > { > decl = node->decl; > setup (stack); > @@ -154,13 +155,18 @@ sem_item::sem_item (sem_item_type _type, symtab_node > *_node, > /* Add reference to a semantic TARGET. */ > > void > -sem_item::add_reference (sem_item *target) > +sem_item::add_reference (ref_map *refs, > + sem_item *target) > { > - refs.safe_push (target); > - unsigned index = refs.length (); > - target->usages.safe_push (new sem_usage_pair(this, index)); > + unsigned index = reference_count++; > + bool existed; > + > + vec<sem_item *> &v > + = refs->get_or_insert (new sem_usage_pair (target, index), &existed); > + v.safe_push (this); > bitmap_set_bit (target->usage_index_bitmap, index); > refs_set.add (target->node); > + ++target->referenced_by_count; > } > > /* Initialize internal data structures. Bitmap STACK is used for > @@ -171,20 +177,14 @@ sem_item::setup (bitmap_obstack *stack) > { > gcc_checking_assert (node); > > - refs.create (0); > + reference_count = 0; > tree_refs.create (0); > - usages.create (0); > usage_index_bitmap = BITMAP_ALLOC (stack); > } > > sem_item::~sem_item () > { > - for (unsigned i = 0; i < usages.length (); i++) > - delete usages[i]; > - > - refs.release (); > tree_refs.release (); > - usages.release (); > > BITMAP_FREE (usage_index_bitmap); > } > @@ -199,13 +199,6 @@ sem_item::dump (void) > fprintf (dump_file, "[%s] %s (tree:%p)\n", type == FUNC ? "func" : > "var", > node->dump_name (), (void *) node->decl); > fprintf (dump_file, " hash: %u\n", get_hash ()); > - fprintf (dump_file, " references: "); > - > - for (unsigned i = 0; i < refs.length (); i++) > - fprintf (dump_file, "%s%s ", refs[i]->node->name (), > - i < refs.length() - 1 ? "," : ""); > - > - fprintf (dump_file, "\n"); > } > } > > @@ -2230,7 +2223,7 @@ unsigned int sem_item_optimizer::class_id = 0; > > sem_item_optimizer::sem_item_optimizer () > : worklist (0), m_classes (0), m_classes_count (0), m_cgraph_node_hooks > (NULL), > - m_varpool_node_hooks (NULL), m_merged_variables () > + m_varpool_node_hooks (NULL), m_merged_variables (), m_references () > { > m_items.create (0); > bitmap_obstack_initialize (&m_bmstack); > @@ -2341,13 +2334,8 @@ sem_item_optimizer::read_section (lto_file_decl_data > *file_data, > node = lto_symtab_encoder_deref (encoder, index); > > hashval_t hash = streamer_read_uhwi (&ib_main); > - > gcc_assert (node->definition); > > - if (dump_file) > - fprintf (dump_file, "Symbol added: %s (tree: %p)\n", > - node->dump_asm_name (), (void *) node->decl); > - > if (is_a<cgraph_node *> (node)) > { > cgraph_node *cnode = dyn_cast <cgraph_node *> (node); > @@ -2611,12 +2599,6 @@ sem_item_optimizer::parse_funcs_and_vars (void) > { > m_items.safe_push (f); > m_symtab_node_map.put (cnode, f); > - > - if (dump_file) > - fprintf (dump_file, "Parsed function:%s\n", f->node->asm_name ()); > - > - if (dump_file && (dump_flags & TDF_DETAILS)) > - f->dump_to_file (dump_file); > } > else if (dump_file) > fprintf (dump_file, "Not parsed function:%s\n", cnode->asm_name ()); > @@ -2745,7 +2727,7 @@ sem_item_optimizer::build_graph (void) > sem_item **slot = m_symtab_node_map.get > (e->callee->ultimate_alias_target ()); > if (slot) > - item->add_reference (*slot); > + item->add_reference (&m_references, *slot); > > e = e->next_callee; > } > @@ -2757,7 +2739,7 @@ sem_item_optimizer::build_graph (void) > sem_item **slot = m_symtab_node_map.get > (ref->referred->ultimate_alias_target ()); > if (slot) > - item->add_reference (*slot); > + item->add_reference (&m_references, *slot); > } > } > } > @@ -2987,13 +2969,6 @@ sem_item_optimizer::verify_classes (void) > > gcc_assert (item); > gcc_assert (item->cls == cls); > - > - for (unsigned k = 0; k < item->usages.length (); k++) > - { > - sem_usage_pair *usage = item->usages[k]; > - gcc_assert (usage->item->index_in_class > - < usage->item->cls->members.length ()); > - } > } > } > } > @@ -3106,10 +3081,11 @@ sem_item_optimizer::traverse_congruence_split > (congruence_class * const &cls, > /* Release class if not presented in work list. */ > if (!in_worklist) > delete cls; > - } > > + return true; > + } > > - return true; > + return false; > } > > /* Compare function for sorting pairs in do_congruence_step_f. */ > @@ -3131,7 +3107,7 @@ sem_item_optimizer::sort_congruence_split (const void > *a_, const void *b_) > /* Tests if a class CLS used as INDEXth splits any congruence classes. > Bitmap stack BMSTACK is used for bitmap allocation. */ > > -void > +bool > sem_item_optimizer::do_congruence_step_for_index (congruence_class *cls, > unsigned int index) > { > @@ -3140,31 +3116,32 @@ sem_item_optimizer::do_congruence_step_for_index > (congruence_class *cls, > for (unsigned int i = 0; i < cls->members.length (); i++) > { > sem_item *item = cls->members[i]; > + sem_usage_pair needle (item, index); > + vec<sem_item *> *callers = m_references.get (&needle); > + if (callers == NULL) > + continue; > > - /* Iterate all usages that have INDEX as usage of the item. */ > - for (unsigned int j = 0; j < item->usages.length (); j++) > + for (unsigned int j = 0; j < callers->length (); j++) > { > - sem_usage_pair *usage = item->usages[j]; > - > - if (usage->index != index) > + sem_item *caller = (*callers)[j]; > + if (caller->cls->members.length () < 2) > continue; > - > - bitmap *slot = split_map.get (usage->item->cls); > + bitmap *slot = split_map.get (caller->cls); > bitmap b; > > if(!slot) > { > b = BITMAP_ALLOC (&m_bmstack); > - split_map.put (usage->item->cls, b); > + split_map.put (caller->cls, b); > } > else > b = *slot; > > - gcc_checking_assert (usage->item->cls); > - gcc_checking_assert (usage->item->index_in_class > - < usage->item->cls->members.length ()); > + gcc_checking_assert (caller->cls); > + gcc_checking_assert (caller->index_in_class > + < caller->cls->members.length ()); > > - bitmap_set_bit (b, usage->item->index_in_class); > + bitmap_set_bit (b, caller->index_in_class); > } > } > > @@ -3180,12 +3157,16 @@ sem_item_optimizer::do_congruence_step_for_index > (congruence_class *cls, > pair.cls = cls; > > splitter_class_removed = false; > + bool r = false; > for (unsigned i = 0; i < to_split.length (); ++i) > - traverse_congruence_split (to_split[i].first, to_split[i].second, &pair); > + r |= traverse_congruence_split (to_split[i].first, to_split[i].second, > + &pair); > > /* Bitmap clean-up. */ > split_map.traverse <traverse_split_pair *, > sem_item_optimizer::release_split_map> (NULL); > + > + return r; > } > > /* Every usage of a congruence class CLS is a candidate that can split the > @@ -3206,9 +3187,8 @@ sem_item_optimizer::do_congruence_step > (congruence_class *cls) > EXECUTE_IF_SET_IN_BITMAP (usage, 0, i, bi) > { > if (dump_file && (dump_flags & TDF_DETAILS)) > - fprintf (dump_file, " processing congruence step for class: %u, " > - "index: %u\n", cls->id, i); > - > + fprintf (dump_file, " processing congruence step for class: %u " > + "(%u items), index: %u\n", cls->id, cls->members.length (), i); > do_congruence_step_for_index (cls, i); > > if (splitter_class_removed) > @@ -3648,7 +3628,7 @@ bool > congruence_class::is_class_used (void) > { > for (unsigned int i = 0; i < members.length (); i++) > - if (members[i]->usages.length ()) > + if (members[i]->referenced_by_count) > return true; > > return false; > diff --git a/gcc/ipa-icf.h b/gcc/ipa-icf.h > index 27d588c9c12..ede4c94dbd3 100644 > --- a/gcc/ipa-icf.h > +++ b/gcc/ipa-icf.h > @@ -126,7 +126,6 @@ struct symbol_compare_hash : nofree_ptr_hash > <symbol_compare_collection> > } > }; > > - > /* Semantic item usage pair. */ > class sem_usage_pair > { > @@ -141,6 +140,32 @@ public: > unsigned int index; > }; > > +struct sem_usage_pair_hash : pointer_hash <sem_usage_pair> > +{ > + static inline hashval_t hash (sem_usage_pair *); > + static inline bool equal (sem_usage_pair *, sem_usage_pair *); > +}; > + > +inline hashval_t > +sem_usage_pair_hash::hash (sem_usage_pair *pair) > +{ > + inchash::hash hstate; > + > + hstate.add_ptr (pair->item); > + hstate.add_int (pair->index); > + > + return hstate.end (); > +} > + > +inline bool > +sem_usage_pair_hash::equal (sem_usage_pair *p1, sem_usage_pair *p2) > +{ > + return p1->item == p2->item && p1->index == p2->index; > +} > + > +struct sem_usage_hash : sem_usage_pair_hash, typed_delete_remove > <sem_usage_pair> {}; > +typedef hash_map<sem_usage_hash, auto_vec<sem_item *> > ref_map; > + > typedef std::pair<symtab_node *, symtab_node *> symtab_pair; > > /* Semantic item is a base class that encapsulates all shared functionality > @@ -168,7 +193,7 @@ public: > virtual void init (void) = 0; > > /* Add reference to a semantic TARGET. */ > - void add_reference (sem_item *target); > + void add_reference (ref_map *map, sem_item *target); > > /* Fast equality function based on knowledge known in WPA. */ > virtual bool equals_wpa (sem_item *item, > @@ -216,8 +241,9 @@ public: > /* Declaration tree node. */ > tree decl; > > - /* Semantic references used that generate congruence groups. */ > - vec <sem_item *> refs; > + /* Number of references to a semantic symbols (function calls, > + variable references). */ > + unsigned reference_count; > > /* Pointer to a congruence class the item belongs to. */ > congruence_class *cls; > @@ -225,9 +251,6 @@ public: > /* Index of the item in a class belonging to. */ > unsigned int index_in_class; > > - /* List of semantic items where the instance is used. */ > - vec <sem_usage_pair *> usages; > - > /* A bitmap with indices of all classes referencing this item. */ > bitmap usage_index_bitmap; > > @@ -239,6 +262,9 @@ public: > > /* Temporary hash used where hash values of references are added. */ > hashval_t global_hash; > + > + /* Number of references to this symbol. */ > + unsigned referenced_by_count; > protected: > /* Cached, once calculated hash for the item. */ > > @@ -581,7 +607,7 @@ private: > > /* Tests if a class CLS used as INDEXth splits any congruence classes. > Bitmap stack BMSTACK is used for bitmap allocation. */ > - void do_congruence_step_for_index (congruence_class *cls, unsigned int > index); > + bool do_congruence_step_for_index (congruence_class *cls, unsigned int > index); > > /* Makes pairing between a congruence class CLS and semantic ITEM. */ > static void add_item_to_class (congruence_class *cls, sem_item *item); > @@ -644,6 +670,9 @@ private: > /* Vector of merged variables. Needed for fixup of points-to-analysis > info. */ > vec <symtab_pair> m_merged_variables; > + > + /* Hash map will all references. */ > + ref_map m_references; > }; // class sem_item_optimizer > > } // ipa_icf namespace >