https://gcc.gnu.org/g:582adc3c97b1e0c42243b106384eb66b7618a682
commit r17-629-g582adc3c97b1e0c42243b106384eb66b7618a682 Author: Richard Sandiford <[email protected]> Date: Wed May 20 17:36:02 2026 +0100 cselib: Avoid repeated discard_useless_locs When bootstrapping with RTL checking enabled, a lot of the compile time for insn-extract.cc is spent in var-tracking (~75% on cfarm425, an aarch64 box, and ~42% on my local x86 box). The top of the aarch64 profile is: 49.80% references_value_p(rtx_def const*, int) 26.40% discard_useless_locs(cselib_val**, void*) var-tracking uses cselib to track equivalences between values. However, whereas most cselib passes clear the tables after each bb/ebb, var-tracking instead retains invariant values for the whole function. It does this by arranging for invariant values to be moved from the main hash table to cselib_preserved_hash_table. cselib_preserved_hash_table is then the first table that is consulted during a lookup (cselib_find_slot). This allows new non-invariant locations to be added to an entry of cselib_preserved_hash_table. These non-invariant locations might be invalidated later, in the usual way. At the end of each block, cselib_preserve_only_values invalidates all registers and memory. It then goes through cselib_preserved_hash_table to remove locations that have become useless through invalidation. This never makes a whole value useless, because the invariant locations still hold. The issue in insn-extract.cc is that we have many blocks and soon have many entries in cselib_preserved_hash_table. Very few of those entries are used in each block, but every location of every entry is examined after each block. This patch adds a bit to cselib_val to say whether every location only references preserved values, and thus will never have useless locations in its current form. The patch then uses this information to maintain a list of entries in cselib_preserve_only_values that might have useless locations. remove_useless_values can then iterate over this list instead of the whole hash table. This required finding space for two new bits in cselib_val. Since there are no holes, I ended up stealing two bits from the hash. On cfarm425, the effect is to go from: variable tracking : 709.47 ( 75%) 35M ( 1%) to: variable tracking : 1.08 ( 0%) 35M ( 1%) There was no change to the final object file. gcc/ * cselib.h (cselib_val::HASH_MASK): New static constant. (cselib_val::hash): Turn into a 30-bit bitfield. (cselib_val::in_preserved_table_p): New bitfield. (cselib_val::all_locs_preserved_p): Likewise. * cselib.cc: Include rtl-iter.h. (cselib_preserved_prune_list): New variable. (cselib_clear_all_locs_preserved): New function. (new_elt_loc_list): Call it when modifying a value's location list. (preserve_constants_and_equivs): Set in_preserved_table_p when moving a value to the preserved hash table. Also push such values onto cselib_preserved_prune_list. (cselib_find_slot): Mask out the upper 2 bits of the hash. (discard_useless_locs): New overload, split out from original hash table traverse callback. Use an inline rtl iteration instead of calling references_value_p. Record whether the retained location only reference preserved value. (remove_useless_values): Iterate over cselib_preserved_prune_list instead of the hash table itself. Remove a value from the list if all remaining locations only reference preserved value. (new_cselib_val): Initialize the new bitfields. (cselib_finish): Free cselib_preserved_prune_list. Diff: --- gcc/cselib.cc | 111 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ gcc/cselib.h | 12 ++++++- 2 files changed, 111 insertions(+), 12 deletions(-) diff --git a/gcc/cselib.cc b/gcc/cselib.cc index af8b237722e3..e029d827fcdd 100644 --- a/gcc/cselib.cc +++ b/gcc/cselib.cc @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see #include "function-abi.h" #include "alias.h" #include "predict.h" +#include "rtl-iter.h" /* A list of cselib_val structures. */ struct elt_list @@ -177,6 +178,16 @@ static hash_table<cselib_hasher> *cselib_hash_table; /* A table to hold preserved values. */ static hash_table<cselib_hasher> *cselib_preserved_hash_table; +/* The subset of cselib_preserved_hash_table that might have useless locations. + It excludes values for which all_locs_preserved_p is true. + + This is an important compile-time optimization for inputs that have + many preserved values and many basic blocks (such as insn-extract.cc + at the time of writing, especially with RTL checking enabled). + If remove_useless_values iterated over the whole hash table for every + block, it would repeat a lot of useless and cache-unfriendly work. */ +static vec<cselib_val *> cselib_preserved_prune_list; + /* The unique id that the next create value will take. */ static unsigned int next_uid; @@ -304,6 +315,21 @@ new_elt_list (struct elt_list *next, cselib_val *elt) return el; } +/* Record that all_locs_preserved_p might no longer hold for VAL. */ + +static inline void +cselib_clear_all_locs_preserved (cselib_val *val) +{ + if (val->all_locs_preserved_p) + { + val->all_locs_preserved_p = false; + if (val->in_preserved_table_p) + /* VAL would have been removed from cselib_preserved_prune_list + but now needs to be considered by remove_useless_values. */ + cselib_preserved_prune_list.safe_push (val); + } +} + /* Allocate a struct elt_loc_list with LOC and prepend it to VAL's loc list. */ @@ -355,6 +381,7 @@ new_elt_loc_list (cselib_val *val, rtx loc) auto *el_val = CSELIB_VAL_PTR (el->loc); gcc_checking_assert (el_val->locs->loc == loc); el_val->locs->loc = val->val_rtx; + cselib_clear_all_locs_preserved (el_val); } } el->next = val->locs; @@ -388,6 +415,7 @@ new_elt_loc_list (cselib_val *val, rtx loc) el->setting_insn = cselib_current_insn; el->next = NULL; loc_val->locs = el; + cselib_clear_all_locs_preserved (loc_val); } el = elt_loc_list_pool.allocate (); @@ -395,6 +423,7 @@ new_elt_loc_list (cselib_val *val, rtx loc) el->setting_insn = cselib_current_insn; el->next = next; val->locs = el; + cselib_clear_all_locs_preserved (val); } /* Promote loc L to a nondebug cselib_current_insn if L is marked as @@ -526,6 +555,9 @@ preserve_constants_and_equivs (cselib_val **x, void *info ATTRIBUTE_UNUSED) v->hash, INSERT); gcc_assert (!*slot); *slot = v; + v->in_preserved_table_p = true; + if (!v->all_locs_preserved_p) + cselib_preserved_prune_list.safe_push (v); } cselib_hash_table->clear_slot (x); @@ -626,6 +658,7 @@ cselib_find_slot (machine_mode mode, rtx x, hashval_t hash, { cselib_val **slot = NULL; cselib_hasher::key lookup = { mode, x, memmode }; + hash &= cselib_val::HASH_MASK; if (cselib_preserve_constants) slot = cselib_preserved_hash_table->find_slot_with_hash (&lookup, hash, NO_INSERT); @@ -674,26 +707,51 @@ cselib_useless_value_p (cselib_val *v) && !SP_DERIVED_VALUE_P (v->val_rtx)); } -/* For all locations found in X, delete locations that reference useless - values (i.e. values without any location). Called through - htab_traverse. */ +/* For all locations found in V, delete locations that reference useless + values (i.e. values without any location). */ -int -discard_useless_locs (cselib_val **x, void *info ATTRIBUTE_UNUSED) +static void +discard_useless_locs (cselib_val *v) { - cselib_val *v = *x; struct elt_loc_list **p = &v->locs; bool had_locs = v->locs != NULL; rtx_insn *setting_insn = v->locs ? v->locs->setting_insn : NULL; + if (v->all_locs_preserved_p) + return; + + bool all_locs_preserved_p = true; while (*p) { - if (references_value_p ((*p)->loc, 1)) - unchain_one_elt_loc_list (p); + /* True if every value referenced by (*p)->loc is preserved. */ + bool preserved_p = true; + bool keep_p = true; + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, (*p)->loc, ALL) + { + const_rtx x = *iter; + if (GET_CODE (x) == VALUE && !PRESERVED_VALUE_P (x)) + { + preserved_p = false; + if (CSELIB_VAL_PTR (x)->locs == 0) + { + keep_p = false; + break; + } + } + } + if (keep_p) + { + all_locs_preserved_p &= preserved_p; + p = &(*p)->next; + } else - p = &(*p)->next; + unchain_one_elt_loc_list (p); } + if (all_locs_preserved_p) + v->all_locs_preserved_p = true; + if (had_locs && cselib_useless_value_p (v)) { if (setting_insn && DEBUG_INSN_P (setting_insn)) @@ -702,6 +760,14 @@ discard_useless_locs (cselib_val **x, void *info ATTRIBUTE_UNUSED) n_useless_values++; values_became_useless = 1; } +} + +/* A hash-table traversal callback for the above. */ + +int +discard_useless_locs (cselib_val **x, void *info ATTRIBUTE_UNUSED) +{ + discard_useless_locs (*x); return 1; } @@ -755,8 +821,28 @@ remove_useless_values (void) *p = &dummy_val; if (cselib_preserve_constants) - cselib_preserved_hash_table->traverse <void *, - discard_useless_locs> (NULL); + { + /* Apply discard_useless_locs to each element of + cselib_preserved_prune_list. Remove from consideration any values + whose locations only reference preserved values, since those + locations will never be useless in their current form. */ + unsigned int len = cselib_preserved_prune_list.length (); + unsigned int dest_i = 0; + unsigned int src_i = 0; + for (; src_i < len; ++src_i) + { + auto *val = cselib_preserved_prune_list[src_i]; + discard_useless_locs (val); + if (!val->all_locs_preserved_p) + { + if (dest_i < src_i) + cselib_preserved_prune_list[dest_i] = val; + dest_i += 1; + } + } + if (src_i != dest_i) + cselib_preserved_prune_list.truncate (dest_i); + } gcc_assert (!values_became_useless); n_useless_values += n_useless_debug_values; @@ -1610,6 +1696,8 @@ new_cselib_val (hashval_t hash, machine_mode mode, rtx x) gcc_assert (next_uid); e->hash = hash; + e->in_preserved_table_p = false; + e->all_locs_preserved_p = false; e->uid = next_uid++; /* We use an alloc pool to allocate this RTL construct because it accounts for about 8% of the overall memory usage. We know @@ -3455,6 +3543,7 @@ cselib_finish (void) cselib_hash_table = NULL; if (preserved) delete cselib_preserved_hash_table; + cselib_preserved_prune_list.release (); cselib_preserved_hash_table = NULL; free (used_regs); used_regs = 0; diff --git a/gcc/cselib.h b/gcc/cselib.h index dac51e141558..69256a02632a 100644 --- a/gcc/cselib.h +++ b/gcc/cselib.h @@ -23,8 +23,18 @@ along with GCC; see the file COPYING3. If not see /* Describe a value. */ struct cselib_val { + /* A mask equivalent of HASH's bitfield width. */ + static const unsigned int HASH_MASK = 0x3fffffff; + /* The hash value. */ - unsigned int hash; + unsigned int hash : 30; + + /* True if this value is entered in cselib_preserved_hash_table. */ + unsigned int in_preserved_table_p : 1; + + /* True if every value referenced by every element of LOCS is known + to be a preserved value. */ + unsigned int all_locs_preserved_p : 1; /* A unique id assigned to values. */ int uid;
