This reduces WPA memory usage at stream-out time by avoiding to allocate the streamer cache node array and by freeing the global out-decl-states hash tables (we do that already for the fn-decl-states).
LTO bootstrapped and bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Ok? Not sure if it will make a notable difference. The pointer-map overhead is at least 4 times of that of the vector, if we make pointer-map behave like hash_table (3/4 full) or htab_t (half full) then that would improve (we could even make that configurable at pointer-set/map construction time). Like with Index: gcc/pointer-set.c =================================================================== --- gcc/pointer-set.c (revision 209018) +++ gcc/pointer-set.c (working copy) @@ -125,7 +125,7 @@ pointer_set_insert (struct pointer_set_t /* For simplicity, expand the set even if P is already there. This can be superfluous but can happen at most once. */ - if (pset->n_elements > pset->n_slots / 4) + if (pset->n_elements * 4 > pset->n_slots * 3) { size_t old_n_slots = pset->n_slots; const void **old_slots = pset->slots; Index: gcc/pointer-set.h =================================================================== --- gcc/pointer-set.h (revision 209018) +++ gcc/pointer-set.h (working copy) @@ -109,7 +109,7 @@ pointer_map<T>::insert (const void *p, b /* For simplicity, expand the map even if P is already there. This can be superfluous but can happen at most once. */ /* ??? Fugly that we have to inline that here. */ - if (n_elements > n_slots / 4) + if (n_elements * 4 > n_slots * 3) { size_t old_n_slots = n_slots; const void **old_keys = slots; might be worth checking how much memory we save from the above. Thanks, Richard. 2014-04-03 Richard Biener <rguent...@suse.de> * tree-streamer.h (struct streamer_tree_cache_d): Add next_idx member. (streamer_tree_cache_create): Adjust. * tree-streamer.c (streamer_tree_cache_add_to_node_array): Adjust to allow optional nodes array. (streamer_tree_cache_insert_1): Use next_idx to assign idx. (streamer_tree_cache_append): Likewise. (streamer_tree_cache_create): Create nodes array optionally as specified by parameter. * lto-streamer-out.c (create_output_block): Avoid maintaining the node array in the writer cache. (DFS_write_tree): Remove assertion. (produce_asm_for_decls): Free the out decl state hash table early. * lto-streamer-in.c (lto_data_in_create): Adjust for streamer_tree_cache_create prototype change. Index: gcc/tree-streamer.c =================================================================== *** gcc/tree-streamer.c (revision 209018) --- gcc/tree-streamer.c (working copy) *************** static void *** 101,120 **** streamer_tree_cache_add_to_node_array (struct streamer_tree_cache_d *cache, unsigned ix, tree t, hashval_t hash) { ! /* Make sure we're either replacing an old element or ! appending consecutively. */ ! gcc_assert (ix <= cache->nodes.length ()); ! ! if (ix == cache->nodes.length ()) { ! cache->nodes.safe_push (t); ! if (cache->hashes.exists ()) ! cache->hashes.safe_push (hash); } ! else { ! cache->nodes[ix] = t; ! if (cache->hashes.exists ()) cache->hashes[ix] = hash; } } --- 101,119 ---- streamer_tree_cache_add_to_node_array (struct streamer_tree_cache_d *cache, unsigned ix, tree t, hashval_t hash) { ! /* We're either replacing an old element or appending consecutively. */ ! if (cache->nodes.exists ()) { ! if (cache->nodes.length () == ix) ! cache->nodes.safe_push (t); ! else ! cache->nodes[ix] = t; } ! if (cache->hashes.exists ()) { ! if (cache->hashes.length () == ix) ! cache->hashes.safe_push (hash); ! else cache->hashes[ix] = hash; } } *************** streamer_tree_cache_insert_1 (struct str *** 146,152 **** { /* Determine the next slot to use in the cache. */ if (insert_at_next_slot_p) ! ix = cache->nodes.length (); else ix = *ix_p; *slot = ix; --- 145,151 ---- { /* Determine the next slot to use in the cache. */ if (insert_at_next_slot_p) ! ix = cache->next_idx++; else ix = *ix_p; *slot = ix; *************** void *** 211,217 **** streamer_tree_cache_append (struct streamer_tree_cache_d *cache, tree t, hashval_t hash) { ! unsigned ix = cache->nodes.length (); if (!cache->node_map) streamer_tree_cache_add_to_node_array (cache, ix, t, hash); else --- 210,216 ---- streamer_tree_cache_append (struct streamer_tree_cache_d *cache, tree t, hashval_t hash) { ! unsigned ix = cache->next_idx++; if (!cache->node_map) streamer_tree_cache_add_to_node_array (cache, ix, t, hash); else *************** preload_common_nodes (struct streamer_tr *** 326,332 **** /* Create a cache of pickled nodes. */ struct streamer_tree_cache_d * ! streamer_tree_cache_create (bool with_hashes, bool with_map) { struct streamer_tree_cache_d *cache; --- 325,331 ---- /* Create a cache of pickled nodes. */ struct streamer_tree_cache_d * ! streamer_tree_cache_create (bool with_hashes, bool with_map, bool with_vec) { struct streamer_tree_cache_d *cache; *************** streamer_tree_cache_create (bool with_ha *** 334,340 **** if (with_map) cache->node_map = new pointer_map<unsigned>; ! cache->nodes.create (165); if (with_hashes) cache->hashes.create (165); --- 333,341 ---- if (with_map) cache->node_map = new pointer_map<unsigned>; ! cache->next_idx = 0; ! if (with_vec) ! cache->nodes.create (165); if (with_hashes) cache->hashes.create (165); Index: gcc/tree-streamer.h =================================================================== *** gcc/tree-streamer.h (revision 209018) --- gcc/tree-streamer.h (working copy) *************** struct streamer_tree_cache_d *** 52,57 **** --- 52,60 ---- vec<tree> nodes; /* The node hashes (if available). */ vec<hashval_t> hashes; + + /* Next index to assign. */ + unsigned next_idx; }; /* Return true if tree node EXPR should be streamed as a builtin. For *************** void streamer_tree_cache_append (struct *** 97,103 **** hashval_t); bool streamer_tree_cache_lookup (struct streamer_tree_cache_d *, tree, unsigned *); ! struct streamer_tree_cache_d *streamer_tree_cache_create (bool, bool); void streamer_tree_cache_delete (struct streamer_tree_cache_d *); /* Return the tree node at slot IX in CACHE. */ --- 100,106 ---- hashval_t); bool streamer_tree_cache_lookup (struct streamer_tree_cache_d *, tree, unsigned *); ! struct streamer_tree_cache_d *streamer_tree_cache_create (bool, bool, bool); void streamer_tree_cache_delete (struct streamer_tree_cache_d *); /* Return the tree node at slot IX in CACHE. */ Index: gcc/lto-streamer-out.c =================================================================== *** gcc/lto-streamer-out.c (revision 209018) --- gcc/lto-streamer-out.c (working copy) *************** create_output_block (enum lto_section_ty *** 79,85 **** ob->decl_state = lto_get_out_decl_state (); ob->main_stream = XCNEW (struct lto_output_stream); ob->string_stream = XCNEW (struct lto_output_stream); ! ob->writer_cache = streamer_tree_cache_create (!flag_wpa, true); if (section_type == LTO_section_function_body) ob->cfg_stream = XCNEW (struct lto_output_stream); --- 79,85 ---- ob->decl_state = lto_get_out_decl_state (); ob->main_stream = XCNEW (struct lto_output_stream); ob->string_stream = XCNEW (struct lto_output_stream); ! ob->writer_cache = streamer_tree_cache_create (!flag_wpa, true, false); if (section_type == LTO_section_function_body) ob->cfg_stream = XCNEW (struct lto_output_stream); *************** DFS_write_tree (struct output_block *ob, *** 1277,1283 **** ??? We still wrap these in LTO_tree_scc so at the input side we can properly identify the tree we want to ultimatively return. */ - size_t old_len = ob->writer_cache->nodes.length (); if (size == 1) lto_output_tree_1 (ob, expr, scc_hash, ref_p, this_ref_p); else --- 1277,1282 ---- *************** DFS_write_tree (struct output_block *ob, *** 1315,1321 **** streamer_write_zero (ob); } } - gcc_assert (old_len + size == ob->writer_cache->nodes.length ()); /* Finally truncate the vector. */ sccstack.truncate (first); --- 1314,1319 ---- *************** produce_asm_for_decls (void) *** 2423,2432 **** gcc_assert (!alias_pairs); ! /* Write the global symbols. */ out_state = lto_get_out_decl_state (); ! num_fns = lto_function_decl_states.length (); lto_output_decl_state_streams (ob, out_state); for (idx = 0; idx < num_fns; idx++) { fn_out_state = --- 2453,2470 ---- gcc_assert (!alias_pairs); ! /* Get rid of the global decl state hash tables to save some memory. */ out_state = lto_get_out_decl_state (); ! for (int i = 0; i < LTO_N_DECL_STREAMS; i++) ! if (out_state->streams[i].tree_hash_table) ! { ! delete out_state->streams[i].tree_hash_table; ! out_state->streams[i].tree_hash_table = NULL; ! } ! ! /* Write the global symbols. */ lto_output_decl_state_streams (ob, out_state); + num_fns = lto_function_decl_states.length (); for (idx = 0; idx < num_fns; idx++) { fn_out_state = Index: gcc/lto-streamer-in.c =================================================================== *** gcc/lto-streamer-in.c (revision 209018) --- gcc/lto-streamer-in.c (working copy) *************** lto_data_in_create (struct lto_file_decl *** 1365,1372 **** data_in->strings = strings; data_in->strings_len = len; data_in->globals_resolution = resolutions; ! data_in->reader_cache = streamer_tree_cache_create (false, false); ! return data_in; } --- 1365,1371 ---- data_in->strings = strings; data_in->strings_len = len; data_in->globals_resolution = resolutions; ! data_in->reader_cache = streamer_tree_cache_create (false, false, true); return data_in; }