Hi, this patch orders symbols where we copy sections to match the order of files in the command line. This optimizes streaming process since we are not opening and closing files randomly and also we read them more sequentially. This saves some kernel time though I think more can be done if we avoid doing pair of mmap/unmap for every file section we read.
We also read files in random order in ipa-cp and during devirt. I guess also summary streaming can be refactored to stream all summaries for a given file instead of reading one sumarry from all files. Bootstrapped/regtested x86_64-linux, plan to commit it this afternoon if there are no complains. Honza * lto-common.c (lto_file_finalize): Add order attribute. (lto_create_files_from_ids): Pass order. (lto_file_read): UPdate call of lto_create_files_from_ids. * lto-streamer-out.c (output_constructor): Push CTORS_OUT timevar. (cmp_symbol_files): New. (lto_output): Copy sections in file order. * lto-streamer.h (lto_file_decl_data): Add field order. Index: lto/lto-common.c =================================================================== --- lto/lto-common.c (revision 276986) +++ lto/lto-common.c (working copy) @@ -2177,7 +2177,8 @@ create_subid_section_table (struct lto_s /* Read declarations and other initializations for a FILE_DATA. */ static void -lto_file_finalize (struct lto_file_decl_data *file_data, lto_file *file) +lto_file_finalize (struct lto_file_decl_data *file_data, lto_file *file, + int order) { const char *data; size_t len; @@ -2195,6 +2196,7 @@ lto_file_finalize (struct lto_file_decl_ file_data->renaming_hash_table = lto_create_renaming_table (); file_data->file_name = file->filename; + file_data->order = order; #ifdef ACCEL_COMPILER lto_input_mode_table (file_data); #else @@ -2231,9 +2233,9 @@ lto_file_finalize (struct lto_file_decl_ static int lto_create_files_from_ids (lto_file *file, struct lto_file_decl_data *file_data, - int *count) + int *count, int order) { - lto_file_finalize (file_data, file); + lto_file_finalize (file_data, file, order); if (symtab->dump_file) fprintf (symtab->dump_file, "Creating file %s with sub id " HOST_WIDE_INT_PRINT_HEX "\n", @@ -2285,9 +2287,10 @@ lto_file_read (lto_file *file, FILE *res lto_resolution_read (file_ids, resolution_file, file); /* Finalize each lto file for each submodule in the merged object. */ + int order = 0; for (file_data = file_list.first; file_data != NULL; file_data = file_data->next) - lto_create_files_from_ids (file, file_data, count); + lto_create_files_from_ids (file, file_data, count, order++); splay_tree_delete (file_ids); htab_delete (section_hash_table); Index: lto-streamer-out.c =================================================================== --- lto-streamer-out.c (revision 276986) +++ lto-streamer-out.c (working copy) @@ -2217,6 +2224,7 @@ output_constructor (struct varpool_node fprintf (streamer_dump_file, "\nStreaming constructor of %s\n", node->name ()); + timevar_push (TV_IPA_LTO_CTORS_OUT); ob = create_output_block (LTO_section_function_body); clear_line_info (ob); @@ -2236,6 +2244,7 @@ output_constructor (struct varpool_node if (streamer_dump_file) fprintf (streamer_dump_file, "Finished streaming %s\n", node->name ()); + timevar_pop (TV_IPA_LTO_CTORS_OUT); } @@ -2416,6 +2425,30 @@ produce_lto_section () destroy_output_block (ob); } +/* Compare symbols to get them sorted by filename (to optimize streaming) */ + +static int +cmp_symbol_files (const void *pn1, const void *pn2) +{ + const symtab_node *n1 = *(const symtab_node * const *)pn1; + const symtab_node *n2 = *(const symtab_node * const *)pn2; + + int file_order1 = n1->lto_file_data ? n1->lto_file_data->order : -1; + int file_order2 = n2->lto_file_data ? n2->lto_file_data->order : -1; + + /* Order files same way as they appeared in the command line to reduce + seeking while copying sections. */ + if (file_order1 != file_order2) + return file_order1 - file_order2; + + /* Order within static library. */ + if (n1->lto_file_data && n1->lto_file_data->id != n2->lto_file_data->id) + return n1->lto_file_data->id - n2->lto_file_data->id; + + /* And finaly order by the definition order. */ + return n1->order - n2->order; +} + /* Main entry point from the pass manager. */ void @@ -2424,8 +2457,9 @@ lto_output (void) struct lto_out_decl_state *decl_state; bitmap output = NULL; bitmap_obstack output_obstack; - int i, n_nodes; + unsigned int i, n_nodes; lto_symtab_encoder_t encoder = lto_get_out_decl_state ()->symtab_node_encoder; + auto_vec<symtab_node *> symbols_to_copy; prune_offload_funcs (); @@ -2441,32 +2475,17 @@ lto_output (void) produce_lto_section (); n_nodes = lto_symtab_encoder_size (encoder); - /* Process only the functions with bodies. */ + /* Prepare vector of functions to output and then sort it to optimize + section copying. */ for (i = 0; i < n_nodes; i++) { symtab_node *snode = lto_symtab_encoder_deref (encoder, i); + if (snode->alias) + continue; if (cgraph_node *node = dyn_cast <cgraph_node *> (snode)) { - if (lto_symtab_encoder_encode_body_p (encoder, node) - && !node->alias) - { - if (flag_checking) - gcc_assert (bitmap_set_bit (output, DECL_UID (node->decl))); - decl_state = lto_new_out_decl_state (); - lto_push_out_decl_state (decl_state); - if (gimple_has_body_p (node->decl) - || (!flag_wpa - && flag_incremental_link != INCREMENTAL_LINK_LTO) - /* Thunks have no body but they may be synthetized - at WPA time. */ - || DECL_ARGUMENTS (node->decl)) - output_function (node); - else - copy_function_or_variable (node); - gcc_assert (lto_get_out_decl_state () == decl_state); - lto_pop_out_decl_state (); - lto_record_function_out_decl_state (node->decl, decl_state); - } + if (lto_symtab_encoder_encode_body_p (encoder, node)) + symbols_to_copy.safe_push (node); } else if (varpool_node *node = dyn_cast <varpool_node *> (snode)) { @@ -2476,27 +2495,42 @@ lto_output (void) if (ctor && !in_lto_p) walk_tree (&ctor, wrap_refs, NULL, NULL); if (get_symbol_initial_value (encoder, node->decl) == error_mark_node - && lto_symtab_encoder_encode_initializer_p (encoder, node) - && !node->alias) - { - timevar_push (TV_IPA_LTO_CTORS_OUT); - if (flag_checking) - gcc_assert (bitmap_set_bit (output, DECL_UID (node->decl))); - decl_state = lto_new_out_decl_state (); - lto_push_out_decl_state (decl_state); - if (DECL_INITIAL (node->decl) != error_mark_node - || (!flag_wpa - && flag_incremental_link != INCREMENTAL_LINK_LTO)) - output_constructor (node); - else - copy_function_or_variable (node); - gcc_assert (lto_get_out_decl_state () == decl_state); - lto_pop_out_decl_state (); - lto_record_function_out_decl_state (node->decl, decl_state); - timevar_pop (TV_IPA_LTO_CTORS_OUT); - } + && lto_symtab_encoder_encode_initializer_p (encoder, node)) + symbols_to_copy.safe_push (node); } } + symbols_to_copy.qsort (cmp_symbol_files); + for (i = 0; i < symbols_to_copy.length (); i++) + { + symtab_node *snode = symbols_to_copy[i]; + cgraph_node *cnode; + varpool_node *vnode; + + if (flag_checking) + gcc_assert (bitmap_set_bit (output, DECL_UID (snode->decl))); + + decl_state = lto_new_out_decl_state (); + lto_push_out_decl_state (decl_state); + + if ((cnode = dyn_cast <cgraph_node *> (snode)) + && (gimple_has_body_p (cnode->decl) + || (!flag_wpa + && flag_incremental_link != INCREMENTAL_LINK_LTO) + /* Thunks have no body but they may be synthetized + at WPA time. */ + || DECL_ARGUMENTS (cnode->decl))) + output_function (cnode); + else if ((vnode = dyn_cast <varpool_node *> (snode)) + && (DECL_INITIAL (vnode->decl) != error_mark_node + || (!flag_wpa + && flag_incremental_link != INCREMENTAL_LINK_LTO))) + output_constructor (vnode); + else + copy_function_or_variable (snode); + gcc_assert (lto_get_out_decl_state () == decl_state); + lto_pop_out_decl_state (); + lto_record_function_out_decl_state (snode->decl, decl_state); + } /* Emit the callgraph after emitting function bodies. This needs to be done now to make sure that all the statements in every function Index: lto-streamer.h =================================================================== --- lto-streamer.h (revision 276986) +++ lto-streamer.h (working copy) @@ -603,6 +603,9 @@ struct GTY(()) lto_file_decl_data /* Linked list used temporarily in reader */ struct lto_file_decl_data *next; + /* Order in which the file appears on the command line. */ + int order; + /* Sub ID for merged objects. */ unsigned HOST_WIDE_INT id;