On Fri, 6 Dec 2013, Jan Hubicka wrote: > > On Thu, 21 Nov 2013, Jan Hubicka wrote: > > > > > > > > > > Why do you need an additional -fparallelism? Wouldn't > > > > -fwpa=... be a better match, matching -flto=...? As we already > > > > pass down a -fwpa option to WPA this would make things easier, no? > > > > > > My plan was to possibly use same option later for parallelizing more > > > parts of > > > compiler, not only WPA streaming. Streaming in may have some chance if we > > > get > > > into thread safety of GGC or move sufficient amount of stuff out of GGC. > > > Also > > > we can parallelize inliner heuristic or IPA-PTA if it will ever work. So > > > it > > > would make sense with -flto-partition=none and perhaps with local > > > optimization, > > > too. > > > > I'd like to drop -flto-partition=none eventually. It's just one more > > path through the compiler to support ... > > > > > But I can definitely update the patch to use -fwpa=N and we can deal with > > > this > > > once this becomes real. (i.e. I have no clue how to parallelize inliner > > > without > > > making its decisions dependent on the parallelizm and declining with > > > parallelizm > > > increased nor I have real plans for stream in procedure) > > > > Please. > > > > Hi, > here is updated patch. Sorry for taking time, I should have more time for > hacking again > now...
Ok. Thanks, Richard. > Honza > > * lto-cgraph.c (asm_nodes_output): Make global. > * lto-wrapper.c (run_gcc): Pass down paralelizm to WPA. > * lto.c (lto_parallelism): New static var. > (do_stream_out, wait_for_child, stream_out): New static functions. > (lto_wpa_write_files): Add support for parallel streaming. > (do_whole_program_analysis): Set parallelism. > * lang.opt (fwpa): Add parameter. > * lto-lang.c (lto_handle_option): Handle flag_wpa. > (lto_init): Update use of flag_wpa. > * lto-streamer.h (asm_nodes_output): Declare. > Index: lto-cgraph.c > =================================================================== > *** lto-cgraph.c (revision 205646) > --- lto-cgraph.c (working copy) > *************** along with GCC; see the file COPYING3. > *** 53,58 **** > --- 53,61 ---- > #include "pass_manager.h" > #include "ipa-utils.h" > > + /* True when asm nodes has been output. */ > + bool asm_nodes_output = false; > + > static void output_cgraph_opt_summary (void); > static void input_cgraph_opt_summary (vec<symtab_node *> nodes); > > *************** output_symtab (void) > *** 889,895 **** > lto_symtab_encoder_iterator lsei; > int i, n_nodes; > lto_symtab_encoder_t encoder; > - static bool asm_nodes_output = false; > > if (flag_wpa) > output_cgraph_opt_summary (); > --- 892,897 ---- > Index: lto-wrapper.c > =================================================================== > *** lto-wrapper.c (revision 205646) > --- lto-wrapper.c (working copy) > *************** run_gcc (unsigned argc, char *argv[]) > *** 745,751 **** > tmp += list_option_len; > strcpy (tmp, ltrans_output_file); > > ! obstack_ptr_grow (&argv_obstack, "-fwpa"); > } > > /* Append the input objects and possible preceding arguments. */ > --- 746,761 ---- > tmp += list_option_len; > strcpy (tmp, ltrans_output_file); > > ! if (jobserver) > ! obstack_ptr_grow (&argv_obstack, xstrdup ("-fwpa=jobserver")); > ! else if (parallel > 1) > ! { > ! char buf[256]; > ! sprintf (buf, "-fwpa=%i", parallel); > ! obstack_ptr_grow (&argv_obstack, xstrdup (buf)); > ! } > ! else > ! obstack_ptr_grow (&argv_obstack, "-fwpa"); > } > > /* Append the input objects and possible preceding arguments. */ > Index: lto/lto.c > =================================================================== > *** lto/lto.c (revision 205646) > --- lto/lto.c (working copy) > *************** along with GCC; see the file COPYING3. > *** 53,58 **** > --- 53,61 ---- > /* Vector to keep track of external variables we've seen so far. */ > vec<tree, va_gc> *lto_global_var_decls; > > + /* Number of parallel tasks to run, -1 if we want to use GNU Make > jobserver. */ > + static int lto_parallelism; > + > static GTY(()) tree first_personality_decl; > > /* Returns a hash code for P. */ > *************** cmp_partitions_order (const void *a, con > *** 2454,2459 **** > --- 2457,2554 ---- > return orderb - ordera; > } > > + /* Actually stream out ENCODER into TEMP_FILENAME. */ > + > + static void > + do_stream_out (char *temp_filename, lto_symtab_encoder_t encoder) > + { > + lto_file *file = lto_obj_file_open (temp_filename, true); > + if (!file) > + fatal_error ("lto_obj_file_open() failed"); > + lto_set_current_out_file (file); > + > + ipa_write_optimization_summaries (encoder); > + > + lto_set_current_out_file (NULL); > + lto_obj_file_close (file); > + free (file); > + } > + > + /* Wait for forked process and signal errors. */ > + #ifdef HAVE_WORKING_FORK > + static void > + wait_for_child () > + { > + int status; > + do > + { > + int w = waitpid(0, &status, WUNTRACED | WCONTINUED); > + if (w == -1) > + fatal_error ("waitpid failed"); > + > + if (WIFEXITED (status) && WEXITSTATUS (status)) > + fatal_error ("streaming subprocess failed"); > + else if (WIFSIGNALED (status)) > + fatal_error ("streaming subprocess was killed by signal"); > + } > + while (!WIFEXITED(status) && !WIFSIGNALED(status)); > + } > + #endif > + > + /* Stream out ENCODER into TEMP_FILENAME > + Fork if that seems to help. */ > + > + static void > + stream_out (char *temp_filename, lto_symtab_encoder_t encoder, bool last) > + { > + #ifdef HAVE_WORKING_FORK > + static int nruns; > + > + if (!lto_parallelism || lto_parallelism == 1) > + { > + do_stream_out (temp_filename, encoder); > + return; > + } > + > + /* Do not run more than LTO_PARALLELISM streamings > + FIXME: we ignore limits on jobserver. */ > + if (lto_parallelism > 0 && nruns >= lto_parallelism) > + { > + wait_for_child (); > + nruns --; > + } > + /* If this is not the last parallel partition, execute new > + streaming process. */ > + if (!last) > + { > + pid_t cpid = fork (); > + > + if (!cpid) > + { > + setproctitle ("lto1-wpa-streaming"); > + do_stream_out (temp_filename, encoder); > + exit (0); > + } > + /* Fork failed; lets do the job ourseleves. */ > + else if (cpid == -1) > + do_stream_out (temp_filename, encoder); > + else > + nruns++; > + } > + /* Last partition; stream it and wait for all children to die. */ > + else > + { > + int i; > + do_stream_out (temp_filename, encoder); > + for (i = 0; i < nruns; i++) > + wait_for_child (); > + } > + asm_nodes_output = true; > + #else > + do_stream_out (temp_filename, encoder); > + #endif > + } > + > /* Write all output files in WPA mode and the file with the list of > LTRANS units. */ > > *************** static void > *** 2461,2478 **** > lto_wpa_write_files (void) > { > unsigned i, n_sets; > - lto_file *file; > ltrans_partition part; > FILE *ltrans_output_list_stream; > char *temp_filename; > size_t blen; > > /* Open the LTRANS output list. */ > if (!ltrans_output_list) > fatal_error ("no LTRANS output list filename provided"); > - ltrans_output_list_stream = fopen (ltrans_output_list, "w"); > - if (ltrans_output_list_stream == NULL) > - fatal_error ("opening LTRANS output list %s: %m", ltrans_output_list); > > timevar_push (TV_WHOPR_WPA); > > --- 2556,2570 ---- > lto_wpa_write_files (void) > { > unsigned i, n_sets; > ltrans_partition part; > FILE *ltrans_output_list_stream; > char *temp_filename; > + vec <char *>temp_filenames = vNULL; > size_t blen; > > /* Open the LTRANS output list. */ > if (!ltrans_output_list) > fatal_error ("no LTRANS output list filename provided"); > > timevar_push (TV_WHOPR_WPA); > > *************** lto_wpa_write_files (void) > *** 2508,2521 **** > : cmp_partitions_order); > for (i = 0; i < n_sets; i++) > { > - size_t len; > ltrans_partition part = ltrans_partitions[i]; > > /* Write all the nodes in SET. */ > sprintf (temp_filename + blen, "%u.o", i); > - file = lto_obj_file_open (temp_filename, true); > - if (!file) > - fatal_error ("lto_obj_file_open() failed"); > > if (!quiet_flag) > fprintf (stderr, " %s (%s %i insns)", temp_filename, part->name, > part->insns); > --- 2600,2609 ---- > *************** lto_wpa_write_files (void) > *** 2557,2577 **** > } > gcc_checking_assert (lto_symtab_encoder_size (part->encoder) || !i); > > ! lto_set_current_out_file (file); > ! > ! ipa_write_optimization_summaries (part->encoder); > > - lto_set_current_out_file (NULL); > - lto_obj_file_close (file); > - free (file); > part->encoder = NULL; > > ! len = strlen (temp_filename); > ! if (fwrite (temp_filename, 1, len, ltrans_output_list_stream) < len > || fwrite ("\n", 1, 1, ltrans_output_list_stream) < 1) > fatal_error ("writing to LTRANS output list %s: %m", > ltrans_output_list); > } > > lto_stats.num_output_files += n_sets; > > --- 2645,2669 ---- > } > gcc_checking_assert (lto_symtab_encoder_size (part->encoder) || !i); > > ! stream_out (temp_filename, part->encoder, i == n_sets - 1); > > part->encoder = NULL; > > ! temp_filenames.safe_push (xstrdup (temp_filename)); > ! } > ! ltrans_output_list_stream = fopen (ltrans_output_list, "w"); > ! if (ltrans_output_list_stream == NULL) > ! fatal_error ("opening LTRANS output list %s: %m", ltrans_output_list); > ! for (i = 0; i < n_sets; i++) > ! { > ! unsigned int len = strlen (temp_filenames[i]); > ! if (fwrite (temp_filenames[i], 1, len, ltrans_output_list_stream) < > len > || fwrite ("\n", 1, 1, ltrans_output_list_stream) < 1) > fatal_error ("writing to LTRANS output list %s: %m", > ltrans_output_list); > + free (temp_filenames[i]); > } > + temp_filenames.release(); > > lto_stats.num_output_files += n_sets; > > *************** do_whole_program_analysis (void) > *** 3126,3131 **** > --- 3218,3235 ---- > { > symtab_node *node; > > + lto_parallelism = 1; > + > + /* TODO: jobserver communicatoin is not supported, yet. */ > + if (!strcmp (flag_wpa, "jobserver")) > + lto_parallelism = -1; > + else > + { > + lto_parallelism = atoi (flag_wpa); > + if (lto_parallelism <= 0) > + lto_parallelism = 0; > + } > + > timevar_start (TV_PHASE_OPT_GEN); > > /* Note that since we are in WPA mode, materialize_cgraph will not > Index: lto/lang.opt > =================================================================== > *** lto/lang.opt (revision 205646) > --- lto/lang.opt (working copy) > *************** LTO Joined Var(ltrans_output_list) > *** 33,41 **** > Specify a file to which a list of files output by LTRANS is written. > > fwpa > ! LTO Driver Report Var(flag_wpa) > Run the link-time optimizer in whole program analysis (WPA) mode. > > fresolution= > LTO Joined > The resolution file > --- 33,45 ---- > Specify a file to which a list of files output by LTRANS is written. > > fwpa > ! LTO Driver Report > Run the link-time optimizer in whole program analysis (WPA) mode. > > + fwpa= > + LTO Driver RejectNegative Joined Var(flag_wpa) > + Whole program analysis (WPA) mode with number of parallel jobs specified. > + > fresolution= > LTO Joined > The resolution file > Index: lto/lto-lang.c > =================================================================== > *** lto/lto-lang.c (revision 205646) > --- lto/lto-lang.c (working copy) > *************** lto_handle_option (size_t scode, const c > *** 749,754 **** > --- 749,758 ---- > warn_psabi = value; > break; > > + case OPT_fwpa: > + flag_wpa = value ? "" : NULL; > + break; > + > default: > break; > } > *************** static bool > *** 1148,1154 **** > lto_init (void) > { > /* We need to generate LTO if running in WPA mode. */ > ! flag_generate_lto = flag_wpa; > > /* Create the basic integer types. */ > build_common_tree_nodes (flag_signed_char, /*short_double=*/false); > --- 1152,1158 ---- > lto_init (void) > { > /* We need to generate LTO if running in WPA mode. */ > ! flag_generate_lto = (flag_wpa != NULL); > > /* Create the basic integer types. */ > build_common_tree_nodes (flag_signed_char, /*short_double=*/false); > Index: lto-streamer.h > =================================================================== > *** lto-streamer.h (revision 205646) > --- lto-streamer.h (working copy) > *************** void lto_output_location (struct output_ > *** 873,878 **** > --- 873,879 ---- > > > /* In lto-cgraph.c */ > + extern bool asm_nodes_output; > lto_symtab_encoder_t lto_symtab_encoder_new (bool); > int lto_symtab_encoder_encode (lto_symtab_encoder_t, symtab_node *); > void lto_symtab_encoder_delete (lto_symtab_encoder_t); > > -- Richard Biener <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer