> > Why do you need an additional -fparallelism? Wouldn't > -fwpa=... be a better match, matching -flto=...? As we already > pass down a -fwpa option to WPA this would make things easier, no?
My plan was to possibly use same option later for parallelizing more parts of compiler, not only WPA streaming. Streaming in may have some chance if we get into thread safety of GGC or move sufficient amount of stuff out of GGC. Also we can parallelize inliner heuristic or IPA-PTA if it will ever work. So it would make sense with -flto-partition=none and perhaps with local optimization, too. But I can definitely update the patch to use -fwpa=N and we can deal with this once this becomes real. (i.e. I have no clue how to parallelize inliner without making its decisions dependent on the parallelizm and declining with parallelizm increased nor I have real plans for stream in procedure) Honza > > Thanks, > Richard. > > > Honza > > > > * lto-cgraph.c (asm_nodes_output): Make global. > > * lto-streamer.h (asm_nodes_output): Declare. > > * lto-wrapper.c (parallel, jobserver): Make global. > > (run_gcc): Pass down -fparallelism > > > > * lto.c (lto_parallelism): New variable. > > (do_stream_out): New function. > > (stream_out): New function. > > (lto_wpa_write_files): Use it. > > * lang.opt (fparallelism): New. > > * lto.h (lto_parallelism): Declare. > > * lto-lang.c (lto_handle_option): Add fparalelism. > > > > Index: lto-cgraph.c > > =================================================================== > > --- lto-cgraph.c (revision 201891) > > +++ lto-cgraph.c (working copy) > > @@ -50,6 +50,9 @@ along with GCC; see the file COPYING3. > > #include "context.h" > > #include "pass_manager.h" > > > > +/* True when asm nodes has been output. */ > > +bool asm_nodes_output = false; > > + > > static void output_cgraph_opt_summary (void); > > static void input_cgraph_opt_summary (vec<symtab_node> nodes); > > > > @@ -852,7 +855,6 @@ output_symtab (void) > > lto_symtab_encoder_iterator lsei; > > int i, n_nodes; > > lto_symtab_encoder_t encoder; > > - static bool asm_nodes_output = false; > > > > if (flag_wpa) > > output_cgraph_opt_summary (); > > Index: lto-streamer.h > > =================================================================== > > --- lto-streamer.h (revision 201891) > > +++ lto-streamer.h (working copy) > > @@ -870,6 +870,7 @@ void lto_output_location (struct output_ > > > > > > /* In lto-cgraph.c */ > > +extern bool asm_nodes_output; > > lto_symtab_encoder_t lto_symtab_encoder_new (bool); > > int lto_symtab_encoder_encode (lto_symtab_encoder_t, symtab_node); > > void lto_symtab_encoder_delete (lto_symtab_encoder_t); > > Index: lto-wrapper.c > > =================================================================== > > --- lto-wrapper.c (revision 201891) > > +++ lto-wrapper.c (working copy) > > @@ -56,6 +56,9 @@ along with GCC; see the file COPYING3. > > > > int debug; /* true if -save-temps. */ > > int verbose; /* true if -v. */ > > +int parallel = 0; /* number of parallel builds specified > > + by -flto=N */ > > +int jobserver = 0; /* true if -flto=jobserver was used. */ > > > > enum lto_mode_d { > > LTO_MODE_NONE, /* Not doing LTO. */ > > @@ -445,8 +448,6 @@ run_gcc (unsigned argc, char *argv[]) > > char *list_option_full = NULL; > > const char *linker_output = NULL; > > const char *collect_gcc, *collect_gcc_options; > > - int parallel = 0; > > - int jobserver = 0; > > bool no_partition = false; > > struct cl_decoded_option *fdecoded_options = NULL; > > unsigned int fdecoded_options_count = 0; > > @@ -630,6 +631,16 @@ run_gcc (unsigned argc, char *argv[]) > > if (parallel <= 1) > > parallel = 0; > > } > > + if (jobserver) > > + { > > + obstack_ptr_grow (&argv_obstack, xstrdup > > ("-fparallelism=jobserver")); > > + } > > + else if (parallel > 1) > > + { > > + char buf[256]; > > + sprintf (buf, "-fparallelism=%i", parallel); > > + obstack_ptr_grow (&argv_obstack, xstrdup (buf)); > > + } > > /* Fallthru. */ > > > > case OPT_flto: > > Index: lto/lto.c > > =================================================================== > > --- lto/lto.c (revision 201891) > > +++ lto/lto.c (working copy) > > @@ -49,6 +49,9 @@ along with GCC; see the file COPYING3. > > #include "context.h" > > #include "pass_manager.h" > > > > +/* Number of parallel tasks to run, -1 if we want to use GNU Make > > jobserver. */ > > +int lto_parallelism; > > + > > static GTY(()) tree first_personality_decl; > > > > /* Returns a hash code for P. */ > > @@ -3002,6 +3005,98 @@ cmp_partitions_order (const void *a, con > > return orderb - ordera; > > } > > > > +/* Actually stream out ENCODER into TEMP_FILENAME. */ > > + > > +void > > +do_stream_out (char *temp_filename, lto_symtab_encoder_t encoder) > > +{ > > + lto_file *file = lto_obj_file_open (temp_filename, true); > > + if (!file) > > + fatal_error ("lto_obj_file_open() failed"); > > + lto_set_current_out_file (file); > > + > > + ipa_write_optimization_summaries (encoder); > > + > > + lto_set_current_out_file (NULL); > > + lto_obj_file_close (file); > > + free (file); > > +} > > + > > +/* Wait for forked process and signal errors. */ > > +#ifdef HAVE_WORKING_FORK > > +void > > +wait_for_child () > > +{ > > + int status; > > + do > > + { > > + int w = waitpid(0, &status, WUNTRACED | WCONTINUED); > > + if (w == -1) > > + fatal_error ("waitpid failed"); > > + > > + if (WIFEXITED (status) && WEXITSTATUS (status)) > > + fatal_error ("streaming subprocess failed"); > > + else if (WIFSIGNALED (status)) > > + fatal_error ("streaming subprocess was killed by signal"); > > + } > > + while (!WIFEXITED(status) && !WIFSIGNALED(status)); > > +} > > +#endif > > + > > +/* Stream out ENCODER into TEMP_FILENAME > > + Fork if that seems to help. */ > > + > > +void > > +stream_out (char *temp_filename, lto_symtab_encoder_t encoder, bool last) > > +{ > > +#ifdef HAVE_WORKING_FORK > > + static int nruns; > > + > > + if (!lto_parallelism || lto_parallelism == 1) > > + { > > + do_stream_out (temp_filename, encoder); > > + return; > > + } > > + > > + /* Do not run more than LTO_PARALLELISM streamings > > + FIXME: we ignore limits on jobserver. */ > > + if (lto_parallelism > 0 && nruns >= lto_parallelism) > > + { > > + wait_for_child (); > > + nruns --; > > + } > > + /* If this is not the last parallel partition, execute new > > + streaming process. */ > > + if (!last) > > + { > > + pid_t cpid = fork (); > > + > > + if (!cpid) > > + { > > + setproctitle ("lto1-wpa-streaming"); > > + do_stream_out (temp_filename, encoder); > > + exit (0); > > + } > > + /* Fork failed; lets do the job ourseleves. */ > > + else if (cpid == -1) > > + do_stream_out (temp_filename, encoder); > > + else > > + nruns++; > > + } > > + /* Last partition; stream it and wait for all children to die. */ > > + else > > + { > > + int i; > > + do_stream_out (temp_filename, encoder); > > + for (i = 0; i < nruns; i++) > > + wait_for_child (); > > + } > > + asm_nodes_output = true; > > +#else > > + do_stream_out (temp_filename, encoder); > > +#endif > > +} > > + > > /* Write all output files in WPA mode and the file with the list of > > LTRANS units. */ > > > > @@ -3009,18 +3104,15 @@ static void > > lto_wpa_write_files (void) > > { > > unsigned i, n_sets; > > - lto_file *file; > > ltrans_partition part; > > FILE *ltrans_output_list_stream; > > char *temp_filename; > > + vec <char *>temp_filenames = vNULL; > > size_t blen; > > > > /* Open the LTRANS output list. */ > > if (!ltrans_output_list) > > fatal_error ("no LTRANS output list filename provided"); > > - ltrans_output_list_stream = fopen (ltrans_output_list, "w"); > > - if (ltrans_output_list_stream == NULL) > > - fatal_error ("opening LTRANS output list %s: %m", ltrans_output_list); > > > > timevar_push (TV_WHOPR_WPA); > > > > @@ -3056,14 +3148,10 @@ lto_wpa_write_files (void) > > : cmp_partitions_order); > > for (i = 0; i < n_sets; i++) > > { > > - size_t len; > > ltrans_partition part = ltrans_partitions[i]; > > > > /* Write all the nodes in SET. */ > > sprintf (temp_filename + blen, "%u.o", i); > > - file = lto_obj_file_open (temp_filename, true); > > - if (!file) > > - fatal_error ("lto_obj_file_open() failed"); > > > > if (!quiet_flag) > > fprintf (stderr, " %s (%s %i insns)", temp_filename, part->name, > > part->insns); > > @@ -3105,21 +3193,25 @@ lto_wpa_write_files (void) > > } > > gcc_checking_assert (lto_symtab_encoder_size (part->encoder) || !i); > > > > - lto_set_current_out_file (file); > > - > > - ipa_write_optimization_summaries (part->encoder); > > + stream_out (temp_filename, part->encoder, i == n_sets - 1); > > > > - lto_set_current_out_file (NULL); > > - lto_obj_file_close (file); > > - free (file); > > part->encoder = NULL; > > > > - len = strlen (temp_filename); > > - if (fwrite (temp_filename, 1, len, ltrans_output_list_stream) < len > > + temp_filenames.safe_push (xstrdup (temp_filename)); > > + } > > + ltrans_output_list_stream = fopen (ltrans_output_list, "w"); > > + if (ltrans_output_list_stream == NULL) > > + fatal_error ("opening LTRANS output list %s: %m", ltrans_output_list); > > + for (i = 0; i < n_sets; i++) > > + { > > + unsigned int len = strlen (temp_filenames[i]); > > + if (fwrite (temp_filenames[i], 1, len, ltrans_output_list_stream) < > > len > > || fwrite ("\n", 1, 1, ltrans_output_list_stream) < 1) > > fatal_error ("writing to LTRANS output list %s: %m", > > ltrans_output_list); > > + free (temp_filenames[i]); > > } > > + temp_filenames.release(); > > > > lto_stats.num_output_files += n_sets; > > > > Index: lto/lang.opt > > =================================================================== > > --- lto/lang.opt (revision 201891) > > +++ lto/lang.opt (working copy) > > @@ -32,6 +32,10 @@ fltrans-output-list= > > LTO Joined Var(ltrans_output_list) > > Specify a file to which a list of files output by LTRANS is written. > > > > +fparallelism= > > +LTO Joined > > +Run the link-time optimizer in whole program analysis (WPA) mode. > > + > > fwpa > > LTO Driver Report Var(flag_wpa) > > Run the link-time optimizer in whole program analysis (WPA) mode. > > Index: lto/lto.h > > =================================================================== > > --- lto/lto.h (revision 201891) > > +++ lto/lto.h (working copy) > > @@ -39,6 +39,7 @@ extern const char *resolution_file_name; > > extern tree lto_eh_personality (void); > > extern void lto_main (void); > > extern void lto_read_all_file_options (void); > > +extern int lto_parallelism; > > > > /* In lto-elf.c or lto-coff.c */ > > extern lto_file *lto_obj_file_open (const char *filename, bool writable); > > Index: lto/lto-lang.c > > =================================================================== > > --- lto/lto-lang.c (revision 201891) > > +++ lto/lto-lang.c (working copy) > > @@ -735,6 +735,19 @@ lto_handle_option (size_t scode, const c > > warn_psabi = value; > > break; > > > > + case OPT_fparallelism_: > > + if (!arg) > > + lto_parallelism = 1; > > + else if (!strcmp (arg, "jobserver")) > > + lto_parallelism = -1; > > + else > > + { > > + lto_parallelism = atoi (arg); > > + if (lto_parallelism <= 0) > > + lto_parallelism = 0; > > + } > > + break; > > + > > default: > > break; > > } > > > > > > -- > Richard Biener <rguent...@suse.de> > SUSE / SUSE Labs > SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 > GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer