On Thu, 21 Nov 2013, Jan Hubicka wrote: > > > > Why do you need an additional -fparallelism? Wouldn't > > -fwpa=... be a better match, matching -flto=...? As we already > > pass down a -fwpa option to WPA this would make things easier, no? > > My plan was to possibly use same option later for parallelizing more parts of > compiler, not only WPA streaming. Streaming in may have some chance if we get > into thread safety of GGC or move sufficient amount of stuff out of GGC. Also > we can parallelize inliner heuristic or IPA-PTA if it will ever work. So it > would make sense with -flto-partition=none and perhaps with local > optimization, > too.
I'd like to drop -flto-partition=none eventually. It's just one more path through the compiler to support ... > But I can definitely update the patch to use -fwpa=N and we can deal with this > once this becomes real. (i.e. I have no clue how to parallelize inliner > without > making its decisions dependent on the parallelizm and declining with > parallelizm > increased nor I have real plans for stream in procedure) Please. Richard. > Honza > > > > Thanks, > > Richard. > > > > > Honza > > > > > > * lto-cgraph.c (asm_nodes_output): Make global. > > > * lto-streamer.h (asm_nodes_output): Declare. > > > * lto-wrapper.c (parallel, jobserver): Make global. > > > (run_gcc): Pass down -fparallelism > > > > > > * lto.c (lto_parallelism): New variable. > > > (do_stream_out): New function. > > > (stream_out): New function. > > > (lto_wpa_write_files): Use it. > > > * lang.opt (fparallelism): New. > > > * lto.h (lto_parallelism): Declare. > > > * lto-lang.c (lto_handle_option): Add fparalelism. > > > > > > Index: lto-cgraph.c > > > =================================================================== > > > --- lto-cgraph.c (revision 201891) > > > +++ lto-cgraph.c (working copy) > > > @@ -50,6 +50,9 @@ along with GCC; see the file COPYING3. > > > #include "context.h" > > > #include "pass_manager.h" > > > > > > +/* True when asm nodes has been output. */ > > > +bool asm_nodes_output = false; > > > + > > > static void output_cgraph_opt_summary (void); > > > static void input_cgraph_opt_summary (vec<symtab_node> nodes); > > > > > > @@ -852,7 +855,6 @@ output_symtab (void) > > > lto_symtab_encoder_iterator lsei; > > > int i, n_nodes; > > > lto_symtab_encoder_t encoder; > > > - static bool asm_nodes_output = false; > > > > > > if (flag_wpa) > > > output_cgraph_opt_summary (); > > > Index: lto-streamer.h > > > =================================================================== > > > --- lto-streamer.h (revision 201891) > > > +++ lto-streamer.h (working copy) > > > @@ -870,6 +870,7 @@ void lto_output_location (struct output_ > > > > > > > > > /* In lto-cgraph.c */ > > > +extern bool asm_nodes_output; > > > lto_symtab_encoder_t lto_symtab_encoder_new (bool); > > > int lto_symtab_encoder_encode (lto_symtab_encoder_t, symtab_node); > > > void lto_symtab_encoder_delete (lto_symtab_encoder_t); > > > Index: lto-wrapper.c > > > =================================================================== > > > --- lto-wrapper.c (revision 201891) > > > +++ lto-wrapper.c (working copy) > > > @@ -56,6 +56,9 @@ along with GCC; see the file COPYING3. > > > > > > int debug; /* true if -save-temps. */ > > > int verbose; /* true if -v. */ > > > +int parallel = 0; /* number of parallel builds > > > specified > > > + by -flto=N */ > > > +int jobserver = 0; /* true if -flto=jobserver was > > > used. */ > > > > > > enum lto_mode_d { > > > LTO_MODE_NONE, /* Not doing LTO. */ > > > @@ -445,8 +448,6 @@ run_gcc (unsigned argc, char *argv[]) > > > char *list_option_full = NULL; > > > const char *linker_output = NULL; > > > const char *collect_gcc, *collect_gcc_options; > > > - int parallel = 0; > > > - int jobserver = 0; > > > bool no_partition = false; > > > struct cl_decoded_option *fdecoded_options = NULL; > > > unsigned int fdecoded_options_count = 0; > > > @@ -630,6 +631,16 @@ run_gcc (unsigned argc, char *argv[]) > > > if (parallel <= 1) > > > parallel = 0; > > > } > > > + if (jobserver) > > > + { > > > + obstack_ptr_grow (&argv_obstack, xstrdup > > > ("-fparallelism=jobserver")); > > > + } > > > + else if (parallel > 1) > > > + { > > > + char buf[256]; > > > + sprintf (buf, "-fparallelism=%i", parallel); > > > + obstack_ptr_grow (&argv_obstack, xstrdup (buf)); > > > + } > > > /* Fallthru. */ > > > > > > case OPT_flto: > > > Index: lto/lto.c > > > =================================================================== > > > --- lto/lto.c (revision 201891) > > > +++ lto/lto.c (working copy) > > > @@ -49,6 +49,9 @@ along with GCC; see the file COPYING3. > > > #include "context.h" > > > #include "pass_manager.h" > > > > > > +/* Number of parallel tasks to run, -1 if we want to use GNU Make > > > jobserver. */ > > > +int lto_parallelism; > > > + > > > static GTY(()) tree first_personality_decl; > > > > > > /* Returns a hash code for P. */ > > > @@ -3002,6 +3005,98 @@ cmp_partitions_order (const void *a, con > > > return orderb - ordera; > > > } > > > > > > +/* Actually stream out ENCODER into TEMP_FILENAME. */ > > > + > > > +void > > > +do_stream_out (char *temp_filename, lto_symtab_encoder_t encoder) > > > +{ > > > + lto_file *file = lto_obj_file_open (temp_filename, true); > > > + if (!file) > > > + fatal_error ("lto_obj_file_open() failed"); > > > + lto_set_current_out_file (file); > > > + > > > + ipa_write_optimization_summaries (encoder); > > > + > > > + lto_set_current_out_file (NULL); > > > + lto_obj_file_close (file); > > > + free (file); > > > +} > > > + > > > +/* Wait for forked process and signal errors. */ > > > +#ifdef HAVE_WORKING_FORK > > > +void > > > +wait_for_child () > > > +{ > > > + int status; > > > + do > > > + { > > > + int w = waitpid(0, &status, WUNTRACED | WCONTINUED); > > > + if (w == -1) > > > + fatal_error ("waitpid failed"); > > > + > > > + if (WIFEXITED (status) && WEXITSTATUS (status)) > > > + fatal_error ("streaming subprocess failed"); > > > + else if (WIFSIGNALED (status)) > > > + fatal_error ("streaming subprocess was killed by signal"); > > > + } > > > + while (!WIFEXITED(status) && !WIFSIGNALED(status)); > > > +} > > > +#endif > > > + > > > +/* Stream out ENCODER into TEMP_FILENAME > > > + Fork if that seems to help. */ > > > + > > > +void > > > +stream_out (char *temp_filename, lto_symtab_encoder_t encoder, bool last) > > > +{ > > > +#ifdef HAVE_WORKING_FORK > > > + static int nruns; > > > + > > > + if (!lto_parallelism || lto_parallelism == 1) > > > + { > > > + do_stream_out (temp_filename, encoder); > > > + return; > > > + } > > > + > > > + /* Do not run more than LTO_PARALLELISM streamings > > > + FIXME: we ignore limits on jobserver. */ > > > + if (lto_parallelism > 0 && nruns >= lto_parallelism) > > > + { > > > + wait_for_child (); > > > + nruns --; > > > + } > > > + /* If this is not the last parallel partition, execute new > > > + streaming process. */ > > > + if (!last) > > > + { > > > + pid_t cpid = fork (); > > > + > > > + if (!cpid) > > > + { > > > + setproctitle ("lto1-wpa-streaming"); > > > + do_stream_out (temp_filename, encoder); > > > + exit (0); > > > + } > > > + /* Fork failed; lets do the job ourseleves. */ > > > + else if (cpid == -1) > > > + do_stream_out (temp_filename, encoder); > > > + else > > > + nruns++; > > > + } > > > + /* Last partition; stream it and wait for all children to die. */ > > > + else > > > + { > > > + int i; > > > + do_stream_out (temp_filename, encoder); > > > + for (i = 0; i < nruns; i++) > > > + wait_for_child (); > > > + } > > > + asm_nodes_output = true; > > > +#else > > > + do_stream_out (temp_filename, encoder); > > > +#endif > > > +} > > > + > > > /* Write all output files in WPA mode and the file with the list of > > > LTRANS units. */ > > > > > > @@ -3009,18 +3104,15 @@ static void > > > lto_wpa_write_files (void) > > > { > > > unsigned i, n_sets; > > > - lto_file *file; > > > ltrans_partition part; > > > FILE *ltrans_output_list_stream; > > > char *temp_filename; > > > + vec <char *>temp_filenames = vNULL; > > > size_t blen; > > > > > > /* Open the LTRANS output list. */ > > > if (!ltrans_output_list) > > > fatal_error ("no LTRANS output list filename provided"); > > > - ltrans_output_list_stream = fopen (ltrans_output_list, "w"); > > > - if (ltrans_output_list_stream == NULL) > > > - fatal_error ("opening LTRANS output list %s: %m", > > > ltrans_output_list); > > > > > > timevar_push (TV_WHOPR_WPA); > > > > > > @@ -3056,14 +3148,10 @@ lto_wpa_write_files (void) > > > : cmp_partitions_order); > > > for (i = 0; i < n_sets; i++) > > > { > > > - size_t len; > > > ltrans_partition part = ltrans_partitions[i]; > > > > > > /* Write all the nodes in SET. */ > > > sprintf (temp_filename + blen, "%u.o", i); > > > - file = lto_obj_file_open (temp_filename, true); > > > - if (!file) > > > - fatal_error ("lto_obj_file_open() failed"); > > > > > > if (!quiet_flag) > > > fprintf (stderr, " %s (%s %i insns)", temp_filename, part->name, > > > part->insns); > > > @@ -3105,21 +3193,25 @@ lto_wpa_write_files (void) > > > } > > > gcc_checking_assert (lto_symtab_encoder_size (part->encoder) || > > > !i); > > > > > > - lto_set_current_out_file (file); > > > - > > > - ipa_write_optimization_summaries (part->encoder); > > > + stream_out (temp_filename, part->encoder, i == n_sets - 1); > > > > > > - lto_set_current_out_file (NULL); > > > - lto_obj_file_close (file); > > > - free (file); > > > part->encoder = NULL; > > > > > > - len = strlen (temp_filename); > > > - if (fwrite (temp_filename, 1, len, ltrans_output_list_stream) < len > > > + temp_filenames.safe_push (xstrdup (temp_filename)); > > > + } > > > + ltrans_output_list_stream = fopen (ltrans_output_list, "w"); > > > + if (ltrans_output_list_stream == NULL) > > > + fatal_error ("opening LTRANS output list %s: %m", > > > ltrans_output_list); > > > + for (i = 0; i < n_sets; i++) > > > + { > > > + unsigned int len = strlen (temp_filenames[i]); > > > + if (fwrite (temp_filenames[i], 1, len, ltrans_output_list_stream) > > > < len > > > || fwrite ("\n", 1, 1, ltrans_output_list_stream) < 1) > > > fatal_error ("writing to LTRANS output list %s: %m", > > > ltrans_output_list); > > > + free (temp_filenames[i]); > > > } > > > + temp_filenames.release(); > > > > > > lto_stats.num_output_files += n_sets; > > > > > > Index: lto/lang.opt > > > =================================================================== > > > --- lto/lang.opt (revision 201891) > > > +++ lto/lang.opt (working copy) > > > @@ -32,6 +32,10 @@ fltrans-output-list= > > > LTO Joined Var(ltrans_output_list) > > > Specify a file to which a list of files output by LTRANS is written. > > > > > > +fparallelism= > > > +LTO Joined > > > +Run the link-time optimizer in whole program analysis (WPA) mode. > > > + > > > fwpa > > > LTO Driver Report Var(flag_wpa) > > > Run the link-time optimizer in whole program analysis (WPA) mode. > > > Index: lto/lto.h > > > =================================================================== > > > --- lto/lto.h (revision 201891) > > > +++ lto/lto.h (working copy) > > > @@ -39,6 +39,7 @@ extern const char *resolution_file_name; > > > extern tree lto_eh_personality (void); > > > extern void lto_main (void); > > > extern void lto_read_all_file_options (void); > > > +extern int lto_parallelism; > > > > > > /* In lto-elf.c or lto-coff.c */ > > > extern lto_file *lto_obj_file_open (const char *filename, bool writable); > > > Index: lto/lto-lang.c > > > =================================================================== > > > --- lto/lto-lang.c (revision 201891) > > > +++ lto/lto-lang.c (working copy) > > > @@ -735,6 +735,19 @@ lto_handle_option (size_t scode, const c > > > warn_psabi = value; > > > break; > > > > > > + case OPT_fparallelism_: > > > + if (!arg) > > > + lto_parallelism = 1; > > > + else if (!strcmp (arg, "jobserver")) > > > + lto_parallelism = -1; > > > + else > > > + { > > > + lto_parallelism = atoi (arg); > > > + if (lto_parallelism <= 0) > > > + lto_parallelism = 0; > > > + } > > > + break; > > > + > > > default: > > > break; > > > } > > > > > > > > > > -- > > Richard Biener <rguent...@suse.de> > > SUSE / SUSE Labs > > SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 > > GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer > > -- Richard Biener <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer