On Thu, 21 Nov 2013, Jan Hubicka wrote:

> > 
> > Why do you need an additional -fparallelism?  Wouldn't
> > -fwpa=... be a better match, matching -flto=...?  As we already
> > pass down a -fwpa option to WPA this would make things easier, no?
> 
> My plan was to possibly use same option later for parallelizing more parts of
> compiler, not only WPA streaming. Streaming in may have some chance if we get
> into thread safety of GGC or move sufficient amount of stuff out of GGC.  Also
> we can parallelize inliner heuristic or IPA-PTA if it will ever work. So it
> would make sense with -flto-partition=none and perhaps with local 
> optimization,
> too.

I'd like to drop -flto-partition=none eventually.  It's just one more
path through the compiler to support ...

> But I can definitely update the patch to use -fwpa=N and we can deal with this
> once this becomes real. (i.e. I have no clue how to parallelize inliner 
> without
> making its decisions dependent on the parallelizm and declining with 
> parallelizm
> increased nor I have real plans for stream in procedure)

Please.

Richard.

> Honza
> > 
> > Thanks,
> > Richard.
> > 
> > > Honza
> > > 
> > >   * lto-cgraph.c (asm_nodes_output): Make global.
> > >   * lto-streamer.h (asm_nodes_output): Declare.
> > >   * lto-wrapper.c (parallel, jobserver): Make global.
> > >   (run_gcc): Pass down -fparallelism
> > > 
> > >   * lto.c (lto_parallelism): New variable.
> > >   (do_stream_out): New function.
> > >   (stream_out): New function.
> > >   (lto_wpa_write_files): Use it.
> > >   * lang.opt (fparallelism): New.
> > >   * lto.h (lto_parallelism): Declare.
> > >   * lto-lang.c (lto_handle_option): Add fparalelism.
> > > 
> > > Index: lto-cgraph.c
> > > ===================================================================
> > > --- lto-cgraph.c  (revision 201891)
> > > +++ lto-cgraph.c  (working copy)
> > > @@ -50,6 +50,9 @@ along with GCC; see the file COPYING3.
> > >  #include "context.h"
> > >  #include "pass_manager.h"
> > >  
> > > +/* True when asm nodes has been output.  */
> > > +bool asm_nodes_output = false;
> > > +
> > >  static void output_cgraph_opt_summary (void);
> > >  static void input_cgraph_opt_summary (vec<symtab_node>  nodes);
> > >  
> > > @@ -852,7 +855,6 @@ output_symtab (void)
> > >    lto_symtab_encoder_iterator lsei;
> > >    int i, n_nodes;
> > >    lto_symtab_encoder_t encoder;
> > > -  static bool asm_nodes_output = false;
> > >  
> > >    if (flag_wpa)
> > >      output_cgraph_opt_summary ();
> > > Index: lto-streamer.h
> > > ===================================================================
> > > --- lto-streamer.h        (revision 201891)
> > > +++ lto-streamer.h        (working copy)
> > > @@ -870,6 +870,7 @@ void lto_output_location (struct output_
> > >  
> > >  
> > >  /* In lto-cgraph.c  */
> > > +extern bool asm_nodes_output;
> > >  lto_symtab_encoder_t lto_symtab_encoder_new (bool);
> > >  int lto_symtab_encoder_encode (lto_symtab_encoder_t, symtab_node);
> > >  void lto_symtab_encoder_delete (lto_symtab_encoder_t);
> > > Index: lto-wrapper.c
> > > ===================================================================
> > > --- lto-wrapper.c (revision 201891)
> > > +++ lto-wrapper.c (working copy)
> > > @@ -56,6 +56,9 @@ along with GCC; see the file COPYING3.
> > >  
> > >  int debug;                               /* true if -save-temps.  */
> > >  int verbose;                             /* true if -v.  */
> > > +int parallel = 0;                        /* number of parallel builds 
> > > specified
> > > +                                    by -flto=N  */
> > > +int jobserver = 0;                       /* true if -flto=jobserver was 
> > > used.  */
> > >  
> > >  enum lto_mode_d {
> > >    LTO_MODE_NONE,                 /* Not doing LTO.  */
> > > @@ -445,8 +448,6 @@ run_gcc (unsigned argc, char *argv[])
> > >    char *list_option_full = NULL;
> > >    const char *linker_output = NULL;
> > >    const char *collect_gcc, *collect_gcc_options;
> > > -  int parallel = 0;
> > > -  int jobserver = 0;
> > >    bool no_partition = false;
> > >    struct cl_decoded_option *fdecoded_options = NULL;
> > >    unsigned int fdecoded_options_count = 0;
> > > @@ -630,6 +631,16 @@ run_gcc (unsigned argc, char *argv[])
> > >         if (parallel <= 1)
> > >           parallel = 0;
> > >       }
> > > +   if (jobserver)
> > > +     {
> > > +       obstack_ptr_grow (&argv_obstack, xstrdup 
> > > ("-fparallelism=jobserver"));
> > > +     }
> > > +   else if (parallel > 1)
> > > +     {
> > > +       char buf[256];
> > > +       sprintf (buf, "-fparallelism=%i", parallel);
> > > +       obstack_ptr_grow (&argv_obstack, xstrdup (buf));
> > > +     }
> > >     /* Fallthru.  */
> > >  
> > >   case OPT_flto:
> > > Index: lto/lto.c
> > > ===================================================================
> > > --- lto/lto.c     (revision 201891)
> > > +++ lto/lto.c     (working copy)
> > > @@ -49,6 +49,9 @@ along with GCC; see the file COPYING3.
> > >  #include "context.h"
> > >  #include "pass_manager.h"
> > >  
> > > +/* Number of parallel tasks to run, -1 if we want to use GNU Make 
> > > jobserver.  */
> > > +int lto_parallelism;
> > > +
> > >  static GTY(()) tree first_personality_decl;
> > >  
> > >  /* Returns a hash code for P.  */
> > > @@ -3002,6 +3005,98 @@ cmp_partitions_order (const void *a, con
> > >    return orderb - ordera;
> > >  }
> > >  
> > > +/* Actually stream out ENCODER into TEMP_FILENAME.  */
> > > +
> > > +void
> > > +do_stream_out (char *temp_filename, lto_symtab_encoder_t encoder)
> > > +{
> > > +  lto_file *file = lto_obj_file_open (temp_filename, true);
> > > +  if (!file)
> > > +    fatal_error ("lto_obj_file_open() failed");
> > > +  lto_set_current_out_file (file);
> > > +
> > > +  ipa_write_optimization_summaries (encoder);
> > > +
> > > +  lto_set_current_out_file (NULL);
> > > +  lto_obj_file_close (file);
> > > +  free (file);
> > > +}
> > > +
> > > +/* Wait for forked process and signal errors.  */
> > > +#ifdef HAVE_WORKING_FORK
> > > +void
> > > +wait_for_child ()
> > > +{
> > > +  int status;
> > > +  do
> > > +    {
> > > +      int w = waitpid(0, &status, WUNTRACED | WCONTINUED);
> > > +      if (w == -1)
> > > + fatal_error ("waitpid failed");
> > > +
> > > +      if (WIFEXITED (status) && WEXITSTATUS (status))
> > > + fatal_error ("streaming subprocess failed");
> > > +      else if (WIFSIGNALED (status))
> > > + fatal_error ("streaming subprocess was killed by signal");
> > > +    }
> > > +  while (!WIFEXITED(status) && !WIFSIGNALED(status));
> > > +}
> > > +#endif
> > > +
> > > +/* Stream out ENCODER into TEMP_FILENAME
> > > +   Fork if that seems to help.  */
> > > +
> > > +void
> > > +stream_out (char *temp_filename, lto_symtab_encoder_t encoder, bool last)
> > > +{
> > > +#ifdef HAVE_WORKING_FORK
> > > +  static int nruns;
> > > +
> > > +  if (!lto_parallelism || lto_parallelism == 1)
> > > +    {
> > > +      do_stream_out (temp_filename, encoder);
> > > +      return;
> > > +    }
> > > +
> > > +  /* Do not run more than LTO_PARALLELISM streamings
> > > +     FIXME: we ignore limits on jobserver.  */
> > > +  if (lto_parallelism > 0 && nruns >= lto_parallelism)
> > > +    {
> > > +      wait_for_child ();
> > > +      nruns --;
> > > +    }
> > > +  /* If this is not the last parallel partition, execute new
> > > +     streaming process.  */
> > > +  if (!last)
> > > +    {
> > > +      pid_t cpid = fork ();
> > > +
> > > +      if (!cpid)
> > > + {
> > > +   setproctitle ("lto1-wpa-streaming");
> > > +   do_stream_out (temp_filename, encoder);
> > > +   exit (0);
> > > + }
> > > +      /* Fork failed; lets do the job ourseleves.  */
> > > +      else if (cpid == -1)
> > > +        do_stream_out (temp_filename, encoder);
> > > +      else
> > > + nruns++;
> > > +    }
> > > +  /* Last partition; stream it and wait for all children to die.  */
> > > +  else
> > > +    {
> > > +      int i;
> > > +      do_stream_out (temp_filename, encoder);
> > > +      for (i = 0; i < nruns; i++)
> > > + wait_for_child ();
> > > +    }
> > > +  asm_nodes_output = true;
> > > +#else
> > > +  do_stream_out (temp_filename, encoder);
> > > +#endif
> > > +}
> > > +
> > >  /* Write all output files in WPA mode and the file with the list of
> > >     LTRANS units.  */
> > >  
> > > @@ -3009,18 +3104,15 @@ static void
> > >  lto_wpa_write_files (void)
> > >  {
> > >    unsigned i, n_sets;
> > > -  lto_file *file;
> > >    ltrans_partition part;
> > >    FILE *ltrans_output_list_stream;
> > >    char *temp_filename;
> > > +  vec <char *>temp_filenames = vNULL;
> > >    size_t blen;
> > >  
> > >    /* Open the LTRANS output list.  */
> > >    if (!ltrans_output_list)
> > >      fatal_error ("no LTRANS output list filename provided");
> > > -  ltrans_output_list_stream = fopen (ltrans_output_list, "w");
> > > -  if (ltrans_output_list_stream == NULL)
> > > -    fatal_error ("opening LTRANS output list %s: %m", 
> > > ltrans_output_list);
> > >  
> > >    timevar_push (TV_WHOPR_WPA);
> > >  
> > > @@ -3056,14 +3148,10 @@ lto_wpa_write_files (void)
> > >                      : cmp_partitions_order);
> > >    for (i = 0; i < n_sets; i++)
> > >      {
> > > -      size_t len;
> > >        ltrans_partition part = ltrans_partitions[i];
> > >  
> > >        /* Write all the nodes in SET.  */
> > >        sprintf (temp_filename + blen, "%u.o", i);
> > > -      file = lto_obj_file_open (temp_filename, true);
> > > -      if (!file)
> > > - fatal_error ("lto_obj_file_open() failed");
> > >  
> > >        if (!quiet_flag)
> > >   fprintf (stderr, " %s (%s %i insns)", temp_filename, part->name, 
> > > part->insns);
> > > @@ -3105,21 +3193,25 @@ lto_wpa_write_files (void)
> > >   }
> > >        gcc_checking_assert (lto_symtab_encoder_size (part->encoder) || 
> > > !i);
> > >  
> > > -      lto_set_current_out_file (file);
> > > -
> > > -      ipa_write_optimization_summaries (part->encoder);
> > > +      stream_out (temp_filename, part->encoder, i == n_sets - 1);
> > >  
> > > -      lto_set_current_out_file (NULL);
> > > -      lto_obj_file_close (file);
> > > -      free (file);
> > >        part->encoder = NULL;
> > >  
> > > -      len = strlen (temp_filename);
> > > -      if (fwrite (temp_filename, 1, len, ltrans_output_list_stream) < len
> > > +      temp_filenames.safe_push (xstrdup (temp_filename));
> > > +    }
> > > +  ltrans_output_list_stream = fopen (ltrans_output_list, "w");
> > > +  if (ltrans_output_list_stream == NULL)
> > > +    fatal_error ("opening LTRANS output list %s: %m", 
> > > ltrans_output_list);
> > > +  for (i = 0; i < n_sets; i++)
> > > +    {
> > > +      unsigned int len = strlen (temp_filenames[i]);
> > > +      if (fwrite (temp_filenames[i], 1, len, ltrans_output_list_stream) 
> > > < len
> > >     || fwrite ("\n", 1, 1, ltrans_output_list_stream) < 1)
> > >   fatal_error ("writing to LTRANS output list %s: %m",
> > >                ltrans_output_list);
> > > +     free (temp_filenames[i]);
> > >      }
> > > +  temp_filenames.release();
> > >  
> > >    lto_stats.num_output_files += n_sets;
> > >  
> > > Index: lto/lang.opt
> > > ===================================================================
> > > --- lto/lang.opt  (revision 201891)
> > > +++ lto/lang.opt  (working copy)
> > > @@ -32,6 +32,10 @@ fltrans-output-list=
> > >  LTO Joined Var(ltrans_output_list)
> > >  Specify a file to which a list of files output by LTRANS is written.
> > >  
> > > +fparallelism=
> > > +LTO Joined
> > > +Run the link-time optimizer in whole program analysis (WPA) mode.
> > > +
> > >  fwpa
> > >  LTO Driver Report Var(flag_wpa)
> > >  Run the link-time optimizer in whole program analysis (WPA) mode.
> > > Index: lto/lto.h
> > > ===================================================================
> > > --- lto/lto.h     (revision 201891)
> > > +++ lto/lto.h     (working copy)
> > > @@ -39,6 +39,7 @@ extern const char *resolution_file_name;
> > >  extern tree lto_eh_personality (void);
> > >  extern void lto_main (void);
> > >  extern void lto_read_all_file_options (void);
> > > +extern int lto_parallelism;
> > >  
> > >  /* In lto-elf.c or lto-coff.c  */
> > >  extern lto_file *lto_obj_file_open (const char *filename, bool writable);
> > > Index: lto/lto-lang.c
> > > ===================================================================
> > > --- lto/lto-lang.c        (revision 201891)
> > > +++ lto/lto-lang.c        (working copy)
> > > @@ -735,6 +735,19 @@ lto_handle_option (size_t scode, const c
> > >        warn_psabi = value;
> > >        break;
> > >  
> > > +    case OPT_fparallelism_:
> > > +      if (!arg)
> > > + lto_parallelism = 1;
> > > +      else if (!strcmp (arg, "jobserver"))
> > > + lto_parallelism = -1;
> > > +      else
> > > + {
> > > +   lto_parallelism = atoi (arg);
> > > +   if (lto_parallelism <= 0)
> > > +     lto_parallelism = 0;
> > > + }
> > > +      break;
> > > +
> > >      default:
> > >        break;
> > >      }
> > > 
> > > 
> > 
> > -- 
> > Richard Biener <rguent...@suse.de>
> > SUSE / SUSE Labs
> > SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
> > GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer

Reply via email to