On Fri, 6 Dec 2013, Jan Hubicka wrote:

> > On Thu, 21 Nov 2013, Jan Hubicka wrote:
> > 
> > > > 
> > > > Why do you need an additional -fparallelism?  Wouldn't
> > > > -fwpa=... be a better match, matching -flto=...?  As we already
> > > > pass down a -fwpa option to WPA this would make things easier, no?
> > > 
> > > My plan was to possibly use same option later for parallelizing more 
> > > parts of
> > > compiler, not only WPA streaming. Streaming in may have some chance if we 
> > > get
> > > into thread safety of GGC or move sufficient amount of stuff out of GGC.  
> > > Also
> > > we can parallelize inliner heuristic or IPA-PTA if it will ever work. So 
> > > it
> > > would make sense with -flto-partition=none and perhaps with local 
> > > optimization,
> > > too.
> > 
> > I'd like to drop -flto-partition=none eventually.  It's just one more
> > path through the compiler to support ...
> > 
> > > But I can definitely update the patch to use -fwpa=N and we can deal with 
> > > this
> > > once this becomes real. (i.e. I have no clue how to parallelize inliner 
> > > without
> > > making its decisions dependent on the parallelizm and declining with 
> > > parallelizm
> > > increased nor I have real plans for stream in procedure)
> > 
> > Please.
> > 
> 
> Hi,
> here is updated patch. Sorry for taking time, I should have more time for 
> hacking again
> now...

Ok.

Thanks,
Richard.

> Honza
> 
>       * lto-cgraph.c (asm_nodes_output): Make global.
>       * lto-wrapper.c (run_gcc): Pass down paralelizm to WPA.
>       * lto.c (lto_parallelism): New static var.
>       (do_stream_out, wait_for_child, stream_out): New static functions.
>       (lto_wpa_write_files): Add support for parallel streaming.
>       (do_whole_program_analysis): Set parallelism.
>       * lang.opt (fwpa): Add parameter.
>       * lto-lang.c (lto_handle_option): Handle flag_wpa.
>       (lto_init): Update use of flag_wpa.
>       * lto-streamer.h (asm_nodes_output): Declare.
> Index: lto-cgraph.c
> ===================================================================
> *** lto-cgraph.c      (revision 205646)
> --- lto-cgraph.c      (working copy)
> *************** along with GCC; see the file COPYING3.
> *** 53,58 ****
> --- 53,61 ----
>   #include "pass_manager.h"
>   #include "ipa-utils.h"
>   
> + /* True when asm nodes has been output.  */
> + bool asm_nodes_output = false;
> + 
>   static void output_cgraph_opt_summary (void);
>   static void input_cgraph_opt_summary (vec<symtab_node *>  nodes);
>   
> *************** output_symtab (void)
> *** 889,895 ****
>     lto_symtab_encoder_iterator lsei;
>     int i, n_nodes;
>     lto_symtab_encoder_t encoder;
> -   static bool asm_nodes_output = false;
>   
>     if (flag_wpa)
>       output_cgraph_opt_summary ();
> --- 892,897 ----
> Index: lto-wrapper.c
> ===================================================================
> *** lto-wrapper.c     (revision 205646)
> --- lto-wrapper.c     (working copy)
> *************** run_gcc (unsigned argc, char *argv[])
> *** 745,751 ****
>         tmp += list_option_len;
>         strcpy (tmp, ltrans_output_file);
>   
> !       obstack_ptr_grow (&argv_obstack, "-fwpa");
>       }
>   
>     /* Append the input objects and possible preceding arguments.  */
> --- 746,761 ----
>         tmp += list_option_len;
>         strcpy (tmp, ltrans_output_file);
>   
> !       if (jobserver)
> !     obstack_ptr_grow (&argv_obstack, xstrdup ("-fwpa=jobserver"));
> !       else if (parallel > 1)
> !     {
> !       char buf[256];
> !       sprintf (buf, "-fwpa=%i", parallel);
> !       obstack_ptr_grow (&argv_obstack, xstrdup (buf));
> !     }
> !       else
> !         obstack_ptr_grow (&argv_obstack, "-fwpa");
>       }
>   
>     /* Append the input objects and possible preceding arguments.  */
> Index: lto/lto.c
> ===================================================================
> *** lto/lto.c (revision 205646)
> --- lto/lto.c (working copy)
> *************** along with GCC; see the file COPYING3.
> *** 53,58 ****
> --- 53,61 ----
>   /* Vector to keep track of external variables we've seen so far.  */
>   vec<tree, va_gc> *lto_global_var_decls;
>   
> + /* Number of parallel tasks to run, -1 if we want to use GNU Make 
> jobserver.  */
> + static int lto_parallelism;
> + 
>   static GTY(()) tree first_personality_decl;
>   
>   /* Returns a hash code for P.  */
> *************** cmp_partitions_order (const void *a, con
> *** 2454,2459 ****
> --- 2457,2554 ----
>     return orderb - ordera;
>   }
>   
> + /* Actually stream out ENCODER into TEMP_FILENAME.  */
> + 
> + static void
> + do_stream_out (char *temp_filename, lto_symtab_encoder_t encoder)
> + {
> +   lto_file *file = lto_obj_file_open (temp_filename, true);
> +   if (!file)
> +     fatal_error ("lto_obj_file_open() failed");
> +   lto_set_current_out_file (file);
> + 
> +   ipa_write_optimization_summaries (encoder);
> + 
> +   lto_set_current_out_file (NULL);
> +   lto_obj_file_close (file);
> +   free (file);
> + }
> + 
> + /* Wait for forked process and signal errors.  */
> + #ifdef HAVE_WORKING_FORK
> + static void
> + wait_for_child ()
> + {
> +   int status;
> +   do
> +     {
> +       int w = waitpid(0, &status, WUNTRACED | WCONTINUED);
> +       if (w == -1)
> +     fatal_error ("waitpid failed");
> + 
> +       if (WIFEXITED (status) && WEXITSTATUS (status))
> +     fatal_error ("streaming subprocess failed");
> +       else if (WIFSIGNALED (status))
> +     fatal_error ("streaming subprocess was killed by signal");
> +     }
> +   while (!WIFEXITED(status) && !WIFSIGNALED(status));
> + }
> + #endif
> + 
> + /* Stream out ENCODER into TEMP_FILENAME
> +    Fork if that seems to help.  */
> + 
> + static void
> + stream_out (char *temp_filename, lto_symtab_encoder_t encoder, bool last)
> + {
> + #ifdef HAVE_WORKING_FORK
> +   static int nruns;
> + 
> +   if (!lto_parallelism || lto_parallelism == 1)
> +     {
> +       do_stream_out (temp_filename, encoder);
> +       return;
> +     }
> + 
> +   /* Do not run more than LTO_PARALLELISM streamings
> +      FIXME: we ignore limits on jobserver.  */
> +   if (lto_parallelism > 0 && nruns >= lto_parallelism)
> +     {
> +       wait_for_child ();
> +       nruns --;
> +     }
> +   /* If this is not the last parallel partition, execute new
> +      streaming process.  */
> +   if (!last)
> +     {
> +       pid_t cpid = fork ();
> + 
> +       if (!cpid)
> +     {
> +       setproctitle ("lto1-wpa-streaming");
> +       do_stream_out (temp_filename, encoder);
> +       exit (0);
> +     }
> +       /* Fork failed; lets do the job ourseleves.  */
> +       else if (cpid == -1)
> +         do_stream_out (temp_filename, encoder);
> +       else
> +     nruns++;
> +     }
> +   /* Last partition; stream it and wait for all children to die.  */
> +   else
> +     {
> +       int i;
> +       do_stream_out (temp_filename, encoder);
> +       for (i = 0; i < nruns; i++)
> +     wait_for_child ();
> +     }
> +   asm_nodes_output = true;
> + #else
> +   do_stream_out (temp_filename, encoder);
> + #endif
> + }
> + 
>   /* Write all output files in WPA mode and the file with the list of
>      LTRANS units.  */
>   
> *************** static void
> *** 2461,2478 ****
>   lto_wpa_write_files (void)
>   {
>     unsigned i, n_sets;
> -   lto_file *file;
>     ltrans_partition part;
>     FILE *ltrans_output_list_stream;
>     char *temp_filename;
>     size_t blen;
>   
>     /* Open the LTRANS output list.  */
>     if (!ltrans_output_list)
>       fatal_error ("no LTRANS output list filename provided");
> -   ltrans_output_list_stream = fopen (ltrans_output_list, "w");
> -   if (ltrans_output_list_stream == NULL)
> -     fatal_error ("opening LTRANS output list %s: %m", ltrans_output_list);
>   
>     timevar_push (TV_WHOPR_WPA);
>   
> --- 2556,2570 ----
>   lto_wpa_write_files (void)
>   {
>     unsigned i, n_sets;
>     ltrans_partition part;
>     FILE *ltrans_output_list_stream;
>     char *temp_filename;
> +   vec <char *>temp_filenames = vNULL;
>     size_t blen;
>   
>     /* Open the LTRANS output list.  */
>     if (!ltrans_output_list)
>       fatal_error ("no LTRANS output list filename provided");
>   
>     timevar_push (TV_WHOPR_WPA);
>   
> *************** lto_wpa_write_files (void)
> *** 2508,2521 ****
>                          : cmp_partitions_order);
>     for (i = 0; i < n_sets; i++)
>       {
> -       size_t len;
>         ltrans_partition part = ltrans_partitions[i];
>   
>         /* Write all the nodes in SET.  */
>         sprintf (temp_filename + blen, "%u.o", i);
> -       file = lto_obj_file_open (temp_filename, true);
> -       if (!file)
> -     fatal_error ("lto_obj_file_open() failed");
>   
>         if (!quiet_flag)
>       fprintf (stderr, " %s (%s %i insns)", temp_filename, part->name, 
> part->insns);
> --- 2600,2609 ----
> *************** lto_wpa_write_files (void)
> *** 2557,2577 ****
>       }
>         gcc_checking_assert (lto_symtab_encoder_size (part->encoder) || !i);
>   
> !       lto_set_current_out_file (file);
> ! 
> !       ipa_write_optimization_summaries (part->encoder);
>   
> -       lto_set_current_out_file (NULL);
> -       lto_obj_file_close (file);
> -       free (file);
>         part->encoder = NULL;
>   
> !       len = strlen (temp_filename);
> !       if (fwrite (temp_filename, 1, len, ltrans_output_list_stream) < len
>         || fwrite ("\n", 1, 1, ltrans_output_list_stream) < 1)
>       fatal_error ("writing to LTRANS output list %s: %m",
>                    ltrans_output_list);
>       }
>   
>     lto_stats.num_output_files += n_sets;
>   
> --- 2645,2669 ----
>       }
>         gcc_checking_assert (lto_symtab_encoder_size (part->encoder) || !i);
>   
> !       stream_out (temp_filename, part->encoder, i == n_sets - 1);
>   
>         part->encoder = NULL;
>   
> !       temp_filenames.safe_push (xstrdup (temp_filename));
> !     }
> !   ltrans_output_list_stream = fopen (ltrans_output_list, "w");
> !   if (ltrans_output_list_stream == NULL)
> !     fatal_error ("opening LTRANS output list %s: %m", ltrans_output_list);
> !   for (i = 0; i < n_sets; i++)
> !     {
> !       unsigned int len = strlen (temp_filenames[i]);
> !       if (fwrite (temp_filenames[i], 1, len, ltrans_output_list_stream) < 
> len
>         || fwrite ("\n", 1, 1, ltrans_output_list_stream) < 1)
>       fatal_error ("writing to LTRANS output list %s: %m",
>                    ltrans_output_list);
> +      free (temp_filenames[i]);
>       }
> +   temp_filenames.release();
>   
>     lto_stats.num_output_files += n_sets;
>   
> *************** do_whole_program_analysis (void)
> *** 3126,3131 ****
> --- 3218,3235 ----
>   {
>     symtab_node *node;
>   
> +   lto_parallelism = 1;
> + 
> +   /* TODO: jobserver communicatoin is not supported, yet.  */
> +   if (!strcmp (flag_wpa, "jobserver"))
> +     lto_parallelism = -1;
> +   else
> +     {
> +       lto_parallelism = atoi (flag_wpa);
> +       if (lto_parallelism <= 0)
> +     lto_parallelism = 0;
> +     }
> + 
>     timevar_start (TV_PHASE_OPT_GEN);
>   
>     /* Note that since we are in WPA mode, materialize_cgraph will not
> Index: lto/lang.opt
> ===================================================================
> *** lto/lang.opt      (revision 205646)
> --- lto/lang.opt      (working copy)
> *************** LTO Joined Var(ltrans_output_list)
> *** 33,41 ****
>   Specify a file to which a list of files output by LTRANS is written.
>   
>   fwpa
> ! LTO Driver Report Var(flag_wpa)
>   Run the link-time optimizer in whole program analysis (WPA) mode.
>   
>   fresolution=
>   LTO Joined
>   The resolution file
> --- 33,45 ----
>   Specify a file to which a list of files output by LTRANS is written.
>   
>   fwpa
> ! LTO Driver Report
>   Run the link-time optimizer in whole program analysis (WPA) mode.
>   
> + fwpa=
> + LTO Driver RejectNegative Joined Var(flag_wpa)
> + Whole program analysis (WPA) mode with number of parallel jobs specified.
> + 
>   fresolution=
>   LTO Joined
>   The resolution file
> Index: lto/lto-lang.c
> ===================================================================
> *** lto/lto-lang.c    (revision 205646)
> --- lto/lto-lang.c    (working copy)
> *************** lto_handle_option (size_t scode, const c
> *** 749,754 ****
> --- 749,758 ----
>         warn_psabi = value;
>         break;
>   
> +     case OPT_fwpa:
> +       flag_wpa = value ? "" : NULL;
> +       break;
> + 
>       default:
>         break;
>       }
> *************** static bool
> *** 1148,1154 ****
>   lto_init (void)
>   {
>     /* We need to generate LTO if running in WPA mode.  */
> !   flag_generate_lto = flag_wpa;
>   
>     /* Create the basic integer types.  */
>     build_common_tree_nodes (flag_signed_char, /*short_double=*/false);
> --- 1152,1158 ----
>   lto_init (void)
>   {
>     /* We need to generate LTO if running in WPA mode.  */
> !   flag_generate_lto = (flag_wpa != NULL);
>   
>     /* Create the basic integer types.  */
>     build_common_tree_nodes (flag_signed_char, /*short_double=*/false);
> Index: lto-streamer.h
> ===================================================================
> *** lto-streamer.h    (revision 205646)
> --- lto-streamer.h    (working copy)
> *************** void lto_output_location (struct output_
> *** 873,878 ****
> --- 873,879 ----
>   
>   
>   /* In lto-cgraph.c  */
> + extern bool asm_nodes_output;
>   lto_symtab_encoder_t lto_symtab_encoder_new (bool);
>   int lto_symtab_encoder_encode (lto_symtab_encoder_t, symtab_node *);
>   void lto_symtab_encoder_delete (lto_symtab_encoder_t);
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer

Reply via email to