> 2014-09-27 Ilya Verbin <ilya.ver...@intel.com> > Ilya Tocar <ilya.to...@intel.com> > Andrey Turetskiy <andrey.turets...@intel.com> > Bernd Schmidt <ber...@codesourcery.com> > gcc/ > * cgraph.h (symtab_node): Add need_dump flag. > * cgraphunit.c: Include lto-section-names.h. > (initialize_offload): New function. > (ipa_passes): Initialize offload and call ipa_write_summaries if there > is something to write to OMP_SECTION_NAME_PREFIX sections. > (symbol_table::compile): Call lto_streamer_hooks_init under flag_openmp. > * ipa-inline-analysis.c (inline_generate_summary): Do not exit under > flag_openmp. > (inline_free_summary): Always remove hooks. > * lto-cgraph.c (lto_set_symtab_encoder_in_partition): Exit if there is > no need to encode the node. > (referenced_from_other_partition_p, reachable_from_other_partition_p): > Ignore references from non-target functions to target functions if we > are streaming out target-side bytecode (offload lto mode). > (select_what_to_dump): New function. > * lto-section-names.h (OMP_SECTION_NAME_PREFIX): Define. > (section_name_prefix): Declare. > * lto-streamer.c (offload_lto_mode): New variable. > (section_name_prefix): New variable. > (lto_get_section_name): Use section_name_prefix instead of > LTO_SECTION_NAME_PREFIX. > * lto-streamer.h (select_what_to_dump): Declare. > (offload_lto_mode): Declare. > * omp-low.c (is_targetreg_ctx): New function. > (create_omp_child_function, check_omp_nesting_restrictions): Use it. > (expand_omp_target): Set mark_force_output for the target functions. > (lower_omp_critical): Add target attribute for omp critical symbol. > * passes.c (ipa_write_summaries): Call select_what_to_dump. > gcc/lto/ > * lto-object.c (lto_obj_add_section): Use section_name_prefix instead of > LTO_SECTION_NAME_PREFIX. > * lto-partition.c (add_symbol_to_partition_1): Always set > node->need_dump to true. > (lto_promote_cross_file_statics): Call select_what_to_dump. > * lto.c (lto_section_with_id): Use section_name_prefix instead of > LTO_SECTION_NAME_PREFIX. > (read_cgraph_and_symbols): Read OMP_SECTION_NAME_PREFIX sections, if > being built as an offload compiler. > > Thanks, > -- Ilya > > --- > > diff --git a/gcc/cgraph.h b/gcc/cgraph.h > index 7481906..9ab970d 100644 > --- a/gcc/cgraph.h > +++ b/gcc/cgraph.h > @@ -444,6 +444,11 @@ public: > /* Set when init priority is set. */ > unsigned in_init_priority_hash : 1; > > + /* Set when symbol needs to be dumped into LTO bytecode for LTO, > + or in pragma omp target case, for separate compilation targeting > + a different architecture. */ > + unsigned need_dump : 1;
dump for me implied debug dump. LTO is usually called streaming, so prehaps need_lto_stremaing? > +/* Check whether there is at least one function or global variable to > offload. > + */ > + > +static bool > +initialize_offload (void) Perhaps have_offload_p? Nothing is initialized here... > +{ > + bool have_offload = false; > + struct cgraph_node *node; > + struct varpool_node *vnode; > + > + FOR_EACH_DEFINED_FUNCTION (node) > + if (lookup_attribute ("omp declare target", DECL_ATTRIBUTES > (node->decl))) > + { > + have_offload = true; > + break; > + } > + > + FOR_EACH_DEFINED_VARIABLE (vnode) > + { > + if (!lookup_attribute ("omp declare target", > + DECL_ATTRIBUTES (vnode->decl)) > + || TREE_CODE (vnode->decl) != VAR_DECL > + || DECL_SIZE (vnode->decl) == 0) > + continue; > + have_offload = true; > + } > + > + return have_offload; > +} > + > static void > ipa_passes (void) > { > + bool have_offload = false; > gcc::pass_manager *passes = g->get_passes (); > > set_cfun (NULL); > @@ -2004,6 +2036,14 @@ ipa_passes (void) > gimple_register_cfg_hooks (); > bitmap_obstack_initialize (NULL); > > + if (!in_lto_p && flag_openmp) > + { > + have_offload = initialize_offload (); > + /* OpenMP offloading requires LTO infrastructure. */ > + if (have_offload) > + flag_generate_lto = 1; > + } > + > invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_START, NULL); > > if (!in_lto_p) > @@ -2041,7 +2081,20 @@ ipa_passes (void) > targetm.asm_out.lto_start (); > > if (!in_lto_p) > - ipa_write_summaries (); > + { > + if (have_offload) > + { > + offload_lto_mode = true; > + section_name_prefix = OMP_SECTION_NAME_PREFIX; > + ipa_write_summaries (); > + } > + if (flag_lto) > + { > + offload_lto_mode = false; > + section_name_prefix = LTO_SECTION_NAME_PREFIX; > + ipa_write_summaries (); > + } How does LTO combine with offloading? > @@ -4325,11 +4325,6 @@ void > inline_free_summary (void) > { > struct cgraph_node *node; > - if (!inline_edge_summary_vec.exists ()) > - return; > - FOR_EACH_DEFINED_FUNCTION (node) > - if (!node->alias) > - reset_inline_summary (node); > if (function_insertion_hook_holder) > symtab->remove_cgraph_insertion_hook (function_insertion_hook_holder); > function_insertion_hook_holder = NULL; > @@ -4345,6 +4340,11 @@ inline_free_summary (void) > if (edge_duplication_hook_holder) > symtab->remove_edge_duplication_hook (edge_duplication_hook_holder); > edge_duplication_hook_holder = NULL; > + if (!inline_edge_summary_vec.exists ()) > + return; > + FOR_EACH_DEFINED_FUNCTION (node) > + if (!node->alias) > + reset_inline_summary (node); Why this is needed? > diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c > index 0584946..78b7fc8 100644 > --- a/gcc/lto-cgraph.c > +++ b/gcc/lto-cgraph.c > @@ -239,6 +239,9 @@ void > lto_set_symtab_encoder_in_partition (lto_symtab_encoder_t encoder, > symtab_node *node) > { > + /* Ignore not needed nodes. */ > + if (!node->need_dump) > + return; I think it should be rather done at caller side (in the loop setting what to output) rather than in this simple datastructure accestor. > int index = lto_symtab_encoder_encode (encoder, node); > encoder->nodes[index].in_partition = true; > } > @@ -321,6 +324,12 @@ referenced_from_other_partition_p (symtab_node *node, > lto_symtab_encoder_t encod > > for (i = 0; node->iterate_referring (i, ref); i++) > { > + /* Ignore references from non-target functions in offload lto mode. */ > + if (offload_lto_mode > + && !lookup_attribute ("omp declare target", > + DECL_ATTRIBUTES (ref->referring->decl))) > + continue; Those are quite busy loops, you may consder making offload a flag. Why you can't test need_dump here? I think you also need to run free lang data when you decide to stream something. Otherwise the cgraph bits seems resonable. I think Richi will want to comment on LTO part. Honza