On Wed, 5 Nov 2014, Ilya Verbin wrote: > On 03 Nov 10:24, Jakub Jelinek wrote: > > On Tue, Oct 28, 2014 at 10:30:47PM +0300, Ilya Verbin wrote: > > > @@ -474,6 +475,13 @@ cgraph_node::create (tree decl) > > > gcc_assert (TREE_CODE (decl) == FUNCTION_DECL); > > > > > > node->decl = decl; > > > + > > > + if (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))) > > > + { > > > + node->offloadable = 1; > > > + g->have_offload = true; > > > + } > > > > I wonder if we shouldn't optimize here and call lookup_attribute only > > if there is a chance that the attribute might be present, so guard with > > flag_openmp (and flag_openacc later on?). During LTO the cgraph nodes > > are streamed in and supposedly the flag offloadable too. > > > > > @@ -2129,8 +2141,12 @@ symbol_table::compile (void) > > > fprintf (stderr, "Performing interprocedural optimizations\n"); > > > state = IPA; > > > > > > + /* OpenMP offloading requires LTO infrastructure. */ > > > + if (!in_lto_p && flag_openmp && g->have_offload) > > > + flag_generate_lto = 1; > > > > On the other side, do you need flag_openmp here? Supposedly g->have_offload > > would already been set if needed. > > Done, flag_openmp moved from symbol_table::compile to cgraph_node::create and > varpool_node::get_create. OK for trunk?
Yes. > Maybe also with this change? > > diff --git a/gcc/omp-low.c b/gcc/omp-low.c > index 4e9ed25..beae5b5 100644 > --- a/gcc/omp-low.c > +++ b/gcc/omp-low.c > @@ -1653,8 +1653,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) > if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP > && DECL_P (decl) > && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)) > - && lookup_attribute ("omp declare target", > - DECL_ATTRIBUTES (decl))) > + && varpool_node::get_create (decl)->offloadable) > break; > if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP > && OMP_CLAUSE_MAP_KIND (c) == OMP_CLAUSE_MAP_POINTER) > @@ -1794,8 +1793,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) > decl = OMP_CLAUSE_DECL (c); > if (DECL_P (decl) > && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)) > - && lookup_attribute ("omp declare target", > - DECL_ATTRIBUTES (decl))) > + && varpool_node::get_create (decl)->offloadable) > break; > if (DECL_P (decl)) > { Yes please. Please make sure that regular LTO bootstrap still works - LTO is only tested lightly in the testsuite. Thanks, Richard. > Thanks, > -- Ilya > > > --- > > diff --git a/gcc/cgraph.c b/gcc/cgraph.c > index 9a47ba2..a491886 100644 > --- a/gcc/cgraph.c > +++ b/gcc/cgraph.c > @@ -70,6 +70,7 @@ along with GCC; see the file COPYING3. If not see > #include "tree-dfa.h" > #include "profile.h" > #include "params.h" > +#include "context.h" > > /* FIXME: Only for PROP_loops, but cgraph shouldn't have to know about this. > */ > #include "tree-pass.h" > @@ -474,6 +475,14 @@ cgraph_node::create (tree decl) > gcc_assert (TREE_CODE (decl) == FUNCTION_DECL); > > node->decl = decl; > + > + if (flag_openmp > + && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))) > + { > + node->offloadable = 1; > + g->have_offload = true; > + } > + > node->register_symbol (); > > if (DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl)) == > FUNCTION_DECL) > diff --git a/gcc/cgraph.h b/gcc/cgraph.h > index 377adce..4988f2d 100644 > --- a/gcc/cgraph.h > +++ b/gcc/cgraph.h > @@ -463,6 +463,13 @@ public: > /* Set when init priority is set. */ > unsigned in_init_priority_hash : 1; > > + /* Set when symbol needs to be streamed into LTO bytecode for LTO, or in > case > + of offloading, for separate compilation for a different target. */ > + unsigned need_lto_streaming : 1; > + > + /* Set when symbol can be streamed into bytecode for offloading. */ > + unsigned offloadable : 1; > + > > /* Ordering of all symtab entries. */ > int order; > diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c > index 3e76bf0..83ab419 100644 > --- a/gcc/cgraphunit.c > +++ b/gcc/cgraphunit.c > @@ -218,6 +218,7 @@ along with GCC; see the file COPYING3. If not see > #include "tree-nested.h" > #include "gimplify.h" > #include "dbgcnt.h" > +#include "lto-section-names.h" > > /* Queue of cgraph nodes scheduled to be added into cgraph. This is a > secondary queue used during optimization to accommodate passes that > @@ -2049,7 +2050,18 @@ ipa_passes (void) > targetm.asm_out.lto_start (); > > if (!in_lto_p) > - ipa_write_summaries (); > + { > + if (g->have_offload) > + { > + section_name_prefix = OFFLOAD_SECTION_NAME_PREFIX; > + ipa_write_summaries (true); > + } > + if (flag_lto) > + { > + section_name_prefix = LTO_SECTION_NAME_PREFIX; > + ipa_write_summaries (false); > + } > + } > > if (flag_generate_lto) > targetm.asm_out.lto_end (); > @@ -2129,8 +2141,12 @@ symbol_table::compile (void) > fprintf (stderr, "Performing interprocedural optimizations\n"); > state = IPA; > > + /* Offloading requires LTO infrastructure. */ > + if (!in_lto_p && g->have_offload) > + flag_generate_lto = 1; > + > /* If LTO is enabled, initialize the streamer hooks needed by GIMPLE. */ > - if (flag_lto) > + if (flag_generate_lto) > lto_streamer_hooks_init (); > > /* Don't run the IPA passes if there was any error or sorry messages. */ > diff --git a/gcc/context.c b/gcc/context.c > index 5339e28..9279be4 100644 > --- a/gcc/context.c > +++ b/gcc/context.c > @@ -30,6 +30,8 @@ gcc::context *g; > > gcc::context::context () > { > + have_offload = false; > + > /* The pass manager's constructor uses the dump manager (to set up > dumps for the various passes), so the dump manager must be set up > before the pass manager. */ > diff --git a/gcc/context.h b/gcc/context.h > index b8fb439..689ae5a 100644 > --- a/gcc/context.h > +++ b/gcc/context.h > @@ -33,6 +33,9 @@ class context > public: > context (); > > + /* The flag shows if there are symbols to be streamed for offloading. */ > + bool have_offload; > + > /* Pass-management. */ > > pass_manager *get_passes () { gcc_assert (m_passes); return m_passes; } > diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c > index 7da02cd..6cb2057 100644 > --- a/gcc/ipa-inline-analysis.c > +++ b/gcc/ipa-inline-analysis.c > @@ -4021,7 +4021,7 @@ inline_generate_summary (void) > > /* When not optimizing, do not bother to analyze. Inlining is still done > because edge redirection needs to happen there. */ > - if (!optimize && !flag_lto && !flag_wpa) > + if (!optimize && !flag_generate_lto && !flag_wpa) > return; > > function_insertion_hook_holder = > @@ -4336,11 +4336,6 @@ void > inline_free_summary (void) > { > struct cgraph_node *node; > - if (!inline_edge_summary_vec.exists ()) > - return; > - FOR_EACH_DEFINED_FUNCTION (node) > - if (!node->alias) > - reset_inline_summary (node); > if (function_insertion_hook_holder) > symtab->remove_cgraph_insertion_hook (function_insertion_hook_holder); > function_insertion_hook_holder = NULL; > @@ -4356,6 +4351,11 @@ inline_free_summary (void) > if (edge_duplication_hook_holder) > symtab->remove_edge_duplication_hook (edge_duplication_hook_holder); > edge_duplication_hook_holder = NULL; > + if (!inline_edge_summary_vec.exists ()) > + return; > + FOR_EACH_DEFINED_FUNCTION (node) > + if (!node->alias) > + reset_inline_summary (node); > vec_free (inline_summary_vec); > inline_edge_summary_vec.release (); > if (edge_predicate_pool) > diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c > index 3071f0c..45655ba 100644 > --- a/gcc/lto-cgraph.c > +++ b/gcc/lto-cgraph.c > @@ -326,6 +326,11 @@ referenced_from_other_partition_p (symtab_node *node, > lto_symtab_encoder_t encod > > for (i = 0; node->iterate_referring (i, ref); i++) > { > + /* Ignore references from non-offloadable nodes while streaming NODE > into > + offload LTO section. */ > + if (!ref->referring->need_lto_streaming) > + continue; > + > if (ref->referring->in_other_partition > || !lto_symtab_encoder_in_partition_p (encoder, ref->referring)) > return true; > @@ -344,9 +349,16 @@ reachable_from_other_partition_p (struct cgraph_node > *node, lto_symtab_encoder_t > if (node->global.inlined_to) > return false; > for (e = node->callers; e; e = e->next_caller) > - if (e->caller->in_other_partition > - || !lto_symtab_encoder_in_partition_p (encoder, e->caller)) > - return true; > + { > + /* Ignore references from non-offloadable nodes while streaming NODE > into > + offload LTO section. */ > + if (!e->caller->need_lto_streaming) > + continue; > + > + if (e->caller->in_other_partition > + || !lto_symtab_encoder_in_partition_p (encoder, e->caller)) > + return true; > + } > return false; > } > > @@ -808,6 +820,16 @@ create_references (lto_symtab_encoder_t encoder, > symtab_node *node) > lto_symtab_encoder_encode (encoder, ref->referred); > } > > +/* Select what needs to be streamed out. In regular lto mode stream > everything. > + In offload lto mode stream only nodes marked as offloadable. */ > +void > +select_what_to_stream (bool offload_lto_mode) > +{ > + struct symtab_node *snode; > + FOR_EACH_SYMBOL (snode) > + snode->need_lto_streaming = !offload_lto_mode || snode->offloadable; > +} > + > /* Find all symbols we want to stream into given partition and insert them > to encoders. > > @@ -834,6 +856,8 @@ compute_ltrans_boundary (lto_symtab_encoder_t in_encoder) > !lsei_end_p (lsei); lsei_next_function_in_partition (&lsei)) > { > struct cgraph_node *node = lsei_cgraph_node (lsei); > + if (!node->need_lto_streaming) > + continue; > add_node_to (encoder, node, true); > lto_set_symtab_encoder_in_partition (encoder, node); > create_references (encoder, node); > @@ -850,6 +874,8 @@ compute_ltrans_boundary (lto_symtab_encoder_t in_encoder) > { > varpool_node *vnode = lsei_varpool_node (lsei); > > + if (!vnode->need_lto_streaming) > + continue; > lto_set_symtab_encoder_in_partition (encoder, vnode); > lto_set_symtab_encoder_encode_initializer (encoder, vnode); > create_references (encoder, vnode); > diff --git a/gcc/lto-section-names.h b/gcc/lto-section-names.h > index cb75230..f5dbed2 100644 > --- a/gcc/lto-section-names.h > +++ b/gcc/lto-section-names.h > @@ -25,6 +25,11 @@ along with GCC; see the file COPYING3. If not see > name for the functions and static_initializers. For other types of > sections a '.' and the section type are appended. */ > #define LTO_SECTION_NAME_PREFIX ".gnu.lto_" > +#define OFFLOAD_SECTION_NAME_PREFIX ".gnu.offload_lto_" > + > +/* Can be either OFFLOAD_SECTION_NAME_PREFIX when we stream IR for offload > + compiler, or LTO_SECTION_NAME_PREFIX for LTO case. */ > +extern const char *section_name_prefix; > > /* Segment name for LTO sections. This is only used for Mach-O. */ > > diff --git a/gcc/lto-streamer.c b/gcc/lto-streamer.c > index cb647bd..79c137d 100644 > --- a/gcc/lto-streamer.c > +++ b/gcc/lto-streamer.c > @@ -56,6 +56,7 @@ struct lto_stats_d lto_stats; > static bitmap_obstack lto_obstack; > static bool lto_obstack_initialized; > > +const char *section_name_prefix = LTO_SECTION_NAME_PREFIX; > > /* Return a string representing LTO tag TAG. */ > > @@ -185,7 +186,7 @@ lto_get_section_name (int section_type, const char *name, > struct lto_file_decl_d > sprintf (post, "." HOST_WIDE_INT_PRINT_HEX_PURE, f->id); > else > sprintf (post, "." HOST_WIDE_INT_PRINT_HEX_PURE, get_random_seed > (false)); > - return concat (LTO_SECTION_NAME_PREFIX, sep, add, post, NULL); > + return concat (section_name_prefix, sep, add, post, NULL); > } > > > diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h > index 63e4b32..0b3fb6a 100644 > --- a/gcc/lto-streamer.h > +++ b/gcc/lto-streamer.h > @@ -832,6 +832,7 @@ bool referenced_from_this_partition_p (symtab_node *, > bool reachable_from_this_partition_p (struct cgraph_node *, > lto_symtab_encoder_t); > lto_symtab_encoder_t compute_ltrans_boundary (lto_symtab_encoder_t encoder); > +void select_what_to_stream (bool); > > > /* In lto-symtab.c. */ > diff --git a/gcc/lto/lto-object.c b/gcc/lto/lto-object.c > index 28b459c..637d1f2 100644 > --- a/gcc/lto/lto-object.c > +++ b/gcc/lto/lto-object.c > @@ -238,8 +238,7 @@ lto_obj_add_section (void *data, const char *name, off_t > offset, > void **slot; > struct lto_section_list *list = loasd->list; > > - if (strncmp (name, LTO_SECTION_NAME_PREFIX, > - strlen (LTO_SECTION_NAME_PREFIX)) != 0) > + if (strncmp (name, section_name_prefix, strlen (section_name_prefix))) > return 1; > > new_name = xstrdup (name); > diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c > index b647275..6290b23 100644 > --- a/gcc/lto/lto-partition.c > +++ b/gcc/lto/lto-partition.c > @@ -928,6 +928,8 @@ lto_promote_cross_file_statics (void) > > gcc_assert (flag_wpa); > > + select_what_to_stream (false); > + > /* First compute boundaries. */ > n_sets = ltrans_partitions.length (); > for (i = 0; i < n_sets; i++) > diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c > index 1234cee..0451e71 100644 > --- a/gcc/lto/lto.c > +++ b/gcc/lto/lto.c > @@ -2127,7 +2127,7 @@ lto_section_with_id (const char *name, unsigned > HOST_WIDE_INT *id) > { > const char *s; > > - if (strncmp (name, LTO_SECTION_NAME_PREFIX, strlen > (LTO_SECTION_NAME_PREFIX))) > + if (strncmp (name, section_name_prefix, strlen (section_name_prefix))) > return 0; > s = strrchr (name, '.'); > return s && sscanf (s, "." HOST_WIDE_INT_PRINT_HEX_PURE, id) == 1; > @@ -2902,6 +2902,10 @@ read_cgraph_and_symbols (unsigned nfiles, const char > **fnames) > > timevar_push (TV_IPA_LTO_DECL_IN); > > +#ifdef ACCEL_COMPILER > + section_name_prefix = OFFLOAD_SECTION_NAME_PREFIX; > +#endif > + > real_file_decl_data > = decl_data = ggc_cleared_vec_alloc<lto_file_decl_data_ptr> (nfiles + 1); > real_file_count = nfiles; > diff --git a/gcc/omp-low.c b/gcc/omp-low.c > index fe9bf80..1404b5e 100644 > --- a/gcc/omp-low.c > +++ b/gcc/omp-low.c > @@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see > #include "tree-nested.h" > #include "tree-eh.h" > #include "cilk.h" > +#include "context.h" > > > /* Lowering of OpenMP parallel and workshare constructs proceeds in two > @@ -268,6 +269,16 @@ is_parallel_ctx (omp_context *ctx) > } > > > +/* Return true if CTX is for an omp target region. */ > + > +static inline bool > +is_targetreg_ctx (omp_context *ctx) > +{ > + return gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET > + && gimple_omp_target_kind (ctx->stmt) == GF_OMP_TARGET_KIND_REGION; > +} > + > + > /* Return true if CTX is for an omp task. */ > > static inline bool > @@ -1933,26 +1944,19 @@ create_omp_child_function (omp_context *ctx, bool > task_copy) > DECL_EXTERNAL (decl) = 0; > DECL_CONTEXT (decl) = NULL_TREE; > DECL_INITIAL (decl) = make_node (BLOCK); > - bool target_p = false; > - if (lookup_attribute ("omp declare target", > - DECL_ATTRIBUTES (current_function_decl))) > - target_p = true; > + if (cgraph_node::get (current_function_decl)->offloadable) > + cgraph_node::get_create (decl)->offloadable = 1; > else > { > omp_context *octx; > for (octx = ctx; octx; octx = octx->outer) > - if (gimple_code (octx->stmt) == GIMPLE_OMP_TARGET > - && gimple_omp_target_kind (octx->stmt) > - == GF_OMP_TARGET_KIND_REGION) > + if (is_targetreg_ctx (octx)) > { > - target_p = true; > + cgraph_node::get_create (decl)->offloadable = 1; > + g->have_offload = true; > break; > } > } > - if (target_p) > - DECL_ATTRIBUTES (decl) > - = tree_cons (get_identifier ("omp declare target"), > - NULL_TREE, DECL_ATTRIBUTES (decl)); > > t = build_decl (DECL_SOURCE_LOCATION (decl), > RESULT_DECL, NULL_TREE, void_type_node); > @@ -2658,8 +2662,7 @@ check_omp_nesting_restrictions (gimple stmt, > omp_context *ctx) > break; > case GIMPLE_OMP_TARGET: > for (; ctx != NULL; ctx = ctx->outer) > - if (gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET > - && gimple_omp_target_kind (ctx->stmt) == GF_OMP_TARGET_KIND_REGION) > + if (is_targetreg_ctx (ctx)) > { > const char *name; > switch (gimple_omp_target_kind (stmt)) > @@ -8276,6 +8279,7 @@ expand_omp_target (struct omp_region *region) > if (kind == GF_OMP_TARGET_KIND_REGION) > { > unsigned srcidx, dstidx, num; > + struct cgraph_node *node; > > /* If the target region needs data sent from the parent > function, then the very first statement (except possible > @@ -8407,6 +8411,11 @@ expand_omp_target (struct omp_region *region) > push_cfun (child_cfun); > cgraph_edge::rebuild_edges (); > > + /* Prevent IPA from removing child_fn as unreachable, since there are > no > + refs from the parent function to child_fn in offload LTO mode. */ > + node = cgraph_node::get (child_fn); > + node->mark_force_output (); > + > /* Some EH regions might become dead, see PR34608. If > pass_cleanup_cfg isn't the first pass to happen with the > new child, these dead EH edges might cause problems. > @@ -9277,6 +9286,17 @@ lower_omp_critical (gimple_stmt_iterator *gsi_p, > omp_context *ctx) > DECL_COMMON (decl) = 1; > DECL_ARTIFICIAL (decl) = 1; > DECL_IGNORED_P (decl) = 1; > + > + /* If '#pragma omp critical' is inside target region, the symbol must > + be marked for offloading. */ > + omp_context *octx; > + for (octx = ctx->outer; octx; octx = octx->outer) > + if (is_targetreg_ctx (octx)) > + { > + varpool_node::get_create (decl)->offloadable = 1; > + break; > + } > + > varpool_node::finalize_decl (decl); > > splay_tree_insert (critical_name_mutexes, (splay_tree_key) name, > diff --git a/gcc/passes.c b/gcc/passes.c > index 8432de8..bd4031b 100644 > --- a/gcc/passes.c > +++ b/gcc/passes.c > @@ -2303,7 +2303,7 @@ ipa_write_summaries_1 (lto_symtab_encoder_t encoder) > /* Write out summaries for all the nodes in the callgraph. */ > > void > -ipa_write_summaries (void) > +ipa_write_summaries (bool offload_lto_mode) > { > lto_symtab_encoder_t encoder; > int i, order_pos; > @@ -2314,6 +2314,8 @@ ipa_write_summaries (void) > if (!flag_generate_lto || seen_error ()) > return; > > + select_what_to_stream (offload_lto_mode); > + > encoder = lto_symtab_encoder_new (false); > > /* Create the callgraph set in the same order used in > @@ -2340,15 +2342,16 @@ ipa_write_summaries (void) > renumber_gimple_stmt_uids (); > pop_cfun (); > } > - if (node->definition) > + if (node->definition && node->need_lto_streaming) > lto_set_symtab_encoder_in_partition (encoder, node); > } > > FOR_EACH_DEFINED_FUNCTION (node) > - if (node->alias) > + if (node->alias && node->need_lto_streaming) > lto_set_symtab_encoder_in_partition (encoder, node); > FOR_EACH_DEFINED_VARIABLE (vnode) > - lto_set_symtab_encoder_in_partition (encoder, vnode); > + if (vnode->need_lto_streaming) > + lto_set_symtab_encoder_in_partition (encoder, vnode); > > ipa_write_summaries_1 (compute_ltrans_boundary (encoder)); > > diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h > index 3db1a08..cbed6e7 100644 > --- a/gcc/tree-pass.h > +++ b/gcc/tree-pass.h > @@ -594,7 +594,7 @@ extern void pass_fini_dump_file (opt_pass *); > extern const char *get_current_pass_name (void); > extern void print_current_pass (FILE *); > extern void debug_pass (void); > -extern void ipa_write_summaries (void); > +extern void ipa_write_summaries (bool); > extern void ipa_write_optimization_summaries (struct lto_symtab_encoder_d *); > extern void ipa_read_summaries (void); > extern void ipa_read_optimization_summaries (void); > diff --git a/gcc/varpool.c b/gcc/varpool.c > index 50b5665..c508bf9 100644 > --- a/gcc/varpool.c > +++ b/gcc/varpool.c > @@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see > #include "gimple.h" > #include "lto-streamer.h" > #include "hash-set.h" > +#include "context.h" > > const char * const tls_model_names[]={"none", "tls-emulated", "tls-real", > "tls-global-dynamic", "tls-local-dynamic", > @@ -155,6 +156,14 @@ varpool_node::get_create (tree decl) > > node = varpool_node::create_empty (); > node->decl = decl; > + > + if (flag_openmp > + && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))) > + { > + node->offloadable = 1; > + g->have_offload = true; > + } > + > node->register_symbol (); > return node; > } > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Jeff Hawn, Jennifer Guild, Felix Imendoerffer, HRB 21284 (AG Nuernberg) Maxfeldstrasse 5, 90409 Nuernberg, Germany