On 27 Mar 17:16, Jakub Jelinek wrote: > On Thu, Mar 27, 2014 at 08:13:00PM +0400, Ilya Verbin wrote: > > On 27 Mar 15:02, Jakub Jelinek wrote: > > > The tables need to be created before IPA, that way it really shouldn't > > > matter in what order you emit them. E.g. the outlined target functions > > > could be added to the table during ompexp pass which actually creates the > > > outlined functions, the vars need to be added before target lto or host > > > lto > > > is streamed. > > > > For host tables it's ok, but when target compiler will create tables with > > functions? > > It reads bytecode from target_lto sections, so it never executes ompexp > > pass. > > Which is why the table created for host by the ompexp pass should be > streamed into the target_lto sections (marked specially somehow, special > attribute or whatever), and then corresponding target table created from > that, rather then created from some possibly different ordering there. > > Jakub
Hi Jakub, Could you please take a look at this patch? It fixes the ordering issue in the tables stated above, and passes all the tests that I have. But I'm not sure about its correctness from the architectural point of view. --- gcc/lto-cgraph.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++ gcc/lto-section-in.c | 3 +- gcc/lto-streamer-out.c | 2 ++ gcc/lto-streamer.h | 3 ++ gcc/lto/lto.c | 2 ++ gcc/omp-low.c | 68 +++++++----------------------------- 6 files changed, 115 insertions(+), 56 deletions(-) diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c index 544f04b..3d6637e 100644 --- a/gcc/lto-cgraph.c +++ b/gcc/lto-cgraph.c @@ -82,6 +82,8 @@ enum LTO_symtab_tags LTO_symtab_last_tag }; +extern vec<tree, va_gc> *offload_funcs, *offload_vars; + /* Create a new symtab encoder. if FOR_INPUT, the encoder allocate only datastructures needed to read the symtab. */ @@ -958,6 +960,51 @@ output_symtab (void) output_refs (encoder); } +void +output_offload_tables (void) +{ + /* Collect all omp-target global variables to offload_vars, if they have not + been gathered earlier by input_offload_tables. */ + if (vec_safe_is_empty (offload_vars)) + { + struct varpool_node *vnode; + FOR_EACH_DEFINED_VARIABLE (vnode) + { + if (!lookup_attribute ("omp declare target", + DECL_ATTRIBUTES (vnode->decl)) + || TREE_CODE (vnode->decl) != VAR_DECL + || DECL_SIZE (vnode->decl) == 0) + continue; + vec_safe_push (offload_vars, vnode->decl); + } + } + + if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars)) + return; + + struct lto_simple_output_block *ob + = lto_create_simple_output_block (LTO_section_offload_table); + + for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++) + { + streamer_write_enum (ob->main_stream, LTO_symtab_tags, + LTO_symtab_last_tag, LTO_symtab_unavail_node); + lto_output_fn_decl_index (ob->decl_state, ob->main_stream, + (*offload_funcs)[i]); + } + + for (unsigned i = 0; i < vec_safe_length (offload_vars); i++) + { + streamer_write_enum (ob->main_stream, LTO_symtab_tags, + LTO_symtab_last_tag, LTO_symtab_variable); + lto_output_var_decl_index (ob->decl_state, ob->main_stream, + (*offload_vars)[i]); + } + + streamer_write_uhwi_stream (ob->main_stream, 0); + lto_destroy_simple_output_block (ob); +} + /* Overwrite the information in NODE based on FILE_DATA, TAG, FLAGS, STACK_SIZE, SELF_TIME and SELF_SIZE. This is called either to initialize NODE or to replace the values in it, for instance because the first @@ -1611,6 +1658,52 @@ input_symtab (void) } } +void +input_offload_tables (void) +{ + struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data (); + struct lto_file_decl_data *file_data; + unsigned int j = 0; + + while ((file_data = file_data_vec[j++])) + { + const char *data; + size_t len; + struct lto_input_block *ib + = lto_create_simple_input_block (file_data, LTO_section_offload_table, + &data, &len); + if (!ib) + continue; + + enum LTO_symtab_tags tag + = streamer_read_enum (ib, LTO_symtab_tags, LTO_symtab_last_tag); + while (tag) + { + if (tag == LTO_symtab_unavail_node) + { + int decl_index = streamer_read_uhwi (ib); + tree fn_decl + = lto_file_decl_data_get_fn_decl (file_data, decl_index); + vec_safe_push (offload_funcs, fn_decl); + } + else if (tag == LTO_symtab_variable) + { + int decl_index = streamer_read_uhwi (ib); + tree var_decl + = lto_file_decl_data_get_var_decl (file_data, decl_index); + vec_safe_push (offload_vars, var_decl); + } + else + fatal_error ("invalid offload table in %s", file_data->file_name); + + tag = streamer_read_enum (ib, LTO_symtab_tags, LTO_symtab_last_tag); + } + + lto_destroy_simple_input_block (file_data, LTO_section_offload_table, + ib, data, len); + } +} + /* True when we need optimization summary for NODE. */ static int diff --git a/gcc/lto-section-in.c b/gcc/lto-section-in.c index 9aa7639..df2fd8f 100644 --- a/gcc/lto-section-in.c +++ b/gcc/lto-section-in.c @@ -60,7 +60,8 @@ const char *lto_section_name[LTO_N_SECTION_TYPES] = "opts", "cgraphopt", "inline", - "ipcp_trans" + "ipcp_trans", + "offload_table" }; diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c index 0f37f1c..2358a5e 100644 --- a/gcc/lto-streamer-out.c +++ b/gcc/lto-streamer-out.c @@ -2072,6 +2072,8 @@ lto_output (void) statements using the statement UIDs. */ output_symtab (); + output_offload_tables (); + #ifdef ENABLE_CHECKING lto_bitmap_free (output); #endif diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h index b1dc7dc..edc5be4 100644 --- a/gcc/lto-streamer.h +++ b/gcc/lto-streamer.h @@ -247,6 +247,7 @@ enum lto_section_type LTO_section_cgraph_opt_sum, LTO_section_inline_summary, LTO_section_ipcp_transform, + LTO_section_offload_table, LTO_N_SECTION_TYPES /* Must be last. */ }; @@ -883,6 +884,8 @@ bool lto_symtab_encoder_encode_initializer_p (lto_symtab_encoder_t, varpool_node *); void output_symtab (void); void input_symtab (void); +void output_offload_tables (void); +void input_offload_tables (void); bool referenced_from_other_partition_p (struct ipa_ref_list *, lto_symtab_encoder_t); bool reachable_from_other_partition_p (struct cgraph_node *, diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c index 8aaf8d3..7a2506d 100644 --- a/gcc/lto/lto.c +++ b/gcc/lto/lto.c @@ -3020,6 +3020,8 @@ read_cgraph_and_symbols (unsigned nfiles, const char **fnames) /* Read the symtab. */ input_symtab (); + input_offload_tables (); + /* Store resolutions into the symbol table. */ FOR_EACH_SYMBOL (snode) diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 45a8eb2..117021d 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -224,6 +224,9 @@ static tree scan_omp_1_op (tree *, int *, void *); /* Holds a decl for __OPENMP_TARGET__. */ static GTY(()) tree offload_symbol_decl; +/* Holds offload tables with decls. */ +vec<tree, va_gc> *offload_funcs, *offload_vars; + /* Get the __OPENMP_TARGET__ symbol. */ static tree get_offload_symbol_decl (void) @@ -8548,6 +8551,9 @@ expand_omp_target (struct omp_region *region) DECL_STRUCT_FUNCTION (child_fn)->curr_properties = cfun->curr_properties; cgraph_add_new_function (child_fn, true); + /* Add the new function to the offload table. */ + vec_safe_push (offload_funcs, child_fn); + /* Fix the callgraph edges for child_cfun. Those for cfun will be fixed in a following pass. */ push_cfun (child_cfun); @@ -12849,71 +12855,23 @@ add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls, void omp_finish_file (void) { - struct cgraph_node *node; - struct varpool_node *vnode; const char *funcs_section_name = OFFLOAD_FUNC_TABLE_SECTION_NAME; const char *vars_section_name = OFFLOAD_VAR_TABLE_SECTION_NAME; - vec<tree, va_gc> *v_funcs, *v_vars; - - vec_alloc (v_vars, 0); - vec_alloc (v_funcs, 0); - - /* Collect all omp-target functions. */ - FOR_EACH_DEFINED_FUNCTION (node) - { - /* TODO: This check could fail on functions, created by omp - parallel/task pragmas. It's better to name outlined for offloading - functions in some different way and to check here the function name. - It could be something like "*_omp_tgtfn" in contrast with "*_omp_fn" - for functions from omp parallel/task pragmas. */ - if (!lookup_attribute ("omp declare target", - DECL_ATTRIBUTES (node->decl)) - || !DECL_ARTIFICIAL (node->decl)) - continue; - vec_safe_push (v_funcs, node->decl); - } - /* Collect all omp-target global variables. */ - FOR_EACH_DEFINED_VARIABLE (vnode) - { - if (!lookup_attribute ("omp declare target", - DECL_ATTRIBUTES (vnode->decl)) - || TREE_CODE (vnode->decl) != VAR_DECL - || DECL_SIZE (vnode->decl) == 0) - continue; - vec_safe_push (v_vars, vnode->decl); - } - unsigned num_vars = vec_safe_length (v_vars); - unsigned num_funcs = vec_safe_length (v_funcs); + unsigned num_funcs = vec_safe_length (offload_funcs); + unsigned num_vars = vec_safe_length (offload_vars); - if (num_vars == 0 && num_funcs == 0) + if (num_funcs == 0 && num_vars == 0) return; -#ifdef ACCEL_COMPILER - /* Decls are placed in reversed order in fat-objects, so we need to - revert them back if we compile target. */ - for (unsigned i = 0; i < num_funcs / 2; i++) - { - tree it = (*v_funcs)[i]; - (*v_funcs)[i] = (*v_funcs)[num_funcs - i - 1]; - (*v_funcs)[num_funcs - i - 1] = it; - } - for (unsigned i = 0; i < num_vars / 2; i++) - { - tree it = (*v_vars)[i]; - (*v_vars)[i] = (*v_vars)[num_vars - i - 1]; - (*v_vars)[num_vars - i - 1] = it; - } -#endif - if (targetm_common.have_named_sections) { vec<constructor_elt, va_gc> *v_f, *v_v; vec_alloc (v_f, num_funcs); vec_alloc (v_v, num_vars * 2); - add_decls_addresses_to_decl_constructor (v_funcs, v_f); - add_decls_addresses_to_decl_constructor (v_vars, v_v); + add_decls_addresses_to_decl_constructor (offload_funcs, v_f); + add_decls_addresses_to_decl_constructor (offload_vars, v_v); tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node, num_vars * 2); @@ -12946,12 +12904,12 @@ omp_finish_file (void) { for (unsigned i = 0; i < num_funcs; i++) { - tree it = (*v_funcs)[i]; + tree it = (*offload_funcs)[i]; targetm.record_offload_symbol (it); } for (unsigned i = 0; i < num_vars; i++) { - tree it = (*v_vars)[i]; + tree it = (*offload_vars)[i]; targetm.record_offload_symbol (it); } } -- 1.7.11.7 Thanks, -- Ilya