On 27 Mar 17:16, Jakub Jelinek wrote:
> On Thu, Mar 27, 2014 at 08:13:00PM +0400, Ilya Verbin wrote:
> > On 27 Mar 15:02, Jakub Jelinek wrote:
> > > The tables need to be created before IPA, that way it really shouldn't
> > > matter in what order you emit them.  E.g. the outlined target functions
> > > could be added to the table during ompexp pass which actually creates the
> > > outlined functions, the vars need to be added before target lto or host 
> > > lto
> > > is streamed.
> > 
> > For host tables it's ok, but when target compiler will create tables with 
> > functions?
> > It reads bytecode from target_lto sections, so it never executes ompexp 
> > pass.
> 
> Which is why the table created for host by the ompexp pass should be
> streamed into the target_lto sections (marked specially somehow, special
> attribute or whatever), and then corresponding target table created from
> that, rather then created from some possibly different ordering there.
> 
>       Jakub

Hi Jakub,

Could you please take a look at this patch?  It fixes the ordering issue in the
tables stated above, and passes all the tests that I have.  But I'm not sure
about its correctness from the architectural point of view.


---
 gcc/lto-cgraph.c       | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++
 gcc/lto-section-in.c   |  3 +-
 gcc/lto-streamer-out.c |  2 ++
 gcc/lto-streamer.h     |  3 ++
 gcc/lto/lto.c          |  2 ++
 gcc/omp-low.c          | 68 +++++++-----------------------------
 6 files changed, 115 insertions(+), 56 deletions(-)

diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
index 544f04b..3d6637e 100644
--- a/gcc/lto-cgraph.c
+++ b/gcc/lto-cgraph.c
@@ -82,6 +82,8 @@ enum LTO_symtab_tags
   LTO_symtab_last_tag
 };
 
+extern vec<tree, va_gc> *offload_funcs, *offload_vars;
+
 /* Create a new symtab encoder.
    if FOR_INPUT, the encoder allocate only datastructures needed
    to read the symtab.  */
@@ -958,6 +960,51 @@ output_symtab (void)
   output_refs (encoder);
 }
 
+void
+output_offload_tables (void)
+{
+  /* Collect all omp-target global variables to offload_vars, if they have not
+     been gathered earlier by input_offload_tables.  */
+  if (vec_safe_is_empty (offload_vars))
+    {
+      struct varpool_node *vnode;
+      FOR_EACH_DEFINED_VARIABLE (vnode)
+       {
+         if (!lookup_attribute ("omp declare target",
+                                DECL_ATTRIBUTES (vnode->decl))
+             || TREE_CODE (vnode->decl) != VAR_DECL
+             || DECL_SIZE (vnode->decl) == 0)
+           continue;
+         vec_safe_push (offload_vars, vnode->decl);
+       }
+    }
+
+  if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars))
+    return;
+
+  struct lto_simple_output_block *ob
+    = lto_create_simple_output_block (LTO_section_offload_table);
+
+  for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++)
+    {
+      streamer_write_enum (ob->main_stream, LTO_symtab_tags,
+                          LTO_symtab_last_tag, LTO_symtab_unavail_node);
+      lto_output_fn_decl_index (ob->decl_state, ob->main_stream,
+                               (*offload_funcs)[i]);
+    }
+
+  for (unsigned i = 0; i < vec_safe_length (offload_vars); i++)
+    {
+      streamer_write_enum (ob->main_stream, LTO_symtab_tags,
+                          LTO_symtab_last_tag, LTO_symtab_variable);
+      lto_output_var_decl_index (ob->decl_state, ob->main_stream,
+                                (*offload_vars)[i]);
+    }
+
+  streamer_write_uhwi_stream (ob->main_stream, 0);
+  lto_destroy_simple_output_block (ob);
+}
+
 /* Overwrite the information in NODE based on FILE_DATA, TAG, FLAGS,
    STACK_SIZE, SELF_TIME and SELF_SIZE.  This is called either to initialize
    NODE or to replace the values in it, for instance because the first
@@ -1611,6 +1658,52 @@ input_symtab (void)
     }
 }
 
+void
+input_offload_tables (void)
+{
+  struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
+  struct lto_file_decl_data *file_data;
+  unsigned int j = 0;
+
+  while ((file_data = file_data_vec[j++]))
+    {
+      const char *data;
+      size_t len;
+      struct lto_input_block *ib
+       = lto_create_simple_input_block (file_data, LTO_section_offload_table,
+                                        &data, &len);
+      if (!ib)
+       continue;
+
+      enum LTO_symtab_tags tag
+       = streamer_read_enum (ib, LTO_symtab_tags, LTO_symtab_last_tag);
+      while (tag)
+       {
+         if (tag == LTO_symtab_unavail_node)
+           {
+             int decl_index = streamer_read_uhwi (ib);
+             tree fn_decl
+               = lto_file_decl_data_get_fn_decl (file_data, decl_index);
+             vec_safe_push (offload_funcs, fn_decl);
+           }
+         else if (tag == LTO_symtab_variable)
+           {
+             int decl_index = streamer_read_uhwi (ib);
+             tree var_decl
+               = lto_file_decl_data_get_var_decl (file_data, decl_index);
+             vec_safe_push (offload_vars, var_decl);
+           }
+         else
+           fatal_error ("invalid offload table in %s", file_data->file_name);
+
+         tag = streamer_read_enum (ib, LTO_symtab_tags, LTO_symtab_last_tag);
+       }
+
+      lto_destroy_simple_input_block (file_data, LTO_section_offload_table,
+                                     ib, data, len);
+    }
+}
+
 /* True when we need optimization summary for NODE.  */
 
 static int
diff --git a/gcc/lto-section-in.c b/gcc/lto-section-in.c
index 9aa7639..df2fd8f 100644
--- a/gcc/lto-section-in.c
+++ b/gcc/lto-section-in.c
@@ -60,7 +60,8 @@ const char *lto_section_name[LTO_N_SECTION_TYPES] =
   "opts",
   "cgraphopt",
   "inline",
-  "ipcp_trans"
+  "ipcp_trans",
+  "offload_table"
 };
 
 
diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c
index 0f37f1c..2358a5e 100644
--- a/gcc/lto-streamer-out.c
+++ b/gcc/lto-streamer-out.c
@@ -2072,6 +2072,8 @@ lto_output (void)
      statements using the statement UIDs.  */
   output_symtab ();
 
+  output_offload_tables ();
+
 #ifdef ENABLE_CHECKING
   lto_bitmap_free (output);
 #endif
diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
index b1dc7dc..edc5be4 100644
--- a/gcc/lto-streamer.h
+++ b/gcc/lto-streamer.h
@@ -247,6 +247,7 @@ enum lto_section_type
   LTO_section_cgraph_opt_sum,
   LTO_section_inline_summary,
   LTO_section_ipcp_transform,
+  LTO_section_offload_table,
   LTO_N_SECTION_TYPES          /* Must be last.  */
 };
 
@@ -883,6 +884,8 @@ bool lto_symtab_encoder_encode_initializer_p 
(lto_symtab_encoder_t,
                                              varpool_node *);
 void output_symtab (void);
 void input_symtab (void);
+void output_offload_tables (void);
+void input_offload_tables (void);
 bool referenced_from_other_partition_p (struct ipa_ref_list *,
                                        lto_symtab_encoder_t);
 bool reachable_from_other_partition_p (struct cgraph_node *,
diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
index 8aaf8d3..7a2506d 100644
--- a/gcc/lto/lto.c
+++ b/gcc/lto/lto.c
@@ -3020,6 +3020,8 @@ read_cgraph_and_symbols (unsigned nfiles, const char 
**fnames)
   /* Read the symtab.  */
   input_symtab ();
 
+  input_offload_tables ();
+
   /* Store resolutions into the symbol table.  */
 
   FOR_EACH_SYMBOL (snode)
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 45a8eb2..117021d 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -224,6 +224,9 @@ static tree scan_omp_1_op (tree *, int *, void *);
 /* Holds a decl for __OPENMP_TARGET__.  */
 static GTY(()) tree offload_symbol_decl;
 
+/* Holds offload tables with decls.  */
+vec<tree, va_gc> *offload_funcs, *offload_vars;
+
 /* Get the __OPENMP_TARGET__ symbol.  */
 static tree
 get_offload_symbol_decl (void)
@@ -8548,6 +8551,9 @@ expand_omp_target (struct omp_region *region)
       DECL_STRUCT_FUNCTION (child_fn)->curr_properties = cfun->curr_properties;
       cgraph_add_new_function (child_fn, true);
 
+      /* Add the new function to the offload table.  */
+      vec_safe_push (offload_funcs, child_fn);
+
       /* Fix the callgraph edges for child_cfun.  Those for cfun will be
         fixed in a following pass.  */
       push_cfun (child_cfun);
@@ -12849,71 +12855,23 @@ add_decls_addresses_to_decl_constructor (vec<tree, 
va_gc> *v_decls,
 void
 omp_finish_file (void)
 {
-  struct cgraph_node *node;
-  struct varpool_node *vnode;
   const char *funcs_section_name = OFFLOAD_FUNC_TABLE_SECTION_NAME;
   const char *vars_section_name = OFFLOAD_VAR_TABLE_SECTION_NAME;
-  vec<tree, va_gc> *v_funcs, *v_vars;
-
-  vec_alloc (v_vars, 0);
-  vec_alloc (v_funcs, 0);
-
-  /* Collect all omp-target functions.  */
-  FOR_EACH_DEFINED_FUNCTION (node)
-    {
-      /* TODO: This check could fail on functions, created by omp
-        parallel/task pragmas.  It's better to name outlined for offloading
-        functions in some different way and to check here the function name.
-        It could be something like "*_omp_tgtfn" in contrast with "*_omp_fn"
-        for functions from omp parallel/task pragmas.  */
-      if (!lookup_attribute ("omp declare target",
-                            DECL_ATTRIBUTES (node->decl))
-         || !DECL_ARTIFICIAL (node->decl))
-       continue;
-      vec_safe_push (v_funcs, node->decl);
-    }
-  /* Collect all omp-target global variables.  */
-  FOR_EACH_DEFINED_VARIABLE (vnode)
-    {
-      if (!lookup_attribute ("omp declare target",
-                            DECL_ATTRIBUTES (vnode->decl))
-         || TREE_CODE (vnode->decl) != VAR_DECL
-         || DECL_SIZE (vnode->decl) == 0)
-       continue;
 
-      vec_safe_push (v_vars, vnode->decl);
-    }
-  unsigned num_vars = vec_safe_length (v_vars);
-  unsigned num_funcs = vec_safe_length (v_funcs);
+  unsigned num_funcs = vec_safe_length (offload_funcs);
+  unsigned num_vars = vec_safe_length (offload_vars);
 
-  if (num_vars == 0 && num_funcs == 0)
+  if (num_funcs == 0 && num_vars == 0)
     return;
 
-#ifdef ACCEL_COMPILER
-  /* Decls are placed in reversed order in fat-objects, so we need to
-     revert them back if we compile target.  */
-  for (unsigned i = 0; i < num_funcs / 2; i++)
-    {
-      tree it = (*v_funcs)[i];
-      (*v_funcs)[i] = (*v_funcs)[num_funcs - i - 1];
-      (*v_funcs)[num_funcs - i - 1] = it;
-    }
-  for (unsigned i = 0; i < num_vars / 2; i++)
-    {
-      tree it = (*v_vars)[i];
-      (*v_vars)[i] = (*v_vars)[num_vars - i - 1];
-      (*v_vars)[num_vars - i - 1] = it;
-    }
-#endif
-
   if (targetm_common.have_named_sections)
     {
       vec<constructor_elt, va_gc> *v_f, *v_v;
       vec_alloc (v_f, num_funcs);
       vec_alloc (v_v, num_vars * 2);
 
-      add_decls_addresses_to_decl_constructor (v_funcs, v_f);
-      add_decls_addresses_to_decl_constructor (v_vars, v_v);
+      add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
+      add_decls_addresses_to_decl_constructor (offload_vars, v_v);
 
       tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
                                                    num_vars * 2);
@@ -12946,12 +12904,12 @@ omp_finish_file (void)
     {
       for (unsigned i = 0; i < num_funcs; i++)
        {
-         tree it = (*v_funcs)[i];
+         tree it = (*offload_funcs)[i];
          targetm.record_offload_symbol (it);
        }  
       for (unsigned i = 0; i < num_vars; i++)
        {
-         tree it = (*v_vars)[i];
+         tree it = (*offload_vars)[i];
          targetm.record_offload_symbol (it);
        }  
     }
-- 
1.7.11.7


Thanks,
  -- Ilya

Reply via email to