Hi Jakub,
how about the following patch, which is kind of a combination of the
two? Namely, avoiding of the output of artificial,read-only nonglobal
variables – and marking all remaining variables and all functions with
node->force_output. As the LTO writing happens earlier, I only do it
there. TobiasOn 6/8/20 1:11 PM, Jakub Jelinek wrote:
On Mon, Jun 08, 2020 at 12:44:31PM +0200, Tobias Burnus wrote:
As side-remark or follow up: I have also experimented
with the attached patch.
On the host side, the omp_finish_file call in toplev.c comes
late enough that the the variables is gone and one no longer
writes it to the var table.
However, the write_lto() → output_offload_tables() call is
that early that both the offload table and the variable is
still written. – Hence, this patch fails at run time as
the two tables host_table & target_data have a different size.
I think this patch is the right thing to do, just needs to be slightly
extended.
If we do the decision at output_offload_tables, then for the
vars we choose to keep in the tables, we should set
node->force_output, so that from that point on we don't try to optimize it
away. Similarly with functions.
Jakub
-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander
Walter
openmp: don't add artificial const decl to offload table (PRs 94848 + 95551)
gcc/ChangeLog:
PR lto/94848
PR middle-end/95551
* omp-offload.c (add_decls_addresses_to_decl_constructor,
omp_finish_file): Skip readonly aritificial nonglobal variables.
* lto-cgraph.c (output_offload_tables): Likewise; set force_output
for variables and functions.
libgomp/ChangeLog:
PR lto/94848
PR middle-end/95551
* testsuite/libgomp.fortran/target-var.f90: New test.
gcc/lto-cgraph.c | 19 +++++++++++---
gcc/omp-offload.c | 15 ++++++++++-
libgomp/testsuite/libgomp.fortran/target-var.f90 | 32 ++++++++++++++++++++++++
3 files changed, 61 insertions(+), 5 deletions(-)
diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
index a671c671fa7..edac1b3e79f 100644
--- a/gcc/lto-cgraph.c
+++ b/gcc/lto-cgraph.c
@@ -1069,18 +1069,29 @@ output_offload_tables (void)
for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++)
{
+ tree it = (*offload_funcs)[i];
+ symtab_node *node = symtab_node::get (it);
+ if (!node)
+ continue;
+ node->force_output = true;
streamer_write_enum (ob->main_stream, LTO_symtab_tags,
LTO_symtab_last_tag, LTO_symtab_unavail_node);
- lto_output_fn_decl_ref (ob->decl_state, ob->main_stream,
- (*offload_funcs)[i]);
+ lto_output_fn_decl_ref (ob->decl_state, ob->main_stream, it);
}
for (unsigned i = 0; i < vec_safe_length (offload_vars); i++)
{
+ /* See also omp_finish_file and add_decls_addresses_to_decl_constructor
+ both in omp-offload.c; note that this function is called earlier. */
+ tree it = (*offload_vars)[i];
+ symtab_node *node = symtab_node::get (it);
+ if (!node || (TREE_READONLY (it) && DECL_ARTIFICIAL (it)
+ && TREE_CODE (DECL_CONTEXT (it)) == FUNCTION_DECL))
+ continue;
+ node->force_output = true;
streamer_write_enum (ob->main_stream, LTO_symtab_tags,
LTO_symtab_last_tag, LTO_symtab_variable);
- lto_output_var_decl_ref (ob->decl_state, ob->main_stream,
- (*offload_vars)[i]);
+ lto_output_var_decl_ref (ob->decl_state, ob->main_stream, it);
}
streamer_write_uhwi_stream (ob->main_stream, 0);
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index b2df91a5724..51348c195bb 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -125,6 +125,13 @@ add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
#endif
&& lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
+ /* See also omp_finish_file and output_offload_tables in lto-cgraph.c. */
+ if (!symtab_node::get (it)
+ || (is_var && TREE_READONLY (it)
+ && DECL_ARTIFICIAL (it)
+ && TREE_CODE (DECL_CONTEXT (it)) == FUNCTION_DECL))
+ continue;
+
tree size = NULL_TREE;
if (is_var)
size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
@@ -341,7 +348,7 @@ omp_finish_file (void)
add_decls_addresses_to_decl_constructor (offload_vars, v_v);
tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
- num_vars * 2);
+ vec_safe_length (v_v));
tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
num_funcs);
SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
@@ -381,6 +388,12 @@ omp_finish_file (void)
for (unsigned i = 0; i < num_vars; i++)
{
tree it = (*offload_vars)[i];
+ /* See also add_decls_addresses_to_decl_constructor
+ and output_offload_tables in lto-cgraph.c. */
+ if (!symtab_node::get (it)
+ || (TREE_READONLY (it) && DECL_ARTIFICIAL (it)
+ && TREE_CODE (DECL_CONTEXT (it)) == FUNCTION_DECL))
+ continue;
#ifdef ACCEL_COMPILER
if (DECL_HAS_VALUE_EXPR_P (it)
&& lookup_attribute ("omp declare target link",
diff --git a/libgomp/testsuite/libgomp.fortran/target-var.f90 b/libgomp/testsuite/libgomp.fortran/target-var.f90
new file mode 100644
index 00000000000..5e5ccd47c96
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/target-var.f90
@@ -0,0 +1,32 @@
+! { dg-additional-options "-O3" }
+!
+! With -O3 the static local variable A.10 generated for
+! the array constructor [-2, -4, ..., -20] is optimized
+! away - which has to be handled in the offload_vars table.
+!
+program main
+ implicit none (type, external)
+ integer :: j
+ integer, allocatable :: A(:)
+
+ A = [(3*j, j=1, 10)]
+ call bar (A)
+ deallocate (A)
+contains
+ subroutine bar (array)
+ integer :: i
+ integer :: array(:)
+
+ !$omp target map(from:array)
+ !$acc parallel copyout(array)
+ array = [(-2*i, i = 1, size(array))]
+ !$omp do private(array)
+ !$acc loop gang private(array)
+ do i = 1, 10
+ array(i) = 9*i
+ end do
+ if (any (array /= [(-2*i, i = 1, 10)])) error stop 2
+ !$omp end target
+ !$acc end parallel
+ end subroutine bar
+end