On 08/02/16 14:00, Tom de Vries wrote:
Hi,
when running libgomp.c testsuite with "-flto -flto-partition=1to1
-fno-toplevel-reorder" we run into many compilation failures like this:
...
/tmp/xxxxxxxx.ltrans0.ltrans.o:(.gnu.offload_funcs+0x1a0): undefined
reference to `MAIN__._omp_fn.0'^M
...
The problem is that the offload table is in one lto partition, and the
function listed in the offload table is in another, without the function
having been promoted to be visible in the other partition.
The patch fixes this by promoting the symbols in the offload table such
that they're visible in all partitions.
Bootstrapped and reg-tested on x86_64.
Build for nvidia accelerator and reg-tested libgomp with various lto
settings.
Added multi-source testcase target-3{7,8}.c that triggers the PR for
intelmicemul accelerator.
OK for trunk, stage1 (or stage4, if that's appropriate)?
Thanks,
- Tom
Mark offload symbols as global in lto
2016-02-08 Tom de Vries <t...@codesourcery.com>
PR lto/69607
* lto-partition.c (promote_offload_tables): New function.
* lto-partition.h (promote_offload_tables): Declare.
* lto.c (do_whole_program_analysis): call promote_offload_tables.
* testsuite/libgomp.c/target-36.c: New test.
* testsuite/libgomp.c/target-37.c: New test.
* testsuite/libgomp.c/target-38.c: New test.
---
gcc/lto/lto-partition.c | 28 ++++++++++
gcc/lto/lto-partition.h | 1 +
gcc/lto/lto.c | 2 +
libgomp/testsuite/libgomp.c/target-36.c | 4 ++
libgomp/testsuite/libgomp.c/target-37.c | 98 +++++++++++++++++++++++++++++++++
libgomp/testsuite/libgomp.c/target-38.c | 95 ++++++++++++++++++++++++++++++++
6 files changed, 228 insertions(+)
diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
index 9eb63c2..56598d4 100644
--- a/gcc/lto/lto-partition.c
+++ b/gcc/lto/lto-partition.c
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see
#include "ipa-prop.h"
#include "ipa-inline.h"
#include "lto-partition.h"
+#include "omp-low.h"
vec<ltrans_partition> ltrans_partitions;
@@ -1003,6 +1004,33 @@ promote_symbol (symtab_node *node)
"Promoting as hidden: %s\n", node->name ());
}
+/* Promote the symbols in the offload tables. */
+
+void
+promote_offload_tables (void)
+{
+ if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars))
+ return;
+
+ for (unsigned i = 0; i < vec_safe_length (offload_funcs); i++)
+ {
+ tree fn_decl = (*offload_funcs)[i];
+ cgraph_node *node = cgraph_node::get (fn_decl);
+ if (node->externally_visible)
+ continue;
+ promote_symbol (node);
+ }
+
+ for (unsigned i = 0; i < vec_safe_length (offload_vars); i++)
+ {
+ tree var_decl = (*offload_vars)[i];
+ varpool_node *node = varpool_node::get (var_decl);
+ if (node->externally_visible)
+ continue;
+ promote_symbol (node);
+ }
+}
+
/* Return true if NODE needs named section even if it won't land in the partition
symbol table.
FIXME: we should really not use named sections for inline clones and master
diff --git a/gcc/lto/lto-partition.h b/gcc/lto/lto-partition.h
index 31e3764..1a38126 100644
--- a/gcc/lto/lto-partition.h
+++ b/gcc/lto/lto-partition.h
@@ -36,6 +36,7 @@ extern vec<ltrans_partition> ltrans_partitions;
void lto_1_to_1_map (void);
void lto_max_map (void);
void lto_balanced_map (int);
+extern void promote_offload_tables (void);
void lto_promote_cross_file_statics (void);
void free_ltrans_partitions (void);
void lto_promote_statics_nonwpa (void);
diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
index 9dd513f..2736c5c 100644
--- a/gcc/lto/lto.c
+++ b/gcc/lto/lto.c
@@ -3138,6 +3138,8 @@ do_whole_program_analysis (void)
to globals with hidden visibility because they are accessed from multiple
partitions. */
lto_promote_cross_file_statics ();
+ /* Promote all the offload symbols. */
+ promote_offload_tables ();
timevar_pop (TV_WHOPR_PARTITIONING);
timevar_stop (TV_PHASE_OPT_GEN);
diff --git a/libgomp/testsuite/libgomp.c/target-36.c b/libgomp/testsuite/libgomp.c/target-36.c
new file mode 100644
index 0000000..bafb718
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-36.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target lto } } */
+/* { dg-additional-options "-flto -flto-partition=1to1 -fno-toplevel-reorder" } */
+
+#include "target-1.c"
diff --git a/libgomp/testsuite/libgomp.c/target-37.c b/libgomp/testsuite/libgomp.c/target-37.c
new file mode 100644
index 0000000..1edb21e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-37.c
@@ -0,0 +1,98 @@
+/* { dg-do run { target lto } } */
+/* { dg-additional-sources "target-38.c" } */
+/* { dg-additional-options "-flto -flto-partition=1to1 -fno-toplevel-reorder" } */
+
+extern
+#ifdef __cplusplus
+"C"
+#endif
+void abort (void);
+
+void
+fn1 (double *x, double *y, int z)
+{
+ int i;
+ for (i = 0; i < z; i++)
+ {
+ x[i] = i & 31;
+ y[i] = (i & 63) - 30;
+ }
+}
+
+#pragma omp declare target
+static int tgtv = 6;
+static int
+tgt (void)
+{
+ #pragma omp atomic update
+ tgtv++;
+ return 0;
+}
+#pragma omp end declare target
+
+static double
+fn2 (int x, int y, int z)
+{
+ double b[1024], c[1024], s = 0;
+ int i, j;
+ fn1 (b, c, x);
+ #pragma omp target data map(to: b)
+ {
+ #pragma omp target map(tofrom: c, s)
+ #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s) firstprivate(x)
+ #pragma omp distribute dist_schedule(static, 4) collapse(1)
+ for (j=0; j < x; j += y)
+ #pragma omp parallel for reduction(+:s)
+ for (i = j; i < j + y; i++)
+ tgt (), s += b[i] * c[i];
+ #pragma omp target update from(b, tgtv)
+ }
+ return s;
+}
+
+static double
+fn3 (int x)
+{
+ double b[1024], c[1024], s = 0;
+ int i;
+ fn1 (b, c, x);
+ #pragma omp target map(to: b, c) map(tofrom:s)
+ #pragma omp parallel for reduction(+:s)
+ for (i = 0; i < x; i++)
+ tgt (), s += b[i] * c[i];
+ return s;
+}
+
+static double
+fn4 (int x, double *p)
+{
+ double b[1024], c[1024], d[1024], s = 0;
+ int i;
+ fn1 (b, c, x);
+ fn1 (d + x, p + x, x);
+ #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & 31)]) \
+ map(tofrom: s)
+ #pragma omp parallel for reduction(+:s)
+ for (i = 0; i < x; i++)
+ s += b[i] * c[i] + d[x + i] + p[x + i];
+ return s;
+}
+
+extern int other_main (void);
+
+int
+main ()
+{
+ double a = fn2 (128, 4, 6);
+ int b = tgtv;
+ double c = fn3 (61);
+ #pragma omp target update from(tgtv)
+ int d = tgtv;
+ double e[1024];
+ double f = fn4 (64, e);
+ if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61
+ || f != 8032.0)
+ abort ();
+ other_main ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-38.c b/libgomp/testsuite/libgomp.c/target-38.c
new file mode 100644
index 0000000..15e69c4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-38.c
@@ -0,0 +1,95 @@
+/* { dg-skip-if "additional source" { *-*-* } } */
+
+extern
+#ifdef __cplusplus
+"C"
+#endif
+void abort (void);
+
+static void
+fna1 (double *x, double *y, int z)
+{
+ int i;
+ for (i = 0; i < z; i++)
+ {
+ x[i] = i & 31;
+ y[i] = (i & 63) - 30;
+ }
+}
+
+#pragma omp declare target
+static int tgtva = 6;
+static int
+tgta (void)
+{
+ #pragma omp atomic update
+ tgtva++;
+ return 0;
+}
+#pragma omp end declare target
+
+double
+fna2 (int x, int y, int z)
+{
+ double b[1024], c[1024], s = 0;
+ int i, j;
+ fna1 (b, c, x);
+ #pragma omp target data map(to: b)
+ {
+ #pragma omp target map(tofrom: c, s)
+ #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s) firstprivate(x)
+ #pragma omp distribute dist_schedule(static, 4) collapse(1)
+ for (j=0; j < x; j += y)
+ #pragma omp parallel for reduction(+:s)
+ for (i = j; i < j + y; i++)
+ tgta (), s += b[i] * c[i];
+ #pragma omp target update from(b, tgtva)
+ }
+ return s;
+}
+
+static double
+fna3 (int x)
+{
+ double b[1024], c[1024], s = 0;
+ int i;
+ fna1 (b, c, x);
+ #pragma omp target map(to: b, c) map(tofrom:s)
+ #pragma omp parallel for reduction(+:s)
+ for (i = 0; i < x; i++)
+ tgta (), s += b[i] * c[i];
+ return s;
+}
+
+static double
+fna4 (int x, double *p)
+{
+ double b[1024], c[1024], d[1024], s = 0;
+ int i;
+ fna1 (b, c, x);
+ fna1 (d + x, p + x, x);
+ #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & 31)]) \
+ map(tofrom: s)
+ #pragma omp parallel for reduction(+:s)
+ for (i = 0; i < x; i++)
+ s += b[i] * c[i] + d[x + i] + p[x + i];
+ return s;
+}
+
+extern int other_main (void);
+
+int
+other_main (void)
+{
+ double a = fna2 (128, 4, 6);
+ int b = tgtva;
+ double c = fna3 (61);
+ #pragma omp target update from(tgtva)
+ int d = tgtva;
+ double e[1024];
+ double f = fna4 (64, e);
+ if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61
+ || f != 8032.0)
+ abort ();
+ return 0;
+}