Hi,
this work-in-progress patch implements a new option
-foffload-alias=<none|pointer|all>.
The option -foffload-alias=none instructs the compiler to assume that
objects references and pointer dereferences in an offload region do not
alias.
The option -foffload-alias=pointer instructs the compiler to assume that
objects references in an offload region do not alias.
The option -foffload-alias=all instructs the compiler to make no
assumptions about aliasing in offload regions.
The default value is -foffload-alias=none.
The patch works by adding restrict to the types of the fields used to
pass data to an offloading region.
Atm, the kernels-loop-offload-alias-ptr.c test-case passes, but the
kernels-loop-offload-alias-none.c test-case fails. For the latter, the
required amount of restrict is added, but it has no effect. I've
reported this in a more basic form in PR67742: "3rd-level restrict ignored".
Thanks,
- Tom
Implement -foffload-alias
2015-09-28 Tom de Vries <t...@codesourcery.com>
* common.opt (foffload-alias): New option.
* flag-types.h (enum offload_alias): New enum.
* omp-low.c (is_gimple_oacc_offload): New function.
(install_var_field): Handle flag_offload_alias.
* doc/invoke.texi (@item Code Generation Options): Add -foffload-alias.
(@item -foffload-alias): New item.
* c-c++-common/goacc/kernels-loop-offload-alias-none.c: New test.
* c-c++-common/goacc/kernels-loop-offload-alias-ptr.c: New test.
---
gcc/common.opt | 16 ++++++
gcc/doc/invoke.texi | 11 ++++
gcc/flag-types.h | 7 +++
gcc/omp-low.c | 24 ++++++++-
.../goacc/kernels-loop-offload-alias-none.c | 63 ++++++++++++++++++++++
.../goacc/kernels-loop-offload-alias-ptr.c | 52 ++++++++++++++++++
6 files changed, 172 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c
create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-ptr.c
diff --git a/gcc/common.opt b/gcc/common.opt
index 290b6b3..28977a4 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1730,6 +1730,22 @@ Enum(offload_abi) String(ilp32) Value(OFFLOAD_ABI_ILP32)
EnumValue
Enum(offload_abi) String(lp64) Value(OFFLOAD_ABI_LP64)
+foffload-alias=
+Common Joined RejectNegative Enum(offload_alias) Var(flag_offload_alias) Init(OFFLOAD_ALIAS_NONE)
+-foffload-alias=[all|pointer|none] Assume non-aliasing in an offload region
+
+Enum
+Name(offload_alias) Type(enum offload_alias) UnknownError(unknown offload aliasing %qs)
+
+EnumValue
+Enum(offload_alias) String(all) Value(OFFLOAD_ALIAS_ALL)
+
+EnumValue
+Enum(offload_alias) String(pointer) Value(OFFLOAD_ALIAS_POINTER)
+
+EnumValue
+Enum(offload_alias) String(none) Value(OFFLOAD_ALIAS_NONE)
+
fomit-frame-pointer
Common Report Var(flag_omit_frame_pointer) Optimization
When possible do not generate stack frames
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 909a453..a5ab785 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1136,6 +1136,7 @@ See S/390 and zSeries Options.
-finstrument-functions-exclude-function-list=@var{sym},@var{sym},@dots{} @gol
-finstrument-functions-exclude-file-list=@var{file},@var{file},@dots{} @gol
-fno-common -fno-ident @gol
+-foffload-alias=@r{[}none@r{|}pointer@r{|}all@r{]} @gol
-fpcc-struct-return -fpic -fPIC -fpie -fPIE -fno-plt @gol
-fno-jump-tables @gol
-frecord-gcc-switches @gol
@@ -23695,6 +23696,16 @@ The options @option{-ftrapv} and @option{-fwrapv} override each other, so using
using @option{-ftrapv} @option{-fwrapv} @option{-fno-wrapv} on the command-line
results in @option{-ftrapv} being effective.
+@item -foffload-alias=@r{[}none@r{|}pointer@r{|}all@r{]}
+@opindex -foffload-alias
+The option @option{-foffload-alias=none} instructs the compiler to assume that
+objects references and pointer dereferences in an offload region do not alias.
+The option @option{-foffload-alias=pointer} instruct the compiler to assume that
+objects references in an offload region are presumed unaliased. The option
+@option{-foffload-alias=all} instructs the compiler to make no assumtions about
+aliasing in offload regions. The default value is
+@option{-foffload-alias=none}.
+
@item -fexceptions
@opindex fexceptions
Enable exception handling. Generates extra code needed to propagate
diff --git a/gcc/flag-types.h b/gcc/flag-types.h
index ac9ca0b..e8e672d 100644
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -286,5 +286,12 @@ enum gfc_convert
GFC_FLAG_CONVERT_LITTLE
};
+enum offload_alias
+{
+ OFFLOAD_ALIAS_ALL,
+ OFFLOAD_ALIAS_POINTER,
+ OFFLOAD_ALIAS_NONE
+};
+
#endif /* ! GCC_FLAG_TYPES_H */
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index a5904eb..9cbba1f 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -1140,6 +1140,16 @@ use_pointer_for_field (tree decl, omp_context *shared_ctx)
return false;
}
+static bool
+is_gimple_oacc_offload (const gimple *stmt)
+{
+ return (gimple_code (stmt) == GIMPLE_OMP_TARGET
+ && (((gimple_omp_target_kind (stmt)
+ == GF_OMP_TARGET_KIND_OACC_PARALLEL)
+ || (gimple_omp_target_kind (stmt)
+ == GF_OMP_TARGET_KIND_OACC_KERNELS))));
+}
+
/* Construct a new automatic decl similar to VAR. */
static tree
@@ -1283,6 +1293,7 @@ static void
install_var_field (tree var, bool by_ref, int mask, omp_context *ctx)
{
tree field, type, sfield = NULL_TREE;
+ bool in_oacc_offload = is_gimple_oacc_offload (ctx->stmt);
gcc_assert ((mask & 1) == 0
|| !splay_tree_lookup (ctx->field_map, (splay_tree_key) var));
@@ -1298,7 +1309,18 @@ install_var_field (tree var, bool by_ref, int mask, omp_context *ctx)
type = build_pointer_type (build_pointer_type (type));
}
else if (by_ref)
- type = build_pointer_type (type);
+ {
+ if (in_oacc_offload
+ && flag_offload_alias == OFFLOAD_ALIAS_NONE
+ && POINTER_TYPE_P (type))
+ type = build_qualified_type (type, TYPE_QUAL_RESTRICT);
+
+ type = build_pointer_type (type);
+
+ if (in_oacc_offload
+ && flag_offload_alias != OFFLOAD_ALIAS_ALL)
+ type = build_qualified_type (type, TYPE_QUAL_RESTRICT);
+ }
else if ((mask & 3) == 1 && is_reference (var))
type = TREE_TYPE (type);
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c
new file mode 100644
index 0000000..918bced
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c
@@ -0,0 +1,63 @@
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+/* { dg-additional-options "-fdump-tree-alias-all" } */
+/* { dg-additional-options "-foffload-alias=none" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *a;
+ unsigned int *b;
+ unsigned int *c;
+
+ a = (unsigned int *)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *)malloc (N * sizeof (unsigned int));
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized. */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
+
+/* Check that the loop has been split off into a function. */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 1 "parloops_oacc_kernels" } } */
+
+/* { dg-final { scan-tree-dump-times "clique 1 base 1" 3 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 2" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 3" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 4" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 5" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 6" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 7" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "(?n)clique .* base .*" 9 "alias" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-ptr.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-ptr.c
new file mode 100644
index 0000000..029d77c
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-ptr.c
@@ -0,0 +1,52 @@
+/* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+/* { dg-additional-options "-fdump-tree-alias-all" } */
+/* { dg-additional-options "-foffload-alias=pointer" } */
+
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+
+int
+main (void)
+{
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ return 0;
+}
+
+/* Check that only one loop is analyzed, and that it can be parallelized. */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
+
+/* Check that the loop has been split off into a function. */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 1 "parloops_oacc_kernels" } } */
+
+/* { dg-final { scan-tree-dump-times "clique 1 base 1" 3 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 2" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 3" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "clique 1 base 4" 1 "alias" } } */
+/* { dg-final { scan-tree-dump-times "(?n)clique .* base .*" 6 "alias" } } */
--
1.9.1