In the future, kernels regions will be transformed into data regions containing
a sequence of serial and parallel offloaded regions. This first patch sets up a
new pass that is responsible for this transformation, and in a first step
constructs the new data region containing a parallel region with the original
kernels region's body.
2019-07-16 Gergö Barany <ge...@codesourcery.com>
gcc/
* Makefile.in: Add...
* omp-oacc-kernels.c: ... this new file for the kernels conversion
pass.
* flag-types.h (enum openacc_kernels): Add "split" style. Adjust
all users.
* doc/invoke.texi (-fopenacc-kernels): Update.
* passes.def: Add pass_convert_oacc_kernels to pipeline.
* tree-pass.h (make_pass_convert_oacc_kernels): Add declaration.
gcc/c-family/
* c.opt (fopenacc-kernels): Document. Add 'split' option.
gcc/fortran/
* lang.opt (fopenacc-kernels): Document.
gcc/testsuite/
* c-c++-common/goacc/kernels-conversion.c: New test.
* gfortran.dg/goacc/kernels-conversion.f95: Likewise.
* c-c++-common/goacc/if-clause-2.c: Update.
* gfortran.dg/goacc/kernels-tree.f95: Likewise.
---
gcc/Makefile.in | 2 +
gcc/c-family/c.opt | 6 +-
gcc/doc/invoke.texi | 13 +-
gcc/flag-types.h | 1 +
gcc/fortran/lang.opt | 3 +-
gcc/omp-oacc-kernels.c | 245 +++++++++++++++++++++
gcc/passes.def | 1 +
gcc/testsuite/c-c++-common/goacc/if-clause-2.c | 7 +
.../c-c++-common/goacc/kernels-conversion.c | 36 +++
.../gfortran.dg/goacc/kernels-conversion.f95 | 33 +++
gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95 | 6 +
gcc/tree-pass.h | 1 +
12 files changed, 351 insertions(+), 3 deletions(-)
create mode 100644 gcc/omp-oacc-kernels.c
create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-conversion.c
create mode 100644 gcc/testsuite/gfortran.dg/goacc/kernels-conversion.f95
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 597dc01..82537f6 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1432,6 +1432,7 @@ OBJS = \
omp-general.o \
omp-grid.o \
omp-low.o \
+ omp-oacc-kernels.o \
omp-simd-clone.o \
opt-problem.o \
optabs.o \
@@ -2560,6 +2561,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h
$(srcdir)/coretypes.h \
$(srcdir)/omp-offload.c \
$(srcdir)/omp-expand.c \
$(srcdir)/omp-low.c \
+ $(srcdir)/omp-oacc-kernels.c \
$(srcdir)/targhooks.c $(out_file) $(srcdir)/passes.c $(srcdir)/cgraphunit.c \
$(srcdir)/cgraphclones.c \
$(srcdir)/tree-phinodes.c \
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 4bdacb6..a193875 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -1689,12 +1689,16 @@ C ObjC C++ ObjC++ LTO Joined Var(flag_openacc_dims)
Specify default OpenACC compute dimensions.
fopenacc-kernels=
-C ObjC C++ ObjC++ RejectNegative Joined Enum(openacc_kernels)
Var(flag_openacc_kernels) Init(OPENACC_KERNELS_PARLOOPS) Undocumented
+C ObjC C++ ObjC++ RejectNegative Joined Enum(openacc_kernels)
Var(flag_openacc_kernels) Init(OPENACC_KERNELS_PARLOOPS)
+-fopenacc-kernels=[split|parloops] Configure OpenACC 'kernels' constructs
handling.
Enum
Name(openacc_kernels) Type(enum openacc_kernels)
EnumValue
+Enum(openacc_kernels) String(split) Value(OPENACC_KERNELS_SPLIT)
+
+EnumValue
Enum(openacc_kernels) String(parloops) Value(OPENACC_KERNELS_PARLOOPS)
fopenmp
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 0c20cb6..ec98ab6 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -198,7 +198,7 @@ in the following sections.
-aux-info @var{filename} -fallow-parameterless-variadic-functions @gol
-fno-asm -fno-builtin -fno-builtin-@var{function} -fgimple@gol
-fhosted -ffreestanding @gol
--fopenacc -fopenacc-dim=@var{geom} @gol
+-fopenacc -fopenacc-dim=@var{geom} -fopenacc-kernels=@var{style} @gol
-fopenmp -fopenmp-simd @gol
-fms-extensions -fplan9-extensions -fsso-struct=@var{endianness} @gol
-fallow-single-precision -fcond-mismatch -flax-vector-conversions @gol
@@ -2193,6 +2193,17 @@ not explicitly specify. The @var{geom} value is a
triple of
':'-separated sizes, in order 'gang', 'worker' and, 'vector'. A size
can be omitted, to use a target-specific default value.
+@item -fopenacc-kernels=@var{style}
+@opindex fopenacc-kernels
+@cindex OpenACC accelerator programming
+Configure OpenACC 'kernels' constructs handling.
+With @option{-fopenacc-kernels=split}, OpenACC 'kernels' constructs
+are split into a sequence of compute constructs, each then handled
+individually.
+With @option{-fopenacc-kernels=parloops}, the whole OpenACC
+'kernels' constructs is handled by the @samp{parloops} pass.
+This is the default.
+
@item -fopenmp
@opindex fopenmp
@cindex OpenMP parallel
diff --git a/gcc/flag-types.h b/gcc/flag-types.h
index 24a80858..ce32607 100644
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -358,6 +358,7 @@ enum cf_protection_level
/* OpenACC 'kernels' constructs handling. */
enum openacc_kernels
{
+ OPENACC_KERNELS_SPLIT,
OPENACC_KERNELS_PARLOOPS
};
#endif /* ! GCC_FLAG_TYPES_H */
diff --git a/gcc/fortran/lang.opt b/gcc/fortran/lang.opt
index 73e88fd..e7e277a 100644
--- a/gcc/fortran/lang.opt
+++ b/gcc/fortran/lang.opt
@@ -663,7 +663,8 @@ Fortran LTO Joined Var(flag_openacc_dims)
; Documented in C
fopenacc-kernels=
-Fortran RejectNegative Joined Enum(openacc_kernels) Var(flag_openacc_kernels)
Init(OPENACC_KERNELS_PARLOOPS) Undocumented
+Fortran RejectNegative Joined Enum(openacc_kernels) Var(flag_openacc_kernels)
Init(OPENACC_KERNELS_PARLOOPS)
+; Documented in C
fopenmp
Fortran LTO
diff --git a/gcc/omp-oacc-kernels.c b/gcc/omp-oacc-kernels.c
new file mode 100644
index 0000000..d180377
--- /dev/null
+++ b/gcc/omp-oacc-kernels.c
@@ -0,0 +1,245 @@
+/* Transformation pass for OpenACC kernels regions. Converts a kernels
+ region into a series of smaller parallel regions. There is a parallel
+ region for each parallelizable loop nest, as well as a "gang-single"
+ parallel region for each non-parallelizable piece of code.
+
+ Contributed by Gergö Barany <ge...@codesourcery.com> and
+ Thomas Schwinge <tho...@codesourcery.com>
+
+ Copyright (C) 2019 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "cgraph.h"
+#include "fold-const.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "gimple-walk.h"
+#include "gomp-constants.h"
+
+/* This is a preprocessing pass to be run immediately before lower_omp. It
+ will convert OpenACC "kernels" regions into sequences of "parallel"
+ regions.
+ For now, the translation is as follows:
+ - The entire kernels region is turned into a data region with clauses
+ taken from the kernels region. New "create" clauses are added for all
+ variables declared at the top level in the kernels region. */
+
+/* Transform KERNELS_REGION, which is an OpenACC kernels region, into a data
+ region containing the original kernels region. */
+
+static gimple *
+transform_kernels_region (gimple *kernels_region)
+{
+ gcc_checking_assert (gimple_omp_target_kind (kernels_region)
+ == GF_OMP_TARGET_KIND_OACC_KERNELS);
+
+ /* Collect the kernels region's data clauses and create the new data
+ region with those clauses. */
+ tree kernels_clauses = gimple_omp_target_clauses (kernels_region);
+ tree data_clauses = NULL;
+ for (tree c = kernels_clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ {
+ /* Certain map clauses are copied to the enclosing data region. Any
+ non-data clause remains on the kernels region. */
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP)
+ {
+ tree decl = OMP_CLAUSE_DECL (c);
+ HOST_WIDE_INT kind = OMP_CLAUSE_MAP_KIND (c);
+ switch (kind)
+ {
+ default:
+ if (kind == GOMP_MAP_ALLOC &&
+ integer_zerop (OMP_CLAUSE_SIZE (c)))
+ /* ??? This is an alloc clause for mapping a pointer whose
+ target is already mapped. We leave these on the inner
+ parallel regions because moving them to the outer data
+ region causes runtime errors. */
+ break;
+
+ /* For non-artificial variables, and for non-declaration
+ expressions like A[0:n], copy the clause to the data
+ region. */
+ if ((DECL_P (decl) && !DECL_ARTIFICIAL (decl))
+ || !DECL_P (decl))
+ {
+ tree new_clause = build_omp_clause (OMP_CLAUSE_LOCATION (c),
+ OMP_CLAUSE_MAP);
+ OMP_CLAUSE_SET_MAP_KIND (new_clause, kind);
+ /* This must be unshared here to avoid "incorrect sharing
+ of tree nodes" errors from verify_gimple. */
+ OMP_CLAUSE_DECL (new_clause) = unshare_expr (decl);
+ OMP_CLAUSE_SIZE (new_clause) = OMP_CLAUSE_SIZE (c);
+ OMP_CLAUSE_CHAIN (new_clause) = data_clauses;
+ data_clauses = new_clause;
+
+ /* Now that this data is mapped, the inner data clause on
+ the kernels region can become a present clause. */
+ OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_FORCE_PRESENT);
+ }
+ break;
+
+ case GOMP_MAP_POINTER:
+ case GOMP_MAP_TO_PSET:
+ case GOMP_MAP_FORCE_TOFROM:
+ case GOMP_MAP_FIRSTPRIVATE_POINTER:
+ case GOMP_MAP_FIRSTPRIVATE_REFERENCE:
+ /* ??? Copying these map kinds leads to internal compiler
+ errors in later passes. */
+ break;
+ }
+ }
+ else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IF)
+ {
+ /* If there is an if clause, it must also be present on the
+ enclosing data region. Temporarily remove the if clause's
+ chain to avoid copying it. */
+ tree saved_chain = OMP_CLAUSE_CHAIN (c);
+ OMP_CLAUSE_CHAIN (c) = NULL;
+ tree new_if_clause = unshare_expr (c);
+ OMP_CLAUSE_CHAIN (c) = saved_chain;
+ OMP_CLAUSE_CHAIN (new_if_clause) = data_clauses;
+ data_clauses = new_if_clause;
+ }
+ }
+ /* Restore the original order of the clauses. */
+ data_clauses = nreverse (data_clauses);
+
+ gimple *data_region
+ = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS,
+ data_clauses);
+ gimple_set_location (data_region, gimple_location (kernels_region));
+
+ /* For now, just construct a new parallel region inside the data region. */
+ gimple *inner_region
+ = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_PARALLEL,
+ kernels_clauses);
+ gimple_set_location (inner_region, gimple_location (kernels_region));
+ gimple_omp_set_body (inner_region, gimple_omp_body (kernels_region));
+
+ gbind *bind = gimple_build_bind (NULL, NULL, NULL);
+ gimple_bind_add_stmt (bind, inner_region);
+
+ /* Put the transformed pieces together. The entire body of the region is
+ wrapped in a try-finally statement that calls __builtin_GOACC_data_end
+ for cleanup. */
+ tree data_end_fn = builtin_decl_explicit (BUILT_IN_GOACC_DATA_END);
+ gimple *call = gimple_build_call (data_end_fn, 0);
+ gimple_seq cleanup = NULL;
+ gimple_seq_add_stmt (&cleanup, call);
+ gimple *try_stmt = gimple_build_try (bind, cleanup, GIMPLE_TRY_FINALLY);
+ gimple_omp_set_body (data_region, try_stmt);
+
+ return data_region;
+}
+
+/* Helper function of convert_oacc_kernels for walking the tree, calling
+ transform_kernels_region on each kernels region found. */
+
+static tree
+scan_kernels (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
+ struct walk_stmt_info *)
+{
+ gimple *stmt = gsi_stmt (*gsi_p);
+ *handled_ops_p = false;
+
+ int kind;
+ switch (gimple_code (stmt))
+ {
+ case GIMPLE_OMP_TARGET:
+ kind = gimple_omp_target_kind (stmt);
+ if (kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
+ {
+ gimple *new_region = transform_kernels_region (stmt);
+ gsi_replace (gsi_p, new_region, false);
+ *handled_ops_p = true;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+/* Find and transform OpenACC kernels regions in the current function. */
+
+static unsigned int
+convert_oacc_kernels (void)
+{
+ struct walk_stmt_info wi;
+ gimple_seq body = gimple_body (current_function_decl);
+
+ memset (&wi, 0, sizeof (wi));
+ walk_gimple_seq_mod (&body, scan_kernels, NULL, &wi);
+
+ gimple_set_body (current_function_decl, body);
+
+ return 0;
+}
+
+namespace {
+
+const pass_data pass_data_convert_oacc_kernels =
+{
+ GIMPLE_PASS, /* type */
+ "convert_oacc_kernels", /* name */
+ OPTGROUP_OMP, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_gimple_any, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_convert_oacc_kernels : public gimple_opt_pass
+{
+public:
+ pass_convert_oacc_kernels (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_convert_oacc_kernels, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ return (flag_openacc
+ && flag_openacc_kernels == OPENACC_KERNELS_SPLIT);
+ }
+ virtual unsigned int execute (function *)
+ {
+ return convert_oacc_kernels ();
+ }
+
+}; // class pass_convert_oacc_kernels
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_convert_oacc_kernels (gcc::context *ctxt)
+{
+ return new pass_convert_oacc_kernels (ctxt);
+}
diff --git a/gcc/passes.def b/gcc/passes.def
index 1a7fd14..7cee52b 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_warn_unused_result);
NEXT_PASS (pass_diagnose_omp_blocks);
NEXT_PASS (pass_diagnose_tm_blocks);
+ NEXT_PASS (pass_convert_oacc_kernels);
NEXT_PASS (pass_lower_omp);
NEXT_PASS (pass_lower_cf);
NEXT_PASS (pass_lower_tm);
diff --git a/gcc/testsuite/c-c++-common/goacc/if-clause-2.c
b/gcc/testsuite/c-c++-common/goacc/if-clause-2.c
index 5ab8459..e17b5dd 100644
--- a/gcc/testsuite/c-c++-common/goacc/if-clause-2.c
+++ b/gcc/testsuite/c-c++-common/goacc/if-clause-2.c
@@ -1,3 +1,6 @@
+/* { dg-additional-options "-fopenacc-kernels=split" } */
+/* { dg-additional-options "-fdump-tree-convert_oacc_kernels" } */
+
void
f (short c)
{
@@ -9,3 +12,7 @@ f (short c)
;
#pragma acc update device(c) if(c)
}
+
+/* Verify that the 'if' clause gets duplicated.
+ { dg-final { scan-tree-dump-times "#pragma omp target oacc_data_kernels
if\\(" 1 "convert_oacc_kernels" } }
+ { dg-final { scan-tree-dump-times "#pragma omp target
oacc_parallel_kernels_gang_single .* if\\(" 1 "convert_oacc_kernels" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-conversion.c
b/gcc/testsuite/c-c++-common/goacc/kernels-conversion.c
new file mode 100644
index 0000000..c75db37
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-conversion.c
@@ -0,0 +1,36 @@
+/* { dg-additional-options "-fopenacc-kernels=split" } */
+/* { dg-additional-options "-fdump-tree-convert_oacc_kernels" } */
+
+#define N 1024
+
+unsigned int a[N];
+
+int
+main (void)
+{
+ int i;
+ unsigned int sum = 1;
+
+#pragma acc kernels copyin(a[0:N]) copy(sum)
+ {
+ #pragma acc loop
+ for (i = 0; i < N; ++i)
+ sum += a[i];
+
+ sum++;
+
+ #pragma acc loop
+ for (i = 0; i < N; ++i)
+ sum += a[i];
+ }
+
+ return 0;
+}
+
+/* Check that the kernels region is split into a data region and an enclosed
+ parallel region. */
+/* { dg-final { scan-tree-dump-times "oacc_data_kernels" 1
"convert_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-times "oacc_parallel" 1 "convert_oacc_kernels" }
} */
+
+/* Check that the original kernels region is removed. */
+/* { dg-final { scan-tree-dump-not "oacc_kernels" "convert_oacc_kernels" } } */
diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-conversion.f95
b/gcc/testsuite/gfortran.dg/goacc/kernels-conversion.f95
new file mode 100644
index 0000000..8c66330
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/kernels-conversion.f95
@@ -0,0 +1,33 @@
+! { dg-additional-options "-fopenacc-kernels=split" }
+! { dg-additional-options "-fdump-tree-convert_oacc_kernels" }
+
+program main
+ implicit none
+ integer, parameter :: N = 1024
+ integer, dimension (1:N) :: a
+ integer :: i, sum
+
+ !$acc kernels copyin(a(1:N)) copy(sum)
+
+ !$acc loop
+ do i = 1, N
+ sum = sum + a(i)
+ end do
+
+ sum = sum + 1
+
+ !$acc loop
+ do i = 1, N
+ sum = sum + a(i)
+ end do
+
+ !$acc end kernels
+end program main
+
+! Check that the kernels region is split into a data region and an enclosed
+! parallel region.
+! { dg-final { scan-tree-dump-times "oacc_data_kernels" 1
"convert_oacc_kernels" } }
+! { dg-final { scan-tree-dump-times "oacc_parallel" 1 "convert_oacc_kernels" }
}
+
+! Check that the original kernels region is removed.
+! { dg-final { scan-tree-dump-not "oacc_kernels" "convert_oacc_kernels" } }
diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95
b/gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95
index a70f1e7..b83ca2d 100644
--- a/gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95
@@ -1,5 +1,7 @@
! { dg-do compile }
! { dg-additional-options "-fdump-tree-original" }
+! { dg-additional-options "-fopenacc-kernels=split" }
+! { dg-additional-options "-fdump-tree-convert_oacc_kernels" }
program test
implicit none
@@ -33,3 +35,7 @@ end program test
! { dg-final { scan-tree-dump-times "map\\(alloc:t\\)" 1 "original" } }
! { dg-final { scan-tree-dump-times "map\\(force_deviceptr:u\\)" 1 "original"
} }
+
+! Verify that the 'if' clause gets duplicated.
+! { dg-final { scan-tree-dump-times "#pragma omp target oacc_data_kernels
if\\(" 1 "convert_oacc_kernels" } }
+! { dg-final { scan-tree-dump-times "#pragma omp target
oacc_parallel_kernels_gang_single .* if\\(" 1 "convert_oacc_kernels" } }
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 1c8df3d..5fd8c2c 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -412,6 +412,7 @@ extern gimple_opt_pass *make_pass_lower_switch_O0
(gcc::context *ctxt);
extern gimple_opt_pass *make_pass_lower_vector (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_lower_vector_ssa (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_lower_omp (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_convert_oacc_kernels (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_diagnose_omp_blocks (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_expand_omp_ssa (gcc::context *ctxt);
--
2.8.1