This patch is in preparation for changes that will cut up OpenACC kernels
regions into individual parts. For the new sub-regions that will be generated,
this adds the following new kinds of OpenACC regions for internal use:
- GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED for parts of kernels
regions to be executed in gang-redundant mode
- GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE for parts of kernels
regions to be executed in gang-single mode
- GF_OMP_TARGET_KIND_OACC_DATA_KERNELS for data regions generated around the
body of a kernels region
2019-07-16 Thomas Schwinge <tho...@codesourcery.com>
gcc/
* gimple.h (enum gf_mask): Add new target kinds
GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED,
GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE, and
GF_OMP_TARGET_KIND_OACC_DATA_KERNELS.
(is_gimple_omp_oacc): Handle new target kinds.
(is_gimple_omp_offloaded): Likewise.
* gimple-pretty-print.c (dump_gimple_omp_target): Likewise.
* omp-expand.c (expand_omp_target): Likewise.
(build_omp_regions_1): Likewise.
(omp_make_gimple_edges): Likewise.
* omp-low.c (is_oacc_parallel_or_serial): Likewise.
(was_originally_oacc_kernels): New function.
(scan_omp_for): Update check for illegal nesting.
(check_omp_nesting_restrictions): Handle new target kinds.
(lower_oacc_reductions): Likewise.
(lower_omp_target): Likewise.
* omp-offload.c (execute_oacc_device_lower): Likewise.
---
gcc/gimple-pretty-print.c | 9 +++++++++
gcc/gimple.h | 14 +++++++++++++
gcc/omp-expand.c | 34 ++++++++++++++++++++++++++++----
gcc/omp-low.c | 50 ++++++++++++++++++++++++++++++++++++++++++-----
gcc/omp-offload.c | 20 +++++++++++++++++++
5 files changed, 118 insertions(+), 9 deletions(-)
diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
index ce339ee..cf4d0e0 100644
--- a/gcc/gimple-pretty-print.c
+++ b/gcc/gimple-pretty-print.c
@@ -1691,6 +1691,15 @@ dump_gimple_omp_target (pretty_printer *buffer,
gomp_target *gs,
case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
kind = " oacc_host_data";
break;
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+ kind = " oacc_parallel_kernels_parallelized";
+ break;
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
+ kind = " oacc_parallel_kernels_gang_single";
+ break;
+ case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
+ kind = " oacc_data_kernels";
+ break;
default:
gcc_unreachable ();
}
diff --git a/gcc/gimple.h b/gcc/gimple.h
index 47070e7..d8423be 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -184,6 +184,15 @@ enum gf_mask {
GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA = 9,
GF_OMP_TARGET_KIND_OACC_DECLARE = 10,
GF_OMP_TARGET_KIND_OACC_HOST_DATA = 11,
+ /* A GF_OMP_TARGET_KIND_OACC_PARALLEL that originates from a 'kernels'
+ construct, parallelized. */
+ GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED = 12,
+ /* A GF_OMP_TARGET_KIND_OACC_PARALLEL that originates from a 'kernels'
+ construct, "gang-single". */
+ GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE = 13,
+ /* A GF_OMP_TARGET_KIND_OACC_DATA that originates from a 'kernels'
+ construct. */
+ GF_OMP_TARGET_KIND_OACC_DATA_KERNELS = 14,
GF_OMP_TEAMS_GRID_PHONY = 1 << 0,
GF_OMP_TEAMS_HOST = 1 << 1,
@@ -6479,6 +6488,9 @@ is_gimple_omp_oacc (const gimple *stmt)
case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
case GF_OMP_TARGET_KIND_OACC_DECLARE:
case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
+ case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
return true;
default:
return false;
@@ -6503,6 +6515,8 @@ is_gimple_omp_offloaded (const gimple *stmt)
case GF_OMP_TARGET_KIND_REGION:
case GF_OMP_TARGET_KIND_OACC_PARALLEL:
case GF_OMP_TARGET_KIND_OACC_KERNELS:
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
return true;
default:
return false;
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
index c007ec1..7e4d5a8 100644
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -7914,6 +7914,8 @@ expand_omp_target (struct omp_region *region)
case GF_OMP_TARGET_KIND_ENTER_DATA:
case GF_OMP_TARGET_KIND_EXIT_DATA:
case GF_OMP_TARGET_KIND_OACC_PARALLEL:
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
case GF_OMP_TARGET_KIND_OACC_KERNELS:
case GF_OMP_TARGET_KIND_OACC_UPDATE:
case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
@@ -7923,6 +7925,7 @@ expand_omp_target (struct omp_region *region)
case GF_OMP_TARGET_KIND_DATA:
case GF_OMP_TARGET_KIND_OACC_DATA:
case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+ case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
data_region = true;
break;
default:
@@ -7945,16 +7948,30 @@ expand_omp_target (struct omp_region *region)
entry_bb = region->entry;
exit_bb = region->exit;
- if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
+ /* Further down, all OpenACC compute constructs will be mapped to
+ BUILT_IN_GOACC_PARALLEL, and to distinguish between them, we now attach
+ attributes. */
+ switch (gimple_omp_target_kind (entry_stmt))
{
+ case GF_OMP_TARGET_KIND_OACC_KERNELS:
mark_loops_in_oacc_kernels_region (region->entry, region->exit);
- /* Further down, both OpenACC kernels and OpenACC parallel constructs
- will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
- two, there is an "oacc kernels" attribute set for OpenACC kernels. */
DECL_ATTRIBUTES (child_fn)
= tree_cons (get_identifier ("oacc kernels"),
NULL_TREE, DECL_ATTRIBUTES (child_fn));
+ break;
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+ DECL_ATTRIBUTES (child_fn)
+ = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
+ NULL_TREE, DECL_ATTRIBUTES (child_fn));
+ break;
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
+ DECL_ATTRIBUTES (child_fn)
+ = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
+ NULL_TREE, DECL_ATTRIBUTES (child_fn));
+ break;
+ default:
+ break;
}
if (offloaded)
@@ -8159,10 +8176,13 @@ expand_omp_target (struct omp_region *region)
break;
case GF_OMP_TARGET_KIND_OACC_KERNELS:
case GF_OMP_TARGET_KIND_OACC_PARALLEL:
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
start_ix = BUILT_IN_GOACC_PARALLEL;
break;
case GF_OMP_TARGET_KIND_OACC_DATA:
case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+ case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
start_ix = BUILT_IN_GOACC_DATA_START;
break;
case GF_OMP_TARGET_KIND_OACC_UPDATE:
@@ -8916,6 +8936,9 @@ build_omp_regions_1 (basic_block bb, struct omp_region
*parent,
case GF_OMP_TARGET_KIND_OACC_KERNELS:
case GF_OMP_TARGET_KIND_OACC_DATA:
case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
+ case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
break;
case GF_OMP_TARGET_KIND_UPDATE:
case GF_OMP_TARGET_KIND_ENTER_DATA:
@@ -9170,6 +9193,9 @@ omp_make_gimple_edges (basic_block bb, struct omp_region
**region,
case GF_OMP_TARGET_KIND_OACC_KERNELS:
case GF_OMP_TARGET_KIND_OACC_DATA:
case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
+ case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
break;
case GF_OMP_TARGET_KIND_UPDATE:
case GF_OMP_TARGET_KIND_ENTER_DATA:
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index a855c5b..623da18 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -178,8 +178,12 @@ is_oacc_parallel (omp_context *ctx)
{
enum gimple_code outer_type = gimple_code (ctx->stmt);
return ((outer_type == GIMPLE_OMP_TARGET)
- && (gimple_omp_target_kind (ctx->stmt)
- == GF_OMP_TARGET_KIND_OACC_PARALLEL));
+ && ((gimple_omp_target_kind (ctx->stmt)
+ == GF_OMP_TARGET_KIND_OACC_PARALLEL)
+ || (gimple_omp_target_kind (ctx->stmt)
+ == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED)
+ || (gimple_omp_target_kind (ctx->stmt)
+ == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE)));
}
/* Return true if CTX corresponds to an oacc kernels region. */
@@ -193,6 +197,22 @@ is_oacc_kernels (omp_context *ctx)
== GF_OMP_TARGET_KIND_OACC_KERNELS));
}
+/* Return true if CTX corresponds to an oacc region that was generated from
+ an original kernels region that has been lowered to parallel regions. */
+
+static bool
+was_originally_oacc_kernels (omp_context *ctx)
+{
+ enum gimple_code outer_type = gimple_code (ctx->stmt);
+ return ((outer_type == GIMPLE_OMP_TARGET)
+ && ((gimple_omp_target_kind (ctx->stmt)
+ == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED)
+ || (gimple_omp_target_kind (ctx->stmt)
+ == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE)
+ || (gimple_omp_target_kind (ctx->stmt)
+ == GF_OMP_TARGET_KIND_OACC_DATA_KERNELS)));
+}
+
/* If DECL is the artificial dummy VAR_DECL created for non-static
data member privatization, return the underlying "this" parameter,
otherwise return NULL. */
@@ -2319,7 +2339,8 @@ scan_omp_for (gomp_for *stmt, omp_context *outer_ctx)
{
omp_context *tgt = enclosing_target_ctx (outer_ctx);
- if (!tgt || is_oacc_parallel (tgt))
+ if (!tgt || (is_oacc_parallel (tgt)
+ && !was_originally_oacc_kernels (tgt)))
for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
{
char const *check = NULL;
@@ -2752,6 +2773,8 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context
*ctx)
{
case GF_OMP_TARGET_KIND_OACC_PARALLEL:
case GF_OMP_TARGET_KIND_OACC_KERNELS:
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
ok = true;
break;
@@ -3207,6 +3230,11 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context
*ctx)
case GF_OMP_TARGET_KIND_OACC_DECLARE: stmt_name = "declare"; break;
case GF_OMP_TARGET_KIND_OACC_HOST_DATA: stmt_name = "host_data";
break;
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
+ case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
+ /* These three cases arise from kernels conversion. */
+ stmt_name = "kernels"; break;
default: gcc_unreachable ();
}
switch (gimple_omp_target_kind (ctx->stmt))
@@ -3220,6 +3248,11 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context
*ctx)
case GF_OMP_TARGET_KIND_OACC_DATA: ctx_stmt_name = "data"; break;
case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
ctx_stmt_name = "host_data"; break;
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
+ case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
+ /* These three cases arise from kernels conversion. */
+ ctx_stmt_name = "kernels"; break;
default: gcc_unreachable ();
}
@@ -6375,8 +6408,12 @@ lower_oacc_reductions (location_t loc, tree clauses, tree
level, bool inner,
break;
case GIMPLE_OMP_TARGET:
- if (gimple_omp_target_kind (probe->stmt)
- != GF_OMP_TARGET_KIND_OACC_PARALLEL)
+ if ((gimple_omp_target_kind (probe->stmt)
+ != GF_OMP_TARGET_KIND_OACC_PARALLEL)
+ && (gimple_omp_target_kind (probe->stmt)
+ !=
GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED)
+ && (gimple_omp_target_kind (probe->stmt)
+ !=
GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE))
goto do_lookup;
cls = gimple_omp_target_clauses (probe->stmt);
@@ -11027,11 +11064,14 @@ lower_omp_target (gimple_stmt_iterator *gsi_p,
omp_context *ctx)
case GF_OMP_TARGET_KIND_OACC_UPDATE:
case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
case GF_OMP_TARGET_KIND_OACC_DECLARE:
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
data_region = false;
break;
case GF_OMP_TARGET_KIND_DATA:
case GF_OMP_TARGET_KIND_OACC_DATA:
case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+ case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
data_region = true;
break;
default:
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index da788d9..4ebfa83 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -1525,6 +1525,20 @@ execute_oacc_device_lower ()
bool is_oacc_kernels_parallelized
= (lookup_attribute ("oacc kernels parallelized",
DECL_ATTRIBUTES (current_function_decl)) != NULL);
+ if (is_oacc_kernels_parallelized)
+ gcc_checking_assert (is_oacc_kernels);
+ bool is_oacc_parallel_kernels_parallelized
+ = (lookup_attribute ("oacc parallel_kernels_parallelized",
+ DECL_ATTRIBUTES (current_function_decl)) != NULL);
+ if (is_oacc_parallel_kernels_parallelized)
+ gcc_checking_assert (!is_oacc_kernels);
+ bool is_oacc_parallel_kernels_gang_single
+ = (lookup_attribute ("oacc parallel_kernels_gang_single",
+ DECL_ATTRIBUTES (current_function_decl)) != NULL);
+ if (is_oacc_parallel_kernels_gang_single)
+ gcc_checking_assert (!is_oacc_kernels);
+ gcc_checking_assert (!(is_oacc_parallel_kernels_parallelized
+ && is_oacc_parallel_kernels_gang_single));
/* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
kernels, so remove the parallelism dimensions function attributes
@@ -1548,6 +1562,12 @@ execute_oacc_device_lower ()
fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
(is_oacc_kernels_parallelized
? "parallelized" : "unparallelized"));
+ else if (is_oacc_parallel_kernels_parallelized)
+ fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
+ "parallel_kernels_parallelized");
+ else if (is_oacc_parallel_kernels_gang_single)
+ fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
+ "parallel_kernels_gang_single");
else
fprintf (dump_file, "Function is OpenACC parallel offload\n");
}
--
2.8.1