Loops in gang-single parts of kernels regions cannot be executed in
gang-redundant mode. If the user specified gang clauses on such loops, emit an
error and remove these clauses. Adjust automatic partitioning to exclude gang
partitioning in gang-single regions.
2019-07-16 Gergö Barany <ge...@codesourcery.com>
gcc/
* omp-oacc-kernels.c (visit_loops_in_gang_single_region): Emit warning
on
conditionally executed code with a gang clause.
(make_loops_gang_single): New function.
(add_parent_or_loop_num_clause): New function.
(adjust_nested_loop_clauses_wi_info): New struct.
(adjust_nested_loop_clauses): New function.
(transform_kernels_loop_clauses): Add worker and vector clause
parameters,
emit error on illegal nesting.
(make_gang_parallel_loop_region): Likewise.
(decompose_kernels_region_body): Separate out gang/worker/vector clauses
for separate handling; add call to make_loops_gang_single.
* omp-offload.c (oacc_loop_auto_partitions): Add and propagate
is_oacc_gang_single parameter.
(oacc_loop_partition): Likewise.
(execute_oacc_device_lower): Adjust call to oacc_loop_partition.
---
gcc/omp-oacc-kernels.c | 380 ++++++++++++++++++++++++++++++++++++++++++++-----
gcc/omp-offload.c | 22 ++-
2 files changed, 364 insertions(+), 38 deletions(-)
diff --git a/gcc/omp-oacc-kernels.c b/gcc/omp-oacc-kernels.c
index 80a82fa..11a960c 100644
--- a/gcc/omp-oacc-kernels.c
+++ b/gcc/omp-oacc-kernels.c
@@ -59,7 +59,14 @@ along with GCC; see the file COPYING3. If not see
- Any sequences of other code (non-loops, non-OpenACC loops) are wrapped
in new "gang-single" parallel regions: Worker/vector annotations are
copied from the original kernels region if present, but num_gangs is
- explicitly set to 1. */
+ explicitly set to 1.
+ - Both points above only apply at the topmost level in the region, i.e.,
+ the transformation does not introduce new parallel regions inside
+ nested statement bodies. In particular, this means that a
+ gang-parallelizable loop inside an if statement is "gang-serialized" by
+ the transformation.
+ The transformation visits loops inside such new gang-single-regions and
+ removes and warns about any gang annotations. */
/* Helper function for decompose_kernels_region_body. If STMT contains a
"top-level" OMP_FOR statement, returns a pointer to that statement;
@@ -122,6 +129,67 @@ top_level_omp_for_in_stmt (gimple *stmt)
return NULL;
}
+/* Helper function for make_loops_gang_single for walking the tree. If the
+ statement indicated by GSI_P is an OpenACC for loop with a gang clause,
+ issue a warning and remove the clause. */
+
+static tree
+visit_loops_in_gang_single_region (gimple_stmt_iterator *gsi_p,
+ bool *handled_ops_p,
+ struct walk_stmt_info *)
+{
+ gimple *stmt = gsi_stmt (*gsi_p);
+ tree clauses = NULL, prev_clause = NULL;
+ *handled_ops_p = false;
+
+ switch (gimple_code (stmt))
+ {
+ case GIMPLE_OMP_FOR:
+ clauses = gimple_omp_for_clauses (stmt);
+ for (tree clause = clauses; clause; clause = OMP_CLAUSE_CHAIN (clause))
+ {
+ if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_GANG)
+ {
+ /* It makes no sense to have a gang clause in a gang-single
+ region, so remove it and warn. */
+ warning_at (gimple_location (stmt), 0,
+ "conditionally executed loop in kernels region"
+ " will be executed in a single gang;"
+ " ignoring %<gang%> clause");
+ if (prev_clause != NULL)
+ OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (clause);
+ else
+ clauses = OMP_CLAUSE_CHAIN (clause);
+
+ break;
+ }
+ prev_clause = clause;
+ }
+ gimple_omp_for_set_clauses (stmt, clauses);
+ /* No need to recurse into nested statements; no loop nested inside
+ this loop can be gang-partitioned. */
+ *handled_ops_p = true;
+ break;
+
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+/* Visit all nested OpenACC loops in the statement indicated by GSI. This
+ statement is expected to be inside a gang-single region. Issue a warning
+ for any loops inside it that have gang clauses and remove the clauses. */
+
+static void
+make_loops_gang_single (gimple_stmt_iterator gsi)
+{
+ struct walk_stmt_info wi;
+ memset (&wi, 0, sizeof (wi));
+ walk_gimple_stmt (&gsi, visit_loops_in_gang_single_region, NULL, &wi);
+}
+
/* Construct a "gang-single" OpenACC parallel region at LOC containing the
STMTS. The newly created region is annotated with CLAUSES, which must
not contain a num_gangs clause, and an additional "num_gangs(1)" clause
@@ -150,45 +218,253 @@ make_gang_single_region (location_t loc, gimple_seq
stmts, tree clauses)
return single_region;
}
+/* Helper function for make_gang_parallel_loop_region. Adds a num_gangs
+ (num_workers, vector_length) clause to the given CLAUSES, either the one
+ from the parent region (PARENT_CLAUSE) or a new one based on the loop's
+ own LOOP_CLAUSE ("gang(num: N)" or similar for workers or vectors) with
+ the given CLAUSE_CODE. Does nothing if neither PARENT_CLAUSE nor
+ LOOP_CLAUSE exist. Returns the new clauses. */
+
+static tree
+add_parent_or_loop_num_clause (tree parent_clause, tree loop_clause,
+ omp_clause_code clause_code, tree clauses)
+{
+ if (parent_clause != NULL)
+ {
+ tree num_clause = unshare_expr (parent_clause);
+ OMP_CLAUSE_CHAIN (num_clause) = clauses;
+ clauses = num_clause;
+ }
+ else if (loop_clause != NULL)
+ {
+ /* The kernels region does not have a "num_gangs" clause, but the loop
+ itself had a "gang(num: N)" clause. Honor it by adding a
+ "num_gangs(N)" clause on the parallel region. */
+ tree num = OMP_CLAUSE_OPERAND (loop_clause, 0);
+ tree new_num_clause
+ = build_omp_clause (OMP_CLAUSE_LOCATION (loop_clause), clause_code);
+ OMP_CLAUSE_OPERAND (new_num_clause, 0) = num;
+ OMP_CLAUSE_CHAIN (new_num_clause) = clauses;
+ clauses = new_num_clause;
+ }
+ return clauses;
+}
+
+/* Helper for make_gang_parallel_loop_region, looking for "worker(num: N)"
+ or "vector(length: N)" clauses in nested loops. Removes the numeric
+ argument, transferring it to the enclosing parallel region (via
+ WI->INFO). If numeric arguments within the same loop nest conflict,
+ emits a warning.
+
+ This function also decides whether to add an auto clause on each of these
+ nested loops. It adds an auto clause unless there is already an
+ independent/seq/auto clause or a gang/worker/vector annotation. */
+
+struct adjust_nested_loop_clauses_wi_info
+{
+ tree *loop_gang_clause_ptr;
+ tree *loop_worker_clause_ptr;
+ tree *loop_vector_clause_ptr;
+};
+
+static tree
+adjust_nested_loop_clauses (gimple_stmt_iterator *gsi_p, bool *,
+ struct walk_stmt_info *wi)
+{
+ struct adjust_nested_loop_clauses_wi_info *wi_info
+ = (struct adjust_nested_loop_clauses_wi_info *) wi->info;
+ gimple *stmt = gsi_stmt (*gsi_p);
+
+ if (gimple_code (stmt) == GIMPLE_OMP_FOR)
+ {
+ bool add_auto_clause = true;
+ tree loop_clauses = gimple_omp_for_clauses (stmt);
+ tree loop_clause = loop_clauses;
+ for (; loop_clause; loop_clause = OMP_CLAUSE_CHAIN (loop_clause))
+ {
+ tree *outer_clause_ptr = NULL;
+ switch (OMP_CLAUSE_CODE (loop_clause))
+ {
+ case OMP_CLAUSE_GANG:
+ outer_clause_ptr = wi_info->loop_gang_clause_ptr;
+ break;
+ case OMP_CLAUSE_WORKER:
+ outer_clause_ptr = wi_info->loop_worker_clause_ptr;
+ break;
+ case OMP_CLAUSE_VECTOR:
+ outer_clause_ptr = wi_info->loop_vector_clause_ptr;
+ break;
+ case OMP_CLAUSE_INDEPENDENT:
+ case OMP_CLAUSE_SEQ:
+ case OMP_CLAUSE_AUTO:
+ add_auto_clause = false;
+ default:
+ break;
+ }
+ if (outer_clause_ptr != NULL)
+ {
+ if (OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL
+ && *outer_clause_ptr == NULL)
+ {
+ /* Transfer the clause to the enclosing parallel region
+ and remove the numerical argument from the loop. */
+ *outer_clause_ptr = unshare_expr (loop_clause);
+ OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL;
+ }
+ else if (OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL &&
+ OMP_CLAUSE_OPERAND (*outer_clause_ptr, 0) != NULL)
+ {
+ /* See if both of these are the same constant. If they
+ aren't, emit a warning. */
+ tree old_op = OMP_CLAUSE_OPERAND (*outer_clause_ptr, 0);
+ tree new_op = OMP_CLAUSE_OPERAND (loop_clause, 0);
+ if (!(cst_and_fits_in_hwi (old_op) &&
+ cst_and_fits_in_hwi (new_op) &&
+ int_cst_value (old_op) == int_cst_value (new_op)))
+ {
+ const char *clause_name
+ = omp_clause_code_name[OMP_CLAUSE_CODE (loop_clause)];
+ error_at (gimple_location (stmt),
+ "cannot honor conflicting %qs annotation",
+ clause_name);
+ inform (OMP_CLAUSE_LOCATION (*outer_clause_ptr),
+ "location of the previous annotation "
+ "in the same loop nest");
+ }
+ OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL;
+ }
+ }
+ }
+ if (add_auto_clause)
+ {
+ tree auto_clause
+ = build_omp_clause (gimple_location (stmt), OMP_CLAUSE_AUTO);
+ OMP_CLAUSE_CHAIN (auto_clause) = loop_clauses;
+ gimple_omp_for_set_clauses (stmt, auto_clause);
+ }
+ }
+
+ return NULL;
+}
+
/* Helper for make_region_loop_nest. Transform OpenACC 'kernels'/'loop'
construct clauses into OpenACC 'parallel'/'loop' construct ones. */
static tree
transform_kernels_loop_clauses (gimple *omp_for,
tree num_gangs_clause,
+ tree num_workers_clause,
+ tree vector_length_clause,
tree clauses)
{
/* If this loop in a kernels region does not have an explicit
"independent", "seq", or "auto" clause, we must give it an explicit
- "auto" clause. */
+ "auto" clause.
+ We also check for "gang(num: N)" clauses. These must not appear in
+ kernels regions that have their own "num_gangs" clause. Otherwise, they
+ must be converted and put on the region; similarly for workers and
+ vectors. */
bool add_auto_clause = true;
+ tree loop_gang_clause = NULL, loop_worker_clause = NULL,
+ loop_vector_clause = NULL;
tree loop_clauses = gimple_omp_for_clauses (omp_for);
- for (tree c = loop_clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ for (tree loop_clause = loop_clauses;
+ loop_clause;
+ loop_clause = OMP_CLAUSE_CHAIN (loop_clause))
{
- if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_AUTO
- || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_INDEPENDENT
- || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SEQ)
- {
- add_auto_clause = false;
- break;
+ /* Look for gang, worker, and vector clauses. */
+ bool found_num_clause = false;
+ tree *clause_ptr, clause_to_check;
+ switch (OMP_CLAUSE_CODE (loop_clause))
+ {
+ case OMP_CLAUSE_GANG:
+ found_num_clause = true;
+ clause_ptr = &loop_gang_clause;
+ clause_to_check = num_gangs_clause;
+ break;
+ case OMP_CLAUSE_WORKER:
+ found_num_clause = true;
+ clause_ptr = &loop_worker_clause;
+ clause_to_check = num_workers_clause;
+ break;
+ case OMP_CLAUSE_VECTOR:
+ found_num_clause = true;
+ clause_ptr = &loop_vector_clause;
+ clause_to_check = vector_length_clause;
+ break;
+ case OMP_CLAUSE_INDEPENDENT:
+ case OMP_CLAUSE_SEQ:
+ case OMP_CLAUSE_AUTO:
+ add_auto_clause = false;
+ default:
+ break;
}
- }
+ if (found_num_clause && OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL)
+ {
+ if (clause_to_check)
+ {
+ const char *clause_name
+ = omp_clause_code_name[OMP_CLAUSE_CODE (loop_clause)];
+ const char *parent_clause_name
+ = omp_clause_code_name[OMP_CLAUSE_CODE (clause_to_check)];
+ error_at (OMP_CLAUSE_LOCATION (loop_clause),
+ "argument not permitted on %qs clause"
+ " in OpenACC %<kernels%> region with a %qs clause",
+ clause_name, parent_clause_name);
+ inform (OMP_CLAUSE_LOCATION (clause_to_check),
+ "location of OpenACC %<kernels%> region");
+ }
+ /* Copy the gang(N)/worker(N)/vector(N) clause to the enclosing
+ parallel region. */
+ *clause_ptr = unshare_expr (loop_clause);
+ OMP_CLAUSE_CHAIN (*clause_ptr) = NULL;
+ /* Leave a gang/worker/vector clause on the loop, but without a
+ numeric argument. */
+ OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL;
+ }
+ }
if (add_auto_clause)
{
tree auto_clause = build_omp_clause (gimple_location (omp_for),
OMP_CLAUSE_AUTO);
OMP_CLAUSE_CHAIN (auto_clause) = loop_clauses;
- gimple_omp_for_set_clauses (omp_for, auto_clause);
- }
-
- /* If the kernels region had a num_gangs clause, add that to this new
- parallel region. */
- if (num_gangs_clause != NULL)
- {
- tree parallel_num_gangs_clause = unshare_expr (num_gangs_clause);
- OMP_CLAUSE_CHAIN (parallel_num_gangs_clause) = clauses;
- clauses = parallel_num_gangs_clause;
+ loop_clauses = auto_clause;
}
+ gimple_omp_for_set_clauses (omp_for, loop_clauses);
+ /* We must also recurse into the loop; it might contain nested loops
+ having their own "worker(num: W)" or "vector(length: V)" annotations.
+ Turn these into worker/vector annotations on the parallel region. */
+ struct walk_stmt_info wi;
+ memset (&wi, 0, sizeof (wi));
+ struct adjust_nested_loop_clauses_wi_info wi_info;
+ wi_info.loop_gang_clause_ptr = &loop_gang_clause;
+ wi_info.loop_worker_clause_ptr = &loop_worker_clause;
+ wi_info.loop_vector_clause_ptr = &loop_vector_clause;
+ wi.info = &wi_info;
+ gimple *body = gimple_omp_body (omp_for);
+ walk_gimple_seq (body, adjust_nested_loop_clauses, NULL, &wi);
+ /* Check if there were conflicting numbers of workers or vector lanes. */
+ if (loop_gang_clause != NULL &&
+ OMP_CLAUSE_OPERAND (loop_gang_clause, 0) == NULL)
+ loop_gang_clause = NULL;
+ if (loop_worker_clause != NULL &&
+ OMP_CLAUSE_OPERAND (loop_worker_clause, 0) == NULL)
+ loop_worker_clause = NULL;
+ if (loop_vector_clause != NULL &&
+ OMP_CLAUSE_OPERAND (loop_vector_clause, 0) == NULL)
+ vector_length_clause = NULL;
+
+ /* If the kernels region had num_gangs, num_worker, vector_length clauses,
+ add these to this new parallel region. */
+ clauses
+ = add_parent_or_loop_num_clause (num_gangs_clause, loop_gang_clause,
+ OMP_CLAUSE_NUM_GANGS, clauses);
+ clauses
+ = add_parent_or_loop_num_clause (num_workers_clause, loop_worker_clause,
+ OMP_CLAUSE_NUM_WORKERS, clauses);
+ clauses
+ = add_parent_or_loop_num_clause (vector_length_clause, loop_vector_clause,
+ OMP_CLAUSE_VECTOR_LENGTH, clauses);
return clauses;
}
@@ -197,18 +473,33 @@ transform_kernels_loop_clauses (gimple *omp_for,
STMT, which must be identical to, or a bind containing, the loop OMP_FOR
with OpenACC loop annotations.
- The newly created region is annotated with the optional NUM_GANGS_CLAUSE
- as well as the other CLAUSES, which must not contain a num_gangs clause. */
+ The NUM_GANGS_CLAUSE, NUM_WORKERS_CLAUSE, and VECTOR_LENGTH_CLAUSE are
+ optional clauses from the original kernels region and must not be
+ contained in the other CLAUSES. The newly created region is annotated
+ with the optional NUM_GANGS_CLAUSE as well as the other CLAUSES. If there
+ is no NUM_GANGS_CLAUSE but the loop has a "gang(num: N)" clause, that is
+ converted to a "num_gangs(N)" clause on the new region, and similarly for
+ workers and vectors.
+
+ The outermost loop gets an auto clause unless there already is an
+ independent/seq/auto clause or a gang/worker/vector annotation. Nested
+ loops inside OMP_FOR are treated similarly by the
+ adjust_nested_loop_clauses function. */
static gimple *
make_gang_parallel_loop_region (gimple *omp_for, gimple *stmt,
- tree num_gangs_clause, tree clauses)
+ tree num_gangs_clause,
+ tree num_workers_clause,
+ tree vector_length_clause,
+ tree clauses)
{
/* This correctly unshares the entire clause chain rooted here. */
clauses = unshare_expr (clauses);
clauses = transform_kernels_loop_clauses (omp_for,
num_gangs_clause,
+ num_workers_clause,
+ vector_length_clause,
clauses);
/* Now build the parallel region containing this loop. */
@@ -596,23 +887,43 @@ decompose_kernels_region_body (gimple *kernels_region,
tree kernels_clauses)
location_t loc = gimple_location (kernels_region);
/* The kernels clauses will be propagated to the child clauses unmodified,
- except that that num_gangs clause will only be added to loop regions.
- The other regions are "gang-single" and get an explicit num_gangs(1)
- clause. So separate out the num_gangs clause here. */
- tree num_gangs_clause = NULL, prev_clause = NULL;
+ except that the num_gangs, num_workers, and vector_length clauses will
+ only be added to loop regions. The other regions are "gang-single" and
+ get an explicit num_gangs(1) clause. So separate out the num_gangs,
+ num_workers, and vector_length clauses here. */
+ tree num_gangs_clause = NULL, num_workers_clause = NULL,
+ vector_length_clause = NULL;
+ tree prev_clause = NULL, next_clause = NULL;
tree parallel_clauses = kernels_clauses;
- for (tree c = parallel_clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ for (tree c = parallel_clauses; c; c = next_clause)
{
- if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_GANGS)
+ /* Preserve this here, as we might NULL it later. */
+ next_clause = OMP_CLAUSE_CHAIN (c);
+
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_GANGS
+ || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_WORKERS
+ || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_VECTOR_LENGTH)
{
/* Cut this clause out of the chain. */
- num_gangs_clause = c;
if (prev_clause != NULL)
OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (c);
else
kernels_clauses = OMP_CLAUSE_CHAIN (c);
- OMP_CLAUSE_CHAIN (num_gangs_clause) = NULL;
- break;
+ OMP_CLAUSE_CHAIN (c) = NULL;
+ switch (OMP_CLAUSE_CODE (c))
+ {
+ case OMP_CLAUSE_NUM_GANGS:
+ num_gangs_clause = c;
+ break;
+ case OMP_CLAUSE_NUM_WORKERS:
+ num_workers_clause = c;
+ break;
+ case OMP_CLAUSE_VECTOR_LENGTH:
+ vector_length_clause = c;
+ break;
+ default:
+ gcc_unreachable ();
+ }
}
else
prev_clause = c;
@@ -735,6 +1046,8 @@ decompose_kernels_region_body (gimple *kernels_region, tree
kernels_clauses)
gimple *parallel_region
= make_gang_parallel_loop_region (omp_for, stmt,
num_gangs_clause,
+ num_workers_clause,
+ vector_length_clause,
kernels_clauses);
gimple_seq_add_stmt (®ion_body, parallel_region);
}
@@ -752,6 +1065,9 @@ decompose_kernels_region_body (gimple *kernels_region, tree
kernels_clauses)
&& DECL_ARTIFICIAL (gimple_assign_lhs (stmt)));
if (!is_simple_assignment)
only_simple_assignments = false;
+ /* Remove and issue warnings about gang clauses on any OpenACC
+ loops nested inside this sequentially executed statement. */
+ make_loops_gang_single (gsi);
}
}
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index 4ebfa83..23d7455 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -1310,7 +1310,7 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned
outer_mask)
static unsigned
oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
- bool outer_assign)
+ bool outer_assign, bool is_oacc_gang_single)
{
bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
bool noisy = true;
@@ -1328,6 +1328,10 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned
outer_mask,
non-innermost available level. */
unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
+ /* Gang partitioning is not available in a gang-single region. */
+ if (is_oacc_gang_single)
+ this_mask = GOMP_DIM_MASK (GOMP_DIM_WORKER);
+
/* Find the first outermost available partition. */
while (this_mask <= outer_mask)
this_mask <<= 1;
@@ -1357,7 +1361,8 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned
outer_mask,
{
unsigned tmp_mask = outer_mask | loop->mask | loop->e_mask;
loop->inner = oacc_loop_auto_partitions (loop->child, tmp_mask,
- outer_assign | assign);
+ outer_assign | assign,
+ is_oacc_gang_single);
}
if (assign && (!loop->mask || (tiling && !loop->e_mask) || !outer_assign))
@@ -1416,7 +1421,8 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned
outer_mask,
if (loop->sibling)
inner_mask |= oacc_loop_auto_partitions (loop->sibling,
- outer_mask, outer_assign);
+ outer_mask, outer_assign,
+ is_oacc_gang_single);
inner_mask |= loop->inner | loop->mask | loop->e_mask;
@@ -1427,14 +1433,16 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned
outer_mask,
axes. Return mask of partitioning. */
static unsigned
-oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
+oacc_loop_partition (oacc_loop *loop, unsigned outer_mask,
+ bool is_oacc_gang_single)
{
unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
{
mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
- mask_all |= oacc_loop_auto_partitions (loop, outer_mask, false);
+ mask_all |= oacc_loop_auto_partitions (loop, outer_mask, false,
+ is_oacc_gang_single);
}
return mask_all;
}
@@ -1573,7 +1581,9 @@ execute_oacc_device_lower ()
}
unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
- unsigned used_mask = oacc_loop_partition (loops, outer_mask);
+ unsigned used_mask = oacc_loop_partition (loops, outer_mask,
+ is_oacc_parallel_kernels_gang_single);
+
/* OpenACC kernels constructs are special: they currently don't use the
generic oacc_loop infrastructure and attribute/dimension processing. */
if (is_oacc_kernels && is_oacc_kernels_parallelized)
--
2.8.1