I've committed this patch, which simplifies the generation of openacc reduction
code. Primarily three changes
1) Don't have a fake gang reduction outside of worker & vector loops. Deal with
the receiver object directly. I.e. 'ref_to_res' need not be a null pointer for
vector and worker loops.
2) Create a local private instance for all cases of reference var reductions,
not just those in vector & worker loops
3) Generate the sequences of reduction functions in one go, rather than multiple
scans of the reduction clauses.
nathan
2015-09-22 Nathan Sidwell <nat...@codesourcery.com>
* omp-low.c (oacc_lid, oacc_gang_reduction_init,
oacc_gang_reduction_fini): Delete.
(lower_oacc_reductions): Reimplement to generate all 4 reduction
functions in one go.
(oacc_fake_gang_reduction): Delete.
(lower_oacc_head_tail): New. Create both head and tail sequences
in one go.
(lower_oacc_loop_helper): Delete.
(lower_reduction_clauses): Don't lower oacc reductions here.
(lower_oacc_loop_enter_exit): Delete.
(lower_omp_for): Update to use lower_oacc_head_tail.
(lower_omp_target): Update for lower_oacc_reductions change.
* gimplify.c (localize_reductions): Remove default arg, update
callers. Always localize reference reductions.l
* config/nvptx/nvptx.c (nvptx_goacc_reduction_setup): Check for
receiver object read for non-gang reductions.
(nvptx_goacc_reduction_teardown): Likewise.
(nvptx_goacc_reduction_init): Copy var for gang reductions lacking
a receiver object.
(nvptx_goacc_reduction_fini): Likewise.
Index: gimplify.c
===================================================================
--- gimplify.c (revision 227929)
+++ gimplify.c (working copy)
@@ -7431,31 +7431,12 @@ localize_reductions_r (tree *tp, int *wa
which are not associated with acc loops. */
static void
-localize_reductions (tree *expr_p, bool target = false)
+localize_reductions (tree *expr_p, bool target)
{
tree clauses = target ? OMP_CLAUSES (*expr_p) : OMP_FOR_CLAUSES (*expr_p);
tree c, var, type, new_var;
struct privatize_reduction pr;
- int gwv_cur = 0;
- int mask_wv =
- GOMP_DIM_MASK (GOMP_DIM_WORKER) | GOMP_DIM_MASK (GOMP_DIM_VECTOR);
-
- /* Non-vector and worker reduction do not need to be localized. */
- for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
- {
- enum omp_clause_code cc = OMP_CLAUSE_CODE (c);
-
- if (cc == OMP_CLAUSE_GANG)
- gwv_cur |= GOMP_DIM_MASK (GOMP_DIM_GANG);
- else if (cc == OMP_CLAUSE_WORKER)
- gwv_cur |= GOMP_DIM_MASK (GOMP_DIM_WORKER);
- else if (cc == OMP_CLAUSE_VECTOR)
- gwv_cur |= GOMP_DIM_MASK (GOMP_DIM_VECTOR);
- }
-
- if (!(gwv_cur & mask_wv) && target == false)
- return;
-
+
for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
{
@@ -7525,7 +7506,7 @@ gimplify_omp_for (tree *expr_p, gimple_s
}
if (ork == ORK_OACC)
- localize_reductions (expr_p);
+ localize_reductions (expr_p, false);
/* Set OMP_CLAUSE_LINEAR_NO_COPYIN flag on explicit linear
clause for the IV. */
Index: config/nvptx/nvptx.c
===================================================================
--- config/nvptx/nvptx.c (revision 227929)
+++ config/nvptx/nvptx.c (working copy)
@@ -4581,6 +4581,14 @@ nvptx_goacc_reduction_setup (gimple call
push_gimplify_context (true);
+ if (level != GOMP_DIM_GANG)
+ {
+ tree ref_to_res = gimple_call_arg (call, 0);
+
+ if (!integer_zerop (ref_to_res))
+ var = build_simple_mem_ref (ref_to_res);
+ }
+
if (level == GOMP_DIM_WORKER)
{
tree ptr = make_ssa_name (build_pointer_type (TREE_TYPE (var)));
@@ -4687,7 +4695,16 @@ nvptx_goacc_reduction_init (gimple call)
add_phi_arg (phi, var, nop_edge, gimple_location (call));
}
else
- gimplify_assign (lhs, init, &seq);
+ {
+ if (level == GOMP_DIM_GANG)
+ {
+ tree ref_to_res = gimple_call_arg (call, 0);
+ if (integer_zerop (ref_to_res))
+ init = var;
+ }
+
+ gimplify_assign (lhs, init, &seq);
+ }
pop_gimplify_context (NULL);
gsi_replace_with_seq (&gsi, seq, true);
@@ -4763,11 +4780,9 @@ nvptx_goacc_reduction_fini (gimple call)
}
else
{
- tree accum;
+ tree accum = NULL_TREE;
- if (level == GOMP_DIM_GANG)
- accum = build_simple_mem_ref (ref_to_res);
- else if (level == GOMP_DIM_WORKER)
+ if (level == GOMP_DIM_WORKER)
{
tree ptr = make_ssa_name (build_pointer_type (TREE_TYPE (var)));
tree call = nvptx_get_worker_red_addr_fn (var, rid, lid);
@@ -4775,13 +4790,19 @@ nvptx_goacc_reduction_fini (gimple call)
gimplify_assign (ptr, call, &seq);
accum = build_simple_mem_ref (ptr);
}
+ else if (integer_zerop (ref_to_res))
+ r = var;
else
- gcc_unreachable ();
+ accum = build_simple_mem_ref (ref_to_res);
- TREE_THIS_VOLATILE (accum) = 1;
- r = make_ssa_name (TREE_TYPE (var));
- gimplify_assign (r, fold_build2 (op, TREE_TYPE (var), accum, var), &seq);
- gimplify_assign (accum, r, &seq);
+ if (accum)
+ {
+ TREE_THIS_VOLATILE (accum) = 1;
+ r = make_ssa_name (TREE_TYPE (var));
+ gimplify_assign (r, fold_build2 (op, TREE_TYPE (var), accum, var),
+ &seq);
+ gimplify_assign (accum, r, &seq);
+ }
}
if (lhs)
@@ -4843,6 +4864,14 @@ nvptx_goacc_reduction_teardown (gimple c
else
r = var;
+ if (level != GOMP_DIM_GANG)
+ {
+ tree ref_to_res = gimple_call_arg (call, 0);
+
+ if (!integer_zerop (ref_to_res))
+ gimplify_assign (build_simple_mem_ref (ref_to_res), r, &seq);
+ }
+
if (lhs)
gimplify_assign (lhs, r, &seq);
Index: omp-low.c
===================================================================
--- omp-low.c (revision 227929)
+++ omp-low.c (working copy)
@@ -245,10 +245,6 @@ static struct omp_region *root_omp_regio
static bitmap task_shared_vars;
static vec<omp_context *> taskreg_contexts;
-static int oacc_lid;
-static gimple_seq oacc_gang_reduction_init = NULL;
-static gimple_seq oacc_gang_reduction_fini = NULL;
-
static void scan_omp (gimple_seq *, omp_context *);
static tree scan_omp_1_op (tree *, int *, void *);
@@ -4723,170 +4719,197 @@ expand_oacc_get_thread_num (gimple_seq *
return res;
}
-/* Lowering code for OpenACC reductions. This function takes as input an
- internal function IFN (one of IFN_GOACC_REDUCTION_SETUP,
- IFN_GOACC_REDUCTION_INIT, IFN_GOACC_REDUCTION_FINI or
- IFN_GOACC_REDUCTION_TEARDOWN), a GOMP_DIM LOOP_DIM, the CLAUSES associated
- with the acc construct, a gimple sequence ILIST, an omp_context CTX.
- WRITE_BACK specifies whether code for a reduction should be emitted.
- E.g., calls to GOACC_REDUCTION_FINI may need to be done in both
- lower_omp_reductions and lower_omp_target and/or lower_omp_for due to
- predication constraints. */
-
-static void
-lower_oacc_reductions (enum internal_fn ifn, int loop_dim, tree clauses,
- gimple_seq *ilist, omp_context *ctx, bool write_back)
-{
- tree orig, var, ref_to_res, call, dim;
- tree c, tcode, gwv, rid, lid = build_int_cst (integer_type_node, oacc_lid);
- int oacc_rid, i;
- unsigned mask = extract_oacc_loop_mask (ctx);
- gimple_seq red_seq = NULL;
- int num_reductions = 0;
- enum tree_code rcode;
-
- /* Remove the outer-most level of parallelism from the loop. */
- for (i = GOMP_DIM_MAX-1; i >= 0; i--)
- if (GOMP_DIM_MASK (i) & mask)
- {
- mask &= ~GOMP_DIM_MASK (i);
- break;
- }
-
- /* Update the write-back status if this loop contains more than one
- level of parallelism associated with it. */
- if (!write_back && (mask & GOMP_DIM_MASK (loop_dim)))
- write_back = true;
-
- if (ctx->reductions == 0)
- return;
-
- dim = build_int_cst (integer_type_node, loop_dim);
+/* Lower the OpenACC reductions of CLAUSES for compute axis DIM. INNER
+ is true if this is an inner axis of a multi-axis loop. FORK and
+ JOIN are (optional) fork and join markers. Generate the
+ before-loop forking sequence in FORK_SEQ and the after-loop joining
+ sequence to JOIN_SEQ. The general form of these sequences is
+
+ GOACC_LOCK_INIT
+ GOACC_REDUCTION_SETUP
+ GOACC_FORK
+ GOACC_REDUCTION_INIT
+ ...
+ GOACC_LOCK
+ GOACC_REDUCTION_FINI
+ GOACC_UNLOCK
+ GOACC_JOIN
+ GOACC_REDUCTION_TEARDOWN. */
+
+static void
+lower_oacc_reductions (location_t loc, tree clauses, unsigned dim, bool inner,
+ gcall *fork, gcall *join, gimple_seq *fork_seq,
+ gimple_seq *join_seq, omp_context *ctx)
+{
+ static unsigned oacc_lid = 0;
+
+ gimple_seq before_fork = NULL;
+ gimple_seq after_fork = NULL;
+ gimple_seq before_join = NULL;
+ gimple_seq after_join = NULL;
+ unsigned count = 0;
+ tree lid = build_int_cst (unsigned_type_node, oacc_lid++);
+ tree level = build_int_cst (unsigned_type_node, dim);
- /* Call GOACC_LOCK_INIT. */
- if (ifn == IFN_GOACC_REDUCTION_SETUP)
- {
- call = build_call_expr_internal_loc (UNKNOWN_LOCATION,
- IFN_GOACC_LOCK_INIT,
- void_type_node, 2, dim, lid);
- gimplify_and_add (call, ilist);
- }
+ for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
+ {
+ tree orig = OMP_CLAUSE_DECL (c);
+ tree var = OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c);
+ tree ref_to_res = NULL_TREE;
+
+ if (!var)
+ var = maybe_lookup_decl (orig, ctx);
+ if (!var)
+ var = orig;
- for (c = clauses, oacc_rid = 0;
- c && write_back;
- c = OMP_CLAUSE_CHAIN (c), oacc_rid++)
- {
- if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION)
- continue;
+ if (!inner)
+ {
+ /* See if an outer construct also reduces this variable. */
+ omp_context *outer = ctx;
- orig = OMP_CLAUSE_DECL (c);
+ while (omp_context *probe = outer->outer)
+ {
+ enum gimple_code type = gimple_code (probe->stmt);
+ tree cls;
- if (loop_dim == GOMP_DIM_GANG && is_oacc_reduction_private (orig, ctx)
- && !is_oacc_parallel (ctx))
- continue;
+ switch (type)
+ {
+ case GIMPLE_OMP_FOR:
+ cls = gimple_omp_for_clauses (probe->stmt);
+ break;
- var = OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c);
- if (var == NULL_TREE)
- var = maybe_lookup_decl (orig, ctx);
- if (var == NULL_TREE)
- var = orig;
+ case GIMPLE_OMP_TARGET:
+ if (gimple_omp_target_kind (probe->stmt)
+ != GF_OMP_TARGET_KIND_OACC_PARALLEL)
+ goto do_lookup;
- if (is_oacc_parallel (ctx))
- {
- ref_to_res = build_receiver_ref (orig, false, ctx);
+ cls = gimple_omp_target_clauses (probe->stmt);
+ break;
- if (is_reference (orig))
- ref_to_res = build_simple_mem_ref (ref_to_res);
- }
- else if (loop_dim == GOMP_DIM_GANG)
- ref_to_res = build_fold_addr_expr (build_outer_var_ref (orig, ctx));
- else
- ref_to_res = integer_zero_node;
+ default:
+ goto do_lookup;
+ }
+
+ outer = probe;
+ for (; cls; cls = OMP_CLAUSE_CHAIN (cls))
+ if (OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_REDUCTION
+ && orig == OMP_CLAUSE_DECL (cls))
+ goto has_outer_reduction;
+ }
- rcode = OMP_CLAUSE_REDUCTION_CODE (c);
- if (rcode == MINUS_EXPR)
- rcode = PLUS_EXPR;
+ do_lookup:
+
+ /* This is the outermost construct with this reduction,
+ see if there's a mapping for it. */
+ if (maybe_lookup_field (orig, outer))
+ ref_to_res = build_receiver_ref (orig, false, outer);
- if (is_reference (var))
- var = build_simple_mem_ref (var);
+ has_outer_reduction:;
+ }
+ gcc_assert (!is_reference (var));
+ if (!ref_to_res)
+ ref_to_res = integer_zero_node;
+ else if (is_reference (orig))
+ ref_to_res = build_simple_mem_ref (ref_to_res);
+
+ unsigned rcode = OMP_CLAUSE_REDUCTION_CODE (c);
+ if (rcode == MINUS_EXPR)
+ rcode = PLUS_EXPR;
+ tree op = build_int_cst (unsigned_type_node, rcode);
+ tree rid = build_int_cst (unsigned_type_node, count);
+
+ tree setup = build_call_expr_internal_loc
+ (loc, IFN_GOACC_REDUCTION_SETUP, TREE_TYPE (var), 6,
+ unshare_expr (ref_to_res), var, level, op, lid, rid);
+ tree init = build_call_expr_internal_loc
+ (loc, IFN_GOACC_REDUCTION_INIT, TREE_TYPE (var), 6,
+ unshare_expr (ref_to_res), var, level, op, lid, rid);
+ tree fini = build_call_expr_internal_loc
+ (loc, IFN_GOACC_REDUCTION_FINI, TREE_TYPE (var), 6,
+ unshare_expr (ref_to_res), var, level, op, lid, rid);
+ tree teardown = build_call_expr_internal_loc
+ (loc, IFN_GOACC_REDUCTION_TEARDOWN, TREE_TYPE (var), 6,
+ ref_to_res, var, level, op, lid, rid);
+
+ gimplify_assign (var, setup, &before_fork);
+ gimplify_assign (var, init, &after_fork);
+ gimplify_assign (var, fini, &before_join);
+ gimplify_assign (var, teardown, &after_join);
+ count++;
+ }
- tcode = build_int_cst (integer_type_node, rcode);
- rid = build_int_cst (integer_type_node, oacc_rid);
- gwv = build_int_cst (integer_type_node, loop_dim);
- call = build_call_expr_internal_loc (UNKNOWN_LOCATION, ifn,
- TREE_TYPE (var), 6, ref_to_res,
- var, gwv, tcode, lid, rid);
- gimplify_assign (var, call, &red_seq);
- num_reductions++;
+ /* Now stitch things together. */
+ if (count)
+ {
+ gcall *init = gimple_build_call_internal
+ (IFN_GOACC_LOCK_INIT, 2, level, lid);
+ gimple_seq_add_stmt (fork_seq, init);
}
+ gimple_seq_add_seq (fork_seq, before_fork);
+ if (fork)
+ gimple_seq_add_stmt (fork_seq, fork);
+ gimple_seq_add_seq (fork_seq, after_fork);
- if (num_reductions)
+ if (count)
{
- /* Call GOACC_LOCK. */
- if (ifn == IFN_GOACC_REDUCTION_FINI && write_back)
- {
- call = build_call_expr_internal_loc (UNKNOWN_LOCATION,
- IFN_GOACC_LOCK, void_type_node,
- 2, dim, lid);
- gimplify_and_add (call, ilist);
- }
-
- gimple_seq_add_seq (ilist, red_seq);
-
- /* Call GOACC_UNLOCK. */
- if (ifn == IFN_GOACC_REDUCTION_FINI && write_back)
- {
- dim = build_int_cst (integer_type_node, loop_dim);
- call = build_call_expr_internal_loc (UNKNOWN_LOCATION,
- IFN_GOACC_UNLOCK,
- void_type_node, 2, dim, lid);
- gimplify_and_add (call, ilist);
- }
+ gcall *init = gimple_build_call_internal
+ (IFN_GOACC_LOCK, 2, level, lid);
+ gimple_seq_add_stmt (join_seq, init);
+ }
+ gimple_seq_add_seq (join_seq, before_join);
+ if (count)
+ {
+ gcall *init = gimple_build_call_internal
+ (IFN_GOACC_UNLOCK, 2, level, lid);
+ gimple_seq_add_stmt (join_seq, init);
}
+ if (join)
+ gimple_seq_add_stmt (join_seq, join);
+ gimple_seq_add_seq (join_seq, after_join);
}
-/* Determine if a fake gang loop is necessary for an OpenACC reduction. */
+/* Generate the before and after OpenACC loop sequences. CLAUSES are
+ the loop clauses, from which we extract reductions. Initialize
+ HEAD and TAIL. */
-static bool
-oacc_fake_gang_reduction (omp_context *ctx)
+static void
+lower_oacc_head_tail (location_t loc, tree clauses,
+ gimple_seq *head, gimple_seq *tail, omp_context *ctx)
{
- if ((ctx->gwv_below & GOMP_DIM_MASK (GOMP_DIM_GANG)) == 0)
- return true;
-
- return false;
-}
-
-/* Helper function for lower_goacc_loop_*. ILIST is the gimple sequence
- corresponding to private reductions. OLIST is for the copy reductions. */
+ unsigned mask = ctx->gwv_this;
+ unsigned ix;
+ bool inner = false;
-static unsigned
-lower_oacc_loop_helper (tree clauses, gimple_seq *ilist, gimple_seq *olist,
- omp_context *ctx, enum internal_fn f1,
- enum internal_fn f2, unsigned fork_join,
- unsigned loop_dim, unsigned loop_mask,
- bool emit_f1)
-{
- tree gwv;
- gcall *call;
- unsigned orig_mask = extract_oacc_loop_mask (ctx);
-
- lower_oacc_reductions (f1, loop_dim, clauses, ilist, ctx, emit_f1);
- gwv = build_int_cst (unsigned_type_node, loop_dim);
- call = gimple_build_call_internal
- (IFN_UNIQUE, 2, build_int_cst (unsigned_type_node, fork_join), gwv);
- gimple_seq_add_stmt (ilist, call);
- lower_oacc_reductions (f2, loop_dim, clauses, ilist, ctx, true);
- loop_mask = loop_mask & ~GOMP_DIM_MASK (loop_dim);
-
- if ((orig_mask & GOMP_DIM_MASK (GOMP_DIM_GANG)) == 0
- && loop_dim != GOMP_DIM_GANG && loop_mask == 0
- && oacc_fake_gang_reduction (ctx))
- {
- lower_oacc_reductions (f1, GOMP_DIM_GANG, clauses, olist, ctx, true);
- lower_oacc_reductions (f2, GOMP_DIM_GANG, clauses, olist, ctx, true);
- }
+ if (ctx->outer && gimple_code (ctx->outer->stmt) == GIMPLE_OMP_FOR)
+ mask &= ~ctx->outer->gwv_this;
+
+ for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
+ if (mask & GOMP_DIM_MASK (ix))
+ {
+ tree level = build_int_cst (unsigned_type_node, ix);
+ gcall *fork = gimple_build_call_internal
+ (IFN_UNIQUE, 2,
+ build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_FORK), level);
+ gcall *join = gimple_build_call_internal
+ (IFN_UNIQUE, 2,
+ build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_JOIN), level);
+ gimple_seq fork_seq = NULL;
+ gimple_seq join_seq = NULL;
+
+ gimple_set_location (fork, loc);
+ gimple_set_location (join, loc);
+ lower_oacc_reductions (loc, clauses, ix, inner,
+ fork, join, &fork_seq, &join_seq, ctx);
+
+ /* Append this level to head. */
+ gimple_seq_add_seq (head, fork_seq);
+ /* Prepend it to tail. */
+ gimple_seq_add_seq (&join_seq, *tail);
+ *tail = join_seq;
- return loop_mask;
+ inner = true;
+ }
}
/* Generate code to implement the REDUCTION clauses. OpenACC reductions
@@ -4903,21 +4926,7 @@ lower_reduction_clauses (tree clauses, g
/* OpenACC loop reductions are handled elsewhere. */
if (is_gimple_omp_oacc (ctx->stmt))
- {
- unsigned loop_dim, loop_mask = extract_oacc_loop_mask (ctx);
-
- if (loop_mask == 0)
- return;
-
- for (loop_dim = GOMP_DIM_MAX; --loop_dim; )
- if (loop_mask & GOMP_DIM_MASK (loop_dim))
- break;
-
- lower_oacc_reductions (IFN_GOACC_REDUCTION_FINI, loop_dim, clauses,
- stmt_seqp, ctx, true);
-
- return;
- }
+ return;
/* SIMD reductions are handled in lower_rec_input_clauses. */
if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
@@ -11178,44 +11187,6 @@ lower_omp_for_lastprivate (struct omp_fo
}
}
-/* Lower code for OpenACC for entry and exit to an oacc loop. This function
- is responsible for setting up reductions and placing markers to GOACC_FORK
- and GOACC_JOIN.
-*/
-
-static void
-lower_oacc_loop_enter_exit (bool enter_loop, tree clauses, gimple_seq *ilist,
- omp_context *ctx)
-{
- unsigned loop_dim_mask = extract_oacc_loop_mask (ctx);
-
- if (loop_dim_mask == 0)
- return;
-
- if (enter_loop)
- {
- for (int i = GOMP_DIM_GANG; i < GOMP_DIM_MAX; i++)
- if (loop_dim_mask & GOMP_DIM_MASK (i))
- loop_dim_mask =
- lower_oacc_loop_helper (clauses, ilist, &oacc_gang_reduction_init,
- ctx, IFN_GOACC_REDUCTION_SETUP,
- IFN_GOACC_REDUCTION_INIT,
- IFN_UNIQUE_OACC_FORK, i, loop_dim_mask,
- enter_loop);
- }
- else
- {
- for (int i = GOMP_DIM_MAX; i-- != GOMP_DIM_GANG;)
- if (loop_dim_mask & GOMP_DIM_MASK (i))
- loop_dim_mask =
- lower_oacc_loop_helper (clauses, ilist, &oacc_gang_reduction_fini,
- ctx, IFN_GOACC_REDUCTION_FINI,
- IFN_GOACC_REDUCTION_TEARDOWN,
- IFN_UNIQUE_OACC_JOIN, i, loop_dim_mask,
- enter_loop);
- }
-}
-
/* Lower code for an OMP loop directive. */
static void
@@ -11225,12 +11196,9 @@ lower_omp_for (gimple_stmt_iterator *gsi
struct omp_for_data fd, *fdp = NULL;
gomp_for *stmt = as_a <gomp_for *> (gsi_stmt (*gsi_p));
gbind *new_stmt;
- gimple_seq omp_for_body, body, dlist, header, exit;
+ gimple_seq omp_for_body, body, dlist;
+ gimple_seq oacc_head = NULL, oacc_tail = NULL;
size_t i;
- int loop_mask = extract_oacc_loop_mask (ctx);
-
- if (is_gimple_omp_oacc (ctx->stmt))
- oacc_lid++;
push_gimplify_context ();
@@ -11305,21 +11273,6 @@ lower_omp_for (gimple_stmt_iterator *gsi
/* The pre-body and input clauses go before the lowered GIMPLE_OMP_FOR. */
dlist = NULL;
body = NULL;
- header = NULL;
-
- if (is_gimple_omp_oacc (ctx->stmt))
- {
- lower_oacc_loop_enter_exit (true, gimple_omp_for_clauses (stmt),
- &header, ctx);
- if (loop_mask & GOMP_DIM_MASK (GOMP_DIM_GANG)
- || (oacc_fake_gang_reduction (ctx) && loop_mask == ctx->gwv_this))
- {
- gimple_seq_add_seq (&body, oacc_gang_reduction_init);
- oacc_gang_reduction_init = NULL;
- }
- }
-
- gimple_seq_add_seq (&body, header);
lower_rec_input_clauses (gimple_omp_for_clauses (stmt), &body, &dlist, ctx,
fdp);
@@ -11352,6 +11305,15 @@ lower_omp_for (gimple_stmt_iterator *gsi
/* Once lowered, extract the bounds and clauses. */
extract_omp_for_data (stmt, &fd, NULL);
+ if (is_gimple_omp_oacc (ctx->stmt))
+ lower_oacc_head_tail (gimple_location (stmt),
+ gimple_omp_for_clauses (stmt),
+ &oacc_head, &oacc_tail, ctx);
+
+ /* Add OpenACC partitioning & reduction markers just before the loop */
+ if (oacc_head)
+ gimple_seq_add_seq (&body, oacc_head);
+
lower_omp_for_lastprivate (&fd, &body, &dlist, ctx);
gimple_seq_add_stmt (&body, stmt);
@@ -11374,19 +11336,9 @@ lower_omp_for (gimple_stmt_iterator *gsi
gimple_seq_add_stmt (&body, gimple_build_omp_return (fd.have_nowait));
maybe_add_implicit_barrier_cancel (ctx, &body);
- if (is_gimple_omp_oacc (ctx->stmt))
- {
- exit = NULL;
- lower_oacc_loop_enter_exit (false, gimple_omp_for_clauses (stmt),
- &exit, ctx);
- gimple_seq_add_seq (&body, exit);
- if (loop_mask & GOMP_DIM_MASK (GOMP_DIM_GANG)
- || (oacc_fake_gang_reduction (ctx) && loop_mask == ctx->gwv_this))
- {
- gimple_seq_add_seq (&body, oacc_gang_reduction_fini);
- oacc_gang_reduction_fini = NULL;
- }
- }
+ /* Add OpenACC joining and reduction markers just after the loop. */
+ if (oacc_tail)
+ gimple_seq_add_seq (&body, oacc_tail);
pop_gimplify_context (new_stmt);
@@ -12072,18 +12024,13 @@ lower_omp_target (gimple_stmt_iterator *
irlist = NULL;
orlist = NULL;
- if (is_oacc_parallel (ctx) && ctx->reductions)
- {
- lower_oacc_reductions (IFN_GOACC_REDUCTION_SETUP, GOMP_DIM_GANG,
- clauses, &irlist, ctx, true);
- lower_oacc_reductions (IFN_GOACC_REDUCTION_INIT, GOMP_DIM_GANG,
- clauses, &irlist, ctx, true);
- lower_oacc_reductions (IFN_GOACC_REDUCTION_FINI, GOMP_DIM_GANG,
- clauses, &orlist, ctx, true);
- lower_oacc_reductions (IFN_GOACC_REDUCTION_TEARDOWN, GOMP_DIM_GANG,
- clauses, &orlist, ctx, true);
- }
-
+ if (is_oacc_parallel (ctx))
+ /* If there are reductions on the offloaded region itself, treat
+ them as a dummy GANG loop. */
+ lower_oacc_reductions (gimple_location (ctx->stmt), clauses,
+ GOMP_DIM_GANG, false, NULL, NULL,
+ &irlist, &orlist, ctx);
+
if (offloaded)
{
/* Declare all the variables created by mapping and the variables