I've applied this patch to gomp4 branch. It's the reworking of IFN_UNIQUE
suggested by Richard & Jakub.
1) IFN_UNIQUE is a ctrl-altering call, and thus ends up at the end of a BB.
2) tracer only needs to check that stmt (and it'a already looking at it for
other reasons)
3) IFN_UNIQUE is no longer ECF_LEAF
4) Inserted a data dependency chain to the had & tail call sequence. The 2nd
param is the result of the previous call in the chain.
Preparing updated trunk patches now ...
nathan
2015-10-25 Nathan Sidwell <nat...@codesourcery.com>
* internal-fn.def (IFN_UNIQUE): Not a leaf.
(IFN_UNIQUE, IFN_GOACC_LOOP): Move sub codes to ...
* internal-fn.h (enum ifn_unique_kind, enum ifn_goacc_loop_kind):
... here. New enums.
* internal-fn.c (expand_UNIQUE): Deal with data dependency var.
* tree-cfg.c (gimple_call_initialize_ctrl_altering): Check for
unique internal fn call.
* config/nvptx/nvptx.md (oacc_fork, oacc_join): Deal with data
dependency src & dest.
* config/nvptx/nvptx.c (nvptx_xform_fork_join): Rename to ...
(nvptx_goacc_fork_join): ... here. Skip date dependency arg.
* tracer.c (ignore_bb_p): Just look at last stmt for UNIQUE.
* omp-low.c (lower_oacc_head_mark): Take data dependency arg.
Use quick_push.
(lower_oacc_loop_marker): Take data dependency arg.
(lower_oacc_head_tail): Insert data dependency var.
(new_oacc_loop): Adjust arg numbering.
(dump_oacc_loop_part): Cope with block-straddling sequences.
(oacc_loop_discover_walk): Likewise.
(oacc_loop_xform_head_tail): Likewise.
(execute_oacc_device_lower): Use two bools for scanning &
deletion.
Index: gcc/config/nvptx/nvptx.md
===================================================================
--- gcc/config/nvptx/nvptx.md (revision 229276)
+++ gcc/config/nvptx/nvptx.md (working copy)
@@ -1400,20 +1400,28 @@
)
(define_expand "oacc_fork"
- [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
- UNSPECV_FORKED)]
+ [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
+ (match_operand:SI 1 "nvptx_general_operand" ""))
+ (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
+ UNSPECV_FORKED)]
""
{
- nvptx_expand_oacc_fork (INTVAL (operands[0]));
+ if (operands[0] != const0_rtx)
+ emit_move_insn (operands[0], operands[1]);
+ nvptx_expand_oacc_fork (INTVAL (operands[2]));
DONE;
})
(define_expand "oacc_join"
- [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
- UNSPECV_JOIN)]
+ [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
+ (match_operand:SI 1 "nvptx_general_operand" ""))
+ (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
+ UNSPECV_JOIN)]
""
{
- nvptx_expand_oacc_join (INTVAL (operands[0]));
+ if (operands[0] != const0_rtx)
+ emit_move_insn (operands[0], operands[1]);
+ nvptx_expand_oacc_join (INTVAL (operands[2]));
DONE;
})
Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c (revision 229276)
+++ gcc/config/nvptx/nvptx.c (working copy)
@@ -4296,10 +4296,10 @@ nvptx_dim_limit (unsigned axis)
/* Determine whether fork & joins are needed. */
static bool
-nvptx_xform_fork_join (gcall *call, const int dims[],
+nvptx_goacc_fork_join (gcall *call, const int dims[],
bool ARG_UNUSED (is_fork))
{
- tree arg = gimple_call_arg (call, 1);
+ tree arg = gimple_call_arg (call, 2);
unsigned axis = TREE_INT_CST_LOW (arg);
/* We only care about worker and vector partitioning. */
@@ -4844,7 +4844,7 @@ nvptx_use_anchors_for_symbol (const_rtx
#define TARGET_GOACC_DIM_LIMIT nvptx_dim_limit
#undef TARGET_GOACC_FORK_JOIN
-#define TARGET_GOACC_FORK_JOIN nvptx_xform_fork_join
+#define TARGET_GOACC_FORK_JOIN nvptx_goacc_fork_join
#undef TARGET_GOACC_REDUCTION
#define TARGET_GOACC_REDUCTION nvptx_goacc_reduction
Index: gcc/tracer.c
===================================================================
--- gcc/tracer.c (revision 229276)
+++ gcc/tracer.c (working copy)
@@ -93,25 +93,20 @@ bb_seen_p (basic_block bb)
static bool
ignore_bb_p (basic_block bb)
{
- gimple_stmt_iterator gsi;
- gimple *g;
-
if (bb->index < NUM_FIXED_BLOCKS)
return true;
if (optimize_bb_for_size_p (bb))
return true;
- /* A transaction is a single entry multiple exit region. It must be
- duplicated in its entirety or not at all. */
- g = last_stmt (CONST_CAST_BB (bb));
- if (g && gimple_code (g) == GIMPLE_TRANSACTION)
- return true;
-
- /* Ignore blocks containing non-clonable function calls. */
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ if (gimple *g = last_stmt (CONST_CAST_BB (bb)))
{
- g = gsi_stmt (gsi);
+ /* A transaction is a single entry multiple exit region. It
+ must be duplicated in its entirety or not at all. */
+ if (gimple_code (g) == GIMPLE_TRANSACTION)
+ return true;
+ /* An IFN_UNIQUE call must be duplicated as part of its group,
+ or not at all. */
if (is_gimple_call (g) && gimple_call_internal_p (g)
&& gimple_call_internal_unique_p (g))
return true;
Index: gcc/internal-fn.def
===================================================================
--- gcc/internal-fn.def (revision 229276)
+++ gcc/internal-fn.def (working copy)
@@ -70,20 +70,8 @@ DEF_INTERNAL_FN (GOACC_DATA_END_WITH_ARG
/* An unduplicable, uncombinable function. Generally used to preserve
a CFG property in the face of jump threading, tail merging or
other such optimizations. The first argument distinguishes
- between uses. Other arguments are as needed for use. The return
- type depends on use too. */
-DEF_INTERNAL_FN (UNIQUE, ECF_NOTHROW | ECF_LEAF, NULL)
-#define IFN_UNIQUE_UNSPEC 0 /* Undifferentiated UNIQUE. */
-
-/* FORK and JOIN mark the points at which OpenACC partitioned
- execution is entered or exited. They take an INTEGER_CST argument,
- indicating the axis of forking or joining and return nothing. */
-#define IFN_UNIQUE_OACC_FORK 1
-#define IFN_UNIQUE_OACC_JOIN 2
-/* HEAD_MARK and TAIL_MARK are used to demark the sequence entering or
- leaving partitioned execution. */
-#define IFN_UNIQUE_OACC_HEAD_MARK 3
-#define IFN_UNIQUE_OACC_TAIL_MARK 4
+ between uses. See internal-fn.h for usage. */
+DEF_INTERNAL_FN (UNIQUE, ECF_NOTHROW, NULL)
/* DIM_SIZE and DIM_POS return the size of a particular compute
dimension and the executing thread's position within that
@@ -112,28 +100,5 @@ DEF_INTERNAL_FN (GOACC_REDUCTION_INIT, E
DEF_INTERNAL_FN (GOACC_REDUCTION_FINI, ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOACC_REDUCTION_TEARDOWN, ECF_NOTHROW, NULL)
-/* OpenACC looping abstraction. Allows the precise stepping of
- the compute geometry over the loop iterations to be deferred until
- it is known which compiler is generating the code. The action is
- encoded in a constant first argument.
-
- CHUNK_MAX = LOOP (CODE_CHUNKS, DIR, RANGE, STEP, CHUNK_SIZE, MASK)
- STEP = LOOP (CODE_STEP, DIR, RANGE, STEP, CHUNK_SIZE, MASK)
- OFFSET = LOOP (CODE_OFFSET, DIR, RANGE, STEP, CHUNK_SIZE, MASK, CHUNK_NO)
- BOUND = LOOP (CODE_BOUND, DIR, RANGE, STEP, CHUNK_SIZE, MASK, OFFSET)
-
- DIR - +1 for up loop, -1 for down loop
- RANGE - Range of loop (END - BASE)
- STEP - iteration step size
- CHUNKING - size of chunking, (constant zero for no chunking)
- CHUNK_NO - chunk number
- MASK - partitioning mask.
-
- TODO: The partitioning mask and chunk size are a transition stage,
- they will be removed once the required infrastructure is in place. */
-
+/* OpenACC looping abstraction. See internal-fn.h for usage. */
DEF_INTERNAL_FN (GOACC_LOOP, ECF_PURE | ECF_NOTHROW, NULL)
-#define IFN_GOACC_LOOP_CHUNKS 0 /* Number of chunks. */
-#define IFN_GOACC_LOOP_STEP 1 /* Size of each thread's step. */
-#define IFN_GOACC_LOOP_OFFSET 2 /* Initial iteration value. */
-#define IFN_GOACC_LOOP_BOUND 3 /* Limit of iteration value. */
Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c (revision 229276)
+++ gcc/omp-low.c (working copy)
@@ -5517,16 +5517,17 @@ lower_oacc_reductions (location_t loc, t
be partitioned over. */
static unsigned
-lower_oacc_head_mark (location_t loc, tree clauses,
+lower_oacc_head_mark (location_t loc, tree ddvar, tree clauses,
gimple_seq *seq, omp_context *ctx)
{
unsigned levels = 0;
unsigned tag = 0;
tree gang_static = NULL_TREE;
- auto_vec<tree, 1> args;
+ auto_vec<tree, 5> args;
args.quick_push (build_int_cst
(integer_type_node, IFN_UNIQUE_OACC_HEAD_MARK));
+ args.quick_push (ddvar);
for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
{
switch (OMP_CLAUSE_CODE (c))
@@ -5594,13 +5595,14 @@ lower_oacc_head_mark (location_t loc, tr
if (!levels)
levels++;
- args.safe_push (build_int_cst (integer_type_node, levels));
- args.safe_push (build_int_cst (integer_type_node, tag));
+ args.quick_push (build_int_cst (integer_type_node, levels));
+ args.quick_push (build_int_cst (integer_type_node, tag));
if (gang_static)
- args.safe_push (gang_static);
+ args.quick_push (gang_static);
gcall *call = gimple_build_call_internal_vec (IFN_UNIQUE, args);
gimple_set_location (call, loc);
+ gimple_set_lhs (call, ddvar);
gimple_seq_add_stmt (seq, call);
return levels;
@@ -5610,15 +5612,17 @@ lower_oacc_head_mark (location_t loc, tr
partitioning level of the enclosed region. */
static void
-lower_oacc_loop_marker (location_t loc, bool head, tree tofollow,
- gimple_seq *seq)
+lower_oacc_loop_marker (location_t loc, tree ddvar, bool head,
+ tree tofollow, gimple_seq *seq)
{
- tree marker = build_int_cst
- (integer_type_node, (head ? IFN_UNIQUE_OACC_HEAD_MARK
- : IFN_UNIQUE_OACC_TAIL_MARK));
- gcall *call = gimple_build_call_internal
- (IFN_UNIQUE, 1 + (tofollow != NULL_TREE), marker, tofollow);
+ int marker_kind = (head ? IFN_UNIQUE_OACC_HEAD_MARK
+ : IFN_UNIQUE_OACC_TAIL_MARK);
+ tree marker = build_int_cst (integer_type_node, marker_kind);
+ int nargs = 2 + (tofollow != NULL_TREE);
+ gcall *call = gimple_build_call_internal (IFN_UNIQUE, nargs,
+ marker, ddvar, tofollow);
gimple_set_location (call, loc);
+ gimple_set_lhs (call, ddvar);
gimple_seq_add_stmt (seq, call);
}
@@ -5631,32 +5635,38 @@ lower_oacc_head_tail (location_t loc, tr
gimple_seq *head, gimple_seq *tail, omp_context *ctx)
{
bool inner = false;
- unsigned count = lower_oacc_head_mark (loc, clauses, head, ctx);
-
+ tree ddvar = create_tmp_var (integer_type_node, ".data_dep");
+ gimple_seq_add_stmt (head, gimple_build_assign (ddvar, integer_zero_node));
+
+ unsigned count = lower_oacc_head_mark (loc, ddvar, clauses, head, ctx);
if (!count)
- lower_oacc_loop_marker (loc, false, integer_zero_node, tail);
+ lower_oacc_loop_marker (loc, ddvar, false, integer_zero_node, tail);
+
+ tree fork_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_FORK);
+ tree join_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_JOIN);
for (unsigned done = 1; count; count--, done++)
{
- tree place = build_int_cst (integer_type_node, -1);
- gcall *fork = gimple_build_call_internal
- (IFN_UNIQUE, 2,
- build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_FORK), place);
- gcall *join = gimple_build_call_internal
- (IFN_UNIQUE, 2,
- build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_JOIN), place);
gimple_seq fork_seq = NULL;
gimple_seq join_seq = NULL;
+ tree place = build_int_cst (integer_type_node, -1);
+ gcall *fork = gimple_build_call_internal (IFN_UNIQUE, 3,
+ fork_kind, ddvar, place);
gimple_set_location (fork, loc);
+ gimple_set_lhs (fork, ddvar);
+
+ gcall *join = gimple_build_call_internal (IFN_UNIQUE, 3,
+ join_kind, ddvar, place);
gimple_set_location (join, loc);
+ gimple_set_lhs (join, ddvar);
/* Mark the beginning of this level sequence. */
if (inner)
- lower_oacc_loop_marker (loc, true,
+ lower_oacc_loop_marker (loc, ddvar, true,
build_int_cst (integer_type_node, count),
&fork_seq);
- lower_oacc_loop_marker (loc, false,
+ lower_oacc_loop_marker (loc, ddvar, false,
build_int_cst (integer_type_node, done),
&join_seq);
@@ -5673,8 +5683,8 @@ lower_oacc_head_tail (location_t loc, tr
}
/* Mark the end of the sequence. */
- lower_oacc_loop_marker (loc, true, NULL_TREE, head);
- lower_oacc_loop_marker (loc, false, NULL_TREE, tail);
+ lower_oacc_loop_marker (loc, ddvar, true, NULL_TREE, head);
+ lower_oacc_loop_marker (loc, ddvar, false, NULL_TREE, tail);
}
/* Generate code to implement the REDUCTION clauses. */
@@ -19167,11 +19177,11 @@ new_oacc_loop (oacc_loop *parent, gcall
/* TODO: This is where device_type flattening would occur for the loop
flags. */
- loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 2));
+ loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
tree chunk_size = integer_zero_node;
if (loop->flags & OLF_GANG_STATIC)
- chunk_size = gimple_call_arg (marker, 3);
+ chunk_size = gimple_call_arg (marker, 4);
loop->chunk_size = chunk_size;
return loop;
@@ -19223,25 +19233,27 @@ static void
dump_oacc_loop_part (FILE *file, gcall *from, int depth,
const char *title, int level)
{
- gimple_stmt_iterator gsi = gsi_for_stmt (from);
unsigned code = TREE_INT_CST_LOW (gimple_call_arg (from, 0));
fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
- for (gimple *stmt = from; ;)
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
{
- print_gimple_stmt (file, stmt, depth * 2 + 2, 0);
- gsi_next (&gsi);
- stmt = gsi_stmt (gsi);
+ gimple *stmt = gsi_stmt (gsi);
- if (!is_gimple_call (stmt))
- continue;
+ if (is_gimple_call (stmt)
+ && gimple_call_internal_p (stmt)
+ && gimple_call_internal_fn (stmt) == IFN_UNIQUE)
+ {
+ unsigned c = TREE_INT_CST_LOW (gimple_call_arg (stmt, 0));
- gcall *call = as_a <gcall *> (stmt);
-
- if (gimple_call_internal_p (call)
- && gimple_call_internal_fn (call) == IFN_UNIQUE
- && code == TREE_INT_CST_LOW (gimple_call_arg (call, 0)))
- break;
+ if (c == code && stmt != from)
+ break;
+ }
+ print_gimple_stmt (file, stmt, depth * 2 + 2, 0);
+
+ gsi_next (&gsi);
+ while (gsi_end_p (gsi))
+ gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
}
}
@@ -19295,12 +19307,14 @@ debug_oacc_loop (oacc_loop *loop)
static void
oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
{
+ int marker = 0;
+ int remaining = 0;
+
if (bb->flags & BB_VISITED)
return;
- bb->flags |= BB_VISITED;
- int marker = 0;
- int remaining = 0;
+ follow:
+ bb->flags |= BB_VISITED;
/* Scan for loop markers. */
for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
@@ -19331,7 +19345,7 @@ oacc_loop_discover_walk (oacc_loop *loop
if (code == IFN_UNIQUE_OACC_HEAD_MARK
|| code == IFN_UNIQUE_OACC_TAIL_MARK)
{
- if (gimple_call_num_args (call) == 1)
+ if (gimple_call_num_args (call) == 2)
{
gcc_assert (marker && !remaining);
marker = 0;
@@ -19342,7 +19356,7 @@ oacc_loop_discover_walk (oacc_loop *loop
}
else
{
- int count = TREE_INT_CST_LOW (gimple_call_arg (call, 1));
+ int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
if (!marker)
{
@@ -19363,7 +19377,12 @@ oacc_loop_discover_walk (oacc_loop *loop
}
}
}
- gcc_assert (!remaining && !marker);
+ if (remaining || marker)
+ {
+ bb = single_succ (bb);
+ gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
+ goto follow;
+ }
/* Walk successor blocks. */
edge e;
@@ -19424,50 +19443,35 @@ oacc_loop_discovery ()
static void
oacc_loop_xform_head_tail (gcall *from, int level)
{
- gimple_stmt_iterator gsi = gsi_for_stmt (from);
unsigned code = TREE_INT_CST_LOW (gimple_call_arg (from, 0));
tree replacement = build_int_cst (unsigned_type_node, level);
- for (gimple *stmt = from; ;)
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
{
- gsi_next (&gsi);
- stmt = gsi_stmt (gsi);
-
- if (!is_gimple_call (stmt))
- continue;
-
- gcall *call = as_a <gcall *> (stmt);
+ gimple *stmt = gsi_stmt (gsi);
- if (!gimple_call_internal_p (call))
- continue;
-
- switch (gimple_call_internal_fn (call))
- {
- case IFN_UNIQUE:
- {
- unsigned c = TREE_INT_CST_LOW (gimple_call_arg (call, 0));
-
- if (c == code)
- goto break2;
-
- if (c == IFN_UNIQUE_OACC_FORK || c == IFN_UNIQUE_OACC_JOIN)
- *gimple_call_arg_ptr (call, 1) = replacement;
- }
- break;
-
- case IFN_GOACC_REDUCTION_SETUP:
- case IFN_GOACC_REDUCTION_INIT:
- case IFN_GOACC_REDUCTION_FINI:
- case IFN_GOACC_REDUCTION_TEARDOWN:
- *gimple_call_arg_ptr (call, 2) = replacement;
- break;
-
- default:
- break;
+ if (!is_gimple_call (stmt)
+ || !gimple_call_internal_p (stmt))
+ ;
+ else if (gimple_call_internal_fn (stmt) == IFN_UNIQUE)
+ {
+ unsigned c = TREE_INT_CST_LOW (gimple_call_arg (stmt, 0));
+
+ if (c == IFN_UNIQUE_OACC_FORK || c == IFN_UNIQUE_OACC_JOIN)
+ *gimple_call_arg_ptr (stmt, 2) = replacement;
+ else if (c == code && stmt != from)
+ break;
}
- }
+ else if (gimple_call_internal_fn (stmt) == IFN_GOACC_REDUCTION_SETUP
+ || gimple_call_internal_fn (stmt) == IFN_GOACC_REDUCTION_INIT
+ || gimple_call_internal_fn (stmt) == IFN_GOACC_REDUCTION_FINI
+ || gimple_call_internal_fn (stmt) == IFN_GOACC_REDUCTION_TEARDOWN)
+ *gimple_call_arg_ptr (stmt, 2) = replacement;
- break2:;
+ gsi_next (&gsi);
+ while (gsi_end_p (gsi))
+ gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
+ }
}
/* Transform the IFN_GOACC_LOOP internal functions by providing the
@@ -19875,7 +19879,7 @@ execute_oacc_device_lower ()
/* Rewind to allow rescan. */
gsi_prev (&gsi);
- int rescan = 0;
+ bool rescan = false, remove = false;
unsigned ifn_code = gimple_call_internal_fn (call);
switch (ifn_code)
@@ -19885,16 +19889,17 @@ execute_oacc_device_lower ()
case IFN_GOACC_DIM_POS:
case IFN_GOACC_DIM_SIZE:
if (gimple_call_lhs (call) == NULL_TREE)
- rescan = -1;
- else if (oacc_xform_dim (call, dims, ifn_code == IFN_GOACC_DIM_POS))
- rescan = 1;
+ remove = true;
+ else if (oacc_xform_dim (call, dims,
+ ifn_code == IFN_GOACC_DIM_POS))
+ rescan = true;
break;
case IFN_GOACC_LOOP:
oacc_xform_loop (call);
- rescan = 1;
+ rescan = true;
break;
-
+
case IFN_GOACC_REDUCTION_SETUP:
case IFN_GOACC_REDUCTION_INIT:
case IFN_GOACC_REDUCTION_FINI:
@@ -19908,7 +19913,7 @@ execute_oacc_device_lower ()
default_goacc_reduction (call);
else
targetm.goacc.reduction (call);
- rescan = 1;
+ rescan = true;
break;
case IFN_UNIQUE:
@@ -19919,16 +19924,16 @@ execute_oacc_device_lower ()
{
case IFN_UNIQUE_OACC_FORK:
case IFN_UNIQUE_OACC_JOIN:
- if (integer_minus_onep (gimple_call_arg (call, 1)))
- rescan = -1;
+ if (integer_minus_onep (gimple_call_arg (call, 2)))
+ remove = true;
else if (targetm.goacc.fork_join
(call, dims, code == IFN_UNIQUE_OACC_FORK))
- rescan = -1;
+ remove = true;
break;
case IFN_UNIQUE_OACC_HEAD_MARK:
case IFN_UNIQUE_OACC_TAIL_MARK:
- rescan = -1;
+ remove = true;
break;
}
break;
@@ -19942,16 +19947,24 @@ execute_oacc_device_lower ()
/* Undo the rewind. */
gsi_next (&gsi);
- if (!rescan)
- /* If not rescanning, advance over the call. */
- gsi_next (&gsi);
- else if (rescan < 0)
+ if (remove)
{
if (gimple_vdef (call))
replace_uses_by (gimple_vdef (call),
gimple_vuse (call));
- gsi_remove (&gsi, true);
+ if (gimple_call_lhs (call))
+ {
+ /* Propagate the data dependency var. */
+ gimple *ass = gimple_build_assign (gimple_call_lhs (call),
+ gimple_call_arg (call, 1));
+ gsi_replace (&gsi, ass, false);
+ }
+ else
+ gsi_remove (&gsi, true);
}
+ else if (!rescan)
+ /* If not rescanning, advance over the call. */
+ gsi_next (&gsi);
}
free_oacc_loop (loops);
Index: gcc/tree-cfg.c
===================================================================
--- gcc/tree-cfg.c (revision 229276)
+++ gcc/tree-cfg.c (working copy)
@@ -487,7 +487,11 @@ gimple_call_initialize_ctrl_altering (gi
|| ((flags & ECF_TM_BUILTIN)
&& is_tm_ending_fndecl (gimple_call_fndecl (stmt)))
/* BUILT_IN_RETURN call is same as return statement. */
- || gimple_call_builtin_p (stmt, BUILT_IN_RETURN))
+ || gimple_call_builtin_p (stmt, BUILT_IN_RETURN)
+ /* IFN_UNIQUE should be the last insn, to make checking for it
+ as cheap as possible. */
+ || (gimple_call_internal_p (stmt)
+ && gimple_call_internal_unique_p (stmt)))
gimple_call_set_ctrl_altering (stmt, true);
else
gimple_call_set_ctrl_altering (stmt, false);
Index: gcc/internal-fn.c
===================================================================
--- gcc/internal-fn.c (revision 229276)
+++ gcc/internal-fn.c (working copy)
@@ -1962,8 +1962,9 @@ static void
expand_UNIQUE (gcall *stmt)
{
rtx pattern = NULL_RTX;
+ int code = TREE_INT_CST_LOW (gimple_call_arg (stmt, 0));
- switch (TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)))
+ switch (code)
{
default:
gcc_unreachable ();
@@ -1975,21 +1976,34 @@ expand_UNIQUE (gcall *stmt)
break;
case IFN_UNIQUE_OACC_FORK:
+ case IFN_UNIQUE_OACC_JOIN:
+ {
+ tree lhs = gimple_call_lhs (stmt);
+ rtx target = const0_rtx;
+
+ if (lhs)
+ target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+
+ rtx data_dep = expand_normal (gimple_call_arg (stmt, 1));
+ rtx axis = expand_normal (gimple_call_arg (stmt, 2));
+
+ if (code == IFN_UNIQUE_OACC_FORK)
+ {
#ifdef HAVE_oacc_fork
- pattern = expand_normal (gimple_call_arg (stmt, 1));
- pattern = gen_oacc_fork (pattern);
+ pattern = gen_oacc_fork (target, data_dep, axis);
#else
- gcc_unreachable ();
+ gcc_unreachable ();
#endif
- break;
-
- case IFN_UNIQUE_OACC_JOIN:
+ }
+ else
+ {
#ifdef HAVE_oacc_join
- pattern = expand_normal (gimple_call_arg (stmt, 1));
- pattern = gen_oacc_join (pattern);
+ pattern = gen_oacc_join (target, data_dep, axis);
#else
- gcc_unreachable ();
+ gcc_unreachable ();
#endif
+ }
+ }
break;
}
Index: gcc/internal-fn.h
===================================================================
--- gcc/internal-fn.h (revision 229276)
+++ gcc/internal-fn.h (working copy)
@@ -20,6 +20,52 @@ along with GCC; see the file COPYING3.
#ifndef GCC_INTERNAL_FN_H
#define GCC_INTERNAL_FN_H
+/* INTEGER_CST values for IFN_UNIQUE function arg-0. */
+enum ifn_unique_kind {
+ IFN_UNIQUE_UNSPEC, /* Undifferentiated UNIQUE. */
+
+ /* FORK and JOIN mark the points at which OpenACC partitioned
+ execution is entered or exited.
+ return: data dependency value
+ arg-1: data dependency var
+ arg-2: INTEGER_CST argument, indicating the axis. */
+ IFN_UNIQUE_OACC_FORK,
+ IFN_UNIQUE_OACC_JOIN,
+
+ /* HEAD_MARK and TAIL_MARK are used to demark the sequence entering
+ or leaving partitioned execution.
+ return: data dependency value
+ arg-1: data dependency var
+ arg-2: INTEGER_CST argument, remaining markers in this sequence
+ arg-3...: varargs on primary header */
+ IFN_UNIQUE_OACC_HEAD_MARK,
+ IFN_UNIQUE_OACC_TAIL_MARK
+};
+
+/* INTEGER_CST values for IFN_GOACC_LOOP arg-0. Allows the precise
+ stepping of the compute geometry over the loop iterations to be
+ deferred until it is known which compiler is generating the code.
+ The action is encoded in a constant first argument.
+
+ CHUNK_MAX = LOOP (CODE_CHUNKS, DIR, RANGE, STEP, CHUNK_SIZE, MASK)
+ STEP = LOOP (CODE_STEP, DIR, RANGE, STEP, CHUNK_SIZE, MASK)
+ OFFSET = LOOP (CODE_OFFSET, DIR, RANGE, STEP, CHUNK_SIZE, MASK, CHUNK_NO)
+ BOUND = LOOP (CODE_BOUND, DIR, RANGE, STEP, CHUNK_SIZE, MASK, OFFSET)
+
+ DIR - +1 for up loop, -1 for down loop
+ RANGE - Range of loop (END - BASE)
+ STEP - iteration step size
+ CHUNKING - size of chunking, (constant zero for no chunking)
+ CHUNK_NO - chunk number
+ MASK - partitioning mask. */
+
+enum ifn_goacc_loop_kind {
+ IFN_GOACC_LOOP_CHUNKS, /* Number of chunks. */
+ IFN_GOACC_LOOP_STEP, /* Size of each thread's step. */
+ IFN_GOACC_LOOP_OFFSET, /* Initial iteration value. */
+ IFN_GOACC_LOOP_BOUND /* Limit of iteration value. */
+};
+
/* Initialize internal function tables. */
extern void init_internal_fns ();