I've committed this to gomp4 branch. It replaces the regular builtins
__builtin_GOACC_nid/__builtin_GOACC_id with internal functions IFN_OACC_DIM_SIZE
and IFN_OACC_DIM_POS -- moving further away from the PTX-specific naming of id &
nid. These functions should never turn into library calls or be accessible by
the user.
A later patch will optimize the OACC_DIM_SIZE function in the oacc-xform pass.
nathan
2015-08-03 Nathan Sidwell <nat...@codesourcery.com>
gcc/
* internal-fn.def (GOACC_DIM_SIZE, GOACC_DFIM_POS): New.
* internal-fn.c (expand_GOACC_DIM_SIZE, expand_GOACC_DIM_POS): New.
* config/nvptx.md (UNSPEC_NID, UNSPEC_ID): Rename to ...
(UNSPEC_DIM_SIZE, UNSPEC_DIM_POS): ... here.
(oacc_nid, oacc_id): Rename to ...
(oacc_dim_size, oacc_dim_pos): ... here. Adjust.
* config/nvptx.c (nvptx_single): Adjust.
* omp-low.c (expand_oacc_get_num_threads,
expand_oacc_get_thread_num, oacc_init_count_vars): Use new
internal builtins.
* omp-builtins.def (BUILT_IN_GOACC_ID, BUILT_IN_GOACC_NID): Delete.
* builtins.c (expand_oacc_id): Delete.
(expand_builtin, is_simpe_biltin): Adjust.
libgomp/
* testuite/libgomp.oacc-c-c++-common/gang-static-2.c: Use asm insert.
Index: gcc/internal-fn.def
===================================================================
--- gcc/internal-fn.def (revision 226515)
+++ gcc/internal-fn.def (working copy)
@@ -66,3 +66,5 @@ DEF_INTERNAL_FN (VA_ARG, ECF_NOTHROW | E
DEF_INTERNAL_FN (GOACC_DATA_END_WITH_ARG, ECF_NOTHROW, ".r")
DEF_INTERNAL_FN (GOACC_FORK, ECF_NOTHROW | ECF_LEAF, ".")
DEF_INTERNAL_FN (GOACC_JOIN, ECF_NOTHROW | ECF_LEAF, ".")
+DEF_INTERNAL_FN (GOACC_DIM_SIZE, ECF_CONST | ECF_NOTHROW | ECF_LEAF, ".")
+DEF_INTERNAL_FN (GOACC_DIM_POS, ECF_NOTHROW | ECF_LEAF, ".")
Index: gcc/omp-builtins.def
===================================================================
--- gcc/omp-builtins.def (revision 226515)
+++ gcc/omp-builtins.def (working copy)
@@ -58,10 +58,6 @@ DEF_GOACC_BUILTIN_FNSPEC (BUILT_IN_GOACC
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait",
BT_FN_VOID_INT_INT_VAR,
ATTR_NOTHROW_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_ID, "GOACC_id",
- BT_FN_UINT_UINT, ATTR_NOTHROW_LEAF_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_NID, "GOACC_nid",
- BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_GANGLOCAL_PTR, "GOACC_get_ganglocal_ptr",
BT_FN_PTR, ATTR_NOTHROW_LEAF_LIST)
DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DEVICEPTR, "GOACC_deviceptr",
Index: gcc/config/nvptx/nvptx.md
===================================================================
--- gcc/config/nvptx/nvptx.md (revision 226515)
+++ gcc/config/nvptx/nvptx.md (working copy)
@@ -49,7 +49,7 @@
UNSPEC_ALLOCA
- UNSPEC_NID
+ UNSPEC_DIM_SIZE
UNSPEC_SHARED_DATA
@@ -65,7 +65,7 @@
UNSPECV_CAS
UNSPECV_XCHG
UNSPECV_BARSYNC
- UNSPECV_ID
+ UNSPECV_DIM_POS
UNSPECV_FORK
UNSPECV_FORKED
@@ -1335,9 +1335,10 @@
DONE;
})
-(define_insn "oacc_nid"
+(define_insn "oacc_dim_size"
[(set (match_operand:SI 0 "nvptx_register_operand" "")
- (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_NID))]
+ (unspec:SI [(match_operand:SI 1 "const_int_operand" "")]
+ UNSPEC_DIM_SIZE))]
""
{
static const char *const asms[] =
@@ -1349,10 +1350,10 @@
return asms[INTVAL (operands[1])];
})
-(define_insn "oacc_id"
+(define_insn "oacc_dim_pos"
[(set (match_operand:SI 0 "nvptx_register_operand" "")
(unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")]
- UNSPECV_ID))]
+ UNSPECV_DIM_POS))]
""
{
static const char *const asms[] =
Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c (revision 226515)
+++ gcc/config/nvptx/nvptx.c (working copy)
@@ -2771,7 +2771,7 @@ nvptx_single (unsigned mask, basic_block
rtx pred = gen_reg_rtx (BImode);
rtx_code_label *label = gen_label_rtx ();
- emit_insn_before (gen_oacc_id (id, GEN_INT (mode)), head);
+ emit_insn_before (gen_oacc_dim_pos (id, GEN_INT (mode)), head);
rtx cond = gen_rtx_SET (pred, gen_rtx_NE (BImode, id, const0_rtx));
emit_insn_before (cond, head);
rtx br;
Index: gcc/internal-fn.c
===================================================================
--- gcc/internal-fn.c (revision 226515)
+++ gcc/internal-fn.c (working copy)
@@ -1984,6 +1984,42 @@ expand_GOACC_JOIN (gcall *stmt ATTRIBUTE
#endif
}
+static void
+expand_GOACC_DIM_SIZE (gcall *stmt)
+{
+ tree lhs = gimple_call_lhs (stmt);
+
+ if (!lhs)
+ return;
+
+ rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+ rtx val = expand_expr (gimple_call_arg (stmt, 0), NULL_RTX,
+ VOIDmode, EXPAND_NORMAL);
+#ifdef HAVE_oacc_dim_size
+ emit_insn (gen_oacc_dim_size (target, val));
+#else
+ emit_move_insn (target, const1_rtx);
+#endif
+}
+
+static void
+expand_GOACC_DIM_POS (gcall *stmt)
+{
+ tree lhs = gimple_call_lhs (stmt);
+
+ if (!lhs)
+ return;
+
+ rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+ rtx val = expand_expr (gimple_call_arg (stmt, 0), NULL_RTX,
+ VOIDmode, EXPAND_NORMAL);
+#ifdef HAVE_oacc_dim_pos
+ emit_insn (gen_oacc_dim_pos (target, val));
+#else
+ emit_move_insn (target, const0_rtx);
+#endif
+}
+
/* Routines to expand each internal function, indexed by function number.
Each routine has the prototype:
Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c (revision 226515)
+++ gcc/builtins.c (working copy)
@@ -5921,59 +5921,6 @@ expand_builtin_acc_on_device (tree exp,
return target;
}
-/* Expand a thread-id/thread-count builtin for OpenACC. */
-
-static rtx
-expand_oacc_id (enum built_in_function fcode, tree exp, rtx target)
-{
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- rtx result = const0_rtx;
- rtx arg;
-
- arg = expand_normal (arg0);
-
- if (GET_CODE (arg) != CONST_INT || UINTVAL (arg) >= GOMP_DIM_MAX)
- {
- error ("argument to %D must be constant in range 0 to %d",
- get_callee_fndecl (exp), GOMP_DIM_MAX - 1);
- return result;
- }
-
- enum insn_code icode = CODE_FOR_nothing;
- switch (fcode)
- {
- case BUILT_IN_GOACC_NID:
-#ifdef HAVE_oacc_nid
- icode = CODE_FOR_oacc_nid;
-#endif
- result = const1_rtx;
- break;
- case BUILT_IN_GOACC_ID:
-#ifdef HAVE_oacc_id
- icode = CODE_FOR_oacc_id;
-#endif
- break;
- default:
- gcc_unreachable ();
- break;
- }
-
- if (icode != CODE_FOR_nothing)
- {
- machine_mode mode = insn_data[icode].operand[0].mode;
- rtx tmp = target;
- if (!REG_P (tmp) || GET_MODE (tmp) != mode)
- tmp = gen_reg_rtx (mode);
- rtx insn = GEN_FCN (icode) (tmp, arg);
- if (insn != NULL_RTX)
- {
- emit_insn (insn);
- return tmp;
- }
- }
- return result;
-}
-
static rtx
expand_oacc_ganglocal_ptr (rtx target ATTRIBUTE_UNUSED)
{
@@ -7135,10 +7082,6 @@ expand_builtin (tree exp, rtx target, rt
return target;
break;
- case BUILT_IN_GOACC_ID:
- case BUILT_IN_GOACC_NID:
- return expand_oacc_id (fcode, exp, target);
-
case BUILT_IN_GOACC_GET_GANGLOCAL_PTR:
target = expand_oacc_ganglocal_ptr (target);
if (target)
@@ -12497,8 +12440,6 @@ is_simple_builtin (tree decl)
case BUILT_IN_EH_FILTER:
case BUILT_IN_EH_POINTER:
case BUILT_IN_EH_COPY_VALUES:
- /* Just a special register read. */
- case BUILT_IN_GOACC_NID:
return true;
default:
Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c (revision 226515)
+++ gcc/omp-low.c (working copy)
@@ -4676,7 +4676,6 @@ static tree
expand_oacc_get_num_threads (gimple_seq *seq, int gwv_bits)
{
tree res = build_int_cst (unsigned_type_node, 1);
- tree decl = builtin_decl_explicit (BUILT_IN_GOACC_NID);
unsigned ix;
for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
@@ -4684,7 +4683,7 @@ expand_oacc_get_num_threads (gimple_seq
{
tree arg = build_int_cst (unsigned_type_node, ix);
tree count = create_tmp_var (unsigned_type_node);
- gimple call = gimple_build_call (decl, 1, arg);
+ gimple call = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg);
gimple_call_set_lhs (call, count);
gimple_seq_add_stmt (seq, call);
@@ -4702,8 +4701,6 @@ static tree
expand_oacc_get_thread_num (gimple_seq *seq, int gwv_bits)
{
tree res = NULL_TREE;
- tree id_decl = builtin_decl_explicit (BUILT_IN_GOACC_ID);
- tree nid_decl = builtin_decl_explicit (BUILT_IN_GOACC_NID);
unsigned ix;
/* Start at gang level, and examine relevant dimension indices. */
@@ -4717,7 +4714,8 @@ expand_oacc_get_thread_num (gimple_seq *
/* We had an outer index, so scale that by the size of
this dimension. */
tree n = create_tmp_var (unsigned_type_node);
- gimple call = gimple_build_call (nid_decl, 1, arg);
+ gimple call
+ = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg);
gimple_call_set_lhs (call, n);
gimple_seq_add_stmt (seq, call);
@@ -4726,7 +4724,7 @@ expand_oacc_get_thread_num (gimple_seq *
/* Determine index in this dimension. */
tree id = create_tmp_var (unsigned_type_node);
- gimple call = gimple_build_call (id_decl, 1, arg);
+ gimple call = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg);
gimple_call_set_lhs (call, id);
gimple_seq_add_stmt (seq, call);
@@ -11671,8 +11669,6 @@ lower_omp_taskreg (gimple_stmt_iterator
static void
oacc_init_count_vars (omp_context *ctx, tree clauses ATTRIBUTE_UNUSED)
{
- tree getid = builtin_decl_explicit (BUILT_IN_GOACC_ID);
- tree getnid = builtin_decl_explicit (BUILT_IN_GOACC_NID);
tree worker_var, worker_count;
if (ctx->gwv_this & GOMP_DIM_MASK (GOMP_DIM_WORKER))
@@ -11682,11 +11678,11 @@ oacc_init_count_vars (omp_context *ctx,
worker_var = create_tmp_var (unsigned_type_node, ".worker");
worker_count = create_tmp_var (unsigned_type_node, ".workercount");
- gimple call1 = gimple_build_call (getid, 1, arg);
+ gimple call1 = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg);
gimple_call_set_lhs (call1, worker_var);
gimple_seq_add_stmt (&ctx->ganglocal_init, call1);
- gimple call2 = gimple_build_call (getnid, 1, arg);
+ gimple call2 = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg);
gimple_call_set_lhs (call2, worker_count);
gimple_seq_add_stmt (&ctx->ganglocal_init, call2);
}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c (revision 226515)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c (working copy)
@@ -1,9 +1,17 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-O2" } */
#include <assert.h>
+#include <openacc.h>
#define N 100
+#define GANG_ID(I) \
+ (acc_on_device (acc_device_nvidia) \
+ ? ({unsigned __r; \
+ __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (__r)); \
+ __r; }) : (I))
+
int
test_static(int *a, int num_gangs, int sarg)
{
@@ -35,38 +43,38 @@ main ()
#pragma acc parallel loop gang (static:*) num_gangs (10)
for (i = 0; i < 100; i++)
- a[i] = __builtin_GOACC_id (0);
+ a[i] = GANG_ID (i);
test_nonstatic (a, 10);
#pragma acc parallel loop gang (static:1) num_gangs (10)
for (i = 0; i < 100; i++)
- a[i] = __builtin_GOACC_id (0);
+ a[i] = GANG_ID (i);
test_static (a, 10, 1);
#pragma acc parallel loop gang (static:2) num_gangs (10)
for (i = 0; i < 100; i++)
- a[i] = __builtin_GOACC_id (0);
+ a[i] = GANG_ID (i);
test_static (a, 10, 2);
#pragma acc parallel loop gang (static:5) num_gangs (10)
for (i = 0; i < 100; i++)
- a[i] = __builtin_GOACC_id (0);
+ a[i] = GANG_ID (i);
test_static (a, 10, 5);
#pragma acc parallel loop gang (static:20) num_gangs (10)
for (i = 0; i < 100; i++)
- a[i] = __builtin_GOACC_id (0);
+ a[i] = GANG_ID (i);
test_static (a, 10, 20);
/* Non-static gang. */
#pragma acc parallel loop gang num_gangs (10)
for (i = 0; i < 100; i++)
- a[i] = __builtin_GOACC_id (0);
+ a[i] = GANG_ID (i);
test_nonstatic (a, 10);