On 01/06/2018 12:36 PM, Jakub Jelinek wrote:
On Sat, Jan 06, 2018 at 09:21:59AM +0100, Tom de Vries wrote:
this patch adds the following builtins in C/C++:
- __builtin_goacc_gang_id
- __builtin_goacc_worker_id
- __builtin_goacc_vector_id
- __builtin_goacc_gang_size
- __builtin_goacc_worker_size
- __builtin_goacc_vector_size
I wonder if it wouldn't be better to have just 2 builtins instead of 6,
with one argument (required to be constant) - the kind of parallelism
you're interested in, to avoid the inflation of the builtins.
Like so:
- __built_goacc_id
- __built_goacc_size
?
I've added:
- doc entries
- error checking for invalid use of builtins
- testcases c-c++-common/goacc/builtin-goacc-id-size{,-2}.c
I've also realized that folding these builtins is not what we want, so
I've removed that bit, and the mapping is now done in
expand_builtin_goacc_id_size.
Bootstrapped and reg-tested on x86_64.
Build and reg-tested on x86_64 with nvptx accelerator.
OK for trunk?
Thanks,
- Tom
Add __builtin_goacc_{id,size}
2018-01-06 Tom de Vries <t...@codesourcery.com>
PR libgomp/82428
* builtins.def (DEF_GOACC_BUILTIN_ONLY): Define.
* omp-builtins.def (BUILT_IN_GOACC_ID, BUILT_IN_GOACC_SIZE): New
builtin.
* builtins.c (expand_builtin_goacc_id_size): New function.
(expand_builtin): Call expand_builtin_goacc_id_size.
* doc/extend.texi (Other Builtins): Add __builtin_goacc_id and
__builtin_goacc_size.
* f95-lang.c (DEF_GOACC_BUILTIN_ONLY): Define.
* c-c++-common/goacc/builtin-goacc-id-size-2.c: New test.
* c-c++-common/goacc/builtin-goacc-id-size.c: New test.
* testsuite/libgomp.oacc-c-c++-common/gang-static-2.c: Use
__builtin_goacc_{id,size}.
* testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-g-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-g-2.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-v-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-w-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Same.
* testsuite/libgomp.oacc-c-c++-common/routine-g-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/routine-v-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/routine-w-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Same.
* testsuite/libgomp.oacc-c-c++-common/tile-1.c: Same.
---
gcc/builtins.c | 91 ++++++++++++++++++++++
gcc/builtins.def | 4 +
gcc/doc/extend.texi | 10 +++
gcc/fortran/f95-lang.c | 4 +
gcc/omp-builtins.def | 5 ++
.../c-c++-common/goacc/builtin-goacc-id-size-2.c | 37 +++++++++
.../c-c++-common/goacc/builtin-goacc-id-size.c | 79 +++++++++++++++++++
.../libgomp.oacc-c-c++-common/gang-static-2.c | 21 ++---
.../libgomp.oacc-c-c++-common/loop-auto-1.c | 18 ++---
.../libgomp.oacc-c-c++-common/loop-dim-default.c | 14 ++--
.../testsuite/libgomp.oacc-c-c++-common/loop-g-1.c | 17 ++--
.../testsuite/libgomp.oacc-c-c++-common/loop-g-2.c | 16 ++--
.../libgomp.oacc-c-c++-common/loop-gwv-1.c | 17 ++--
.../libgomp.oacc-c-c++-common/loop-red-g-1.c | 16 ++--
.../libgomp.oacc-c-c++-common/loop-red-gwv-1.c | 16 ++--
.../libgomp.oacc-c-c++-common/loop-red-v-1.c | 16 ++--
.../libgomp.oacc-c-c++-common/loop-red-v-2.c | 16 ++--
.../libgomp.oacc-c-c++-common/loop-red-w-1.c | 16 ++--
.../libgomp.oacc-c-c++-common/loop-red-w-2.c | 16 ++--
.../libgomp.oacc-c-c++-common/loop-red-wv-1.c | 12 +--
.../testsuite/libgomp.oacc-c-c++-common/loop-v-1.c | 16 ++--
.../testsuite/libgomp.oacc-c-c++-common/loop-w-1.c | 16 ++--
.../libgomp.oacc-c-c++-common/loop-wv-1.c | 16 ++--
.../libgomp.oacc-c-c++-common/parallel-dims.c | 19 +----
.../libgomp.oacc-c-c++-common/routine-g-1.c | 18 ++---
.../libgomp.oacc-c-c++-common/routine-gwv-1.c | 18 ++---
.../libgomp.oacc-c-c++-common/routine-v-1.c | 18 ++---
.../libgomp.oacc-c-c++-common/routine-w-1.c | 18 ++---
.../libgomp.oacc-c-c++-common/routine-wv-1.c | 18 ++---
.../libgomp.oacc-c-c++-common/routine-wv-2.c | 19 ++---
.../testsuite/libgomp.oacc-c-c++-common/tile-1.c | 15 ++--
31 files changed, 402 insertions(+), 230 deletions(-)
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 98eb804..34bb97f 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -70,6 +70,8 @@ along with GCC; see the file COPYING3. If not see
#include "case-cfn-macros.h"
#include "gimple-fold.h"
#include "intl.h"
+#include "gomp-constants.h"
+#include "omp-general.h"
struct target_builtins default_target_builtins;
#if SWITCHABLE_TARGET
@@ -6602,6 +6604,91 @@ expand_stack_save (void)
return ret;
}
+/* Emit code to get the openacc gang, worker or vector id or size. */
+
+static rtx
+expand_builtin_goacc_id_size (tree exp, rtx target, int ignore)
+{
+ tree fndecl = get_callee_fndecl (exp);
+
+ const char *name;
+ switch (DECL_FUNCTION_CODE (fndecl))
+ {
+ case BUILT_IN_GOACC_ID:
+ name = "__built_goacc_id";
+ break;
+ case BUILT_IN_GOACC_SIZE:
+ name = "__built_goacc_size";
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (oacc_get_fn_attrib (current_function_decl) == NULL_TREE)
+ {
+ error ("%s only supported in openacc code", name);
+ return const0_rtx;
+ }
+
+ tree arg = CALL_EXPR_ARG (exp, 0);
+ if (TREE_CODE (arg) != INTEGER_CST)
+ {
+ error ("non-constant argument 0 to %s", name);
+ return const0_rtx;
+ }
+
+ int dim = TREE_INT_CST_LOW (arg);
+ switch (dim)
+ {
+ case GOMP_DIM_GANG:
+ case GOMP_DIM_WORKER:
+ case GOMP_DIM_VECTOR:
+ break;
+ default:
+ error ("illegal argument 0 to %s", name);
+ return const0_rtx;
+ }
+
+ if (ignore)
+ return target;
+
+ if (!targetm.have_oacc_dim_size ())
+ {
+ rtx retval;
+ switch (DECL_FUNCTION_CODE (fndecl))
+ {
+ case BUILT_IN_GOACC_ID:
+ retval = const0_rtx;
+ break;
+ case BUILT_IN_GOACC_SIZE:
+ retval = GEN_INT (1);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_move_insn (target, retval);
+ return target;
+ }
+
+ rtx reg = MEM_P (target) ? gen_reg_rtx (GET_MODE (target)) : target;
+ switch (DECL_FUNCTION_CODE (fndecl))
+ {
+ case BUILT_IN_GOACC_ID:
+ emit_insn (targetm.gen_oacc_dim_pos (reg, GEN_INT (dim)));
+ break;
+ case BUILT_IN_GOACC_SIZE:
+ emit_insn (targetm.gen_oacc_dim_size (reg, GEN_INT (dim)));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (reg != target)
+ emit_move_insn (target, reg);
+
+ return target;
+}
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
@@ -7732,6 +7819,10 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
folding. */
break;
+ case BUILT_IN_GOACC_ID:
+ case BUILT_IN_GOACC_SIZE:
+ return expand_builtin_goacc_id_size (exp, target, ignore);
+
default: /* just do library call, if unknown builtin */
break;
}
diff --git a/gcc/builtins.def b/gcc/builtins.def
index 671097e..263dfc7ea 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -214,6 +214,10 @@ along with GCC; see the file COPYING3. If not see
#define DEF_GOACC_BUILTIN_COMPILER(ENUM, NAME, TYPE, ATTRS) \
DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \
flag_openacc, true, true, ATTRS, false, true)
+#undef DEF_GOACC_BUILTIN_ONLY
+#define DEF_GOACC_BUILTIN_ONLY(ENUM, NAME, TYPE, ATTRS) \
+ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, BT_LAST, \
+ false, false, true, ATTRS, false, flag_openacc)
#undef DEF_GOMP_BUILTIN
#define DEF_GOMP_BUILTIN(ENUM, NAME, TYPE, ATTRS) \
DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 2a553ad..ebe3265 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -12335,6 +12335,16 @@ Similar to @code{__builtin_bswap32}, except the argument and return types
are 64 bit.
@end deftypefn
+@deftypefn {Built-in Function} int __builtin_goacc_id (int x)
+Returns the openacc gang, worker or vector id depending on whether @var{x} is
+GOMP_DIM_GANG, GOMP_DIM_WORKER or GOMP_DIM_GANG.
+@end deftypefn
+
+@deftypefn {Built-in Function} int __builtin_goacc_size (int x)
+Returns the openacc gang, worker or vector size depending on whether @var{x} is
+GOMP_DIM_GANG, GOMP_DIM_WORKER or GOMP_DIM_GANG.
+@end deftypefn
+
@node Target Builtins
@section Built-in Functions Specific to Particular Target Machines
diff --git a/gcc/fortran/f95-lang.c b/gcc/fortran/f95-lang.c
index dc9a1ae..60a28b8 100644
--- a/gcc/fortran/f95-lang.c
+++ b/gcc/fortran/f95-lang.c
@@ -1202,6 +1202,10 @@ gfc_init_builtin_functions (void)
#undef DEF_GOACC_BUILTIN_COMPILER
#define DEF_GOACC_BUILTIN_COMPILER(code, name, type, attr) \
gfc_define_builtin (name, builtin_types[type], code, name, attr);
+#undef DEF_GOACC_BUILTIN_ONLY
+#define DEF_GOACC_BUILTIN_ONLY(code, name, type, attr) \
+ gfc_define_builtin ("__builtin_" name, builtin_types[type], code, NULL, \
+ attr);
#undef DEF_GOMP_BUILTIN
#define DEF_GOMP_BUILTIN(code, name, type, attr) /* ignore */
#include "../omp-builtins.def"
diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def
index 69b73f4..b145709 100644
--- a/gcc/omp-builtins.def
+++ b/gcc/omp-builtins.def
@@ -51,6 +51,11 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait",
DEF_GOACC_BUILTIN_COMPILER (BUILT_IN_ACC_ON_DEVICE, "acc_on_device",
BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GOACC_BUILTIN_ONLY (BUILT_IN_GOACC_ID, "goacc_id",
+ BT_FN_INT_INT, ATTR_NOTHROW_LEAF_LIST)
+DEF_GOACC_BUILTIN_ONLY (BUILT_IN_GOACC_SIZE, "goacc_size",
+ BT_FN_INT_INT, ATTR_NOTHROW_LEAF_LIST)
+
DEF_GOMP_BUILTIN (BUILT_IN_OMP_GET_THREAD_NUM, "omp_get_thread_num",
BT_FN_INT, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_OMP_GET_NUM_THREADS, "omp_get_num_threads",
diff --git a/gcc/testsuite/c-c++-common/goacc/builtin-goacc-id-size-2.c b/gcc/testsuite/c-c++-common/goacc/builtin-goacc-id-size-2.c
new file mode 100644
index 0000000..80a3f72
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/builtin-goacc-id-size-2.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2" } */
+
+#include "../../../../include/gomp-constants.h"
+
+void
+foo (void)
+{
+ __builtin_goacc_id (GOMP_DIM_GANG);
+ /* { dg-error "__built_goacc_id only supported in openacc code" "" { target *-*-* } .-1 } */
+
+ __builtin_goacc_size (GOMP_DIM_GANG);
+ /* { dg-error "__built_goacc_size only supported in openacc code" "" { target *-*-* } .-1 } */
+}
+
+#pragma acc routine
+void
+foo2 (int arg)
+{
+ __builtin_goacc_id (arg);
+ /* { dg-error "non-constant argument 0 to __built_goacc_id" "" { target *-*-* } .-1 } */
+
+ __builtin_goacc_size (arg);
+ /* { dg-error "non-constant argument 0 to __built_goacc_size" "" { target *-*-* } .-1 } */
+
+ __builtin_goacc_id (-1);
+ /* { dg-error "illegal argument 0 to __built_goacc_id" "" { target *-*-* } .-1 } */
+
+ __builtin_goacc_id (-1);
+ /* { dg-error "illegal argument 0 to __built_goacc_id" "" { target *-*-* } .-1 } */
+
+ __builtin_goacc_size (-1);
+ /* { dg-error "illegal argument 0 to __built_goacc_size" "" { target *-*-* } .-1 } */
+
+ __builtin_goacc_size (3);
+ /* { dg-error "illegal argument 0 to __built_goacc_size" "" { target *-*-* } .-1 } */
+}
diff --git a/gcc/testsuite/c-c++-common/goacc/builtin-goacc-id-size.c b/gcc/testsuite/c-c++-common/goacc/builtin-goacc-id-size.c
new file mode 100644
index 0000000..d1a836a
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/builtin-goacc-id-size.c
@@ -0,0 +1,79 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2" } */
+
+#include "../../../../include/gomp-constants.h"
+
+#pragma acc routine
+int
+foo (void)
+{
+ int res;
+
+ __builtin_goacc_id (GOMP_DIM_GANG);
+ __builtin_goacc_id (GOMP_DIM_WORKER);
+ __builtin_goacc_id (GOMP_DIM_VECTOR);
+
+ __builtin_goacc_size (GOMP_DIM_GANG);
+ __builtin_goacc_size (GOMP_DIM_WORKER);
+ __builtin_goacc_size (GOMP_DIM_VECTOR);
+
+ res += __builtin_goacc_id (GOMP_DIM_GANG);
+ res += __builtin_goacc_id (GOMP_DIM_WORKER);
+ res += __builtin_goacc_id (GOMP_DIM_VECTOR);
+
+ res += __builtin_goacc_size (GOMP_DIM_GANG);
+ res += __builtin_goacc_size (GOMP_DIM_WORKER);
+ res += __builtin_goacc_size (GOMP_DIM_VECTOR);
+
+ return res;
+}
+
+void
+foo2 (void)
+{
+ int res;
+
+#pragma acc parallel
+ {
+ __builtin_goacc_id (GOMP_DIM_GANG);
+ __builtin_goacc_id (GOMP_DIM_WORKER);
+ __builtin_goacc_id (GOMP_DIM_VECTOR);
+
+ __builtin_goacc_size (GOMP_DIM_GANG);
+ __builtin_goacc_size (GOMP_DIM_WORKER);
+ __builtin_goacc_size (GOMP_DIM_VECTOR);
+
+ res += __builtin_goacc_id (GOMP_DIM_GANG);
+ res += __builtin_goacc_id (GOMP_DIM_WORKER);
+ res += __builtin_goacc_id (GOMP_DIM_VECTOR);
+
+ res += __builtin_goacc_size (GOMP_DIM_GANG);
+ res += __builtin_goacc_size (GOMP_DIM_WORKER);
+ res += __builtin_goacc_size (GOMP_DIM_VECTOR);
+ }
+}
+
+void
+foo3 (void)
+{
+ int res;
+
+#pragma acc kernels
+ {
+ __builtin_goacc_id (GOMP_DIM_GANG);
+ __builtin_goacc_id (GOMP_DIM_WORKER);
+ __builtin_goacc_id (GOMP_DIM_VECTOR);
+
+ __builtin_goacc_size (GOMP_DIM_GANG);
+ __builtin_goacc_size (GOMP_DIM_WORKER);
+ __builtin_goacc_size (GOMP_DIM_VECTOR);
+
+ res += __builtin_goacc_id (GOMP_DIM_GANG);
+ res += __builtin_goacc_id (GOMP_DIM_WORKER);
+ res += __builtin_goacc_id (GOMP_DIM_VECTOR);
+
+ res += __builtin_goacc_size (GOMP_DIM_GANG);
+ res += __builtin_goacc_size (GOMP_DIM_WORKER);
+ res += __builtin_goacc_size (GOMP_DIM_VECTOR);
+ }
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
index 6de739a..06a0fad 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
@@ -1,25 +1,23 @@
-/* { dg-do run { target openacc_nvidia_accel_selected } } */
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <assert.h>
#include <openacc.h>
+#include <gomp-constants.h>
#define N 100
#define GANG_ID(I) \
- (acc_on_device (acc_device_nvidia) \
- ? ({unsigned __r; \
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (__r)); \
- __r; }) : (I))
+ (acc_on_device (acc_device_not_host) \
+ ? __builtin_goacc_id (GOMP_DIM_GANG) \
+ : (I))
void
test_static(int *a, int num_gangs, int sarg)
{
int i, j;
- if (sarg == 0)
+ if (acc_on_device (acc_device_host))
+ return;
+
+ if (sarg == 0)
sarg = 1;
for (i = 0; i < N / sarg; i++)
@@ -32,6 +30,9 @@ test_nonstatic(int *a, int gangs)
{
int i, j;
+ if (acc_on_device (acc_device_host))
+ return;
+
for (i = 0; i < N; i+=gangs)
for (j = 0; j < gangs; j++)
assert (a[i+j] == i/gangs);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
index 863b6b3..5051083 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
@@ -1,11 +1,8 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
/* { dg-additional-options "-fopenacc-dim=32" } */
#include <stdio.h>
#include <openacc.h>
+#include <gomp-constants.h>
int check (const int *ary, int size, int gp, int wp, int vp)
{
@@ -79,15 +76,12 @@ static int __attribute__((noinline)) place ()
{
int r = 0;
- if (acc_on_device (acc_device_nvidia))
- {
- int g = 0, w = 0, v = 0;
+ int g = 0, w = 0, v = 0;
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
+ r = (g << 16) | (w << 8) | v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
- r = (g << 16) | (w << 8) | v;
- }
return r;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
index e2b08c3..c13024d 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
@@ -1,25 +1,23 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler. */
-/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
/* { dg-additional-options "-fopenacc-dim=16:16" } */
#include <openacc.h>
#include <alloca.h>
#include <string.h>
#include <stdio.h>
+#include <gomp-constants.h>
#pragma acc routine
static int __attribute__ ((noinline)) coord ()
{
int res = 0;
- if (acc_on_device (acc_device_nvidia))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
res = (1 << 24) | (g << 16) | (w << 8) | v;
}
return res;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c
index ae1d588..00d660c 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
#define N (32*32*32+17)
int main ()
@@ -20,13 +18,12 @@ int main ()
#pragma acc loop gang
for (unsigned ix = 0; ix < N; ix++)
{
- if (__builtin_acc_on_device (5))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
-
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ int g, w, v;
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
ary[ix] = (g << 16) | (w << 8) | v;
ondev = 1;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c
index c06d861..db73f44 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
#define N (32*32*32+17)
int main ()
@@ -20,13 +18,13 @@ int main ()
#pragma acc loop gang (static:1)
for (unsigned ix = 0; ix < N; ix++)
{
- if (__builtin_acc_on_device (5))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
ary[ix] = (g << 16) | (w << 8) | v;
ondev = 1;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
index 42b612a..fe6e696 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
#define N (32*32*32+17)
int main ()
@@ -20,13 +18,14 @@ int main ()
#pragma acc loop gang worker vector
for (unsigned ix = 0; ix < N; ix++)
{
- if (__builtin_acc_on_device (5))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
+
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
ary[ix] = (g << 16) | (w << 8) | v;
ondev = 1;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c
index 929e01c..95f1118 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
#define N (32*32*32+17)
int main ()
@@ -18,13 +16,13 @@ int main ()
{
int val = ix;
- if (__builtin_acc_on_device (5))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
val = (g << 16) | (w << 8) | v;
ondev = 1;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
index 4ae4b7c..5f26939 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
#define N (32*32*32+17)
int main ()
@@ -18,13 +16,13 @@ int main ()
{
int val = ix;
- if (__builtin_acc_on_device (5))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
val = (g << 16) | (w << 8) | v;
ondev = 1;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
index 0556455..a11f1f9 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
#define N (32*32*32+17)
@@ -19,13 +17,13 @@ int main ()
{
int val = ix;
- if (__builtin_acc_on_device (5))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
val = (g << 16) | (w << 8) | v;
ondev = 1;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
index 16d8f9f..79a9ea8 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
#define N (32*32*32+17)
@@ -21,13 +19,13 @@ int main ()
{
int val = ix;
- if (__builtin_acc_on_device (5))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
val = (g << 16) | (w << 8) | v;
ondev = 1;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
index 19021d9..28b0c35 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
#define N (32*32*32+17)
int main ()
@@ -18,13 +16,13 @@ int main ()
{
int val = ix;
- if (__builtin_acc_on_device (5))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
val = (g << 16) | (w << 8) | v;
ondev = 1;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
index f0c9d81..ad7a32b 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
#define N (32*32*32+17)
int main ()
@@ -20,13 +18,13 @@ int main ()
{
int val = ix;
- if (__builtin_acc_on_device (5))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
val = (g << 16) | (w << 8) | v;
ondev = 1;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
index 0fec2dc..0ba4b15 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
@@ -2,6 +2,8 @@
/* { dg-additional-options "-O2" } */
#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
#define N (32*32*32+17)
int main ()
@@ -17,13 +19,13 @@ int main ()
{
int val = ix;
- if (__builtin_acc_on_device (5))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
val = (g << 16) | (w << 8) | v;
ondev = 1;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
index 2974807..869cd20 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
#define N (32*32*32+17)
int main ()
@@ -20,13 +18,13 @@ int main ()
#pragma acc loop vector
for (unsigned ix = 0; ix < N; ix++)
{
- if (__builtin_acc_on_device (5))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
ary[ix] = (g << 16) | (w << 8) | v;
ondev = 1;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
index 33b6eae..1167ba4 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
#define N (32*32*32+17)
int main ()
@@ -20,13 +18,13 @@ int main ()
#pragma acc loop worker
for (unsigned ix = 0; ix < N; ix++)
{
- if (__builtin_acc_on_device (5))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
ary[ix] = (g << 16) | (w << 8) | v;
ondev = 1;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
index 578cfad..aa665c5 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
#define N (32*32*32+17)
int main ()
@@ -20,13 +18,13 @@ int main ()
#pragma acc loop worker vector
for (unsigned ix = 0; ix < N; ix++)
{
- if (__builtin_acc_on_device (5))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
ary[ix] = (g << 16) | (w << 8) | v;
ondev = 1;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
index 8308f7c..49c59cf 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -3,6 +3,7 @@
#include <limits.h>
#include <openacc.h>
+#include <gomp-constants.h>
/* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper
not behaving as expected for -O0. */
@@ -12,11 +13,7 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_gang ()
if (acc_on_device ((int) acc_device_host))
return 0;
else if (acc_on_device ((int) acc_device_nvidia))
- {
- unsigned int r;
- asm volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (r));
- return r;
- }
+ return __builtin_goacc_id (GOMP_DIM_GANG);
else
__builtin_abort ();
}
@@ -27,11 +24,7 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_worker ()
if (acc_on_device ((int) acc_device_host))
return 0;
else if (acc_on_device ((int) acc_device_nvidia))
- {
- unsigned int r;
- asm volatile ("mov.u32 %0,%%tid.y;" : "=r" (r));
- return r;
- }
+ return __builtin_goacc_id (GOMP_DIM_WORKER);
else
__builtin_abort ();
}
@@ -42,11 +35,7 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_vector ()
if (acc_on_device ((int) acc_device_host))
return 0;
else if (acc_on_device ((int) acc_device_nvidia))
- {
- unsigned int r;
- asm volatile ("mov.u32 %0,%%tid.x;" : "=r" (r));
- return r;
- }
+ return __builtin_goacc_id (GOMP_DIM_VECTOR);
else
__builtin_abort ();
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c
index b6ab713..244f6cc 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
#define N (32*32*32+17)
@@ -12,13 +10,13 @@ void __attribute__ ((noinline)) gang (int ary[N])
#pragma acc loop gang
for (unsigned ix = 0; ix < N; ix++)
{
- if (__builtin_acc_on_device (5))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
ary[ix] = (g << 16) | (w << 8) | v;
}
else
@@ -38,7 +36,7 @@ int main ()
#pragma acc parallel num_gangs(32) copy(ary) copy(ondev)
{
- ondev = __builtin_acc_on_device (5);
+ ondev = acc_on_device (acc_device_not_host);
gang (ary);
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
index ace2f49..bd784c5 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
#define N (32*32*32+17)
@@ -12,13 +10,13 @@ void __attribute__ ((noinline)) gang (int ary[N])
#pragma acc loop gang worker vector
for (unsigned ix = 0; ix < N; ix++)
{
- if (__builtin_acc_on_device (5))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
ary[ix] = (g << 16) | (w << 8) | v;
}
else
@@ -38,7 +36,7 @@ int main ()
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev)
{
- ondev = __builtin_acc_on_device (5);
+ ondev = acc_on_device (acc_device_not_host);
gang (ary);
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
index 2503e8d..bec68fe 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
#define N (32*32*32+17)
@@ -12,13 +10,13 @@ void __attribute__ ((noinline)) vector (int ary[N])
#pragma acc loop vector
for (unsigned ix = 0; ix < N; ix++)
{
- if (__builtin_acc_on_device (5))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
ary[ix] = (g << 16) | (w << 8) | v;
}
else
@@ -38,7 +36,7 @@ int main ()
#pragma acc parallel vector_length(32) copy(ary) copy(ondev)
{
- ondev = __builtin_acc_on_device (5);
+ ondev = acc_on_device (acc_device_not_host);
vector (ary);
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
index 80cd462..bbdd158 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
#define N (32*32*32+17)
@@ -12,13 +10,13 @@ void __attribute__ ((noinline)) worker (int ary[N])
#pragma acc loop worker
for (unsigned ix = 0; ix < N; ix++)
{
- if (__builtin_acc_on_device (5))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
ary[ix] = (g << 16) | (w << 8) | v;
}
else
@@ -38,7 +36,7 @@ int main ()
#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
{
- ondev = __builtin_acc_on_device (5);
+ ondev = acc_on_device (acc_device_not_host);
worker (ary);
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
index 5e45fad..50b6027 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
@@ -1,8 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
#define N (32*32*32+17)
@@ -12,13 +10,13 @@ void __attribute__ ((noinline)) worker (int ary[N])
#pragma acc loop worker vector
for (unsigned ix = 0; ix < N; ix++)
{
- if (__builtin_acc_on_device (5))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
ary[ix] = (g << 16) | (w << 8) | v;
}
else
@@ -38,7 +36,7 @@ int main ()
#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
{
- ondev = __builtin_acc_on_device (5);
+ ondev = acc_on_device (acc_device_not_host);
worker (ary);
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
index b5cbc90..1a7bece 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
@@ -1,9 +1,6 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#include <openacc.h>
+#include <gomp-constants.h>
#define NUM_WORKERS 16
#define NUM_VECTORS 32
@@ -11,15 +8,13 @@
#define HEIGHT 32
#define WORK_ID(I,N) \
- (acc_on_device (acc_device_nvidia) \
- ? ({unsigned __r; \
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (__r)); \
- __r; }) : (I % N))
+ (acc_on_device (acc_device_not_host) \
+ ? __builtin_goacc_id (GOMP_DIM_WORKER) \
+ : (I % N))
#define VEC_ID(I,N) \
- (acc_on_device (acc_device_nvidia) \
- ? ({unsigned __r; \
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (__r)); \
- __r; }) : (I % N))
+ (acc_on_device (acc_device_not_host) \
+ ? __builtin_goacc_id (GOMP_DIM_VECTOR) \
+ : (I % N))
#pragma acc routine worker
void __attribute__ ((noinline))
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c
index 8dcb956..2ffc096 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c
@@ -1,11 +1,8 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
/* { dg-additional-options "-fopenacc-dim=32" } */
#include <stdio.h>
#include <openacc.h>
+#include <gomp-constants.h>
static int check (const int *ary, int size, int gp, int wp, int vp)
{
@@ -79,13 +76,13 @@ static int __attribute__((noinline)) place ()
{
int r = 0;
- if (acc_on_device (acc_device_nvidia))
+ if (acc_on_device (acc_device_not_host))
{
- int g = 0, w = 0, v = 0;
+ int g, w, v;
- __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
- __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
- __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ g = __builtin_goacc_id (GOMP_DIM_GANG);
+ w = __builtin_goacc_id (GOMP_DIM_WORKER);
+ v = __builtin_goacc_id (GOMP_DIM_VECTOR);
r = (g << 16) | (w << 8) | v;
}
return r;