Hi,
the openacc standard states: If the acc_on_device routine has
a compile-time constant argument, it evaluates at compile time to a
constant.
The purpose of this is to remove non-applicable device-specific code
during compilation. In the case of asm insns which are device-specific,
removal is even needed to be able to compile for host.
When optimizing, the compiler complies with this requirement, through
gimple_fold_builtin_acc_on_device and following optimizations. But that
doesn't work at -O0.
Consequenly, a test-case like f.i. loop-auto-1.c that has
device-specific asm insns:
...
#pragma acc routine seq
static int __attribute__((noinline)) place ()
{
int r = 0;
if (acc_on_device (acc_device_nvidia))
{
int g = 0, w = 0, v = 0;
__asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
__asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
__asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
r = (g << 16) | (w << 8) | v;
}
return r;
}
...
skips -O0:
...
/* This code uses nvptx inline assembly guarded with acc_on_device,
which is not optimized away at -O0, and then confuses the target
assembler.
{ dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
...
This patch adds folding of acc_on_device with constant argument at -O0.
This folding is done by fold_builtin_acc_on_device_cst_arg during
pass_oacc_device_lower, which also propagates the folded value to it's
uses, which allows TODO_cleanup_cfg to remove the dead code.
This solution works fine for C, but for C++ things are a bit more
complicated. In C, the 'int acc_on_device (acc_device_t)' maps onto the
'int __builtin_acc_on_device (int)', but for C++ that's not the case.
The current solution for that problem is an inline function in
openacc.h, but at -O0 that adds too much indirection to still be able to
remove the dead code. The easiest solution is:
...
#define acc_on_device(dev) __builtin_acc_on_device ((int)dev)
...
but that's not strictly compliant with the openacc standard, which
requires an openacc interface function 'int
acc_on_device(acc_device_t)', not a macro.
So we end up with a kludge in oacc_xform_acc_on_device that maps the
openacc interface function acc_on_device onto the builtin function.
Bootstrapped and reg-tested on x86_64.
Build and reg-tested for x86_64 with nvptx accelerator.
OK for trunk?
Thanks,
- Tom
Fold acc_on_device with const arg
2017-12-22 Tom de Vries <t...@codesourcery.com>
PR libgomp/82391
* omp-offload.c (fold_builtin_acc_on_device_cst_arg)
(oacc_xform_acc_on_device, oacc_device_lower_non_offloaded): New
function.
(execute_oacc_device_lower): Call oacc_device_lower_non_offloaded.
Call oacc_xform_acc_on_device.
* openacc.h [__cplusplus] (acc_on_device (int)): Remove.
[__cplusplus] (acc_on_device (acc_device_t)): Remove definition, and
declare instead with __builtin_acc_on_device attributes.
* testsuite/libgomp.oacc-c-c++-common/acc-on-device-4.c: New test.
* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Remove int casts
from args of acc_on_device calls.
* testsuite/libgomp.oacc-c-c++-common/gang-static-2.c: Remove skip for
-O0.
* testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-g-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-g-2.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-v-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-w-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/routine-g-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/routine-v-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/routine-w-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c: Same.
* testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Same.
* testsuite/libgomp.oacc-c-c++-common/tile-1.c: Same.
---
gcc/omp-offload.c | 121 ++++++++++++++++++++-
libgomp/openacc.h | 14 +--
.../libgomp.oacc-c-c++-common/acc-on-device-4.c | 18 +++
.../libgomp.oacc-c-c++-common/gang-static-2.c | 3 -
.../libgomp.oacc-c-c++-common/loop-auto-1.c | 4 -
.../libgomp.oacc-c-c++-common/loop-dim-default.c | 3 -
.../testsuite/libgomp.oacc-c-c++-common/loop-g-1.c | 4 -
.../testsuite/libgomp.oacc-c-c++-common/loop-g-2.c | 4 -
.../libgomp.oacc-c-c++-common/loop-gwv-1.c | 4 -
.../libgomp.oacc-c-c++-common/loop-red-g-1.c | 4 -
.../libgomp.oacc-c-c++-common/loop-red-gwv-1.c | 4 -
.../libgomp.oacc-c-c++-common/loop-red-v-1.c | 4 -
.../libgomp.oacc-c-c++-common/loop-red-v-2.c | 4 -
.../libgomp.oacc-c-c++-common/loop-red-w-1.c | 4 -
.../libgomp.oacc-c-c++-common/loop-red-w-2.c | 4 -
.../testsuite/libgomp.oacc-c-c++-common/loop-v-1.c | 4 -
.../testsuite/libgomp.oacc-c-c++-common/loop-w-1.c | 4 -
.../libgomp.oacc-c-c++-common/loop-wv-1.c | 4 -
.../libgomp.oacc-c-c++-common/parallel-dims.c | 14 +--
.../libgomp.oacc-c-c++-common/routine-g-1.c | 4 -
.../libgomp.oacc-c-c++-common/routine-gwv-1.c | 4 -
.../libgomp.oacc-c-c++-common/routine-v-1.c | 4 -
.../libgomp.oacc-c-c++-common/routine-w-1.c | 4 -
.../libgomp.oacc-c-c++-common/routine-wv-1.c | 4 -
.../libgomp.oacc-c-c++-common/routine-wv-2.c | 4 -
.../testsuite/libgomp.oacc-c-c++-common/tile-1.c | 4 -
26 files changed, 146 insertions(+), 107 deletions(-)
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index 9d5b8be..0bcbde2 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -52,6 +52,8 @@ along with GCC; see the file COPYING3. If not see
#include "stringpool.h"
#include "attribs.h"
#include "cfgloop.h"
+#include "gimple-fold.h"
+#include "tree-ssa-propagate.h"
/* Describe the OpenACC looping structure of a function. The entire
function is held in a 'NULL' loop. */
@@ -1451,6 +1453,116 @@ default_goacc_reduction (gcall *call)
gsi_replace_with_seq (&gsi, seq, true);
}
+/* Fold a call to __builtin_acc_on_device with constant argument.
+ The openacc standard states: if the acc_on_device routine has a
+ compile-time constant argument, it evaluates at compile time to a
+ constant. The purpose of this is to remove non-applicable device-specific
+ code during compilation. In the case of asm insns which are
+ device-specific, removal is even needed to be able to compile for host. */
+
+static bool
+fold_builtin_acc_on_device_cst_arg (gimple_stmt_iterator *gsi, tree arg0)
+{
+ if (TREE_CODE (arg0) != INTEGER_CST)
+ return false;
+ HOST_WIDE_INT val = tree_to_shwi (arg0);
+
+ unsigned val_host, val_dev;
+#ifdef ACCEL_COMPILER
+ val_host = GOMP_DEVICE_NOT_HOST;
+ val_dev = ACCEL_COMPILER_acc_device;
+#else
+ val_host = GOMP_DEVICE_HOST;
+ val_dev = GOMP_DEVICE_NONE;
+#endif
+ bool res = val == val_host || val == val_dev;
+
+ tree replacement = res ? integer_one_node : integer_zero_node;
+
+ /* Propagate the acc_on_device result to its uses. If it's propagated to a
+ condition, then TODO_cleanup_cfg will eliminate the dead code. */
+ gimple *stmt = gsi_stmt (*gsi);
+ tree lhs = gimple_call_lhs (stmt);
+ imm_use_iterator iter;
+ gimple *use_stmt;
+ use_operand_p use_p;
+ FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
+ {
+ FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+ propagate_value (use_p, replacement);
+
+ update_stmt (use_stmt);
+ }
+
+ replace_call_with_value (gsi, replacement);
+ return true;
+}
+
+/* Do oacc transformations for acc_on_device calls. */
+
+static void
+oacc_xform_acc_on_device (gimple_stmt_iterator *gsi)
+{
+ gimple *stmt = gsi_stmt (*gsi);
+ gcall *call = as_a <gcall *> (stmt);
+
+ /* Kludge: The openacc standard declares a function
+ 'int acc_on_device (acc_device_t)', but we have a builtin
+ 'int __builtin_acc_on_device (int)'. When compiling for c++, these are
+ distinct functions, so here we map the former onto the latter. */
+ tree acc_on_device_id = get_identifier ("acc_on_device");
+ tree acc_device_t_id = get_identifier ("acc_device_t");
+ tree fndecl = gimple_call_fndecl (call);
+ if (fndecl)
+ {
+ tree fntype = TREE_TYPE (fndecl);
+ tree fnrettype = TREE_TYPE (fntype);
+ tree fnargstypes = TYPE_ARG_TYPES (fntype);
+ tree fnargtype = (fnargstypes != NULL_TREE
+ ? TREE_VALUE (fnargstypes)
+ : NULL_TREE);
+ bool one_arg = (fnargtype != NULL_TREE
+ && TREE_CHAIN (fnargstypes) != NULL_TREE
+ && VOID_TYPE_P (TREE_VALUE (TREE_CHAIN (fnargstypes))));
+ if (DECL_NAME (fndecl) == acc_on_device_id
+ && fnrettype == integer_type_node
+ && one_arg
+ && TREE_CODE (fnargtype) == ENUMERAL_TYPE
+ && TYPE_IDENTIFIER (fnargtype) == acc_device_t_id)
+ {
+ tree builtin_fndecl
+ = builtin_decl_explicit (BUILT_IN_ACC_ON_DEVICE);
+ gimple_call_set_fndecl (call, builtin_fndecl);
+ }
+ }
+
+ if (gimple_call_builtin_p (call, BUILT_IN_NORMAL))
+ {
+ enum built_in_function fcode
+ = DECL_FUNCTION_CODE (gimple_call_fndecl (call));
+ if (fcode == BUILT_IN_ACC_ON_DEVICE)
+ fold_builtin_acc_on_device_cst_arg (gsi, gimple_call_arg (stmt, 0));
+ }
+}
+
+/* Do oacc transformations for the host fallback. */
+
+static void
+oacc_device_lower_non_offloaded (void)
+{
+ basic_block bb;
+ FOR_ALL_BB_FN (bb, cfun)
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (!is_gimple_call (stmt))
+ continue;
+
+ oacc_xform_acc_on_device (&gsi);
+ }
+}
+
/* Main entry point for oacc transformations which run on the device
compiler after LTO, so we know what the target device is at this
point (including the host fallback). */
@@ -1461,8 +1573,11 @@ execute_oacc_device_lower ()
tree attrs = oacc_get_fn_attrib (current_function_decl);
if (!attrs)
- /* Not an offloaded function. */
- return 0;
+ {
+ /* Not an offloaded function. */
+ oacc_device_lower_non_offloaded ();
+ return 0;
+ }
/* Parse the default dim argument exactly once. */
if ((const void *)flag_openacc_dims != &flag_openacc_dims)
@@ -1551,6 +1666,8 @@ execute_oacc_device_lower ()
continue;
}
+ oacc_xform_acc_on_device (&gsi);
+
gcall *call = as_a <gcall *> (stmt);
if (!gimple_call_internal_p (call))
{
diff --git a/libgomp/openacc.h b/libgomp/openacc.h
index 137e2c1..0e6904e 100644
--- a/libgomp/openacc.h
+++ b/libgomp/openacc.h
@@ -82,9 +82,7 @@ void acc_async_wait_all (void) __GOACC_NOTHROW;
void acc_wait_all_async (int) __GOACC_NOTHROW;
void acc_init (acc_device_t) __GOACC_NOTHROW;
void acc_shutdown (acc_device_t) __GOACC_NOTHROW;
-#ifdef __cplusplus
-int acc_on_device (int __arg) __GOACC_NOTHROW;
-#else
+#ifndef __cplusplus
int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW;
#endif
void *acc_malloc (size_t) __GOACC_NOTHROW;
@@ -117,14 +115,8 @@ int acc_set_cuda_stream (int, void *) __GOACC_NOTHROW;
#ifdef __cplusplus
}
-
-/* Forwarding function with correctly typed arg. */
-
-#pragma acc routine seq
-inline int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW
-{
- return acc_on_device ((int) __arg);
-}
+int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW __attribute__((const))
+ __attribute__((leaf));
#endif
#endif /* _OPENACC_H */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device-4.c
new file mode 100644
index 0000000..873663a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device-4.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+/* { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#include <openacc.h>
+
+extern void bar ();
+
+void
+foo (void)
+{
+ if (!acc_on_device (acc_device_host))
+ bar ();
+}
+
+/* { dg-final { scan-tree-dump-not "acc_on_device" "oaccdevlow" } } */
+/* { dg-final { scan-tree-dump-not "bar" "oaccdevlow" } } */
+/* { dg-final { scan-tree-dump "_\[0-9\] = 1" "oaccdevlow" } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
index ce9632c..487f079 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
@@ -1,7 +1,4 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
#include <assert.h>
#include <openacc.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
index 863b6b3..fd0e19c 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
/* { dg-additional-options "-fopenacc-dim=32" } */
#include <stdio.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
index e2b08c3..d1b8c64 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
@@ -1,6 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler. */
-/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
/* { dg-additional-options "-fopenacc-dim=16:16" } */
#include <openacc.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c
index ae1d588..18ed9d1 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#define N (32*32*32+17)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c
index c06d861..df2f1ea 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#define N (32*32*32+17)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
index 42b612a..acc5512 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#define N (32*32*32+17)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c
index 929e01c..fa7ee2e 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#define N (32*32*32+17)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
index 4ae4b7c..b18876b 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#define N (32*32*32+17)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
index 0556455..9ca78bd 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#define N (32*32*32+17)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
index 16d8f9f..c0ff3eb 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#define N (32*32*32+17)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
index 19021d9..c273b4e 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#define N (32*32*32+17)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
index f0c9d81..05974fb 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#define N (32*32*32+17)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
index 2974807..99ec21c 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#define N (32*32*32+17)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
index 33b6eae..e3621e3 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#define N (32*32*32+17)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
index 578cfad..d69735a 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#define N (32*32*32+17)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
index 8308f7c..1c48ab3 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -4,14 +4,12 @@
#include <limits.h>
#include <openacc.h>
-/* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper
- not behaving as expected for -O0. */
#pragma acc routine seq
static unsigned int __attribute__ ((optimize ("O2"))) acc_gang ()
{
- if (acc_on_device ((int) acc_device_host))
+ if (acc_on_device (acc_device_host))
return 0;
- else if (acc_on_device ((int) acc_device_nvidia))
+ else if (acc_on_device (acc_device_nvidia))
{
unsigned int r;
asm volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (r));
@@ -24,9 +22,9 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_gang ()
#pragma acc routine seq
static unsigned int __attribute__ ((optimize ("O2"))) acc_worker ()
{
- if (acc_on_device ((int) acc_device_host))
+ if (acc_on_device (acc_device_host))
return 0;
- else if (acc_on_device ((int) acc_device_nvidia))
+ else if (acc_on_device (acc_device_nvidia))
{
unsigned int r;
asm volatile ("mov.u32 %0,%%tid.y;" : "=r" (r));
@@ -39,9 +37,9 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_worker ()
#pragma acc routine seq
static unsigned int __attribute__ ((optimize ("O2"))) acc_vector ()
{
- if (acc_on_device ((int) acc_device_host))
+ if (acc_on_device (acc_device_host))
return 0;
- else if (acc_on_device ((int) acc_device_nvidia))
+ else if (acc_on_device (acc_device_nvidia))
{
unsigned int r;
asm volatile ("mov.u32 %0,%%tid.x;" : "=r" (r));
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c
index b6ab713..0043e84 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#define N (32*32*32+17)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
index ace2f49..ecb2931 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#define N (32*32*32+17)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
index 2503e8d..801aa14 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#define N (32*32*32+17)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
index 80cd462..097ebdb 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#define N (32*32*32+17)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
index 5e45fad..d62b0e2 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#define N (32*32*32+17)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
index b5cbc90..820f149 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
#include <stdio.h>
#include <openacc.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c
index 8dcb956..8549e8a 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c
@@ -1,7 +1,3 @@
-/* This code uses nvptx inline assembly guarded with acc_on_device, which is
- not optimized away at -O0, and then confuses the target assembler.
- { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-
/* { dg-additional-options "-fopenacc-dim=32" } */
#include <stdio.h>