Hi Chung-Lin! On Thu, 06 Dec 2018 21:42:14 +0100, I wrote: > On Tue, 25 Sep 2018 21:09:49 +0800, Chung-Lin Tang <chunglin_t...@mentor.com> > wrote: > > Also included in this patch is the code for the acc_get/set_default_async > > API functions in OpenACC 2.5. > > It's a minor part of this patch, but since some code was merge together, > > I'm submitting it together here. > > As I requested, I'm reviewing those changes separately, and have backed > out those changes in my working copy.
... as follows: commit 79b89a5214dc2624a52f0593bbfad5cefed0c025 Author: Thomas Schwinge <tho...@codesourcery.com> Date: Thu Dec 6 15:57:46 2018 +0100 into async re-work: revert default_async changes --- include/gomp-constants.h | 1 - libgomp/libgomp.map | 4 - libgomp/oacc-async.c | 19 +- libgomp/oacc-init.c | 2 - libgomp/oacc-int.h | 3 - libgomp/openacc.f90 | 22 +- libgomp/openacc.h | 3 - libgomp/openacc_lib.h | 13 - .../libgomp.oacc-c-c++-common/asyncwait-2.c | 904 --------------------- 9 files changed, 2 insertions(+), 969 deletions(-) diff --git include/gomp-constants.h include/gomp-constants.h index acd25851bcc7..1021306ed661 100644 --- include/gomp-constants.h +++ include/gomp-constants.h @@ -160,7 +160,6 @@ enum gomp_map_kind /* Asynchronous behavior. Keep in sync with libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_async_t. */ -#define GOMP_ASYNC_DEFAULT 0 #define GOMP_ASYNC_NOVAL -1 #define GOMP_ASYNC_SYNC -2 diff --git libgomp/libgomp.map libgomp/libgomp.map index c5e1b876fccd..d2381da3bf07 100644 --- libgomp/libgomp.map +++ libgomp/libgomp.map @@ -464,12 +464,8 @@ OACC_2.5 { acc_delete_finalize_async_32_h_; acc_delete_finalize_async_64_h_; acc_delete_finalize_async_array_h_; - acc_get_default_async; - acc_get_default_async_h_; acc_memcpy_from_device_async; acc_memcpy_to_device_async; - acc_set_default_async; - acc_set_default_async_h_; acc_update_device_async; acc_update_device_async_32_h_; acc_update_device_async_64_h_; diff --git libgomp/oacc-async.c libgomp/oacc-async.c index 68aaf199a27e..553082fe3d4a 100644 --- libgomp/oacc-async.c +++ libgomp/oacc-async.c @@ -60,7 +60,7 @@ lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async) /* The special value acc_async_noval (-1) maps to the thread-specific default async stream. */ if (async == acc_async_noval) - async = thr->default_async; + async = 0; //TODO thr->default_async; if (async == acc_async_sync) return NULL; @@ -221,23 +221,6 @@ acc_wait_all_async (int async) gomp_mutex_unlock (&thr->dev->openacc.async.lock); } -int -acc_get_default_async (void) -{ - struct goacc_thread *thr = get_goacc_thread (); - return thr->default_async; -} - -void -acc_set_default_async (int async) -{ - if (async < acc_async_sync) - gomp_fatal ("invalid async argument: %d", async); - - struct goacc_thread *thr = get_goacc_thread (); - thr->default_async = async; -} - static void goacc_async_unmap_tgt (void *ptr) { diff --git libgomp/oacc-init.c libgomp/oacc-init.c index 2c2f91ce3c2c..c40f48829078 100644 --- libgomp/oacc-init.c +++ libgomp/oacc-init.c @@ -426,8 +426,6 @@ goacc_attach_host_thread_to_device (int ord) thr->target_tls = acc_dev->openacc.create_thread_data_func (ord); - - thr->default_async = acc_async_default; } /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of diff --git libgomp/oacc-int.h libgomp/oacc-int.h index 3354eb654ce9..97f3fc8a61ed 100644 --- libgomp/oacc-int.h +++ libgomp/oacc-int.h @@ -73,9 +73,6 @@ struct goacc_thread /* Target-specific data (used by plugin). */ void *target_tls; - - /* Default OpenACC async queue for current thread, exported to plugin. */ - int default_async; }; #if defined HAVE_TLS || defined USE_EMUTLS diff --git libgomp/openacc.f90 libgomp/openacc.f90 index 7d31ee689479..7c809fe00738 100644 --- libgomp/openacc.f90 +++ libgomp/openacc.f90 @@ -51,10 +51,9 @@ module openacc_kinds integer, parameter :: acc_handle_kind = int32 - public :: acc_async_default, acc_async_noval, acc_async_sync + public :: acc_async_noval, acc_async_sync ! Keep in sync with include/gomp-constants.h. - integer (acc_handle_kind), parameter :: acc_async_default = 0 integer (acc_handle_kind), parameter :: acc_async_noval = -1 integer (acc_handle_kind), parameter :: acc_async_sync = -2 @@ -93,16 +92,6 @@ module openacc_internal integer (acc_device_kind) d end function - subroutine acc_set_default_async_h (a) - import - integer a - end subroutine - - function acc_get_default_async_h () - import - integer acc_get_default_async_h - end function - function acc_async_test_h (a) logical acc_async_test_h integer a @@ -731,7 +720,6 @@ module openacc public :: acc_get_num_devices, acc_set_device_type, acc_get_device_type public :: acc_set_device_num, acc_get_device_num, acc_async_test - public :: acc_set_default_async, acc_get_default_async public :: acc_async_test_all public :: acc_wait, acc_async_wait, acc_wait_async public :: acc_wait_all, acc_async_wait_all, acc_wait_all_async @@ -764,14 +752,6 @@ module openacc procedure :: acc_get_device_num_h end interface - interface acc_set_default_async - procedure :: acc_set_default_async_h - end interface - - interface acc_get_default_async - procedure :: acc_get_default_async_h - end interface - interface acc_async_test procedure :: acc_async_test_h end interface diff --git libgomp/openacc.h libgomp/openacc.h index ede59d76c862..f61bb77f9f3e 100644 --- libgomp/openacc.h +++ libgomp/openacc.h @@ -63,7 +63,6 @@ typedef enum acc_device_t { typedef enum acc_async_t { /* Keep in sync with include/gomp-constants.h. */ - acc_async_default = 0, acc_async_noval = -1, acc_async_sync = -2 } acc_async_t; @@ -73,8 +72,6 @@ void acc_set_device_type (acc_device_t) __GOACC_NOTHROW; acc_device_t acc_get_device_type (void) __GOACC_NOTHROW; void acc_set_device_num (int, acc_device_t) __GOACC_NOTHROW; int acc_get_device_num (acc_device_t) __GOACC_NOTHROW; -void acc_set_default_async (int) __GOACC_NOTHROW; -int acc_get_default_async (void) __GOACC_NOTHROW; int acc_async_test (int) __GOACC_NOTHROW; int acc_async_test_all (void) __GOACC_NOTHROW; void acc_wait (int) __GOACC_NOTHROW; diff --git libgomp/openacc_lib.h libgomp/openacc_lib.h index 75a693937967..820d987d72e2 100644 --- libgomp/openacc_lib.h +++ libgomp/openacc_lib.h @@ -46,7 +46,6 @@ integer, parameter :: acc_handle_kind = 4 ! Keep in sync with include/gomp-constants.h. - integer (acc_handle_kind), parameter :: acc_async_default = 0 integer (acc_handle_kind), parameter :: acc_async_noval = -1 integer (acc_handle_kind), parameter :: acc_async_sync = -2 @@ -90,18 +89,6 @@ end function end interface - interface acc_set_default_async - subroutine acc_set_default_async_h (a) - integer a - end subroutine - end interface - - interface acc_get_default_async - function acc_get_default_async_h () - integer acc_get_default_async_h - end function - end interface - interface acc_async_test function acc_async_test_h (a) logical acc_async_test_h diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-2.c deleted file mode 100644 index 94205407d41d..000000000000 --- libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-2.c +++ /dev/null @@ -1,904 +0,0 @@ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ -/* { dg-additional-options "-lcuda" } */ - -#include <openacc.h> -#include <stdlib.h> -#include <cuda.h> - -#include <stdio.h> -#include <time.h> -#include <sys/time.h> - -int -main (int argc, char **argv) -{ - CUresult r; - CUstream stream1; - int N = 128; //1024 * 1024; - float *a, *b, *c, *d, *e; - int i; - int nbytes; - - srand (time (NULL)); - int s = rand () % 100; - - acc_init (acc_device_nvidia); - - nbytes = N * sizeof (float); - - a = (float *) malloc (nbytes); - b = (float *) malloc (nbytes); - c = (float *) malloc (nbytes); - d = (float *) malloc (nbytes); - e = (float *) malloc (nbytes); - - for (i = 0; i < N; i++) - { - a[i] = 3.0; - b[i] = 0.0; - } - - acc_set_default_async (s); - -#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N) - { - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - b[ii] = a[ii]; - } - -#pragma acc wait - - } - - for (i = 0; i < N; i++) - { - if (a[i] != 3.0) - abort (); - - if (b[i] != 3.0) - abort (); - } - - for (i = 0; i < N; i++) - { - a[i] = 2.0; - b[i] = 0.0; - } - -#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N) - { - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - b[ii] = a[ii]; - } - -#pragma acc wait (s) - - } - - for (i = 0; i < N; i++) - { - if (a[i] != 2.0) - abort (); - - if (b[i] != 2.0) - abort (); - } - - for (i = 0; i < N; i++) - { - a[i] = 3.0; - b[i] = 0.0; - c[i] = 0.0; - d[i] = 0.0; - } - -#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N) - { - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; - } - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; - } - - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; - } - -#pragma acc wait (s) - - } - - for (i = 0; i < N; i++) - { - if (a[i] != 3.0) - abort (); - - if (b[i] != 9.0) - abort (); - - if (c[i] != 4.0) - abort (); - - if (d[i] != 1.0) - abort (); - } - - for (i = 0; i < N; i++) - { - a[i] = 2.0; - b[i] = 0.0; - c[i] = 0.0; - d[i] = 0.0; - e[i] = 0.0; - } - -#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N) - { - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; - } - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; - } - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; - } - -#pragma acc parallel wait (s) async (s) - { - int ii; - - for (ii = 0; ii < N; ii++) - e[ii] = a[ii] + b[ii] + c[ii] + d[ii]; - } - -#pragma acc wait (s) - - } - - for (i = 0; i < N; i++) - { - if (a[i] != 2.0) - abort (); - - if (b[i] != 4.0) - abort (); - - if (c[i] != 4.0) - abort (); - - if (d[i] != 1.0) - abort (); - - if (e[i] != 11.0) - abort (); - } - - - r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuStreamCreate failed: %d\n", r); - abort (); - } - - acc_set_cuda_stream (1, stream1); - - for (i = 0; i < N; i++) - { - a[i] = 5.0; - b[i] = 0.0; - } - -#pragma acc data copy (a[0:N], b[0:N]) copyin (N) - { - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - b[ii] = a[ii]; - } - -#pragma acc wait (s) - - } - - for (i = 0; i < N; i++) - { - if (a[i] != 5.0) - abort (); - - if (b[i] != 5.0) - abort (); - } - - for (i = 0; i < N; i++) - { - a[i] = 7.0; - b[i] = 0.0; - c[i] = 0.0; - d[i] = 0.0; - } - -#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N) - { - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; - } - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; - } - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; - } - -#pragma acc wait (s) - - } - - for (i = 0; i < N; i++) - { - if (a[i] != 7.0) - abort (); - - if (b[i] != 49.0) - abort (); - - if (c[i] != 4.0) - abort (); - - if (d[i] != 1.0) - abort (); - } - - for (i = 0; i < N; i++) - { - a[i] = 3.0; - b[i] = 0.0; - c[i] = 0.0; - d[i] = 0.0; - e[i] = 0.0; - } - -#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N) - { - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; - } - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; - } - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; - } - -#pragma acc parallel wait (s) async (s) - { - int ii; - - for (ii = 0; ii < N; ii++) - e[ii] = a[ii] + b[ii] + c[ii] + d[ii]; - } - -#pragma acc wait (s) - - } - - for (i = 0; i < N; i++) - { - if (a[i] != 3.0) - abort (); - - if (b[i] != 9.0) - abort (); - - if (c[i] != 4.0) - abort (); - - if (d[i] != 1.0) - abort (); - - if (e[i] != 17.0) - abort (); - } - - for (i = 0; i < N; i++) - { - a[i] = 4.0; - b[i] = 0.0; - c[i] = 0.0; - d[i] = 0.0; - e[i] = 0.0; - } - -#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N) - { - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; - } - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; - } - -#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (s) - - } - - for (i = 0; i < N; i++) - { - if (a[i] != 4.0) - abort (); - - if (b[i] != 16.0) - abort (); - - if (c[i] != 4.0) - abort (); - } - - - for (i = 0; i < N; i++) - { - a[i] = 5.0; - b[i] = 0.0; - c[i] = 0.0; - d[i] = 0.0; - e[i] = 0.0; - } - -#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N) - { - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; - } - -#pragma acc parallel async - { - int ii; - - for (ii = 0; ii < N; ii++) - c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; - } - -#pragma acc update host (a[0:N], b[0:N], c[0:N]) async - -#pragma acc wait (s) - - } - - for (i = 0; i < N; i++) - { - if (a[i] != 5.0) - abort (); - - if (b[i] != 25.0) - abort (); - - if (c[i] != 4.0) - abort (); - } - - for (i = 0; i < N; i++) - { - a[i] = 3.0; - b[i] = 0.0; - } - -#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N) - { - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - b[ii] = a[ii]; - } - -#pragma acc wait - - } - - for (i = 0; i < N; i++) - { - if (a[i] != 3.0) - abort (); - - if (b[i] != 3.0) - abort (); - } - - for (i = 0; i < N; i++) - { - a[i] = 2.0; - b[i] = 0.0; - } - -#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N) - { - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - b[ii] = a[ii]; - } - -#pragma acc wait (s) - - } - - for (i = 0; i < N; i++) - { - if (a[i] != 2.0) - abort (); - - if (b[i] != 2.0) - abort (); - } - - for (i = 0; i < N; i++) - { - a[i] = 3.0; - b[i] = 0.0; - c[i] = 0.0; - d[i] = 0.0; - } - -#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N) - { - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; - } - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; - } - - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; - } - -#pragma acc wait (s) - - } - - for (i = 0; i < N; i++) - { - if (a[i] != 3.0) - abort (); - - if (b[i] != 9.0) - abort (); - - if (c[i] != 4.0) - abort (); - - if (d[i] != 1.0) - abort (); - } - - for (i = 0; i < N; i++) - { - a[i] = 2.0; - b[i] = 0.0; - c[i] = 0.0; - d[i] = 0.0; - e[i] = 0.0; - } - -#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N) - { - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; - } - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; - } - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; - } - -#pragma acc kernels wait (s) async (s) - { - int ii; - - for (ii = 0; ii < N; ii++) - e[ii] = a[ii] + b[ii] + c[ii] + d[ii]; - } - -#pragma acc wait (s) - - } - - for (i = 0; i < N; i++) - { - if (a[i] != 2.0) - abort (); - - if (b[i] != 4.0) - abort (); - - if (c[i] != 4.0) - abort (); - - if (d[i] != 1.0) - abort (); - - if (e[i] != 11.0) - abort (); - } - - - r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuStreamCreate failed: %d\n", r); - abort (); - } - - acc_set_cuda_stream (1, stream1); - - for (i = 0; i < N; i++) - { - a[i] = 5.0; - b[i] = 0.0; - } - -#pragma acc data copy (a[0:N], b[0:N]) copyin (N) - { - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - b[ii] = a[ii]; - } - -#pragma acc wait (s) - - } - - for (i = 0; i < N; i++) - { - if (a[i] != 5.0) - abort (); - - if (b[i] != 5.0) - abort (); - } - - for (i = 0; i < N; i++) - { - a[i] = 7.0; - b[i] = 0.0; - c[i] = 0.0; - d[i] = 0.0; - } - -#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N) - { - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; - } - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; - } - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; - } - -#pragma acc wait (s) - - } - - for (i = 0; i < N; i++) - { - if (a[i] != 7.0) - abort (); - - if (b[i] != 49.0) - abort (); - - if (c[i] != 4.0) - abort (); - - if (d[i] != 1.0) - abort (); - } - - for (i = 0; i < N; i++) - { - a[i] = 3.0; - b[i] = 0.0; - c[i] = 0.0; - d[i] = 0.0; - e[i] = 0.0; - } - -#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N) - { - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; - } - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; - } - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; - } - -#pragma acc kernels wait (s) async (s) - { - int ii; - - for (ii = 0; ii < N; ii++) - e[ii] = a[ii] + b[ii] + c[ii] + d[ii]; - } - -#pragma acc wait (s) - - } - - for (i = 0; i < N; i++) - { - if (a[i] != 3.0) - abort (); - - if (b[i] != 9.0) - abort (); - - if (c[i] != 4.0) - abort (); - - if (d[i] != 1.0) - abort (); - - if (e[i] != 17.0) - abort (); - } - - for (i = 0; i < N; i++) - { - a[i] = 4.0; - b[i] = 0.0; - c[i] = 0.0; - d[i] = 0.0; - e[i] = 0.0; - } - -#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N) - { - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; - } - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; - } - -#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (s) - - } - - for (i = 0; i < N; i++) - { - if (a[i] != 4.0) - abort (); - - if (b[i] != 16.0) - abort (); - - if (c[i] != 4.0) - abort (); - } - - - for (i = 0; i < N; i++) - { - a[i] = 5.0; - b[i] = 0.0; - c[i] = 0.0; - d[i] = 0.0; - e[i] = 0.0; - } - -#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N) - { - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; - } - -#pragma acc kernels async - { - int ii; - - for (ii = 0; ii < N; ii++) - c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; - } - -#pragma acc update host (a[0:N], b[0:N], c[0:N]) async - -#pragma acc wait (s) - - } - - for (i = 0; i < N; i++) - { - if (a[i] != 5.0) - abort (); - - if (b[i] != 25.0) - abort (); - - if (c[i] != 4.0) - abort (); - } - - acc_shutdown (acc_device_nvidia); - - return 0; -} Grüße Thomas