Hi, The attached file contains additional tests for the if, async, and wait clauses associated with the kernels directive.
Jim
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c index 22cef6d..d478ce2 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c @@ -460,6 +460,438 @@ main (int argc, char **argv) abort (); } + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + } + +#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N) + { + +#pragma acc kernels async + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc wait + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 3.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 0.0; + } + +#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N) + { + +#pragma acc kernels async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort (); + + if (b[i] != 2.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + } + +#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N) + { + +#pragma acc kernels async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + } + +#pragma acc kernels async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + } + + +#pragma acc kernels async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; + } + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 9.0) + abort (); + + if (c[i] != 4.0) + abort (); + + if (d[i] != 1.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + e[i] = 0.0; + } + +#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N) + { + +#pragma acc kernels async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + } + +#pragma acc kernels async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + } + +#pragma acc kernels async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; + } + +#pragma acc kernels wait (1) async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + e[ii] = a[ii] + b[ii] + c[ii] + d[ii]; + } + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort (); + + if (b[i] != 4.0) + abort (); + + if (c[i] != 4.0) + abort (); + + if (d[i] != 1.0) + abort (); + + if (e[i] != 11.0) + abort (); + } + + + r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + acc_set_cuda_stream (1, stream1); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 0.0; + } + +#pragma acc data copy (a[0:N], b[0:N]) copyin (N) + { + +#pragma acc kernels async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 5.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 7.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + } + +#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N) + { + +#pragma acc kernels async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + } + +#pragma acc kernels async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + } + +#pragma acc kernels async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; + } + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 7.0) + abort (); + + if (b[i] != 49.0) + abort (); + + if (c[i] != 4.0) + abort (); + + if (d[i] != 1.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + e[i] = 0.0; + } + +#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N) + { + +#pragma acc kernels async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + } + +#pragma acc kernels async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + } + +#pragma acc kernels async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; + } + +#pragma acc kernels wait (1) async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + e[ii] = a[ii] + b[ii] + c[ii] + d[ii]; + } + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 9.0) + abort (); + + if (c[i] != 4.0) + abort (); + + if (d[i] != 1.0) + abort (); + + if (e[i] != 17.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + e[i] = 0.0; + } + +#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N) + { + +#pragma acc kernels async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + } + +#pragma acc kernels async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 16.0) + abort (); + + if (c[i] != 4.0) + abort (); + } + + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 0.0; + c[i] = 0.0; + d[i] = 0.0; + e[i] = 0.0; + } + +#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N) + { + +#pragma acc kernels async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; + } + +#pragma acc kernels async (1) + { + int ii; + + for (ii = 0; ii < N; ii++) + c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1) + +#pragma acc wait (1) + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 25.0) + abort (); + + if (c[i] != 4.0) + abort (); + } + acc_shutdown (acc_device_nvidia); return 0; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c index 184b355..5478bb6 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c @@ -609,5 +609,357 @@ main(int argc, char **argv) abort (); #endif + for (i = 0; i < N; i++) + a[i] = 4.0; + +#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(1) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 5.0; +#else + exp = 4.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 16.0; + +#pragma acc kernels if(0) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 17.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 8.0; + +#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(one) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 9.0; +#else + exp = 8.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 22.0; + +#pragma acc kernels if(zero) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 23.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 16.0; + +#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(true) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 17.0; +#else + exp = 16.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 76.0; + +#pragma acc kernels if(false) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 77.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 22.0; + + n = 1; + +#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(n) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 23.0; +#else + exp = 22.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 18.0; + + n = 0; + +#pragma acc kernels if(n) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 19.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 49.0; + + n = 1; + +#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(n + n) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 50.0; +#else + exp = 49.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 38.0; + + n = 0; + +#pragma acc kernels if(n + n) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 39.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 91.0; + +#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(-2) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 92.0; +#else + exp = 91.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 43.0; + +#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(one == 1) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 44.0; +#else + exp = 43.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 87.0; + +#pragma acc kernels if(one == 0) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 88.0) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 9.0; + } + +#if ACC_MEM_SHARED + exp = 0.0; + exp2 = 0.0; +#else + acc_map_data (a, d_a, N * sizeof (float)); + acc_map_data (b, d_b, N * sizeof (float)); + exp = 3.0; + exp2 = 9.0; +#endif + return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90 index b6e637b..01728bd 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90 @@ -132,4 +132,126 @@ program asyncwait if (d(i) .ne. 1.0) call abort if (e(i) .ne. 11.0) call abort end do + + a(:) = 3.0 + b(:) = 0.0 + + !$acc data copy (a(1:N)) copy (b(1:N)) + + !$acc kernels async + !$acc loop + do i = 1, N + b(i) = a(i) + end do + !$acc end kernels + + !$acc wait + !$acc end data + + do i = 1, N + if (a(i) .ne. 3.0) call abort + if (b(i) .ne. 3.0) call abort + end do + + a(:) = 2.0 + b(:) = 0.0 + + !$acc data copy (a(1:N)) copy (b(1:N)) + + !$acc kernels async (1) + !$acc loop + do i = 1, N + b(i) = a(i) + end do + !$acc end kernels + + !$acc wait (1) + !$acc end data + + do i = 1, N + if (a(i) .ne. 2.0) call abort + if (b(i) .ne. 2.0) call abort + end do + + a(:) = 3.0 + b(:) = 0.0 + c(:) = 0.0 + d(:) = 0.0 + + !$acc data copy (a(1:N)) copy (b(1:N)) copy (c(1:N)) copy (d(1:N)) + + !$acc kernels async (1) + do i = 1, N + b(i) = (a(i) * a(i) * a(i)) / a(i) + end do + !$acc end kernels + + !$acc kernels async (1) + do i = 1, N + c(i) = (a(i) * 4) / a(i) + end do + !$acc end kernels + + !$acc kernels async (1) + !$acc loop + do i = 1, N + d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i) + end do + !$acc end kernels + + !$acc wait (1) + !$acc end data + + do i = 1, N + if (a(i) .ne. 3.0) call abort + if (b(i) .ne. 9.0) call abort + if (c(i) .ne. 4.0) call abort + if (d(i) .ne. 1.0) call abort + end do + + a(:) = 2.0 + b(:) = 0.0 + c(:) = 0.0 + d(:) = 0.0 + e(:) = 0.0 + + !$acc data copy (a(1:N), b(1:N), c(1:N), d(1:N), e(1:N)) + + !$acc kernels async (1) + do i = 1, N + b(i) = (a(i) * a(i) * a(i)) / a(i) + end do + !$acc end kernels + + !$acc kernels async (1) + !$acc loop + do i = 1, N + c(i) = (a(i) * 4) / a(i) + end do + !$acc end kernels + + !$acc kernels async (1) + !$acc loop + do i = 1, N + d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i) + end do + !$acc end kernels + + !$acc kernels wait (1) async (1) + !$acc loop + do i = 1, N + e(i) = a(i) + b(i) + c(i) + d(i) + end do + !$acc end kernels + + !$acc wait (1) + !$acc end data + + do i = 1, N + if (a(i) .ne. 2.0) call abort + if (b(i) .ne. 4.0) call abort + if (c(i) .ne. 4.0) call abort + if (d(i) .ne. 1.0) call abort + if (e(i) .ne. 11.0) call abort + end do end program asyncwait diff --git a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f90 index bade52b..fe131b6 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f90 @@ -1,6 +1,6 @@ ! { dg-do run } -program parallel_wait +program asyncwait integer, parameter :: N = 64 real, allocatable :: a(:), b(:), c(:) integer i @@ -33,8 +33,33 @@ program parallel_wait do i = 1, N if (c(i) .ne. 2.0) call abort end do + + !$acc kernels async (0) + !$acc loop + do i = 1, N + a(i) = 1 + end do + !$acc end kernels + + !$acc kernels async (1) + !$acc loop + do i = 1, N + b(i) = 1 + end do + !$acc end kernels + + !$acc kernels wait (0, 1) + !$acc loop + do i = 1, N + c(i) = a(i) + b(i) + end do + !$acc end kernels + + do i = 1, N + if (c(i) .ne. 2.0) call abort + end do deallocate (a) deallocate (b) deallocate (c) -end program parallel_wait +end program asyncwait diff --git a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f90 index d48dc11..fa96a01 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f90 @@ -1,6 +1,6 @@ ! { dg-do run } -program parallel_wait +program asyncwait integer, parameter :: N = 64 real, allocatable :: a(:), b(:), c(:) integer i @@ -35,8 +35,35 @@ program parallel_wait do i = 1, N if (c(i) .ne. 2.0) call abort end do + + !$acc kernels async (0) + !$acc loop + do i = 1, N + a(i) = 1 + end do + !$acc end kernels + + !$acc kernels async (1) + !$acc loop + do i = 1, N + b(i) = 1 + end do + !$acc end kernels + + !$acc wait (0, 1) + + !$acc kernels + !$acc loop + do i = 1, N + c(i) = a(i) + b(i) + end do + !$acc end kernels + + do i = 1, N + if (c(i) .ne. 2.0) call abort + end do deallocate (a) deallocate (b) deallocate (c) -end program parallel_wait +end program asyncwait diff --git a/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90 index 1729a3b..e54c1b2 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90 @@ -450,4 +450,437 @@ program main if (acc_is_present (b) .eqv. .TRUE.) call abort #endif + a(:) = 4.0 + + !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (1 == 1) + do i = 1, N + if (acc_on_device (acc_device_host) .eqv. .TRUE.) then + b(i) = a(i) + 1 + else + b(i) = a(i) + end if + end do + !$acc end kernels + +#if ACC_MEM_SHARED + exp = 5.0 +#else + exp = 4.0 +#endif + + do i = 1, N + if (b(i) .ne. exp) call abort + end do + + a(:) = 16.0 + + !$acc kernels if (0 == 1) + do i = 1, N + if (acc_on_device (acc_device_host) .eqv. .TRUE.) then + b(i) = a(i) + 1 + else + b(i) = a(i) + end if + end do + !$acc end kernels + + do i = 1, N + if (b(i) .ne. 17.0) call abort + end do + + a(:) = 8.0 + + !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (one == 1) + do i = 1, N + if (acc_on_device (acc_device_host) .eqv. .TRUE.) then + b(i) = a(i) + 1 + else + b(i) = a(i) + end if + end do + !$acc end kernels + +#if ACC_MEM_SHARED + exp = 9.0 +#else + exp = 8.0 +#endif + + do i = 1, N + if (b(i) .ne. exp) call abort + end do + + a(:) = 22.0 + + !$acc kernels if (zero == 1) + do i = 1, N + if (acc_on_device (acc_device_host) .eqv. .TRUE.) then + b(i) = a(i) + 1 + else + b(i) = a(i) + end if + end do + !$acc end kernels + + do i = 1, N + if (b(i) .ne. 23.0) call abort + end do + + a(:) = 16.0 + + !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (.TRUE.) + do i = 1, N + if (acc_on_device (acc_device_host) .eqv. .TRUE.) then + b(i) = a(i) + 1 + else + b(i) = a(i) + end if + end do + !$acc end kernels + +#if ACC_MEM_SHARED + exp = 17.0; +#else + exp = 16.0; +#endif + + do i = 1, N + if (b(i) .ne. exp) call abort + end do + + a(:) = 76.0 + + !$acc kernels if (.FALSE.) + do i = 1, N + if (acc_on_device (acc_device_host) .eqv. .TRUE.) then + b(i) = a(i) + 1 + else + b(i) = a(i) + end if + end do + !$acc end kernels + + do i = 1, N + if (b(i) .ne. 77.0) call abort + end do + + a(:) = 22.0 + + nn = 1 + + !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (nn == 1) + do i = 1, N + if (acc_on_device (acc_device_host) .eqv. .TRUE.) then + b(i) = a(i) + 1 + else + b(i) = a(i) + end if + end do + !$acc end kernels + +#if ACC_MEM_SHARED + exp = 23.0; +#else + exp = 22.0; +#endif + + do i = 1, N + if (b(i) .ne. exp) call abort + end do + + a(:) = 18.0 + + nn = 0 + + !$acc kernels if (nn == 1) + do i = 1, N + if (acc_on_device (acc_device_host) .eqv. .TRUE.) then + b(i) = a(i) + 1 + else + b(i) = a(i) + end if + end do + !$acc end kernels + + do i = 1, N + if (b(i) .ne. 19.0) call abort + end do + + a(:) = 49.0 + + nn = 1 + + !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0) + do i = 1, N + if (acc_on_device (acc_device_host) .eqv. .TRUE.) then + b(i) = a(i) + 1 + else + b(i) = a(i) + end if + end do + !$acc end kernels + +#if ACC_MEM_SHARED + exp = 50.0 +#else + exp = 49.0 +#endif + + do i = 1, N + if (b(i) .ne. exp) call abort + end do + + a(:) = 38.0 + + nn = 0; + + !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0) + do i = 1, N + if (acc_on_device (acc_device_host) .eqv. .TRUE.) then + b(i) = a(i) + 1 + else + b(i) = a(i) + end if + end do + !$acc end kernels + + do i = 1, N + if (b(i) .ne. 39.0) call abort + end do + + a(:) = 91.0 + + !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (-2 > 0) + do i = 1, N + if (acc_on_device (acc_device_host) .eqv. .TRUE.) then + b(i) = a(i) + 1 + else + b(i) = a(i) + end if + end do + !$acc end kernels + + do i = 1, N + if (b(i) .ne. 92.0) call abort + end do + + a(:) = 43.0 + + !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (one == 1) + do i = 1, N + if (acc_on_device (acc_device_host) .eqv. .TRUE.) then + b(i) = a(i) + 1 + else + b(i) = a(i) + end if + end do + !$acc end kernels + +#if ACC_MEM_SHARED + exp = 44.0 +#else + exp = 43.0 +#endif + + do i = 1, N + if (b(i) .ne. exp) call abort + end do + + a(:) = 87.0 + + !$acc kernels if (one == 0) + do i = 1, N + if (acc_on_device (acc_device_host) .eqv. .TRUE.) then + b(i) = a(i) + 1 + else + b(i) = a(i) + end if + end do + !$acc end kernels + + do i = 1, N + if (b(i) .ne. 88.0) call abort + end do + + a(:) = 3.0 + b(:) = 9.0 + +#if ACC_MEM_SHARED + exp = 0.0 + exp2 = 0.0 +#else + call acc_copyin (a, sizeof (a)) + call acc_copyin (b, sizeof (b)) + exp = 3.0; + exp2 = 9.0; +#endif + + !$acc update device (a(1:N), b(1:N)) if (1 == 1) + + a(:) = 0.0 + b(:) = 0.0 + + !$acc update host (a(1:N), b(1:N)) if (1 == 1) + + do i = 1, N + if (a(i) .ne. exp) call abort + if (b(i) .ne. exp2) call abort + end do + + a(:) = 6.0 + b(:) = 12.0 + + !$acc update device (a(1:N), b(1:N)) if (0 == 1) + + a(:) = 0.0 + b(:) = 0.0 + + !$acc update host (a(1:N), b(1:N)) if (1 == 1) + + do i = 1, N + if (a(i) .ne. exp) call abort + if (b(i) .ne. exp2) call abort + end do + + a(:) = 26.0 + b(:) = 21.0 + + !$acc update device (a(1:N), b(1:N)) if (1 == 1) + + a(:) = 0.0 + b(:) = 0.0 + + !$acc update host (a(1:N), b(1:N)) if (0 == 1) + + do i = 1, N + if (a(i) .ne. 0.0) call abort + if (b(i) .ne. 0.0) call abort + end do + +#if !ACC_MEM_SHARED + call acc_copyout (a, sizeof (a)) + call acc_copyout (b, sizeof (b)) +#endif + + a(:) = 4.0 + b(:) = 0.0 + + !$acc data copyin (a(1:N)) copyout (b(1:N)) if (1 == 1) + + !$acc kernels present (a(1:N)) + do i = 1, N + b(i) = a(i) + end do + !$acc end kernels + !$acc end data + + do i = 1, N + if (b(i) .ne. 4.0) call abort + end do + + a(:) = 8.0 + b(:) = 1.0 + + !$acc data copyin (a(1:N)) copyout (b(1:N)) if (0 == 1) + +#if !ACC_MEM_SHARED + if (acc_is_present (a) .eqv. .TRUE.) call abort + if (acc_is_present (b) .eqv. .TRUE.) call abort +#endif + + !$acc end data + + a(:) = 18.0 + b(:) = 21.0 + + !$acc data copyin (a(1:N)) if (1 == 1) + +#if !ACC_MEM_SHARED + if (acc_is_present (a) .eqv. .FALSE.) call abort +#endif + + !$acc data copyout (b(1:N)) if (0 == 1) +#if !ACC_MEM_SHARED + if (acc_is_present (b) .eqv. .TRUE.) call abort +#endif + !$acc data copyout (b(1:N)) if (1 == 1) + + !$acc kernels present (a(1:N)) present (b(1:N)) + do i = 1, N + b(i) = a(i) + end do + !$acc end kernels + + !$acc end data + +#if !ACC_MEM_SHARED + if (acc_is_present (b) .eqv. .TRUE.) call abort +#endif + !$acc end data + !$acc end data + + do i = 1, N + if (b(1) .ne. 18.0) call abort + end do + + !$acc enter data copyin (b(1:N)) if (0 == 1) + +#if !ACC_MEM_SHARED + if (acc_is_present (b) .eqv. .TRUE.) call abort +#endif + + !$acc exit data delete (b(1:N)) if (0 == 1) + + !$acc enter data copyin (b(1:N)) if (1 == 1) + +#if !ACC_MEM_SHARED + if (acc_is_present (b) .eqv. .FALSE.) call abort +#endif + + !$acc exit data delete (b(1:N)) if (1 == 1) + +#if !ACC_MEM_SHARED + if (acc_is_present (b) .eqv. .TRUE.) call abort +#endif + + !$acc enter data copyin (b(1:N)) if (zero == 1) + +#if !ACC_MEM_SHARED + if (acc_is_present (b) .eqv. .TRUE.) call abort +#endif + + !$acc exit data delete (b(1:N)) if (zero == 1) + + !$acc enter data copyin (b(1:N)) if (one == 1) + +#if !ACC_MEM_SHARED + if (acc_is_present (b) .eqv. .FALSE.) call abort +#endif + + !$acc exit data delete (b(1:N)) if (one == 1) + +#if !ACC_MEM_SHARED + if (acc_is_present (b) .eqv. .TRUE.) call abort +#endif + + !$acc enter data copyin (b(1:N)) if (one == 0) + +#if !ACC_MEM_SHARED + if (acc_is_present (b) .eqv. .TRUE.) call abort +#endif + + !$acc exit data delete (b(1:N)) if (one == 0) + + !$acc enter data copyin (b(1:N)) if (one == 1) + +#if !ACC_MEM_SHARED + if (acc_is_present (b) .eqv. .FALSE.) call abort +#endif + + !$acc exit data delete (b(1:N)) if (one == 1) + +#if !ACC_MEM_SHARED + if (acc_is_present (b) .eqv. .TRUE.) call abort +#endif + end program main