Hi Jakub, this patch has been posted before, but hasn't really been reviewed yet: https://gcc.gnu.org/ml/gcc-patches/2016-08/msg01927.html
This has been deployed on gomp-4_0-branch for a long time, and was re-tested on current trunk, test results okay. Is this okay for trunk? Thanks, Chung-Lin 2016-06-13 Cesar Philippidis <ce...@codesourcery.com> Thomas Schwinge <tho...@codesourcery.com> Chung-Lin Tang <clt...@codesourcery.com> libgomp/ * oacc-parallel.c (find_pset): Adjust and rename from... (find_pointer): ...this function. (GOACC_enter_exit_data): Handle GOMP_MAP_TO and GOMP_MAP_ALLOC, adjust find_pointer calls into find_pset, adjust pointer map handling, add acc_is_present guards to calls to gomp_acc_insert_pointer and gomp_acc_remove_pointer. * testsuite/libgomp.oacc-c-c++-common/data-2.c: Update test. * testsuite/libgomp.oacc-c-c++-common/enter-data.c: New test. * testsuite/libgomp.oacc-fortran/data-2.f90: Update test.
Index: oacc-parallel.c =================================================================== --- oacc-parallel.c (revision 249147) +++ oacc-parallel.c (working copy) @@ -38,8 +38,11 @@ #include <stdarg.h> #include <assert.h> +/* Returns the number of mappings associated with the pointer or pset. PSET + have three mappings, whereas pointer have two. */ + static int -find_pset (int pos, size_t mapnum, unsigned short *kinds) +find_pointer (int pos, size_t mapnum, unsigned short *kinds) { if (pos + 1 >= mapnum) return 0; @@ -46,7 +49,12 @@ static int unsigned char kind = kinds[pos+1] & 0xff; - return kind == GOMP_MAP_TO_PSET; + if (kind == GOMP_MAP_TO_PSET) + return 3; + else if (kind == GOMP_MAP_POINTER) + return 2; + + return 0; } static void goacc_wait (int async, int num_waits, va_list *ap); @@ -298,7 +306,9 @@ GOACC_enter_exit_data (int device, size_t mapnum, if (kind == GOMP_MAP_FORCE_ALLOC || kind == GOMP_MAP_FORCE_PRESENT - || kind == GOMP_MAP_FORCE_TO) + || kind == GOMP_MAP_FORCE_TO + || kind == GOMP_MAP_TO + || kind == GOMP_MAP_ALLOC) { data_enter = true; break; @@ -312,6 +322,15 @@ GOACC_enter_exit_data (int device, size_t mapnum, kind); } + /* In c, non-pointers and arrays are represented by a single data clause. + Dynamically allocated arrays and subarrays are represented by a data + clause followed by an internal GOMP_MAP_POINTER. + + In fortran, scalars and not allocated arrays are represented by a + single data clause. Allocated arrays and subarrays have three mappings: + 1) the original data clause, 2) a PSET 3) a pointer to the array data. + */ + if (data_enter) { for (i = 0; i < mapnum; i++) @@ -318,25 +337,24 @@ GOACC_enter_exit_data (int device, size_t mapnum, { unsigned char kind = kinds[i] & 0xff; - /* Scan for PSETs. */ - int psets = find_pset (i, mapnum, kinds); + /* Scan for pointers and PSETs. */ + int pointer = find_pointer (i, mapnum, kinds); - if (!psets) + if (!pointer) { switch (kind) { - case GOMP_MAP_POINTER: - gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i], - &kinds[i]); + case GOMP_MAP_ALLOC: + acc_present_or_create (hostaddrs[i], sizes[i]); break; case GOMP_MAP_FORCE_ALLOC: acc_create (hostaddrs[i], sizes[i]); break; - case GOMP_MAP_FORCE_PRESENT: + case GOMP_MAP_TO: acc_present_or_copyin (hostaddrs[i], sizes[i]); break; case GOMP_MAP_FORCE_TO: - acc_present_or_copyin (hostaddrs[i], sizes[i]); + acc_copyin (hostaddrs[i], sizes[i]); break; default: gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", @@ -346,12 +364,16 @@ GOACC_enter_exit_data (int device, size_t mapnum, } else { - gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]); + if (!acc_is_present (hostaddrs[i], sizes[i])) + { + gomp_acc_insert_pointer (pointer, &hostaddrs[i], + &sizes[i], &kinds[i]); + } /* Increment 'i' by two because OpenACC requires fortran arrays to be contiguous, so each PSET is associated with one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and one MAP_POINTER. */ - i += 2; + i += pointer - 1; } } } @@ -360,19 +382,15 @@ GOACC_enter_exit_data (int device, size_t mapnum, { unsigned char kind = kinds[i] & 0xff; - int psets = find_pset (i, mapnum, kinds); + int pointer = find_pointer (i, mapnum, kinds); - if (!psets) + if (!pointer) { switch (kind) { - case GOMP_MAP_POINTER: - gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff) - == GOMP_MAP_FORCE_FROM, - async, 1); - break; case GOMP_MAP_DELETE: - acc_delete (hostaddrs[i], sizes[i]); + if (acc_is_present (hostaddrs[i], sizes[i])) + acc_delete (hostaddrs[i], sizes[i]); break; case GOMP_MAP_FORCE_FROM: acc_copyout (hostaddrs[i], sizes[i]); @@ -385,10 +403,14 @@ GOACC_enter_exit_data (int device, size_t mapnum, } else { - gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff) - == GOMP_MAP_FORCE_FROM, async, 3); - /* See the above comment. */ - i += 2; + if (acc_is_present (hostaddrs[i], sizes[i])) + { + gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff) + == GOMP_MAP_FORCE_FROM, async, + pointer); + /* See the above comment. */ + } + i += pointer - 1; } } Index: testsuite/libgomp.oacc-c-c++-common/data-2.c =================================================================== --- testsuite/libgomp.oacc-c-c++-common/data-2.c (revision 249147) +++ testsuite/libgomp.oacc-c-c++-common/data-2.c (working copy) @@ -3,6 +3,7 @@ /* { dg-do run } */ #include <stdlib.h> +#include <openacc.h> int main (int argc, char **argv) @@ -32,7 +33,7 @@ main (int argc, char **argv) for (i = 0; i < N; i++) b[i] = a[i]; -#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) wait async +#pragma acc exit data copyout (a[0:N], b[0:N]) delete (N) wait async #pragma acc wait for (i = 0; i < N; i++) @@ -46,6 +47,32 @@ main (int argc, char **argv) for (i = 0; i < N; i++) { + a[i] = 3.0; + b[i] = 0.0; + } + +#pragma acc enter data copyin (a[0:N]) async +#pragma acc enter data copyin (b[0:N]) async wait +#pragma acc enter data copyin (N) async wait +#pragma acc parallel async wait present (a[0:N]) present (b[0:N]) present (N) +#pragma acc loop + for (i = 0; i < N; i++) + b[i] = a[i]; + +#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) delete (N) wait async +#pragma acc wait + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 3.0) + abort (); + } + + for (i = 0; i < N; i++) + { a[i] = 2.0; b[i] = 0.0; } @@ -56,7 +83,7 @@ main (int argc, char **argv) for (i = 0; i < N; i++) b[i] = a[i]; -#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) wait (1) async (1) +#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) delete (N) wait (1) async (1) #pragma acc wait (1) for (i = 0; i < N; i++) @@ -93,7 +120,7 @@ main (int argc, char **argv) for (i = 0; i < N; i++) d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i]; -#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) copyout (d[0:N]) wait (1, 2, 3) async (1) +#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) copyout (d[0:N]) delete (N) wait (1, 2, 3) async (1) #pragma acc wait (1) for (i = 0; i < N; i++) @@ -161,5 +188,156 @@ main (int argc, char **argv) abort (); } +#if !ACC_MEM_SHARED + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + } + +#pragma acc enter data present_or_copyin (a[0:N]) + + if (!acc_is_present (a, nbytes)) + abort (); + +#pragma acc exit data copyout (a[0:N]) + + if (acc_is_present (a, nbytes)) + abort (); + +#pragma acc exit data delete (a[0:N]) + + if (acc_is_present (a, nbytes)) + abort (); + +#pragma acc enter data present_or_copyin (a[0:N], b[0:N]) + + if (!acc_is_present (a, nbytes)) + abort (); + + if (!acc_is_present (b, nbytes)) + abort (); + +#pragma acc exit data copyout (a[0:N], b[0:N]) + + if (acc_is_present (a, nbytes)) + abort (); + + if (acc_is_present (b, nbytes)) + abort (); + +#pragma acc exit data delete (a[0:N], b[0:N]) + + if (acc_is_present (a, nbytes)) + abort (); + + if (acc_is_present (b, nbytes)) + abort (); + +#pragma acc enter data present_or_create (a[0:N], b[0:N]) + + if (!acc_is_present (a, nbytes)) + abort (); + + if (!acc_is_present (b, nbytes)) + abort (); + +#pragma acc exit data copyout (a[0:N], b[0:N]) + + if (acc_is_present (a, nbytes)) + abort (); + + if (acc_is_present (b, nbytes)) + abort (); + +#pragma acc enter data present_or_create (a[0:N], b[0:N]) + + if (!acc_is_present (a, nbytes)) + abort (); + + if (!acc_is_present (b, nbytes)) + abort (); + +#pragma acc enter data present_or_create (a[0:N], b[0:N]) + + if (!acc_is_present (a, nbytes)) + abort (); + + if (!acc_is_present (b, nbytes)) + abort (); + +#pragma acc exit data delete (a[0:N], b[0:N]) + + if (acc_is_present (a, nbytes)) + abort (); + + if (acc_is_present (b, nbytes)) + abort (); + +#pragma acc exit data delete (a[0:N], b[0:N]) + + if (acc_is_present (a, nbytes)) + abort (); + + if (acc_is_present (b, nbytes)) + abort (); + +#pragma acc enter data create (a[0:N], b[0:N]) + + if (!acc_is_present (a, nbytes)) + abort (); + + if (!acc_is_present (b, nbytes)) + abort (); + +#pragma acc enter data present_or_copyin (a[0:N], b[0:N]) + + if (!acc_is_present (a, nbytes)) + abort (); + + if (!acc_is_present (b, nbytes)) + abort (); + +#pragma acc exit data delete (a[0:N], b[0:N]) + + if (acc_is_present (a, nbytes)) + abort (); + + if (acc_is_present (b, nbytes)) + abort (); + +#pragma acc exit data delete (a[0:N], b[0:N]) + + if (acc_is_present (a, nbytes)) + abort (); + + if (acc_is_present (b, nbytes)) + abort (); + +#pragma acc enter data present_or_copyin (a[0:N]) + + if (!acc_is_present (a, nbytes)) + abort (); + + if (acc_is_present (b, nbytes)) + abort (); + +#pragma acc exit data delete (a[0:N], b[0:N]) + + if (acc_is_present (a, nbytes)) + abort (); + + if (acc_is_present (b, nbytes)) + abort (); + +#pragma acc exit data delete (a[0:N], b[0:N]) + + if (acc_is_present (a, nbytes)) + abort (); + + if (acc_is_present (b, nbytes)) + abort (); +#endif + return 0; } Index: testsuite/libgomp.oacc-c-c++-common/enter-data.c =================================================================== --- testsuite/libgomp.oacc-c-c++-common/enter-data.c (nonexistent) +++ testsuite/libgomp.oacc-c-c++-common/enter-data.c (working copy) @@ -0,0 +1,23 @@ +/* This test verifies that the present data clauses to acc enter data + don't cause duplicate mapping failures at runtime. */ + +/* { dg-do run } */ + +#include <stdlib.h> + +int +main (void) +{ + int a; + +#pragma acc enter data copyin (a) +#pragma acc enter data pcopyin (a) +#pragma acc enter data pcreate (a) +#pragma acc exit data delete (a) + +#pragma acc enter data create (a) +#pragma acc enter data pcreate (a) +#pragma acc exit data delete (a) + + return 0; +} Index: testsuite/libgomp.oacc-fortran/data-2.f90 =================================================================== --- testsuite/libgomp.oacc-fortran/data-2.f90 (revision 249147) +++ testsuite/libgomp.oacc-fortran/data-2.f90 (working copy) @@ -1,9 +1,16 @@ ! { dg-do run } +! { dg-additional-options "-cpp" } program test + use openacc integer, parameter :: N = 8 real, allocatable :: a(:,:), b(:,:) + real, allocatable :: c(:), d(:) + integer i, j + i = 0 + j = 0 + allocate (a(N,N)) allocate (b(N,N)) @@ -12,7 +19,7 @@ program test !$acc enter data copyin (a(1:N,1:N), b(1:N,1:N)) - !$acc parallel + !$acc parallel present (a(1:N,1:N), b(1:N,1:N)) do i = 1, n do j = 1, n b(j,i) = a (j,i) @@ -28,4 +35,171 @@ program test if (b(j,i) .ne. 3.0) call abort end do end do + + allocate (c(N)) + allocate (d(N)) + + c(:) = 3.0 + d(:) = 0.0 + + !$acc enter data copyin (c(1:N)) create (d(1:N)) async + !$acc wait + + !$acc parallel present (c(1:N), d(1:N)) + do i = 1, N + d(i) = c(i) + 1 + end do + !$acc end parallel + + !$acc exit data copyout (c(1:N), d(1:N)) async + !$acc wait + + do i = 1, N + if (d(i) .ne. 4.0) call abort + end do + + c(:) = 3.0 + d(:) = 0.0 + + !$acc enter data copyin (c(1:N)) async + !$acc enter data create (d(1:N)) wait + !$acc wait + + !$acc parallel present (c(1:N), d(1:N)) + do i = 1, N + d(i) = c(i) + 1 + end do + !$acc end parallel + + !$acc exit data delete (c(1:N)) copyout (d(1:N)) async + !$acc exit data async + !$acc wait + + do i = 1, N + if (d(i) .ne. 4.0) call abort + end do + +#if !ACC_MEM_SHARED + + c(:) = 3.0 + d(:) = 0.0 + + !$acc enter data present_or_copyin (c(0:N)) + + if (acc_is_present (c) .eqv. .FALSE.) call abort + + !$acc exit data copyout (c(0:N)) + + if (acc_is_present (c) .eqv. .TRUE.) call abort + + !$acc exit data delete (c(0:N)) + + if (acc_is_present (c) .eqv. .TRUE.) call abort + + do i = 1, N + if (c(i) .ne. 3.0) call abort + end do + + c(:) = 5.0 + d(:) = 9.0 + + !$acc enter data present_or_copyin (c(0:N), d(0:N)) + + if (acc_is_present (c) .eqv. .FALSE.) call abort + if (acc_is_present (d) .eqv. .FALSE.) call abort + + !$acc exit data copyout (c(0:N), d(0:N)) + + if (acc_is_present (c) .eqv. .TRUE.) call abort + if (acc_is_present (d) .eqv. .TRUE.) call abort + + !$acc exit data delete (c(0:N), d(0:N)) + + if (acc_is_present (c) .eqv. .TRUE.) call abort + if (acc_is_present (d) .eqv. .TRUE.) call abort + + do i = 1, N + if (c(i) .ne. 5.0) call abort + if (d(i) .ne. 9.0) call abort + end do + + !$acc enter data present_or_create (c(0:N), d(0:N)) + + if (acc_is_present (c) .eqv. .FALSE.) call abort + if (acc_is_present (d) .eqv. .FALSE.) call abort + + !$acc parallel present (c(0:N), d(0:N)) + do i = 1, N + c(i) = 1.0; + d(i) = 2.0; + end do + !$acc end parallel + + !$acc exit data copyout (c(0:N), d(0:N)) + + if (acc_is_present (c) .eqv. .TRUE.) call abort + if (acc_is_present (d) .eqv. .TRUE.) call abort + + do i = 1, N + if (c(i) .ne. 1.0) call abort + if (d(i) .ne. 2.0) call abort + end do + + !$acc enter data present_or_create (c(0:N), d(0:N)) + + if (acc_is_present (c) .eqv. .FALSE.) call abort + if (acc_is_present (d) .eqv. .FALSE.) call abort + + !$acc enter data present_or_create (c(0:N), d(0:N)) + + if (acc_is_present (c) .eqv. .FALSE.) call abort + if (acc_is_present (d) .eqv. .FALSE.) call abort + + !$acc exit data delete (c(0:N), d(0:N)) + + if (acc_is_present (c) .eqv. .TRUE.) call abort + if (acc_is_present (d) .eqv. .TRUE.) call abort + + !$acc exit data delete (c(0:N), d(0:N)) + + if (acc_is_present (c) .eqv. .TRUE.) call abort + if (acc_is_present (d) .eqv. .TRUE.) call abort + + !$acc enter data create (c(0:N), d(0:N)) + + if (acc_is_present (c) .eqv. .FALSE.) call abort + if (acc_is_present (d) .eqv. .FALSE.) call abort + + !$acc enter data present_or_copyin (c(0:N), d(0:N)) + + if (acc_is_present (c) .eqv. .FALSE.) call abort + if (acc_is_present (d) .eqv. .FALSE.) call abort + + !$acc exit data delete (c(0:N), d(0:N)) + + if (acc_is_present (c) .eqv. .TRUE.) call abort + if (acc_is_present (d) .eqv. .TRUE.) call abort + + !$acc exit data delete (c(0:N), d(0:N)) + + if (acc_is_present (c) .eqv. .TRUE.) call abort + if (acc_is_present (d) .eqv. .TRUE.) call abort + + !$acc enter data present_or_copyin (c(0:N)) + + if (acc_is_present (c) .eqv. .FALSE.) call abort + if (acc_is_present (d) .eqv. .TRUE.) call abort + + !$acc exit data delete (c(0:N), d(0:N)) + + if (acc_is_present (c) .eqv. .TRUE.) call abort + if (acc_is_present (d) .eqv. .TRUE.) call abort + + !$acc exit data delete (c(0:N), d(0:N)) + + if (acc_is_present (c) .eqv. .TRUE.) call abort + if (acc_is_present (d) .eqv. .TRUE.) call abort + +#endif + end program test