Hi Thomas, this is the updated Fortran deviceptr patche, originated from Cesar, and one of the tests was from James Norris: https://gcc.gnu.org/ml/gcc-patches/2018-05/msg00286.html https://gcc.gnu.org/ml/gcc-patches/2018-08/msg00532.html
There were a few style cleanups, but the goal of modification is the same: to use only one clause to represent Fortran deviceptr, and to preserve it during gimplification. Because of this modification, and as we discussed earlier, the handle_ftn_pointers() code in libgomp/oacc-parallel.c appeared to be no longer needed. I have remove them in this patch, and tested libgomp without regressions. Also, I've added a new libgomp.oacc-fortran/deviceptr-2.f90 testcase that actually copies out and verifies the deviceptr computation. Is this okay for trunk now? Thanks, Chung-Lin 2019-10-18 Cesar Philippidis <ce...@codesourcery.com> Chung-Lin Tang <clt...@codesourcery.com> gcc/fortran/ * trans-openmp.c (gfc_omp_finish_clause): Don't create pointer data mappings for deviceptr clauses. (gfc_trans_omp_clauses): Likewise. gcc/ * gimplify.c (enum gimplify_omp_var_data): Add GOVD_DEVICETPR. (omp_notice_variable): Add GOVD_DEVICEPTR attribute when appropriate. (gimplify_scan_omp_clauses): Likewise. (gimplify_adjust_omp_clauses_1): Set GOMP_MAP_FORCE_DEVICEPTR for implicit deviceptr mappings. gcc/testsuite/ * c-c++-common/goacc/deviceptr-4.c: Update expected data mapping. 2019-10-18 Chung-Lin Tang <clt...@codesourcery.com> James Norris <jnor...@codesourcery.com> libgomp/ * oacc-parallel.c (handle_ftn_pointers): Delete function. (GOACC_parallel_keyed): Remove call to handle_ftn_pointers. * testsuite/libgomp.oacc-fortran/deviceptr-1.f90: New test. * testsuite/libgomp.oacc-fortran/deviceptr-2.f90: New test.
Index: gcc/fortran/trans-openmp.c =================================================================== --- gcc/fortran/trans-openmp.c (revision 277155) +++ gcc/fortran/trans-openmp.c (working copy) @@ -1099,7 +1099,8 @@ gfc_omp_clause_dtor (tree clause, tree decl) void gfc_omp_finish_clause (tree c, gimple_seq *pre_p) { - if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FORCE_DEVICEPTR) return; tree decl = OMP_CLAUSE_DECL (c); @@ -2173,6 +2174,12 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp if (n->expr == NULL || n->expr->ref->u.ar.type == AR_FULL) { if (POINTER_TYPE_P (TREE_TYPE (decl)) + && n->u.map_op == OMP_MAP_FORCE_DEVICEPTR) + { + OMP_CLAUSE_DECL (node) = decl; + goto finalize_map_clause; + } + else if (POINTER_TYPE_P (TREE_TYPE (decl)) && (gfc_omp_privatize_by_reference (decl) || GFC_DECL_GET_SCALAR_POINTER (decl) || GFC_DECL_GET_SCALAR_ALLOCATABLE (decl) @@ -2346,6 +2353,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp OMP_CLAUSE_SIZE (node3) = fold_build2 (MINUS_EXPR, sizetype, ptr, ptr2); } + finalize_map_clause: switch (n->u.map_op) { case OMP_MAP_ALLOC: Index: gcc/gimplify.c =================================================================== --- gcc/gimplify.c (revision 277155) +++ gcc/gimplify.c (working copy) @@ -123,6 +123,9 @@ enum gimplify_omp_var_data /* Flag for GOVD_REDUCTION: inscan seen in {in,ex}clusive clause. */ GOVD_REDUCTION_INSCAN = 0x2000000, + /* Flag for OpenACC deviceptrs. */ + GOVD_DEVICEPTR = 0x4000000, + GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR | GOVD_LOCAL) @@ -7426,6 +7429,7 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, error ("variable %qE declared in enclosing " "%<host_data%> region", DECL_NAME (decl)); nflags |= GOVD_MAP; + nflags |= (n2->value & GOVD_DEVICEPTR); if (octx->region_type == ORT_ACC_DATA && (n2->value & GOVD_MAP_0LEN_ARRAY)) nflags |= GOVD_MAP_0LEN_ARRAY; @@ -8943,6 +8947,8 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_se if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ALWAYS_TO || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ALWAYS_TOFROM) flags |= GOVD_MAP_ALWAYS_TO; + else if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FORCE_DEVICEPTR) + flags |= GOVD_DEVICEPTR; goto do_add; case OMP_CLAUSE_DEPEND: @@ -9727,7 +9733,8 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n, | GOVD_MAP_FORCE | GOVD_MAP_FORCE_PRESENT | GOVD_MAP_ALLOC_ONLY - | GOVD_MAP_FROM_ONLY)) + | GOVD_MAP_FROM_ONLY + | GOVD_DEVICEPTR)) { case 0: kind = GOMP_MAP_TOFROM; @@ -9750,6 +9757,9 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n, case GOVD_MAP_FORCE_PRESENT: kind = GOMP_MAP_FORCE_PRESENT; break; + case GOVD_DEVICEPTR: + kind = GOMP_MAP_FORCE_DEVICEPTR; + break; default: gcc_unreachable (); } Index: gcc/testsuite/c-c++-common/goacc/deviceptr-4.c =================================================================== --- gcc/testsuite/c-c++-common/goacc/deviceptr-4.c (revision 277155) +++ gcc/testsuite/c-c++-common/goacc/deviceptr-4.c (working copy) @@ -8,4 +8,4 @@ subr (int *a) a[0] += 1.0; } -/* { dg-final { scan-tree-dump-times "#pragma omp target oacc_parallel.*map\\(tofrom:a" 1 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "#pragma omp target oacc_parallel.*map\\(force_deviceptr:a" 1 "gimple" } } */ Index: libgomp/oacc-parallel.c =================================================================== --- libgomp/oacc-parallel.c (revision 277155) +++ libgomp/oacc-parallel.c (working copy) @@ -66,51 +66,6 @@ find_pointer (int pos, size_t mapnum, unsigned sho return 0; } -/* Handle the mapping pair that are presented when a - deviceptr clause is used with Fortran. */ - -static void -handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes, - unsigned short *kinds) -{ - int i; - - for (i = 0; i < mapnum; i++) - { - unsigned short kind1 = kinds[i] & 0xff; - - /* Handle Fortran deviceptr clause. */ - if (kind1 == GOMP_MAP_FORCE_DEVICEPTR) - { - unsigned short kind2; - - if (i < (signed)mapnum - 1) - kind2 = kinds[i + 1] & 0xff; - else - kind2 = 0xffff; - - if (sizes[i] == sizeof (void *)) - continue; - - /* At this point, we're dealing with a Fortran deviceptr. - If the next element is not what we're expecting, then - this is an instance of where the deviceptr variable was - not used within the region and the pointer was removed - by the gimplifier. */ - if (kind2 == GOMP_MAP_POINTER - && sizes[i + 1] == 0 - && hostaddrs[i] == *(void **)hostaddrs[i + 1]) - { - kinds[i+1] = kinds[i]; - sizes[i+1] = sizeof (void *); - } - - /* Invalidate the entry. */ - hostaddrs[i] = NULL; - } - } -} - static void goacc_wait (int async, int num_waits, va_list *ap); @@ -203,8 +158,6 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (voi goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, &api_info); - handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds); - /* Host fallback if "if" clause is false or if the current device is set to the host. */ if (flags & GOACC_FLAG_HOST_FALLBACK) Index: libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90 =================================================================== --- libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90 (nonexistent) +++ libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90 (working copy) @@ -0,0 +1,197 @@ +! { dg-do run } + +! Test the deviceptr clause with various directives +! and in combination with other directives where +! the deviceptr variable is implied. + +subroutine subr1 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc data deviceptr (a) + + !$acc parallel copy (b) + do i = 1, N + a(i) = i * 2 + b(i) = a(i) + end do + !$acc end parallel + + !$acc end data + +end subroutine + +subroutine subr2 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + !$acc declare deviceptr (a) + integer :: b(N) + integer :: i = 0 + + !$acc parallel copy (b) + do i = 1, N + a(i) = i * 4 + b(i) = a(i) + end do + !$acc end parallel + +end subroutine + +subroutine subr3 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + !$acc declare deviceptr (a) + integer :: b(N) + integer :: i = 0 + + !$acc kernels copy (b) + do i = 1, N + a(i) = i * 8 + b(i) = a(i) + end do + !$acc end kernels + +end subroutine + +subroutine subr4 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc parallel deviceptr (a) copy (b) + do i = 1, N + a(i) = i * 16 + b(i) = a(i) + end do + !$acc end parallel + +end subroutine + +subroutine subr5 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc kernels deviceptr (a) copy (b) + do i = 1, N + a(i) = i * 32 + b(i) = a(i) + end do + !$acc end kernels + +end subroutine + +subroutine subr6 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc parallel deviceptr (a) copy (b) + do i = 1, N + b(i) = i + end do + !$acc end parallel + +end subroutine + +subroutine subr7 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc data deviceptr (a) + + !$acc parallel copy (b) + do i = 1, N + a(i) = i * 2 + b(i) = a(i) + end do + !$acc end parallel + + !$acc parallel copy (b) + do i = 1, N + a(i) = b(i) * 2 + b(i) = a(i) + end do + !$acc end parallel + + !$acc end data + +end subroutine + +program main + use iso_c_binding, only: c_ptr, c_f_pointer + implicit none + type (c_ptr) :: cp + integer, parameter :: N = 8 + integer, pointer :: fp(:) + integer :: i = 0 + integer :: b(N) + + interface + function acc_malloc (s) bind (C) + use iso_c_binding, only: c_ptr, c_size_t + integer (c_size_t), value :: s + type (c_ptr) :: acc_malloc + end function + end interface + + cp = acc_malloc (N * sizeof (fp(N))) + call c_f_pointer (cp, fp, [N]) + + call subr1 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 2) call abort + end do + + call subr2 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 4) call abort + end do + + call subr3 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 8) call abort + end do + + call subr4 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 16) call abort + end do + + call subr5 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 32) call abort + end do + + call subr6 (fp, b) + + do i = 1, N + if (b(i) .ne. i) call abort + end do + + call subr7 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 4) call abort + end do + +end program main Index: libgomp/testsuite/libgomp.oacc-fortran/deviceptr-2.f90 =================================================================== --- libgomp/testsuite/libgomp.oacc-fortran/deviceptr-2.f90 (nonexistent) +++ libgomp/testsuite/libgomp.oacc-fortran/deviceptr-2.f90 (working copy) @@ -0,0 +1,54 @@ +! { dg-do run } + +! Test deviceptr clause to see if computation on device memory array +! and copy back to host memory works. + +subroutine process_by_openacc (a, c) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: i = 0 + integer :: c + + !$acc parallel deviceptr (a) + do i = 1, N + a(i) = i * c + end do + !$acc end parallel + +end subroutine + +program main + use iso_c_binding, only: c_ptr, c_f_pointer, c_loc + implicit none + type (c_ptr) :: cp + integer, parameter :: N = 8 + integer, pointer :: fp(:) + integer, target :: res(N) + integer :: i + + interface + function acc_malloc (s) bind (C) + use iso_c_binding, only: c_ptr, c_size_t + integer (c_size_t), value :: s + type (c_ptr) :: acc_malloc + end function acc_malloc + + subroutine acc_memcpy_from_device (d, s, sz) bind (C) + use iso_c_binding, only: c_ptr, c_size_t + type (c_ptr), value :: d, s + integer (c_size_t), value :: sz + end subroutine acc_memcpy_from_device + end interface + + cp = acc_malloc (N * sizeof (fp(N))) + call c_f_pointer (cp, fp, [N]) + + call process_by_openacc (fp, 1234) + call acc_memcpy_from_device (c_loc (res), cp, N * sizeof (fp(N))) + + do i = 1, N + if (res(i) .ne. i * 1234) call abort + end do + +end program main