I've applied this patch to gomp-4_0-branch to add support for fortran allocatable scalars inside OpenACC declare constructs. In order to update the declared variable on the device, the fortran FE now uses the GOMP_MAP_ALWAYS_POINTERS for declared allocatable scalars. That necessitated some minor tweaking in the runtime. Note that I didn't add support for allocatable derived types in this patch as the OpenACC 2.5 spec is still unclear on them.
Included in this patch is a bug fix for non-declared allocatable scalars. Specifically, prior to this patch, the gimplifier would treat allocatable scalar variables like C pointers, so only the pointer would be updated on the accelerator and not the value being pointed to. If the user explicitly specified a data clause for the variable, the fortran FE itself would generate code to copy the value being pointed to along with the pointer itself. I.e. this bug only affects implicit firstprivate allocatable scalar variables. To resolve this issue, this patch teaches lower_omp_target to pass the value being pointed to by a GOMP_MAP_FIRSTPRIVATE_INT data mapping instead of the pointer itself to the accelerator, then on entry to the offloaded region, lower_omp_target will create a local copy of the pointed to value on target and along with a local pointer to it. Cesar
2017-04-19 Cesar Philippidis <ce...@codesourcery.com> gcc/fortran/ * trans-decl.c (add_clause): Populate sym->backend_decl so that it can be used to determine if two symbols are unique. * trans-openmp.c (gfc_trans_oacc_executable_directive): Use GOMP_MAP_ALWAYS_POINTER for fortran allocatable pointers. * trans-stmt.c (gfc_trans_allocate): Call gfc_trans_oacc_declare_allocate for declared allocatable pointers. (gfc_trans_deallocate): Likewise. gcc/ * omp-low.c (convert_to_firstprivate_int): Handle pointer types. (convert_from_firstprivate_int): Likewise. Create local storage for the values being pointed too. Add new argument orig_type. (lower_omp_target): Allow pointer types with GOMP_MAP_FIRSTPRIVATE_INT. gcc/testsuite/ * gfortran.dg/goacc/declare-allocatable-1.f90: Update test case to exercise allocatable scalars. libgomp/ * oacc-parallel.c (GOACC_enter_exit_data): Add support for GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. (GOACC_update): Add support for GOMP_MAP_ALWAYS_POINTER. * testsuite/libgomp.oacc-fortran/allocatable-scalar.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90: New test. * testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90: New test. diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c index b4db6b0..c0efc1a 100644 --- a/gcc/fortran/trans-decl.c +++ b/gcc/fortran/trans-decl.c @@ -5935,6 +5935,9 @@ add_clause (gfc_symbol *sym, gfc_omp_map_op map_op) if (!module_oacc_clauses) module_oacc_clauses = gfc_get_omp_clauses (); + if (sym->backend_decl == NULL) + gfc_get_symbol_decl (sym); + for (n = module_oacc_clauses->lists[OMP_LIST_MAP]; n != NULL; n = n->next) if (n->sym->backend_decl == sym->backend_decl) return; diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index 238eebe..3718da2 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -3336,6 +3336,18 @@ gfc_trans_oacc_executable_directive (gfc_code *code) gfc_start_block (&block); oacc_clauses = gfc_trans_omp_clauses (&block, code->ext.omp_clauses, code->loc); + + /* Promote GOMP_MAP_FIRSTPRIVATE_POINTER to GOMP_MAP_ALWAYS_POINTER for + variables inside OpenACC update directives. */ + if (code->op == EXEC_OACC_UPDATE) + for (tree c = oacc_clauses; c; c = OMP_CLAUSE_CHAIN (c)) + { + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER) + OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_ALWAYS_POINTER); + } + + stmt = build1_loc (input_location, construct_code, void_type_node, oacc_clauses); gfc_add_expr_to_block (&block, stmt); diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c index dcf17694..faf19e6 100644 --- a/gcc/fortran/trans-stmt.c +++ b/gcc/fortran/trans-stmt.c @@ -5883,6 +5883,10 @@ gfc_trans_allocate (gfc_code * code) tmp = gfc_nullify_alloc_comp (expr->ts.u.derived, tmp, 0); gfc_add_expr_to_block (&se.pre, tmp); } + + /* Allocate memory for OpenACC declared variables. */ + if (expr->symtree->n.sym->attr.oacc_declare_create) + gfc_trans_oacc_declare_allocate (&se.pre, expr, true); } else { @@ -6360,6 +6364,10 @@ gfc_trans_deallocate (gfc_code *code) } else { + /* Deallocate memory for OpenACC declared variables. */ + if (expr->symtree->n.sym->attr.oacc_declare_create) + gfc_trans_oacc_declare_allocate (&se.pre, expr, false); + tmp = gfc_deallocate_scalar_with_status (se.expr, pstat, false, al->expr, al->expr->ts); gfc_add_expr_to_block (&se.pre, tmp); diff --git a/gcc/omp-low.c b/gcc/omp-low.c index a584a44..5c41edc 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -16568,7 +16568,7 @@ convert_to_firstprivate_int (tree var, gimple_seq *gs) if (INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) { - if (is_reference (var)) + if (is_reference (var) || POINTER_TYPE_P (type)) { tmp = create_tmp_var (type); gimplify_assign (tmp, build_simple_mem_ref (var), gs); @@ -16605,7 +16605,8 @@ convert_to_firstprivate_int (tree var, gimple_seq *gs) /* Like convert_to_firstprivate_int, but restore the original type. */ static tree -convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs) +convert_from_firstprivate_int (tree var, tree orig_type, bool is_ref, + gimple_seq *gs) { tree type = TREE_TYPE (var); tree new_type = NULL_TREE; @@ -16614,7 +16615,31 @@ convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs) gcc_assert (TREE_CODE (var) == MEM_REF); var = TREE_OPERAND (var, 0); - if (INTEGRAL_TYPE_P (var) || POINTER_TYPE_P (type)) + if (is_ref || POINTER_TYPE_P (orig_type)) + { + tree_code code = NOP_EXPR; + + if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == COMPLEX_TYPE) + code = VIEW_CONVERT_EXPR; + + if (code == VIEW_CONVERT_EXPR + && TYPE_SIZE (type) != TYPE_SIZE (orig_type)) + { + tree ptype = build_pointer_type (type); + var = fold_build1 (code, ptype, build_fold_addr_expr (var)); + var = build_simple_mem_ref (var); + } + else + var = fold_build1 (code, type, var); + + tree inst = create_tmp_var (type); + gimplify_assign (inst, var, gs); + var = build_fold_addr_expr (inst); + + return var; + } + + if (INTEGRAL_TYPE_P (var)) return fold_convert (type, var); switch (tree_to_uhwi (TYPE_SIZE (type))) @@ -16631,13 +16656,6 @@ convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs) gimplify_assign (tmp, var, gs); var = fold_build1 (VIEW_CONVERT_EXPR, type, tmp); - if (is_ref) - { - tmp = create_tmp_var (build_pointer_type (type)); - gimplify_assign (tmp, build_fold_addr_expr (var), gs); - var = tmp; - } - return var; } @@ -16846,16 +16864,19 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) { gcc_assert (is_gimple_omp_oacc (ctx->stmt)); if (oacc_firstprivate_int) - x = convert_from_firstprivate_int (x, is_reference (var), + x = convert_from_firstprivate_int (x, TREE_TYPE (new_var), + is_reference (var), &fplist); - else if (is_reference (new_var) - && TREE_CODE (var_type) != POINTER_TYPE) + else if (is_reference (new_var)) { /* Create a local object to hold the instance value. */ const char *id = IDENTIFIER_POINTER (DECL_NAME (new_var)); tree inst = create_tmp_var (TREE_TYPE (var_type), id); - gimplify_assign (inst, fold_indirect_ref (x), &fplist); + if (TREE_CODE (var_type) == POINTER_TYPE) + gimplify_assign (inst, x, &fplist); + else + gimplify_assign (inst, fold_indirect_ref (x), &fplist); x = build_fold_addr_expr (inst); } gimplify_assign (new_var, x, &fplist); @@ -17103,8 +17124,9 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) } else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { + tree new_var = lookup_decl (var, ctx); tree type = TREE_TYPE (var); - tree inner_type = is_reference (var) + tree inner_type = is_reference (new_var) ? TREE_TYPE (type) : type; gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt)); if ((TREE_CODE (inner_type) == REAL_TYPE diff --git a/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 b/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 index b6bb6b3..5349e0d 100644 --- a/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 +++ b/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 @@ -6,20 +6,20 @@ program allocate implicit none - integer, allocatable :: a(:) + integer, allocatable :: a(:), b integer, parameter :: n = 100 integer i - !$acc declare create(a) + !$acc declare create(a,b) - allocate (a(n)) + allocate (a(n), b) - !$acc parallel loop copyout(a) + !$acc parallel loop copyout(a, b) do i = 1, n - a(i) = i + a(i) = b end do - deallocate (a) + deallocate (a, b) end program allocate -! { dg-final { scan-tree-dump-times "pragma acc enter data map.declare_allocate" 1 "original" } } -! { dg-final { scan-tree-dump-times "pragma acc exit data map.declare_deallocate" 1 "original" } } +! { dg-final { scan-tree-dump-times "pragma acc enter data map.declare_allocate" 2 "original" } } +! { dg-final { scan-tree-dump-times "pragma acc exit data map.declare_deallocate" 2 "original" } } diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 3962076..66acdf6 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -523,6 +523,7 @@ GOACC_enter_exit_data (int device, size_t mapnum, { switch (kind) { + case GOMP_MAP_DECLARE_ALLOCATE: case GOMP_MAP_ALLOC: acc_present_or_create (hostaddrs[i], sizes[i]); break; @@ -574,6 +575,7 @@ GOACC_enter_exit_data (int device, size_t mapnum, if (acc_is_present (hostaddrs[i], sizes[i])) acc_delete (hostaddrs[i], sizes[i]); break; + case GOMP_MAP_DECLARE_DEALLOCATE: case GOMP_MAP_FROM: case GOMP_MAP_FORCE_FROM: acc_copyout (hostaddrs[i], sizes[i]); @@ -655,6 +657,7 @@ GOACC_update (int device, size_t mapnum, acc_dev->openacc.async_set_async_func (async); + bool update_device = false; for (i = 0; i < mapnum; ++i) { unsigned char kind = kinds[i] & 0xff; @@ -665,11 +668,31 @@ GOACC_update (int device, size_t mapnum, case GOMP_MAP_TO_PSET: break; + case GOMP_MAP_ALWAYS_POINTER: + if (update_device) + { + /* Save the contents of the host pointer. */ + void *dptr = acc_deviceptr (hostaddrs[i-1]); + uintptr_t t = *(uintptr_t *) hostaddrs[i]; + + /* Update the contents of the host pointer to reflect + the value of the allocated device memory in the + previous pointer. */ + *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr; + acc_update_device (hostaddrs[i], sizeof (uintptr_t)); + + /* Restore the host pointer. */ + *(uintptr_t *) hostaddrs[i] = t; + } + break; + case GOMP_MAP_FORCE_TO: + update_device = true; acc_update_device (hostaddrs[i], sizes[i]); break; case GOMP_MAP_FORCE_FROM: + update_device = false; acc_update_self (hostaddrs[i], sizes[i]); break; diff --git a/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 new file mode 100644 index 0000000..8386c5d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 @@ -0,0 +1,30 @@ +program main + implicit none + integer, parameter :: n = 100 + integer, allocatable :: a, c + integer :: i, b(n) + + allocate (a) + + a = 50 + + !$acc parallel loop + do i = 1, n; + b(i) = a + end do + + do i = 1, n + if (b(i) /= a) call abort + end do + + allocate (c) + + print *, loc (c) + !$acc parallel copyout(c) num_gangs(1) + c = a + !$acc end parallel + + if (c /= a) call abort + + deallocate (a, c) +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 new file mode 100644 index 0000000..3521a7f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 @@ -0,0 +1,48 @@ +! Test declare create with allocatable scalars. + +! { dg-do run } + +program main + use openacc + implicit none + integer, parameter :: n = 100 + integer, allocatable :: a, c + integer :: i, b(n) + !$acc declare create (c) + + allocate (a) + + a = 50 + + !$acc parallel loop firstprivate(a) + do i = 1, n; + b(i) = a + end do + + do i = 1, n + if (b(i) /= a) call abort + end do + + allocate (c) + a = 100 + + if (.not.acc_is_present(c)) call abort + + !$acc parallel num_gangs(1) present(c) + c = a + !$acc end parallel + + !$acc update host(c) + if (c /= a) call abort + + !$acc parallel loop + do i = 1, n + b(i) = c + end do + + do i = 1, n + if (b(i) /= a) call abort + end do + + deallocate (a, c) +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 new file mode 100644 index 0000000..919146a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 @@ -0,0 +1,218 @@ +! Test declare create with allocatable arrays. + +! { dg-do run } + +module vars + implicit none + integer, parameter :: n = 100 + real*8, allocatable :: a, b(:) + !$acc declare create (a, b) +end module vars + +program test + use vars + use openacc + implicit none + integer :: i + + interface + subroutine sub1 + !$acc routine gang + end subroutine sub1 + + subroutine sub2 + end subroutine sub2 + + real*8 function fun1 (ix) + integer ix + !$acc routine seq + end function fun1 + + real*8 function fun2 (ix) + integer ix + !$acc routine seq + end function fun2 + end interface + + if (allocated (a)) call abort + if (allocated (b)) call abort + + ! Test local usage of an allocated declared array. + + allocate (a) + + if (.not.allocated (a)) call abort + if (acc_is_present (a) .neqv. .true.) call abort + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + a = 2.0 + !$acc update device(a) + + !$acc parallel loop + do i = 1, n + b(i) = i * a + end do + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*a) call abort + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside an acc + ! routine subroutine. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel + call sub1 + !$acc end parallel + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= a+i*2) call abort + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! subroutine. + + call sub2 + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= 1.0) call abort + end do + + deallocate (b) + + if (allocated (b)) call abort + + ! Test the usage of an allocated declared array inside an acc + ! routine function. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do + + !$acc parallel loop + do i = 1, n + b(i) = fun1 (i) + end do + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i) call abort + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! function. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do + + !$acc update host(b) + + do i = 1, n + b(i) = fun2 (i) + end do + + if (.not.acc_is_present (b)) call abort + + do i = 1, n + if (b(i) /= i*a) call abort + end do + + deallocate (a) + deallocate (b) +end program test + +! Set each element in array 'b' at index i to a+i*2. + +subroutine sub1 ! { dg-warning "region is worker partitioned" } + use vars + implicit none + integer i + !$acc routine gang + + !$acc loop + do i = 1, n + b(i) = a+i*2 + end do +end subroutine sub1 + +! Allocate array 'b', and set it to all 1.0. + +subroutine sub2 + use vars + use openacc + implicit none + integer i + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do +end subroutine sub2 + +! Return b(i) * i; + +real*8 function fun1 (i) + use vars + implicit none + integer i + !$acc routine seq + + fun1 = b(i) * i +end function fun1 + +! Return b(i) * i * a; + +real*8 function fun2 (i) + use vars + implicit none + integer i + + fun2 = b(i) * i * a +end function fun2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 new file mode 100644 index 0000000..b4cf26e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 @@ -0,0 +1,66 @@ +! Test declare create with allocatable arrays and scalars. The unused +! declared array 'b' caused an ICE in the past. + +! { dg-do run } + +module vars + implicit none + integer, parameter :: n = 100 + real*8, allocatable :: a, b(:) + !$acc declare create (a, b) +end module vars + +program test + use vars + implicit none + integer :: i + + interface + subroutine sub1 + end subroutine sub1 + + subroutine sub2 + end subroutine sub2 + + real*8 function fun1 (ix) + integer ix + !$acc routine seq + end function fun1 + + real*8 function fun2 (ix) + integer ix + !$acc routine seq + end function fun2 + end interface + + if (allocated (a)) call abort + if (allocated (b)) call abort + + ! Test the usage of an allocated declared array inside an acc + ! routine subroutine. + + allocate (a) + allocate (b(n)) + + if (.not.allocated (b)) call abort + + call sub1 + + !$acc update self(a) + if (a /= 50) call abort + + deallocate (a) + deallocate (b) + +end program test + +! Set 'a' to 50. + +subroutine sub1 + use vars + implicit none + integer i + + a = 50 + !$acc update device(a) +end subroutine sub1