In both OpenACC and OpenMP, each subarray has at least two data mappings
associated with them, one for the pointer and another for the data in
the array section (fortan also has a pset mapping). One problem I
observed in fortran is that array section data is casted to char *.
Consequently, when lower_omp_target assigns alignment for the subarray
data, it does so incorrectly. This is a problem on nvptx if you have a
data clause such as

  integer foo
  real*8 bar (100)

  !$acc data copy (foo, bar(1:100))

Here, the data associated with bar could get aligned on a 4 byte
boundary instead of 8 byte. That causes problems on nvptx targets.

My fix for this is to prevent the fortran front end from casting the
data pointers to char *. I only prevented casting on the code which
handles OMP_CLAUSE_MAP. The subarrays associated with OMP_CLAUSE_SHARED
also get casted to char *, but I left those as-is because I'm not that
familiar with how non-OpenMP target regions get lowered.

Is this patch OK for trunk?

Thanks,
Cesar
2015-09-22  Cesar Philippidis  <ce...@codesourcery.com>

	gcc/
	* fortran/trans-openmp.c (gfc_omp_finish_clause): Don't cast ptr
	into a character pointer.
	(gfc_trans_omp_clauses_1): Likewise.

	libgomp/
	* testsuite/libgomp.oacc-fortran/data-alignment.f90: New test.

diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index cd76f2a..8c1e897 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -1065,7 +1065,6 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p)
       gfc_start_block (&block);
       tree type = TREE_TYPE (decl);
       tree ptr = gfc_conv_descriptor_data_get (decl);
-      ptr = fold_convert (build_pointer_type (char_type_node), ptr);
       ptr = build_fold_indirect_ref (ptr);
       OMP_CLAUSE_DECL (c) = ptr;
       c2 = build_omp_clause (input_location, OMP_CLAUSE_MAP);
@@ -1972,8 +1971,6 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses,
 		    {
 		      tree type = TREE_TYPE (decl);
 		      tree ptr = gfc_conv_descriptor_data_get (decl);
-		      ptr = fold_convert (build_pointer_type (char_type_node),
-					  ptr);
 		      ptr = build_fold_indirect_ref (ptr);
 		      OMP_CLAUSE_DECL (node) = ptr;
 		      node2 = build_omp_clause (input_location,
@@ -2066,8 +2063,6 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses,
 				       OMP_CLAUSE_SIZE (node), elemsz);
 		    }
 		  gfc_add_block_to_block (block, &se.post);
-		  ptr = fold_convert (build_pointer_type (char_type_node),
-				      ptr);
 		  OMP_CLAUSE_DECL (node) = build_fold_indirect_ref (ptr);
 
 		  if (POINTER_TYPE_P (TREE_TYPE (decl))
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-alignment.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-alignment.f90
new file mode 100644
index 0000000..3c309c0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-alignment.f90
@@ -0,0 +1,35 @@
+! Test if the array data associated with c is properly aligned
+! on the accelerator.  If it is not, this program will crash.
+
+! { dg-do run }
+
+integer function routine_align()
+  implicit none
+  integer, parameter :: n = 10000
+  real*8, dimension(:), allocatable :: c
+  integer :: i, idx
+
+  allocate (c(n))
+  routine_align = 0
+  c = 0.0
+
+  !$acc data copyin(idx) copy(c(1:n))
+
+  !$acc parallel vector_length(32)
+  !$acc loop vector
+  do i=1, n
+     c(i) = i
+  enddo
+  !$acc end parallel
+
+  !$acc end data
+end function routine_align
+
+
+! main driver
+program routine_align_main
+  implicit none
+  integer :: success
+  integer routine_align
+  success = routine_align()
+end program routine_align_main

Reply via email to