On 2016/5/31 05:51 PM, Chung-Lin Tang wrote:
> On 2016/5/31 3:28 PM, Thomas Schwinge wrote:
>> > Hi!
>> > 
>> > On Mon, 30 May 2016 18:53:41 +0200, Jakub Jelinek <ja...@redhat.com> wrote:
>>> >> On Mon, May 30, 2016 at 10:38:59PM +0800, Chung-Lin Tang wrote:
>>>> >>> Hi, a previous patch of Cesar's has made the middle-end omp-lowering
>>>> >>> automatically create and insert a tofrom (i.e. present_or_copy) map for
>>>> >>> parallel reductions.  This allowed the user to not need explicit
>>>> >>> clauses to copy out the reduction result, but because reduction 
>>>> >>> arguments
>>>> >>> are not marked addressable, async does not work as expected,
>>>> >>> i.e. the asynchronous copy-out results are not used in the compiler 
>>>> >>> generated code.
>>> >>
>>> >> If you need it only for async parallel/kernels? regions, can't you do 
>>> >> that
>>> >> only for those and not for others?
> That is achievable, but not in line with how we currently treat all other
> data clause OMP_CLAUSE_MAPs, which are all marked addressable. Is this special
> case handling really better here?
> 

Hi Jakub, here's a version of the patch with the addressable marking restricted
to when there's an async clause. Tests re-ran to ensure no regressions.  Please 
inform
if this way seems better.  Also attached are some new testcases.

Thanks,
Chung-Lin

2016-06-01  Chung-Lin Tang  <clt...@codesourcery.com>

        c/
        * c-typeck.c (c_finish_omp_clauses): Mark OpenACC reduction
        arguments as addressable when async clause exists.

        cp/
        * semantics.c (finish_omp_clauses): Mark OpenACC reduction
        arguments as addressable when async clause exists.

        fortran/
        * trans-openmp.c (gfc_trans_oacc_construct): Mark OpenACC reduction
        arguments as addressable. when async clause exists.
        (gfc_trans_oacc_combined_directive): Likewise.

        libgomp/testsuite/
        * libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-8.c: New test.
        * libgomp/testsuite/libgomp.oacc-fortran/reduction-8.f90: New test.

Index: c/c-typeck.c
===================================================================
--- c/c-typeck.c	(revision 236845)
+++ c/c-typeck.c	(working copy)
@@ -12529,6 +12529,7 @@ c_finish_omp_clauses (tree clauses, enum c_omp_reg
   tree *nowait_clause = NULL;
   bool ordered_seen = false;
   tree schedule_clause = NULL_TREE;
+  bool oacc_async = false;
 
   bitmap_obstack_initialize (NULL);
   bitmap_initialize (&generic_head, &bitmap_default_obstack);
@@ -12539,6 +12540,14 @@ c_finish_omp_clauses (tree clauses, enum c_omp_reg
   bitmap_initialize (&map_field_head, &bitmap_default_obstack);
   bitmap_initialize (&oacc_reduction_head, &bitmap_default_obstack);
 
+  if (ort & C_ORT_ACC)
+    for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_ASYNC)
+	{
+	  oacc_async = true;
+	  break;
+	}
+	  
   for (pc = &clauses, c = clauses; c ; c = *pc)
     {
       bool remove = false;
@@ -12575,6 +12584,8 @@ c_finish_omp_clauses (tree clauses, enum c_omp_reg
 	      remove = true;
 	      break;
 	    }
+	  if (oacc_async)
+	    c_mark_addressable (t);
 	  type = TREE_TYPE (t);
 	  if (TREE_CODE (t) == MEM_REF)
 	    type = TREE_TYPE (type);
Index: cp/semantics.c
===================================================================
--- cp/semantics.c	(revision 236845)
+++ cp/semantics.c	(working copy)
@@ -5774,6 +5774,7 @@ finish_omp_clauses (tree clauses, enum c_omp_regio
   bool branch_seen = false;
   bool copyprivate_seen = false;
   bool ordered_seen = false;
+  bool oacc_async = false;
 
   bitmap_obstack_initialize (NULL);
   bitmap_initialize (&generic_head, &bitmap_default_obstack);
@@ -5784,6 +5785,14 @@ finish_omp_clauses (tree clauses, enum c_omp_regio
   bitmap_initialize (&map_field_head, &bitmap_default_obstack);
   bitmap_initialize (&oacc_reduction_head, &bitmap_default_obstack);
 
+  if (ort & C_ORT_ACC)
+    for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_ASYNC)
+	{
+	  oacc_async = true;
+	  break;
+	}
+
   for (pc = &clauses, c = clauses; c ; c = *pc)
     {
       bool remove = false;
@@ -5827,6 +5836,8 @@ finish_omp_clauses (tree clauses, enum c_omp_regio
 		t = n;
 	      goto check_dup_generic_t;
 	    }
+	  if (oacc_async)
+	    cxx_mark_addressable (t);
 	  goto check_dup_generic;
 	case OMP_CLAUSE_COPYPRIVATE:
 	  copyprivate_seen = true;
Index: fortran/trans-openmp.c
===================================================================
--- fortran/trans-openmp.c	(revision 236845)
+++ fortran/trans-openmp.c	(working copy)
@@ -2704,6 +2704,15 @@ gfc_trans_oacc_construct (gfc_code *code)
   gfc_start_block (&block);
   oacc_clauses = gfc_trans_omp_clauses (&block, code->ext.omp_clauses,
 					code->loc);
+  for (tree c = oacc_clauses; c; c = OMP_CLAUSE_CHAIN (c))
+    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_ASYNC)
+      {
+	for (c = oacc_clauses; c; c = OMP_CLAUSE_CHAIN (c))
+	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
+	      && DECL_P (OMP_CLAUSE_DECL (c)))
+	    TREE_ADDRESSABLE (OMP_CLAUSE_DECL (c)) = 1;
+	break;
+      }
   stmt = gfc_trans_omp_code (code->block->next, true);
   stmt = build2_loc (input_location, construct_code, void_type_node, stmt,
 		     oacc_clauses);
@@ -3501,6 +3510,15 @@ gfc_trans_oacc_combined_directive (gfc_code *code)
 	construct_clauses.lists[OMP_LIST_REDUCTION] = NULL;
       oacc_clauses = gfc_trans_omp_clauses (&block, &construct_clauses,
 					    code->loc);
+      for (tree c = oacc_clauses; c; c = OMP_CLAUSE_CHAIN (c))
+	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_ASYNC)
+	  {
+	    for (c = oacc_clauses; c; c = OMP_CLAUSE_CHAIN (c))
+	      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
+		  && DECL_P (OMP_CLAUSE_DECL (c)))
+		TREE_ADDRESSABLE (OMP_CLAUSE_DECL (c)) = 1;
+	    break;
+	  }
     }
   if (!loop_clauses.seq)
     pblock = &block;
Index: libgomp/testsuite/libgomp.oacc-fortran/reduction-8.f90
===================================================================
--- libgomp/testsuite/libgomp.oacc-fortran/reduction-8.f90	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-fortran/reduction-8.f90	(revision 0)
@@ -0,0 +1,41 @@
+! { dg-do run }
+
+program reduction
+  implicit none
+  integer, parameter    :: n = 100
+  integer               :: i, h1, h2, s1, s2, a1, a2
+
+  h1 = 0
+  h2 = 0
+  do i = 1, n
+     h1 = h1 + 1
+     h2 = h2 + 2
+  end do
+
+  s1 = 0
+  s2 = 0
+  !$acc parallel loop reduction(+:s1, s2)
+  do i = 1, n
+     s1 = s1 + 1
+     s2 = s2 + 2
+  end do
+  !$acc end parallel loop
+
+  a1 = 0
+  a2 = 0
+  !$acc parallel loop reduction(+:a1, a2) async(1)
+  do i = 1, n
+     a1 = a1 + 1
+     a2 = a2 + 2
+  end do
+  !$acc end parallel loop
+
+  if (h1 .ne. s1) call abort ()
+  if (h2 .ne. s2) call abort ()
+
+  !$acc wait(1)
+
+  if (h1 .ne. a1) call abort ()
+  if (h2 .ne. a2) call abort ()
+
+end program reduction
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-8.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-8.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-8.c	(revision 0)
@@ -0,0 +1,30 @@
+const int n = 100;
+    
+// Check async over parallel construct with reduction
+    
+int
+async_sum (int c)
+{
+  int s = 0;
+    
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) async
+  for (int i = 0; i < n; i++)
+    s += i+c;
+    
+#pragma acc wait
+  return s;
+}
+    
+int
+main()
+{
+  int result = 0;
+    
+  for (int i = 0; i < n; i++)
+    result += i+1;
+    
+  if (async_sum (1) != result)
+    __builtin_abort ();
+    
+  return 0;
+}

Reply via email to