Hi Tobias,

I'm just back from holiday, which it took me a bit longer to reply.

Actually, the test case is *not* OK.

If one compiles the original test case of the PR (or your
workshare2.f90) with "-O" and looks at "-fdump-tree-original", one finds:

     #pragma omp parallel default(shared)
       {
         {
           real(kind=4) __var_1;
           {
             #pragma omp single
               {
                 __var_1 = __builtin_cosf (b[0])
               }
...
                 #pragma omp for schedule(static) nowait
                 for (S.1 = 1; S.1 <= 5; S.1 = S.1 + 1)
                   {
                     a[S.1 + -1] = a[S.1 + -1] * D.1730 + a[S.1 + -1] *
D.1731;

Thus, __var_1 is a thread-local variable; however, COS() is not executed
in all threads but only in one due to the omp single: "The single
construct specifies that the associated structured block is executed by
only one of the threads in the team" (2.5.3 single Construct, OpenMP 3.1).

Jakub remarks that omp single is what we expand to omp workshare if it
is not simple enough for us.

I modified the test case as below

! { dg-do run }
! { dg-options "-ffrontend-optimize" }
! PR 50690 - this used to ICE because workshare could not handle
! BLOCKs.
program foo
  implicit none
  integer, parameter :: n = 100000000
  real, parameter :: eps = 3e-7
  integer :: i
  real :: A(n), B(5), C(n)
  B(1) = 3.344
  do i=1,10
  call random_number(a)
  c = a
  !$omp parallel default(shared)
  !$omp workshare
   A(:) = A(:)*cos(B(1))+A(:)*cos(B(1))
  !$omp end workshare nowait
  !$omp end parallel ! sync is implied here
  end do
  c = c*cos(b(1)) + c*cos(b(1))
  if (any(abs(a-c) > eps)) call abort
end program foo

and did indeed see an abort.

With the patch below (based on an earlier patch, fiddling with the OMP
clauses), the test case above passes, although the tree dump shows the
same issue that you referred to.

What would be the best strategy now?  Jakub, could you check the patch
for correctness? Should I combine the workshare-6.diff approach (modifying the BLOCKs) with this one? This will certainly make the
patch more compilcated, but is doable.

        Thomas
Index: frontend-passes.c
===================================================================
--- frontend-passes.c	(Revision 180394)
+++ frontend-passes.c	(Arbeitskopie)
@@ -66,6 +66,13 @@
 
 static int forall_level;
 
+/* Keep track of the OMP blocks, so we can mark variables introduced
+   by optimizations as private.  */
+
+static int omp_level;
+static int omp_size;
+static gfc_code **omp_block;
+
 /* Entry point - run all passes for a namespace.  So far, only an
    optimization pass is run.  */
 
@@ -76,12 +83,15 @@
     {
       expr_size = 20;
       expr_array = XNEWVEC(gfc_expr **, expr_size);
+      omp_size = 20;
+      omp_block = XCNEWVEC(gfc_code *, omp_size);
 
       optimize_namespace (ns);
       if (gfc_option.dump_fortran_optimized)
 	gfc_dump_parse_tree (ns, stdout);
 
       XDELETEVEC (expr_array);
+      XDELETEVEC (omp_block);
     }
 }
 
@@ -245,9 +255,17 @@
   gfc_namespace *ns;
   int i;
 
-  /* If the block hasn't already been created, do so.  */
-  if (inserted_block == NULL)
+  /* If the block hasn't already been created, do so.  If we are within
+     an OMP construct, create the temporary variable in the current block.
+     This is to avoid problems with OMP workshare.  */
+
+  if (omp_level > 0)
     {
+      ns = current_ns;
+      changed_statement = current_code;
+    }
+  else if (inserted_block == NULL)
+    {
       inserted_block = XCNEW (gfc_code);
       inserted_block->op = EXEC_BLOCK;
       inserted_block->loc = (*current_code)->loc;
@@ -309,6 +327,20 @@
   symbol->attr.flavor = FL_VARIABLE;
   symbol->attr.referenced = 1;
   symbol->attr.dimension = e->rank > 0;
+
+  if (omp_level > 0)
+    {
+      /* Insert an OMP PRIVATE clause for the new variable.  */
+      gfc_omp_clauses *clauses;
+      gfc_namelist *nn;
+
+      clauses = omp_block[omp_level-1]->ext.omp_clauses;
+      nn = gfc_get_namelist ();
+      nn->sym = symbol;
+      nn->next = clauses->lists[OMP_LIST_PRIVATE];
+      clauses->lists[OMP_LIST_PRIVATE] = nn;
+    }
+
   gfc_commit_symbol (symbol);
 
   result = gfc_get_expr ();
@@ -505,6 +537,7 @@
 
   current_ns = ns;
   forall_level = 0;
+  omp_level = 0;
 
   gfc_code_walker (&ns->code, convert_do_while, dummy_expr_callback, NULL);
   gfc_code_walker (&ns->code, cfe_code, cfe_expr_0, NULL);
@@ -1149,11 +1182,13 @@
 	  gfc_actual_arglist *a;
 	  gfc_code *co;
 	  gfc_association_list *alist;
+	  bool in_omp;
 
 	  /* There might be statement insertions before the current code,
 	     which must not affect the expression walker.  */
 
 	  co = *c;
+	  in_omp = false;
 
 	  switch (co->op)
 	    {
@@ -1339,6 +1374,18 @@
 	    case EXEC_OMP_WORKSHARE:
 	    case EXEC_OMP_END_SINGLE:
 	    case EXEC_OMP_TASK:
+
+	      in_omp = 1;
+
+	      if (omp_level >= omp_size)
+		{
+		  omp_size += omp_size;
+		  omp_block = XRESIZEVEC(gfc_code *, omp_block, omp_size);
+		}
+
+	      omp_block[omp_level] = co;
+	      omp_level ++;
+
 	      if (co->ext.omp_clauses)
 		{
 		  WALK_SUBEXPR (co->ext.omp_clauses->if_expr);
@@ -1365,6 +1412,9 @@
 	  if (co->op == EXEC_FORALL)
 	    forall_level --;
 
+	  if (in_omp)
+	    omp_level --;
+
 	}
     }
   return 0;

Reply via email to