On 06/04/2014 06:53 AM, Ilmir Usmanov wrote:

>> This patch, which is derived from Ilmir Usmanov's work posted here
>> <https://gcc.gnu.org/ml/gcc-patches/2014-04/msg00027.html>, implements
>> the loop directive in openacc. The original patch is mostly intact,
> Thank you!
> 
> I looked through the patch and found that you also added middle-end
> part. I don't know a lot about middle-end, so, probably, Thomas could
> review the part. However, there is a regression in middle-end.
> 
> About front-ends, especially fortran front-end:
>> I did disable support for do concurrent loops since openacc
>> 2.0a supports fortran up to fortran 2003.
> As I can see, you didn't remove helper code for DO CONCURRENT loops
> transformation (see below).
> 
>> @@ -12217,8 +12221,8 @@ c_parser_omp_for_loop (location_t loc,
>> c_parser *parser, enum tree_code code,
>>     for (cl = clauses; cl; cl = OMP_CLAUSE_CHAIN (cl))
>>       if (OMP_CLAUSE_CODE (cl) == OMP_CLAUSE_COLLAPSE)
>>         {
>> -    gcc_assert (code != OACC_LOOP);
>> -      collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (cl));
>> +    //gcc_assert (code != OACC_LOOP);
> I suppose you forgot to remove this comment.
> 
>> +++ b/gcc/testsuite/gfortran.dg/goacc/loop-4.f95
>> @@ -0,0 +1,16 @@
>> +! { dg-do compile }
>> +! { dg-additional-options "-fdump-tree-original -std=f2008" }
>> +
>> +PROGRAM test
>> +  IMPLICIT NONE
>> +  INTEGER :: a(64), b(64), c(64), i, j, k
>> +  ! Must be replaced by three loops.
>> +  !$acc loop
>> +  DO CONCURRENT (i=1:64, j=1:64, k=1:64, i==j .and. j==k)
> This test is obsolete. I think you should remove this testcase since you
> are not supporting DO CONCURRENT loops.

I removed that test.

>> +++ b/gcc/testsuite/gfortran.dg/goacc/loop-tree.f95
> For this test you should update tree-pretty-print.c (I forgot this):
> @@ -675,13 +675,13 @@ dump_omp_clause (pretty_printer *buffer, tree
> clause, int spc, int flags)
> 
>      case OMP_CLAUSE_WORKER:
>        pp_string (buffer, "worker(");
> -      dump_generic_node (buffer, OMP_CLAUSE_DECL (clause), spc, flags,
> false);
> +      dump_generic_node (buffer, OMP_CLAUSE_WORKER_EXPR (clause), spc,
> flags, false);
>        pp_character(buffer, ')');
>        break;
> 
>      case OMP_CLAUSE_VECTOR:
>        pp_string (buffer, "vector(");
> -      dump_generic_node (buffer, OMP_CLAUSE_DECL (clause), spc, flags,
> false);
> +      dump_generic_node (buffer, OMP_CLAUSE_VECTOR_EXPR (clause), spc,
> flags, false);
>        pp_character(buffer, ')');
>        break;
> 
> 
>> +/* Recursively generate conditional expressions.  */
>> +static tree
>> +gfc_trans_oacc_loop_generate_mask_conds (gfc_code *code, int collapse)
>> +{
>> +  if (collapse > 1)
>> +    return gfc_trans_oacc_loop_generate_mask_conds (code->block->next,
>> +                            collapse - 1);
>> +  else
>> +    return gfc_trans_omp_code (code->block->next, true);
>> +}
>> +static tree
>> +gfc_trans_oacc_loop (gfc_code *code, stmtblock_t *pblock,
>> +             gfc_omp_clauses *loop_clauses)
>> +{
>> +  /* DO CONCURRENT specific vars.  */
>> +  int nforloops = 0;
>> +  int current_for = 0;
>> +
>> +  if (collapse <= 0)
>> +    collapse = 1;
>> +
>> +  code = code->block->next;
>> +
>> +  if (code->op == EXEC_DO_CONCURRENT)
>> +    gfc_error ("!$ACC LOOP directive is unsupported on DO CONCURRENT
>> %L",
>> +           &code->loc);
>> +
>> +  gcc_assert (code->op == EXEC_DO);
>> +
>> +  if (pblock == NULL)
>> +    {
>> +      gfc_start_block (&block);
>> +      pblock = &block;
>> +    }
>> +
>> +  /* Calculate number of required for loops.  */
>> +  old_code = code;
>> +  for (i = 0; i < collapse; i++)
>> +    {
>> +      if (code->op == EXEC_DO)
>> +    nforloops++;
>> +      else
>> +    gcc_unreachable ();
>> +      code = code->block->next;
>> +    }
>> +  code = old_code;
>> +
>> +  /* Set the number of required for loops for collapse.  */
>> +  /* FIXME: this is probably correct, but OMP_CLAUSE_COLLAPSE isn't
>> supported
>> +     yet.  */
>> +  loop_clauses->collapse = nforloops;
>> +
>> +  omp_clauses = gfc_trans_omp_clauses (pblock, loop_clauses, code->loc);
>> +
>> +  init = make_tree_vec (nforloops);
>> +  cond = make_tree_vec (nforloops);
>> +  incr = make_tree_vec (nforloops);
>> +
>> +  for (i = 0; i < collapse; i++)
>> +    {
>> +      if (code->op == EXEC_DO)
>> +    gfc_trans_oacc_loop_generate_for (pblock, &se,
>> code->ext.iterator->var,
>> +                      code->ext.iterator->start,
>> +                      code->ext.iterator->end,
>> +                      code->ext.iterator->step,
>> +                      current_for++, &init, &cond, &incr,
>> +                      &inits);
>> +      else
>> +    gcc_unreachable ();
>> +      if (i + 1 < collapse)
>> +    code = code->block->next;
>> +    }
>> +
>> +  if (pblock != &block)
>> +    {
>> +      pushlevel ();
>> +      gfc_start_block (&block);
>> +    }
> This is complicated for simple DO loops. I think the following will be
> enough (see gfc_trans_omp_do).

I didn't see a whole lot different from your gfc_trans_oacc_loop and the
existing gfc_trans_omp_do, so I augmented the latter to handle the
openacc loop clause. It looks like the only important change is ensure
that gfc_trans_omp_do creates an OACC_LOOP stmt for the for openacc. I
know that gfc_trans_omp_do also handles the lastprivate clause, but that
shouldn't matter for the loop directive since it can't get set. Likewise
for the SIMD stuff.

Is there any specific reason why you created a new function to handle
openacc loops?

>>   code = code->block->next;
>> +  if (code->op == EXEC_DO_CONCURRENT)
>> +    gfc_error ("!$ACC LOOP directive is unsupported on DO CONCURRENT
>> %L",
>> +           &code->loc);
>>   gcc_assert (code->op == EXEC_DO);
>>
>>   init = make_tree_vec (collapse);
>>   cond = make_tree_vec (collapse);
>>   incr = make_tree_vec (collapse);
>>
>>   if (pblock == NULL)
>>     {
>>       gfc_start_block (&block);
>>       pblock = &block;
>>     }
> 
>> @@ -1817,13 +1818,9 @@ scan_sharing_clauses (tree clauses, omp_context
>> *ctx)
>>       case OMP_CLAUSE_PRIVATE:
>>       case OMP_CLAUSE_FIRSTPRIVATE:
>>       case OMP_CLAUSE_REDUCTION:
>> -      if (is_gimple_omp_oacc_specifically (ctx->stmt))
>> -        {
>> -          sorry ("clause not supported yet");
>> -          break;
>> -        }
> This change produces regression on parallel-tree.f95 testcase: ICE.

I've replaced the asserts with sorry messages in my other patch, so it
shouldn't ICE anymore.

If this patch is OK with you, please commit it.

Thanks,
Cesar
2014-06-04  Ilmir Usmanov  <i.usma...@samsung.com>
	    Cesar Philippidis  <ce...@codesourcery.com>	

	gcc/
	* gcc/tree-pretty-print.c (dump_omp_clause): Don't use
	OMP_CLAUSE_DECL with OMP_CLAUSE_WORKER and OMP_CLAUSE_VECTOR.

	gcc/fortran/
	*trans-openmp.c (gfc_trans_oacc_combined_directive): Move under
	gfc_trans_omp_do.	
	(gfc_trans_omp_do): Handle EXEC_OACC_LOOP.

	gcc/testsuite/
	* gfortran.dg/goacc/loop-tree.f95: New file.


diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index 08f6faa..721dcb1 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -1856,58 +1856,6 @@ typedef struct dovar_init_d {
   tree init;
 } dovar_init;
 
-/* parallel loop and kernels loop. */
-static tree
-gfc_trans_oacc_combined_directive (gfc_code *code)
-{
-  stmtblock_t block;
-  gfc_omp_clauses construct_clauses, loop_clauses;
-  tree stmt, oacc_clauses = NULL_TREE;
-  enum tree_code construct_code;
-
-  switch (code->op)
-    {
-      case EXEC_OACC_PARALLEL_LOOP:
-	construct_code = OACC_PARALLEL;
-	break;
-      case EXEC_OACC_KERNELS_LOOP:
-	construct_code = OACC_KERNELS;
-	break;
-      default:
-	gcc_unreachable ();
-    }
-
-  gfc_start_block (&block);
-
-  memset (&loop_clauses, 0, sizeof (loop_clauses));
-  if (code->ext.omp_clauses != NULL)
-    {
-      memcpy (&construct_clauses, code->ext.omp_clauses,
-	      sizeof (construct_clauses));
-      loop_clauses.collapse = construct_clauses.collapse;
-      loop_clauses.gang = construct_clauses.gang;
-      loop_clauses.vector = construct_clauses.vector;
-      loop_clauses.worker = construct_clauses.worker;
-      loop_clauses.seq = construct_clauses.seq;
-      loop_clauses.independent = construct_clauses.independent;
-      construct_clauses.collapse = 0;
-      construct_clauses.gang = false;
-      construct_clauses.vector = false;
-      construct_clauses.worker = false;
-      construct_clauses.seq = false;
-      construct_clauses.independent = false;
-      oacc_clauses = gfc_trans_omp_clauses (&block, &construct_clauses,
-					    code->loc);
-    }
-    
-  gfc_error ("!$ACC LOOP directive not implemented yet %L", &code->loc);
-  stmt = gfc_trans_omp_code (code->block->next, true);
-  stmt = build2_loc (input_location, construct_code, void_type_node, stmt,
-		     oacc_clauses);
-  gfc_add_expr_to_block (&block, stmt);
-  return gfc_finish_block (&block);
-}
-
 static tree
 gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
 		  gfc_omp_clauses *do_clauses, tree par_clauses)
@@ -1915,6 +1863,7 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
   gfc_se se;
   tree dovar, stmt, from, to, step, type, init, cond, incr;
   tree count = NULL_TREE, cycle_label, tmp, omp_clauses;
+  tree_code stmt_code;
   stmtblock_t block;
   stmtblock_t body;
   gfc_omp_clauses *clauses = code->ext.omp_clauses;
@@ -2174,7 +2123,19 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
     }
 
   /* End of loop body.  */
-  stmt = make_node (op == EXEC_OMP_SIMD ? OMP_SIMD : OMP_FOR);
+  switch (op)
+    {
+    case EXEC_OMP_SIMD:
+      stmt_code = OMP_SIMD;
+      break;
+    case EXEC_OACC_LOOP:
+      stmt_code = OACC_LOOP;
+      break;
+    default:
+      stmt_code = OMP_FOR;
+    }
+ 
+  stmt = make_node (stmt_code);
 
   TREE_TYPE (stmt) = void_type_node;
   OMP_FOR_BODY (stmt) = gfc_finish_block (&body);
@@ -2187,6 +2148,68 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
   return gfc_finish_block (&block);
 }
 
+/* parallel loop and kernels loop. */
+static tree
+gfc_trans_oacc_combined_directive (gfc_code *code)
+{
+  stmtblock_t block, *pblock = NULL;
+  gfc_omp_clauses construct_clauses, loop_clauses;
+  tree stmt, oacc_clauses = NULL_TREE;
+  enum tree_code construct_code;
+
+  switch (code->op)
+    {
+      case EXEC_OACC_PARALLEL_LOOP:
+	construct_code = OACC_PARALLEL;
+	break;
+      case EXEC_OACC_KERNELS_LOOP:
+	construct_code = OACC_KERNELS;
+	break;
+      default:
+	gcc_unreachable ();
+    }
+
+  gfc_start_block (&block);
+
+  memset (&loop_clauses, 0, sizeof (loop_clauses));
+  if (code->ext.omp_clauses != NULL)
+    {
+      memcpy (&construct_clauses, code->ext.omp_clauses,
+	      sizeof (construct_clauses));
+      loop_clauses.collapse = construct_clauses.collapse;
+      loop_clauses.gang = construct_clauses.gang;
+      loop_clauses.vector = construct_clauses.vector;
+      loop_clauses.worker = construct_clauses.worker;
+      loop_clauses.seq = construct_clauses.seq;
+      loop_clauses.independent = construct_clauses.independent;
+      construct_clauses.collapse = 0;
+      construct_clauses.gang = false;
+      construct_clauses.vector = false;
+      construct_clauses.worker = false;
+      construct_clauses.seq = false;
+      construct_clauses.independent = false;
+      oacc_clauses = gfc_trans_omp_clauses (&block, &construct_clauses,
+					    code->loc);
+    }
+  if (!loop_clauses.seq)
+    pblock = &block;
+  else
+    pushlevel ();
+  stmt = gfc_trans_omp_do (code, code->op, pblock, &loop_clauses, NULL);
+  if (TREE_CODE (stmt) != BIND_EXPR)
+    stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0));
+  else
+    poplevel (0, 0);
+  stmt = build2_loc (input_location, construct_code, void_type_node, stmt,
+		     oacc_clauses);
+  if (code->op == EXEC_OACC_KERNELS_LOOP)
+    OACC_KERNELS_COMBINED (stmt) = 1;
+  else
+    OACC_PARALLEL_COMBINED (stmt) = 1;
+  gfc_add_expr_to_block (&block, stmt);
+  return gfc_finish_block (&block);
+}
+
 static tree
 gfc_trans_omp_flush (void)
 {
@@ -2763,8 +2786,8 @@ gfc_trans_oacc_directive (gfc_code *code)
     case EXEC_OACC_HOST_DATA:
       return gfc_trans_oacc_construct (code);
     case EXEC_OACC_LOOP:
-      gfc_error ("!$ACC LOOP directive not implemented yet %L", &code->loc);
-      return NULL_TREE;
+      return gfc_trans_omp_do (code, code->op, NULL, code->ext.omp_clauses,
+			       NULL);
     case EXEC_OACC_UPDATE:
     case EXEC_OACC_WAIT:
     case EXEC_OACC_CACHE:
diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-tree-1.f90 b/gcc/testsuite/gfortran.dg/goacc/loop-tree-1.f90
new file mode 100644
index 0000000..14779b6
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/loop-tree-1.f90
@@ -0,0 +1,49 @@
+! { dg-do compile } 
+! { dg-additional-options "-fdump-tree-original -std=f2008" } 
+
+! test for tree-dump-original and spaces-commas
+
+program test
+  implicit none
+  integer :: i, j, k, m, sum
+  REAL :: a(64), b(64), c(64)
+
+  !$acc kernels 
+  !$acc loop collapse(2)
+  DO i = 1,10
+    DO j = 1,10
+    ENDDO
+  ENDDO
+
+  !$acc loop independent gang (3)
+  DO i = 1,10
+    !$acc loop worker(3) ! { dg-error "work-sharing region may not be closely nested inside of work-sharing, critical, ordered, master or explicit task region" }
+    DO j = 1,10
+      !$acc loop vector(5)
+      DO k = 1,10
+      ENDDO
+    ENDDO
+  ENDDO
+  !$acc end kernels
+
+  sum = 0
+  !$acc parallel
+  !$acc loop private(m) reduction(+:sum)
+  DO i = 1,10
+    sum = sum + 1
+  ENDDO
+  !$acc end parallel
+
+end program test
+! { dg-prune-output "sorry" }
+! { dg-final { scan-tree-dump-times "pragma acc loop" 5 "original" } } 
+
+! { dg-final { scan-tree-dump-times "collapse\\(2\\)" 1 "original" } } 
+! { dg-final { scan-tree-dump-times "independent" 1 "original" } } 
+! { dg-final { scan-tree-dump-times "gang\\(3\\)" 1 "original" } } 
+! { dg-final { scan-tree-dump-times "worker\\(3\\)" 1 "original" } } 
+! { dg-final { scan-tree-dump-times "vector\\(5\\)" 1 "original" } } 
+
+! { dg-final { scan-tree-dump-times "private\\(m\\)" 1 "original" } } 
+! { dg-final { scan-tree-dump-times "reduction\\(\\+:sum\\)" 1 "original" } } 
+! { dg-final { cleanup-tree-dump "original" } } 
diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
index 22b82fe..a4a9bb8 100644
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@@ -675,13 +675,15 @@ dump_omp_clause (pretty_printer *buffer, tree clause, int spc, int flags)
 
     case OMP_CLAUSE_WORKER:
       pp_string (buffer, "worker(");
-      dump_generic_node (buffer, OMP_CLAUSE_DECL (clause), spc, flags, false);
+      dump_generic_node (buffer, OMP_CLAUSE_WORKER_EXPR (clause), spc, flags, 
+			 false);
       pp_character(buffer, ')');
       break;
 
     case OMP_CLAUSE_VECTOR:
       pp_string (buffer, "vector(");
-      dump_generic_node (buffer, OMP_CLAUSE_DECL (clause), spc, flags, false);
+      dump_generic_node (buffer, OMP_CLAUSE_VECTOR_EXPR (clause), spc, flags,
+			 false);
       pp_character(buffer, ')');
       break;
 

Reply via email to