Nathan spotted a bug where the fortran FE wasn't lowering the tile clause properly in combined parallel/kernels loops. The problem was that gfc_trans_omp_d has two sources of clauses, code->ext.omp_clauses and do_clauses. Because of the way that the fortran FE splits combined loops, the tile clause is only associated with do_clauses. This patch teaches gfc_trans_omp_do to check for the tile clause in both code and do_clauses.
I've applied this patch to gomp-4_0-branch. Cesar
2016-10-05 Cesar Philippidis <ce...@codesourcery.com> gcc/fortran/ * trans-openmp.c (gfc_trans_omp_do): Check do_clauses for the tile clause if it's not present in clauses. gcc/testsuite/ * gfortran.dg/goacc/tile-lowering.f95: Add more coverage. diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index e0e1c8b..37744e1 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -3353,14 +3353,15 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock, vec<dovar_init> inits = vNULL; dovar_init *di; unsigned ix; + gfc_expr_list *tile = do_clauses ? do_clauses->tile_list : clauses->tile_list; /* Both collapsed and tiled loops are lowered the same way. In OpenACC, those clauses are not compatible, so prioritize the tile clause, if present. */ - if (clauses->tile_list) + if (tile) { collapse = 0; - for (gfc_expr_list *el = clauses->tile_list; el; el = el->next) + for (gfc_expr_list *el = tile; el; el = el->next) collapse++; } diff --git a/gcc/testsuite/gfortran.dg/goacc/tile-lowering.f95 b/gcc/testsuite/gfortran.dg/goacc/tile-lowering.f95 index 3774b38..1cb8b9c 100644 --- a/gcc/testsuite/gfortran.dg/goacc/tile-lowering.f95 +++ b/gcc/testsuite/gfortran.dg/goacc/tile-lowering.f95 @@ -1,7 +1,7 @@ ! { dg-do compile } ! { dg-additional-options "-fdump-tree-original" } -subroutine test +subroutine par integer i, j, k !$acc parallel @@ -12,7 +12,7 @@ subroutine test !$acc loop tile (*) do i = 1, 10 end do - + !$acc loop tile (1,2) do i = 1, 10 do j = 1, 10 @@ -37,7 +37,6 @@ subroutine test end do end do - !$acc loop tile (1,2,3) do i = 1, 10 do j = 1, 10 @@ -70,17 +69,224 @@ subroutine test end do end do !$acc end parallel -end subroutine test - -! { dg-final { scan-tree-dump-times "tile\\(1\\)" 1 "original" } } -! { dg-final { scan-tree-dump-times "tile\\(0\\)" 1 "original" } } -! { dg-final { scan-tree-dump-times "tile\\(1, 2\\)" 1 "original" } } -! { dg-final { scan-tree-dump-times "tile\\(0, 2\\)" 1 "original" } } -! { dg-final { scan-tree-dump-times "tile\\(1, 0\\)" 1 "original" } } -! { dg-final { scan-tree-dump-times "tile\\(0, 0\\)" 1 "original" } } -! { dg-final { scan-tree-dump-times "tile\\(1, 2, 3\\)" 1 "original" } } -! { dg-final { scan-tree-dump-times "tile\\(0, 2, 3\\)" 1 "original" } } -! { dg-final { scan-tree-dump-times "tile\\(1, 0, 3\\)" 1 "original" } } -! { dg-final { scan-tree-dump-times "tile\\(1, 2, 0\\)" 1 "original" } } -! { dg-final { scan-tree-dump-times "for \\(" 22 "original" } } +end subroutine par + +subroutine kerns + integer i, j, k + + !$acc kernels + !$acc loop tile (1) + do i = 1, 10 + end do + + !$acc loop tile (*) + do i = 1, 10 + end do + + !$acc loop tile (1,2) + do i = 1, 10 + do j = 1, 10 + end do + end do + + !$acc loop tile (*,2) + do i = 1, 10 + do j = 1, 10 + end do + end do + + !$acc loop tile (1,*) + do i = 1, 10 + do j = 1, 10 + end do + end do + + !$acc loop tile (*,*) + do i = 1, 10 + do j = 1, 10 + end do + end do + + !$acc loop tile (1,2,3) + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + end do + end do + end do + + !$acc loop tile (*,2,3) + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + end do + end do + end do + + !$acc loop tile (1,*,3) + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + end do + end do + end do + + !$acc loop tile (1,2,*) + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + end do + end do + end do + !$acc end kernels +end subroutine kerns + +subroutine parloop + integer i, j, k + + !$acc parallel loop tile (1) + do i = 1, 10 + end do + + !$acc parallel loop tile (*) + do i = 1, 10 + end do + + !$acc parallel loop tile (1,2) + do i = 1, 10 + do j = 1, 10 + end do + end do + + !$acc parallel loop tile (*,2) + do i = 1, 10 + do j = 1, 10 + end do + end do + + !$acc parallel loop tile (1,*) + do i = 1, 10 + do j = 1, 10 + end do + end do + + !$acc parallel loop tile (*,*) + do i = 1, 10 + do j = 1, 10 + end do + end do + + !$acc parallel loop tile (1,2,3) + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + end do + end do + end do + + !$acc parallel loop tile (*,2,3) + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + end do + end do + end do + + !$acc parallel loop tile (1,*,3) + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + end do + end do + end do + + !$acc parallel loop tile (1,2,*) + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + end do + end do + end do +end subroutine parloop + +subroutine kernloop + integer i, j, k + + !$acc kernels loop tile (1) + do i = 1, 10 + end do + + !$acc kernels loop tile (*) + do i = 1, 10 + end do + + !$acc kernels loop tile (1,2) + do i = 1, 10 + do j = 1, 10 + end do + end do + + !$acc kernels loop tile (*,2) + do i = 1, 10 + do j = 1, 10 + end do + end do + + !$acc kernels loop tile (1,*) + do i = 1, 10 + do j = 1, 10 + end do + end do + + !$acc kernels loop tile (*,*) + do i = 1, 10 + do j = 1, 10 + end do + end do + + !$acc kernels loop tile (1,2,3) + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + end do + end do + end do + + !$acc kernels loop tile (*,2,3) + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + end do + end do + end do + + !$acc kernels loop tile (1,*,3) + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + end do + end do + end do + + !$acc kernels loop tile (1,2,*) + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + end do + end do + end do +end subroutine kernloop + +! { dg-final { scan-tree-dump-times "tile\\(1\\)" 4 "original" } } +! { dg-final { scan-tree-dump-times "tile\\(0\\)" 4 "original" } } +! { dg-final { scan-tree-dump-times "tile\\(1, 2\\)" 4 "original" } } +! { dg-final { scan-tree-dump-times "tile\\(0, 2\\)" 4 "original" } } +! { dg-final { scan-tree-dump-times "tile\\(1, 0\\)" 4 "original" } } +! { dg-final { scan-tree-dump-times "tile\\(0, 0\\)" 4 "original" } } +! { dg-final { scan-tree-dump-times "tile\\(1, 2, 3\\)" 4 "original" } } +! { dg-final { scan-tree-dump-times "tile\\(0, 2, 3\\)" 4 "original" } } +! { dg-final { scan-tree-dump-times "tile\\(1, 0, 3\\)" 4 "original" } } +! { dg-final { scan-tree-dump-times "tile\\(1, 2, 0\\)" 4 "original" } } +! { dg-final { scan-tree-dump-times "for \\(" 88 "original" } } +! { dg-final { scan-tree-dump-times "while \\(" 0 "original" } }