Hello world, this is what I hope is the final round of the OMP front-end optimization patch. This one ignores outer workshares when doing function elimination within omp do and similar blocks.
Regression-tested. OK for trunk? Thomas 2011-12-02 Thomas Koenig <tkoe...@gcc.gnu.org> PR fortran/50690 * frontend-passes.c (omp_level): New variable. (omp_size): New variable. (omp_block): New variable. (gfc_run_passes): Allocate and deallocate omp_block, set omp_size. (cfe_expr_0): Don't eliminiate common function if it would put the variable immediately into a WORKSHARE construct. (optimize_namespace): Set omp_level. (gfc_code_walker): Keep track of OMP PARALLEL and OMP WORKSHARE constructs. 2011-12-02 Thomas Koenig <tkoe...@gcc.gnu.org> PR fortran/50690 * gfortran.dg/gomp/workshare2.f90: New test. * gfortran.dg/gomp/workshare3.f90: New test.
Index: frontend-passes.c =================================================================== --- frontend-passes.c (Revision 181809) +++ frontend-passes.c (Arbeitskopie) @@ -66,6 +66,13 @@ static gfc_namespace *current_ns; static int forall_level; +/* Keep track of the OMP blocks, so we can mark variables introduced + by optimizations as private. */ + +static int omp_level; +static int omp_size; +static gfc_code **omp_block; + /* Entry point - run all passes for a namespace. So far, only an optimization pass is run. */ @@ -76,12 +83,15 @@ gfc_run_passes (gfc_namespace *ns) { expr_size = 20; expr_array = XNEWVEC(gfc_expr **, expr_size); + omp_size = 20; + omp_block = XCNEWVEC(gfc_code *, omp_size); optimize_namespace (ns); if (gfc_option.dump_fortran_optimized) gfc_dump_parse_tree (ns, stdout); XDELETEVEC (expr_array); + XDELETEVEC (omp_block); } } @@ -367,6 +377,23 @@ cfe_expr_0 (gfc_expr **e, int *walk_subtrees, int i,j; gfc_expr *newvar; + /* If we are within an OMP WORKSHARE or OMP PARALLEL WORKSHARE + construct, don't do this optimization. Only look at the + innermost level because an EXEC_OMP_PARALLEL{,_DO,_SECTIONS} + nested in an EXEC_OMP_WORKSHARE/EXEC_OMP_PARALLEL_WORKSHARE + is OK. */ + if (omp_level > 0) + { + gfc_exec_op op; + op = omp_block[omp_level - 1]->op; + + if (op == EXEC_OMP_WORKSHARE || op == EXEC_OMP_PARALLEL_WORKSHARE) + { + *walk_subtrees = 0; + return 0; + } + } + expr_count = 0; gfc_expr_walker (e, cfe_register_funcs, NULL); @@ -505,6 +532,7 @@ optimize_namespace (gfc_namespace *ns) current_ns = ns; forall_level = 0; + omp_level = 0; gfc_code_walker (&ns->code, convert_do_while, dummy_expr_callback, NULL); gfc_code_walker (&ns->code, cfe_code, cfe_expr_0, NULL); @@ -1150,11 +1178,13 @@ gfc_code_walker (gfc_code **c, walk_code_fn_t code gfc_actual_arglist *a; gfc_code *co; gfc_association_list *alist; + bool in_omp; /* There might be statement insertions before the current code, which must not affect the expression walker. */ co = *c; + in_omp = false; switch (co->op) { @@ -1330,14 +1360,32 @@ gfc_code_walker (gfc_code **c, walk_code_fn_t code WALK_SUBEXPR (co->ext.dt->extra_comma); break; - case EXEC_OMP_DO: case EXEC_OMP_PARALLEL: case EXEC_OMP_PARALLEL_DO: case EXEC_OMP_PARALLEL_SECTIONS: case EXEC_OMP_PARALLEL_WORKSHARE: + case EXEC_OMP_WORKSHARE: + + /* Register all OMP PARALLEL and WORKSHARE constructs + on a stack so they can be handled separately for + common function elimination. */ + + in_omp = 1; + + if (omp_level >= omp_size) + { + omp_size += omp_size; + omp_block = XRESIZEVEC(gfc_code *, omp_block, omp_size); + } + + omp_block[omp_level] = co; + omp_level ++; + + /* Fall through. */ + case EXEC_OMP_SECTIONS: + case EXEC_OMP_DO: case EXEC_OMP_SINGLE: - case EXEC_OMP_WORKSHARE: case EXEC_OMP_END_SINGLE: case EXEC_OMP_TASK: if (co->ext.omp_clauses) @@ -1366,6 +1414,9 @@ gfc_code_walker (gfc_code **c, walk_code_fn_t code if (co->op == EXEC_FORALL) forall_level --; + if (in_omp) + omp_level --; + } } return 0;
! { dg-do compile } ! { dg-options "-ffrontend-optimize -fdump-tree-original" } ! Test that common function elimination is done within the OMP parallel ! blocks even if there is a workshare around it. program foo implicit none integer, parameter :: n = 100000000 real, parameter :: eps = 3e-7 integer :: i,j real :: A(n), B(5), C(n) real :: tmp B(1) = 3.344 tmp = B(1) do i=1,10 call random_number(a) c = a !$omp parallel workshare !$omp parallel default(shared) !$omp do do j=1,n A(j) = A(j)*cos(B(1))+A(j)*cos(B(1)) end do !$omp end do !$omp end parallel !$omp end parallel workshare end do c = c*cos(b(1))+ c*cos(b(1)) do j=1,n if (abs(a(j)-c(j)) > eps) then print *,1,j,a(j), c(j) call abort end if end do end program foo ! { dg-final { scan-tree-dump-times "__builtin_cosf" 2 "original" } } ! { dg-final { cleanup-tree-dump "original" } }
! { dg-do compile } ! { dg-options "-ffrontend-optimize -fdump-tree-original" } ! PR 50690 - this used to ICE because workshare could not handle ! BLOCKs. ! To test for correct execution, run this program (but don't forget ! to unset the stack limit). program foo implicit none integer, parameter :: n = 100000000 real, parameter :: eps = 3e-7 integer :: i,j real :: A(n), B(5), C(n) real :: tmp B(1) = 3.344 tmp = B(1) do i=1,10 call random_number(a) c = a !$omp parallel default(shared) !$omp workshare A(:) = A(:)*cos(B(1))+A(:)*cos(B(1)) !$omp end workshare nowait !$omp end parallel ! sync is implied here end do c = c*tmp + c*tmp do j=1,n if (abs(a(j)-c(j)) > eps) then print *,1,j,a(j), c(j) call abort end if end do do i=1,10 call random_number(a) c = a !$omp parallel workshare default(shared) A(:) = A(:)*cos(B(1))+A(:)*cos(B(1)) !$omp end parallel workshare end do c = c*tmp + c*tmp do j=1,n if (abs(a(j)-c(j)) > eps) then print *,2,j,a(j), c(j) call abort end if end do end program foo ! { dg-final { scan-tree-dump-times "__var" 0 "original" } } ! { dg-final { cleanup-tree-dump "original" } }