Besides the subject line fix, it fixes a inconsistency with lastprivate between Fortran and C – see patch comment -, which caused it to fail with the current patch. (ie.: If lastprivate is on 'parallel' it is not marked tofrom; I wonder whether there is a valid C/Fortran testcase which triggers this.)
OK for mainline? (I think that it makes sense to wait for Stage 1 mainline.) Tobias ----------------- Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank Thürauf
OpenMP: Fix combined-target handling for lastprivate/reduction/linear [PR99928] OpenMP 5.0 implies in combined target constructs tofrom mapping. Additionally, make lastprivate handling in Fortran same as in C's c_parser_omp_for_loop, i.e. leave at SPLIT_PARALLEL except it is loop iteration variable - if so, remove from parallel (before: turn into shared) and add to DO (before: only when not SIMD). gcc/fortran/ChangeLog: PR fortran/99928 * trans-openmp.c (gfc_trans_omp_do): For iter vars, remove lastprivate from SPLIT_PARALLEL clauses; for 'do simd', add it to 'do'; take new 't_do_clauses' tree for the latter. (gfc_trans_omp_do_simd): Take pointer for parallel clauses tree; update calls. (gfc_trans_omp_parallel_do, gfc_trans_omp_parallel_do_simd, gfc_trans_omp_distribute, gfc_trans_omp_target, gfc_trans_omp_taskloop, gfc_trans_omp_directive): Update calls. gcc/ChangeLog: PR fortran/99928 * gimplify.c (omp_notice_variable): Add tofrom Boolean arg to force tofrom mapping also for scalars. (gimplify_scan_omp_clauses, gimplify_adjust_omp_clauses): For lastprivate/reduction/linear, map also scalar var as tofrom in combined target constructs. gcc/testsuite/ChangeLog: PR fortran/99928 * gfortran.dg/gomp/openmp-simd-6.f90: Update scan-tree-dump-times for lastprivate. * c-c++-common/gomp/target-reduction.c: New test. * gfortran.dg/gomp/target-reduction.f90: New test. gcc/fortran/trans-openmp.c | 48 ++++++++++++-------- gcc/gimplify.c | 29 ++++++++---- gcc/testsuite/c-c++-common/gomp/target-reduction.c | 41 +++++++++++++++++ gcc/testsuite/gfortran.dg/gomp/openmp-simd-6.f90 | 4 +- .../gfortran.dg/gomp/target-reduction.f90 | 51 ++++++++++++++++++++++ 5 files changed, 144 insertions(+), 29 deletions(-) diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index 349df1cc346..a1cb7f0d8bf 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -4444,7 +4444,7 @@ typedef struct dovar_init_d { static tree gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock, - gfc_omp_clauses *do_clauses, tree par_clauses) + gfc_omp_clauses *do_clauses, tree *par_clauses, tree *t_do_clauses = NULL) { gfc_se se; tree dovar, stmt, from, to, step, type, init, cond, incr, orig_decls; @@ -4703,25 +4703,37 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock, break; } } - if (c == NULL && op == EXEC_OMP_DO && par_clauses != NULL) + if (c == NULL && par_clauses != NULL) { - for (c = par_clauses; c ; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE - && OMP_CLAUSE_DECL (c) == dovar_decl) + for (tree *pc = par_clauses; *pc ; pc = &OMP_CLAUSE_CHAIN (*pc)) + if (OMP_CLAUSE_CODE (*pc) == OMP_CLAUSE_LASTPRIVATE + && OMP_CLAUSE_DECL (*pc) == dovar_decl) { + /* Move lastprivate (decl) clause from PARALLEL to DO. */ tree l = build_omp_clause (input_location, OMP_CLAUSE_LASTPRIVATE); - if (OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c)) + if (OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (*pc)) OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (l) = 1; OMP_CLAUSE_DECL (l) = dovar_decl; - OMP_CLAUSE_CHAIN (l) = omp_clauses; OMP_CLAUSE_LASTPRIVATE_STMT (l) = tmp; - omp_clauses = l; - OMP_CLAUSE_SET_CODE (c, OMP_CLAUSE_SHARED); + if (t_do_clauses) + { + gcc_assert (op == EXEC_OMP_SIMD); + OMP_CLAUSE_CHAIN (l) = *t_do_clauses; + *t_do_clauses = l; + } + else + { + gcc_assert (op == EXEC_OMP_DO); + OMP_CLAUSE_CHAIN (l) = omp_clauses; + omp_clauses = l; + } + c = *pc; /* For the gcc_assert. */ + *pc = OMP_CLAUSE_CHAIN (*pc); break; } } - gcc_assert (simple || c != NULL); + //gcc_assert (simple || c != NULL); } if (!simple) { @@ -5353,7 +5365,7 @@ gfc_split_omp_clauses (gfc_code *code, static tree gfc_trans_omp_do_simd (gfc_code *code, stmtblock_t *pblock, - gfc_omp_clauses *clausesa, tree omp_clauses) + gfc_omp_clauses *clausesa, tree *omp_clauses) { stmtblock_t block; gfc_omp_clauses clausesa_buf[GFC_OMP_SPLIT_NUM]; @@ -5373,7 +5385,7 @@ gfc_trans_omp_do_simd (gfc_code *code, stmtblock_t *pblock, omp_do_clauses = gfc_trans_omp_clauses (&block, &clausesa[GFC_OMP_SPLIT_DO], code->loc); body = gfc_trans_omp_do (code, EXEC_OMP_SIMD, pblock ? pblock : &block, - &clausesa[GFC_OMP_SPLIT_SIMD], omp_clauses); + &clausesa[GFC_OMP_SPLIT_SIMD], omp_clauses, &omp_do_clauses); if (pblock == NULL) { if (TREE_CODE (body) != BIND_EXPR) @@ -5426,7 +5438,7 @@ gfc_trans_omp_parallel_do (gfc_code *code, stmtblock_t *pblock, pushlevel (); } stmt = gfc_trans_omp_do (code, EXEC_OMP_DO, new_pblock, - &clausesa[GFC_OMP_SPLIT_DO], omp_clauses); + &clausesa[GFC_OMP_SPLIT_DO], &omp_clauses); if (pblock == NULL) { if (TREE_CODE (stmt) != BIND_EXPR) @@ -5467,7 +5479,7 @@ gfc_trans_omp_parallel_do_simd (gfc_code *code, stmtblock_t *pblock, code->loc); if (pblock == NULL) pushlevel (); - stmt = gfc_trans_omp_do_simd (code, pblock, clausesa, omp_clauses); + stmt = gfc_trans_omp_do_simd (code, pblock, clausesa, &omp_clauses); if (pblock == NULL) { if (TREE_CODE (stmt) != BIND_EXPR) @@ -5669,7 +5681,7 @@ gfc_trans_omp_distribute (gfc_code *code, gfc_omp_clauses *clausesa) case EXEC_OMP_TARGET_TEAMS_DISTRIBUTE_SIMD: case EXEC_OMP_TEAMS_DISTRIBUTE_SIMD: stmt = gfc_trans_omp_do (code, EXEC_OMP_SIMD, &block, - &clausesa[GFC_OMP_SPLIT_SIMD], NULL_TREE); + &clausesa[GFC_OMP_SPLIT_SIMD], NULL); if (TREE_CODE (stmt) != BIND_EXPR) stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0)); else @@ -5799,7 +5811,7 @@ gfc_trans_omp_target (gfc_code *code) break; case EXEC_OMP_TARGET_SIMD: stmt = gfc_trans_omp_do (code, EXEC_OMP_SIMD, &block, - &clausesa[GFC_OMP_SPLIT_SIMD], NULL_TREE); + &clausesa[GFC_OMP_SPLIT_SIMD], NULL); if (TREE_CODE (stmt) != BIND_EXPR) stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0)); else @@ -5869,7 +5881,7 @@ gfc_trans_omp_taskloop (gfc_code *code) break; case EXEC_OMP_TASKLOOP_SIMD: stmt = gfc_trans_omp_do (code, EXEC_OMP_SIMD, &block, - &clausesa[GFC_OMP_SPLIT_SIMD], NULL_TREE); + &clausesa[GFC_OMP_SPLIT_SIMD], NULL); if (TREE_CODE (stmt) != BIND_EXPR) stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0)); else @@ -6192,7 +6204,7 @@ gfc_trans_omp_directive (gfc_code *code) case EXEC_OMP_DISTRIBUTE_SIMD: return gfc_trans_omp_distribute (code, NULL); case EXEC_OMP_DO_SIMD: - return gfc_trans_omp_do_simd (code, NULL, NULL, NULL_TREE); + return gfc_trans_omp_do_simd (code, NULL, NULL, NULL); case EXEC_OMP_FLUSH: return gfc_trans_omp_flush (code); case EXEC_OMP_MASTER: diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 1f417a52702..e4a7ccfbc8d 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -477,7 +477,8 @@ delete_omp_context (struct gimplify_omp_ctx *c) } static void omp_add_variable (struct gimplify_omp_ctx *, tree, unsigned int); -static bool omp_notice_variable (struct gimplify_omp_ctx *, tree, bool); +static bool omp_notice_variable (struct gimplify_omp_ctx *, tree, bool, + bool tofrom=false); /* Both gimplify the statement T and append it to *SEQ_P. This function behaves exactly as gimplify_stmt, but you don't have to pass T as a @@ -7419,10 +7420,11 @@ oacc_default_clause (struct gimplify_omp_ctx *ctx, tree decl, unsigned flags) IN_CODE is true when real code uses DECL, and false when we should merely emit default(none) errors. Return true if DECL is going to be remapped and thus DECL shouldn't be gimplified into its - DECL_VALUE_EXPR (if any). */ + DECL_VALUE_EXPR (if any). If ORT_TARGET and tofrom use GOVD_MAP. */ static bool -omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code) +omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code, + bool tofrom /* = false */) { splay_tree_node n; unsigned flags = in_code ? GOVD_SEEN : 0; @@ -7527,7 +7529,9 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code) else gdmk = GDMK_AGGREGATE; kind = lang_hooks.decls.omp_predetermined_mapping (decl); - if (kind != OMP_CLAUSE_DEFAULTMAP_CATEGORY_UNSPECIFIED) + if (tofrom) + nflags |= GOVD_MAP; + else if (kind != OMP_CLAUSE_DEFAULTMAP_CATEGORY_UNSPECIFIED) { if (kind == OMP_CLAUSE_DEFAULTMAP_FIRSTPRIVATE) nflags |= GOVD_FIRSTPRIVATE; @@ -7559,7 +7563,7 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code) { /* Look in outer OpenACC contexts, to see if there's a data attribute for this variable. */ - omp_notice_variable (octx, decl, in_code); + omp_notice_variable (octx, decl, in_code, tofrom); for (; octx; octx = octx->outer_context) { @@ -7609,6 +7613,8 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code) } else { + if (tofrom && (ctx->region_type & ORT_TARGET) != 0) + flags |= GOVD_MAP; /* If nothing changed, there's nothing left to do. */ if ((n->value & flags) == flags) return ret; @@ -7702,7 +7708,7 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code) == (GOVD_LASTPRIVATE | GOVD_LINEAR_LASTPRIVATE_NO_OUTER)) return ret; if (ctx->outer_context - && omp_notice_variable (ctx->outer_context, decl, in_code)) + && omp_notice_variable (ctx->outer_context, decl, in_code, tofrom)) return true; return ret; } @@ -8651,7 +8657,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, { omp_add_variable (outer_ctx, decl, GOVD_SHARED | GOVD_SEEN); if (outer_ctx->outer_context) - omp_notice_variable (outer_ctx->outer_context, decl, true); + omp_notice_variable (outer_ctx->outer_context, decl, true, true); } else if (outer_ctx && (outer_ctx->region_type & ORT_TASK) != 0 @@ -8912,8 +8918,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, && octx->region_type == ORT_COMBINED_TARGET) { flags &= ~GOVD_LASTPRIVATE; - if (flags == GOVD_SEEN) - break; + flags |= GOVD_MAP; } else break; @@ -10837,6 +10842,12 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p, case OMP_CLAUSE_IN_REDUCTION: case OMP_CLAUSE_TASK_REDUCTION: decl = OMP_CLAUSE_DECL (c); + /* OpenMP reduction implies tofrom mapping. */ + if (VAR_P (decl)) + for (struct gimplify_omp_ctx *octx = ctx->outer_context; + octx; octx = octx->outer_context) + if (octx->region_type == ORT_COMBINED_TARGET) + omp_notice_variable (octx, decl, true, true); /* OpenACC reductions need a present_or_copy data clause. Add one if necessary. Emit error when the reduction is private. */ if (ctx->region_type == ORT_ACC_PARALLEL diff --git a/gcc/testsuite/c-c++-common/gomp/target-reduction.c b/gcc/testsuite/c-c++-common/gomp/target-reduction.c new file mode 100644 index 00000000000..0faba6688cb --- /dev/null +++ b/gcc/testsuite/c-c++-common/gomp/target-reduction.c @@ -0,0 +1,41 @@ +/* { dg-additional-options "-fdump-tree-omplower" } */ +/* PR fortran/99928 */ + +int a[10]; + +int s2() +{ + int s = 0, t = 0, i, j, k; + #pragma omp target parallel for lastprivate(i) reduction(+:s) + for (i=0; i < 10; i++) + s += a[i]; + + #pragma omp target parallel for simd lastprivate(i) reduction(+:s) + for (i=0; i < 10; i++) + s += a[i]; + + #pragma omp target parallel loop lastprivate(j) reduction(+:t) + for (j=0; j < 10; j++) + t += a[j]; + + #pragma omp target parallel for simd linear(k) + for (k=0; k < 10; k++) + a[k] = k; + return s + t; +} + +/* { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(1\\) thread_limit\\(0\\) map\\(tofrom:i \\\[len: 4\\\]\\) map\\(tofrom:s \\\[len: 4\\\]\\)" 2 "omplower" } } */ +/* { dg-final { scan-tree-dump-times "#pragma omp parallel reduction\\(\\+:s\\) shared\\(i\\) shared\\(a\\)" 2 "omplower" } } */ +/* { dg-final { scan-tree-dump-times "#pragma omp for lastprivate\\(i\\) nowait" 2 "omplower" } } */ + +/* { dg-final { scan-tree-dump-times "#pragma omp simd.* lastprivate\\(i\\) reduction\\(\\+:s\\)" 1 "omplower" } } */ + +/* { dg-final { scan-tree-dump "#pragma omp target num_teams\\(1\\) thread_limit\\(0\\) map\\(tofrom:j \\\[len: 4\\\]\\) map\\(tofrom:t \\\[len: 4\\\]\\) map\\(tofrom:a \\\[len: 40\\\]\\)" "omplower" } } */ +/* { dg-final { scan-tree-dump "#pragma omp parallel shared\\(j\\) shared\\(t\\) shared\\(a\\)" "omplower" } } */ +/* { dg-final { scan-tree-dump "#pragma omp for .*lastprivate\\(j\\) reduction\\(\\+:t\\) private\\(j" "omplower" } } */ +/* { dg-final { scan-tree-dump "#pragma omp simd.* lastprivate\\(j\\) reduction\\(\\+:t\\)" "omplower" } } */ + +/* { dg-final { scan-tree-dump "#pragma omp target num_teams\\(1\\) thread_limit\\(0\\) map\\(tofrom:k \\\[len: 4\\\]\\)" "omplower" } } */ +/* { dg-final { scan-tree-dump "#pragma omp parallel lastprivate\\(k\\) shared\\(a\\)" "omplower" } } */ +/* { dg-final { scan-tree-dump "#pragma omp for nowait private\\(k" "omplower" } } */ +/* { dg-final { scan-tree-dump "#pragma omp simd.* linear\\(k:1\\)" "omplower" } } */ diff --git a/gcc/testsuite/gfortran.dg/gomp/openmp-simd-6.f90 b/gcc/testsuite/gfortran.dg/gomp/openmp-simd-6.f90 index 361e0dad343..35cd86fb205 100644 --- a/gcc/testsuite/gfortran.dg/gomp/openmp-simd-6.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/openmp-simd-6.f90 @@ -51,8 +51,8 @@ end ! { dg-final { scan-tree-dump-times "#pragma omp teams firstprivate\\(a1\\) firstprivate\\(b1\\) shared\\(u\\) default\\(none\\)" 1 "original" } } ! { dg-final { scan-tree-dump-times "#pragma omp distribute lastprivate\\(d1\\)" 1 "original" } } -! { dg-final { scan-tree-dump-times "#pragma omp parallel firstprivate\\(a1\\) firstprivate\\(b1\\) lastprivate\\(d1\\) shared\\(u\\) default\\(none\\)" 1 "original" } } -! { dg-final { scan-tree-dump-times "#pragma omp for nowait" 1 "original" } } +! { dg-final { scan-tree-dump-times "#pragma omp parallel firstprivate\\(a1\\) firstprivate\\(b1\\) shared\\(u\\) default\\(none\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "#pragma omp for lastprivate\\(d1\\) nowait" 1 "original" } } ! { dg-final { scan-tree-dump-times "#pragma omp simd lastprivate\\(d1\\)" 1 "original" } } ! { dg-final { scan-tree-dump-times "#pragma omp simd private\\(ii\\)" 1 "original" } } diff --git a/gcc/testsuite/gfortran.dg/gomp/target-reduction.f90 b/gcc/testsuite/gfortran.dg/gomp/target-reduction.f90 new file mode 100644 index 00000000000..5b7eff6b734 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/target-reduction.f90 @@ -0,0 +1,51 @@ +! { dg-additional-options "-fdump-tree-omplower" } */ +! PR fortran/99928 + +module m + integer :: a(10) +contains +integer function s2() + integer :: s, t, i, j, k, n + s = 0 + !$omp target parallel do lastprivate(i) reduction(+:s) + do i = 1, 10 + s = s + a(i) + end do + !$omp end target parallel do + + s = 0 + !$omp target parallel do simd lastprivate(i) reduction(+:s) + do i = 1, 10 + s = s + a(i) + end do + !$omp end target parallel do simd + + t = 0 + !$omp target parallel do lastprivate(j) reduction(+:t) + do j = 1, 10, n + t = t + a(j) + end do + !$omp end target parallel do + + !$omp target parallel do simd linear(k) + do k = 1, 10 + a(k) = k + end do + !$omp end target parallel do simd +end +end module + +! { dg-final { scan-tree-dump-times "pragma omp target num_teams\\(1\\) thread_limit\\(0\\) map\\(tofrom:s \\\[len: 4\\\]\\) map\\(tofrom:i \\\[len: 4\\\]\\) map\\(tofrom:a \\\[len: 40\\\]\\)" 2 "omplower" } } +! { dg-final { scan-tree-dump-times "#pragma omp parallel reduction\\(\\+:s\\) shared\\(i\\) shared\\(a\\)" 2 "omplower" } } +! { dg-final { scan-tree-dump-times "#pragma omp for lastprivate\\(i\\) nowait" 2 "omplower" } } + +! { dg-final { scan-tree-dump-times "#pragma omp simd.* lastprivate\\(i\\) reduction\\(\\+:s\\)" 1 "omplower" } } + +! { dg-final { scan-tree-dump "#pragma omp target .*map\\(tofrom:t \\\[len: 4\\\]\\) map\\(tofrom:j \\\[len: 4\\\]\\) map\\(tofrom:a \\\[len: 40\\\]\\)" "omplower" } } +! { dg-final { scan-tree-dump "#pragma omp parallel reduction\\(\\+:t\\).* shared\\(j\\) shared\\(a\\)" "omplower" } } +! { dg-final { scan-tree-dump "#pragma omp for .*lastprivate\\(j\\) nowait" "omplower" } } + +! { dg-final { scan-tree-dump "#pragma omp target num_teams\\(1\\) thread_limit\\(0\\) map\\(tofrom:k \\\[len: 4\\\]\\)" "omplower" } } +! { dg-final { scan-tree-dump "#pragma omp parallel lastprivate\\(k\\) shared\\(a\\)" "omplower" } } +! { dg-final { scan-tree-dump "#pragma omp for nowait private\\(k" "omplower" } } +! { dg-final { scan-tree-dump "#pragma omp simd.* linear\\(k:1\\)" "omplower" } }