On 04/06/2016 07:23 AM, Jakub Jelinek wrote: > On Tue, Apr 05, 2016 at 06:53:47PM -0700, Cesar Philippidis wrote: >> --- a/gcc/omp-low.c >> +++ b/gcc/omp-low.c >> @@ -309,6 +309,25 @@ is_oacc_kernels (omp_context *ctx) >> == GF_OMP_TARGET_KIND_OACC_KERNELS)); >> } >> >> +/* Return true if CTX corresponds to an oacc parallel region and if >> + VAR is used in a reduction. */ >> + >> +static bool >> +is_oacc_parallel_reduction (tree var, omp_context *ctx) >> +{ >> + if (!is_oacc_parallel (ctx)) >> + return false; >> + >> + tree clauses = gimple_omp_target_clauses (ctx->stmt); >> + >> + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) >> + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION >> + && OMP_CLAUSE_DECL (c) == var) >> + return true; >> + >> + return false; >> +} >> + >> /* If DECL is the artificial dummy VAR_DECL created for non-static >> data member privatization, return the underlying "this" parameter, >> otherwise return NULL. */ >> @@ -2122,7 +2141,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, >> else >> install_var_field (decl, true, 3, ctx, >> base_pointers_restrict); >> - if (is_gimple_omp_offloaded (ctx->stmt)) >> + if (is_gimple_omp_offloaded (ctx->stmt) >> + && !is_oacc_parallel_reduction (decl, ctx)) >> install_var_local (decl, ctx); >> } >> } > > The above is O(n^2) in number of clauses on the construct. > Perhaps better define some OMP_CLAUSE_MAP_IN_REDUCTION macro (e.g. > TREE_PRIVATE bit is unused on OMP_CLAUSE_MAP right now), make sure to set it > e.g. during gimplification where you can see all GOVD_* flags for a > particular decl), and then use this flag here?
That's a good idea. I went ahead and combined this patch with the data map reduction fix for PR70289 that I posted on Monday, <https://gcc.gnu.org/ml/gcc-patches/2016-04/msg00202.html>, because I'm already scanning for parallel reduction data clauses in there. As you suggested, I introduced an OMP_CLAUSE_MAP_IN_REDUCTION macro to the data clauses associated with acc parallel reductions. Is this patch OK for trunk? It fixes PR70289, PR70348, PR70373, PR70533, PR70535 and PR70537. Cesar
pr70533-20160406.diff.gz
Description: application/gzip
2016-04-06 Cesar Philippidis <ce...@codesourcery.com> PR lto/70289 gcc/ * gimplify.c (gimplify_adjust_acc_parallel_reductions): New function. (gimplify_omp_workshare): Call it. Add new data clauses for acc parallel reductions as needed. * omp-low.c (is_oacc_parallel_reduction): New function. (scan_sharing_clauses): Use it to prevent installing local variables for those used in acc parallel reductions. (lower_rec_input_clauses): Remove dead code. (lower_oacc_reductions): Add support for reference reductions. (lower_reduction_clauses): Remove dead code. (lower_omp_target): Don't remap variables appearing in acc parallel reductions. * gcc/tree.h (OMP_CLAUSE_MAP_IN_REDUCTION): New macro. gcc/testsuite/ * c-c++-common/goacc/reduction-5.c: New test. * c-c++-common/goacc/reduction-promotions.c: New test. * gfortran.dg/goacc/reduction-3.f95: New test. * gfortran.dg/goacc/reduction-promotions.f90: New test. libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c: New test. * testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c: New test. * testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c: New test. * testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c: Increase test coverage. * testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c: New test. * testsuite/libgomp.oacc-c-c++-common/pr70289.c: New test. * testsuite/libgomp.oacc-c-c++-common/pr70373.c: New test. * testsuite/libgomp.oacc-c-c++-common/reduction-1.c: Increate test coverage. * testsuite/libgomp.oacc-c-c++-common/reduction-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-6.c: New test. * testsuite/libgomp.oacc-c-c++-common/reduction.h: New test. * testsuite/libgomp.oacc-fortran/parallel-reduction.f90: New test. * testsuite/libgomp.oacc-fortran/pr70289.f90: New test. * testsuite/libgomp.oacc-fortran/reduction-1.f90: Increate test coverage. * testsuite/libgomp.oacc-fortran/reduction-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-4.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-5.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-6.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-7.f90: New test. diff --git a/gcc/gimplify.c b/gcc/gimplify.c index b9757db..056e88c 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -9484,6 +9484,123 @@ optimize_target_teams (tree target, gimple_seq *pre_p) OMP_TARGET_CLAUSES (target) = c; } +/* OpenACC parallel reductions need a present_or_copy clause to ensure + that the original variable used in the reduction gets updated on + the host. This function scans CLAUSES for reductions and adds or + adjusts the data clauses as necessary. Any incompatible data clause + will be reported as a warning and promoted to present_or_copy. Any + private reduction will be treated as an error. This function + returns a list of new present_or_copy data clauses. */ + +static tree +gimplify_adjust_acc_parallel_reductions (tree *clauses) +{ + tree c, list = NULL_TREE; + hash_set<tree> *reduction_decls, *pointer_decls; + reduction_decls = new hash_set<tree>; + pointer_decls = new hash_set<tree>; + + /* Scan 1: Construct a hash set with all of the reduction decls. */ + for (c = *clauses; c; c = OMP_CLAUSE_CHAIN (c)) + { + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION) + { + reduction_decls->add (OMP_CLAUSE_DECL (c)); + if (POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))) + pointer_decls->add (OMP_CLAUSE_DECL (c)); + } + } + + if (reduction_decls->elements () == 0) + goto cleanup; + + /* Scan 2: Adjust the data clause for each reduction. */ + for (c = *clauses; c; c = OMP_CLAUSE_CHAIN (c)) + { + int kind = -1; + tree decl; + + switch (OMP_CLAUSE_CODE (c)) + { + case OMP_CLAUSE_MAP: + kind = OMP_CLAUSE_MAP_KIND (c); + case OMP_CLAUSE_PRIVATE: + case OMP_CLAUSE_FIRSTPRIVATE: + decl = OMP_CLAUSE_DECL (c); + + if (!DECL_P (decl)) + decl = TREE_OPERAND (decl, 0); + gcc_assert (DECL_P (decl)); + + /* Reference variables always have a GOMP_MAP_POINTER. Mark + that clause as IN_REDUCTION, and ignore it. */ + if (POINTER_TYPE_P (TREE_TYPE (decl)) + && kind == GOMP_MAP_POINTER + && pointer_decls->contains (decl)) + { + OMP_CLAUSE_MAP_IN_REDUCTION (c) = 1; + break; + } + + if (!reduction_decls->contains (decl)) + break; + + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP) + { + if (!pointer_decls->contains (decl)) + OMP_CLAUSE_MAP_IN_REDUCTION(c) = 1; + + if (!((kind & GOMP_MAP_TOFROM) == GOMP_MAP_TOFROM + || kind == GOMP_MAP_FORCE_PRESENT)) + { + warning_at (OMP_CLAUSE_LOCATION (c), 0, "incompatible data " + "clause with reduction on %qE; promoting to " + "present_or_copy", DECL_NAME (decl)); + + OMP_CLAUSE_CODE (c) = OMP_CLAUSE_MAP; + OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_TOFROM); + } + reduction_decls->remove (decl); + break; + } + + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) + { + error_at (OMP_CLAUSE_LOCATION (c), "invalid private reduction " + "on %qE", DECL_NAME (decl)); + reduction_decls->remove (decl); + } + default:; + } + } + + if (reduction_decls->elements () == 0) + goto cleanup; + + /* Scan 3: Add a present_or_copy clause for any reduction variable which + doens't have a data clause already. */ + for (hash_set<tree>::iterator iter = reduction_decls->begin (); + iter != reduction_decls->end (); ++iter) + { + tree decl = *iter; + + tree nc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (nc, GOMP_MAP_TOFROM); + OMP_CLAUSE_DECL (nc) = decl; + if (!POINTER_TYPE_P (TREE_TYPE (decl))) + OMP_CLAUSE_MAP_IN_REDUCTION (nc) = 1; + TREE_CHAIN (nc) = list; + list = nc; + } + + cleanup: + delete reduction_decls; + delete pointer_decls; + + return list; +} + /* Gimplify the gross structure of several OMP constructs. */ static void @@ -9491,6 +9608,7 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) { tree expr = *expr_p; gimple *stmt; + tree acc_reductions = NULL_TREE; gimple_seq body = NULL; enum omp_region_type ort; @@ -9508,6 +9626,8 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) break; case OACC_PARALLEL: ort = ORT_ACC_PARALLEL; + acc_reductions + = gimplify_adjust_acc_parallel_reductions (&OMP_CLAUSES (expr)); break; case OACC_DATA: ort = ORT_ACC_DATA; @@ -9606,6 +9726,48 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) gimplify_seq_add_stmt (pre_p, stmt); *expr_p = NULL_TREE; + + /* Finalize any parallel acc reductions. */ + if (acc_reductions) + { + tree c, nc, t; + tree clauses = NULL_TREE; + + c = nc = acc_reductions; + + while (c) + { + nc = OMP_CLAUSE_CHAIN (c); + OMP_CLAUSE_CHAIN (c) = NULL_TREE; + lang_hooks.decls.omp_finish_clause (c, pre_p); + + /* Find the last data clause introduced by omp_finish_decls, + marking any pointer data maps as IN_REDUCTION. */ + for (t = c; t; t = TREE_CHAIN (t)) + { + if (POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (t)))) + OMP_CLAUSE_MAP_IN_REDUCTION (t) = 1; + + if (TREE_CHAIN (t) == NULL_TREE) + break; + } + + /* Update the chain of clauses. */ + TREE_CHAIN (t) = clauses; + clauses = c; + + c = nc; + } + + /* Update the list of clauses in the gimple stmt. */ + for (t = gimple_omp_target_clauses (stmt); OMP_CLAUSE_CHAIN (t); + t = OMP_CLAUSE_CHAIN (t)) + ; + + OMP_CLAUSE_CHAIN (t) = clauses; + } + + return; } /* Gimplify the gross structure of OpenACC enter/exit data, update, and OpenMP diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 3fd6eb3..5a6f9d3 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -2122,7 +2122,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, else install_var_field (decl, true, 3, ctx, base_pointers_restrict); - if (is_gimple_omp_offloaded (ctx->stmt)) + if (is_gimple_omp_offloaded (ctx->stmt) + && !OMP_CLAUSE_MAP_IN_REDUCTION (c)) install_var_local (decl, ctx); } } @@ -4837,7 +4838,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, gimplify_assign (ptr, x, ilist); } } - else if (is_reference (var) && !is_oacc_parallel (ctx)) + else if (is_reference (var)) { /* For references that are being privatized for Fortran, allocate new backing storage for the new pointer @@ -5573,7 +5574,8 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, tree orig = OMP_CLAUSE_DECL (c); tree var = maybe_lookup_decl (orig, ctx); tree ref_to_res = NULL_TREE; - tree incoming, outgoing; + tree incoming, outgoing, v1, v2, v3; + bool is_private = false; enum tree_code rcode = OMP_CLAUSE_REDUCTION_CODE (c); if (rcode == MINUS_EXPR) @@ -5586,7 +5588,6 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, if (!var) var = orig; - gcc_assert (!is_reference (var)); incoming = outgoing = var; @@ -5622,22 +5623,38 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, for (; cls; cls = OMP_CLAUSE_CHAIN (cls)) if (OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_REDUCTION && orig == OMP_CLAUSE_DECL (cls)) - goto has_outer_reduction; + { + incoming = outgoing = lookup_decl (orig, probe); + goto has_outer_reduction; + } + else if ((OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_FIRSTPRIVATE + || OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_PRIVATE) + && orig == OMP_CLAUSE_DECL (cls)) + { + is_private = true; + goto do_lookup; + } } do_lookup: /* This is the outermost construct with this reduction, see if there's a mapping for it. */ if (gimple_code (outer->stmt) == GIMPLE_OMP_TARGET - && maybe_lookup_field (orig, outer)) + && maybe_lookup_field (orig, outer) && !is_private) { ref_to_res = build_receiver_ref (orig, false, outer); if (is_reference (orig)) ref_to_res = build_simple_mem_ref (ref_to_res); + tree type = TREE_TYPE (var); + if (POINTER_TYPE_P (type)) + type = TREE_TYPE (type); + outgoing = var; - incoming = omp_reduction_init_op (loc, rcode, TREE_TYPE (var)); + incoming = omp_reduction_init_op (loc, rcode, type); } + else if (ctx->outer) + incoming = outgoing = lookup_decl (orig, ctx->outer); else incoming = outgoing = orig; @@ -5647,6 +5664,37 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, if (!ref_to_res) ref_to_res = integer_zero_node; + if (is_reference (orig)) + { + tree type = TREE_TYPE (var); + const char *id = IDENTIFIER_POINTER (DECL_NAME (var)); + + if (!inner) + { + tree x = create_tmp_var (TREE_TYPE (type), id); + gimplify_assign (var, build_fold_addr_expr (x), fork_seq); + } + + v1 = create_tmp_var (type, id); + v2 = create_tmp_var (type, id); + v3 = create_tmp_var (type, id); + + gimplify_assign (v1, var, fork_seq); + gimplify_assign (v2, var, fork_seq); + gimplify_assign (v3, var, fork_seq); + + var = build_simple_mem_ref (var); + v1 = build_simple_mem_ref (v1); + v2 = build_simple_mem_ref (v2); + v3 = build_simple_mem_ref (v3); + outgoing = build_simple_mem_ref (outgoing); + + if (TREE_CODE (incoming) != INTEGER_CST) + incoming = build_simple_mem_ref (incoming); + } + else + v1 = v2 = v3 = var; + /* Determine position in reduction buffer, which may be used by target. */ enum machine_mode mode = TYPE_MODE (TREE_TYPE (var)); @@ -5676,20 +5724,20 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION, TREE_TYPE (var), 6, init_code, unshare_expr (ref_to_res), - var, level, op, off); + v1, level, op, off); tree fini_call = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION, TREE_TYPE (var), 6, fini_code, unshare_expr (ref_to_res), - var, level, op, off); + v2, level, op, off); tree teardown_call = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION, TREE_TYPE (var), 6, teardown_code, - ref_to_res, var, level, op, off); + ref_to_res, v3, level, op, off); - gimplify_assign (var, setup_call, &before_fork); - gimplify_assign (var, init_call, &after_fork); - gimplify_assign (var, fini_call, &before_join); + gimplify_assign (v1, setup_call, &before_fork); + gimplify_assign (v2, init_call, &after_fork); + gimplify_assign (v3, fini_call, &before_join); gimplify_assign (outgoing, teardown_call, &after_join); } @@ -5931,9 +5979,6 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx) } } - if (is_gimple_omp_oacc (ctx->stmt)) - return; - stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START), 0); gimple_seq_add_stmt (stmt_seqp, stmt); @@ -15820,7 +15865,10 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (!maybe_lookup_field (var, ctx)) continue; - if (offloaded) + /* Don't remap oacc parallel reduction variables, because the + intermediate result must be local to each gang. */ + if (offloaded && !(OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && OMP_CLAUSE_MAP_IN_REDUCTION(c))) { x = build_receiver_ref (var, true, ctx); tree new_var = lookup_decl (var, ctx); diff --git a/gcc/tree.h b/gcc/tree.h index 544a6a1..945e7e4 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -1532,6 +1532,9 @@ extern void protected_set_expr_location (tree, location_t); treatment if OMP_CLAUSE_SIZE is zero. */ #define OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION(NODE) \ TREE_PROTECTED (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)) +/* Nonzero if this map clause is for an ACC parallel reduction variable. */ +#define OMP_CLAUSE_MAP_IN_REDUCTION(NODE) \ + TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)) #define OMP_CLAUSE_PROC_BIND_KIND(NODE) \ (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_PROC_BIND)->omp_clause.subcode.proc_bind_kind)