From: Julian Brown <jul...@codesourcery.com> gcc/ * gimplify.c (privatize_reduction): New struct. (localize_reductions_r, localize_reductions): New functions. (gimplify_omp_for): Call localize_reductions. (gimplify_omp_workshare): Likewise. * omp-low.c (lower_oacc_reductions): Handle localized reductions. Create fewer temp vars. * tree-core.h (omp_clause_code): Add OMP_CLAUSE_REDUCTION_PRIVATE_DECL documentation. * tree.c (omp_clause_num_ops): Bump number of ops for OMP_CLAUSE_REDUCTION to 6. (walk_tree_1): Adjust accordingly. * tree.h (OMP_CLAUSE_REDUCTION_PRIVATE_DECL): Add macro. --- gcc/gimplify.c | 102 +++++++++++++++++++++++++++++++++++ gcc/omp-low.c | 45 +++++----------- gcc/tree-core.h | 4 +- gcc/tree.c | 137 +++++++++++++++++++++++++++++++++++++++++++++--- gcc/tree.h | 2 + 5 files changed, 250 insertions(+), 40 deletions(-)
diff --git a/gcc/gimplify.c b/gcc/gimplify.c index c2ab96e7e182..9a4331c70d6e 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -240,6 +240,11 @@ struct gimplify_omp_ctx int defaultmap[5]; }; +struct privatize_reduction +{ + tree ref_var, local_var; +}; + static struct gimplify_ctx *gimplify_ctxp; static struct gimplify_omp_ctx *gimplify_omp_ctxp; static bool in_omp_construct; @@ -11900,6 +11905,80 @@ gimplify_omp_taskloop_expr (tree type, tree *tp, gimple_seq *pre_p, OMP_FOR_CLAUSES (orig_for_stmt) = c; } +/* Helper function for localize_reductions. Replace all uses of REF_VAR with + LOCAL_VAR. */ + +static tree +localize_reductions_r (tree *tp, int *walk_subtrees, void *data) +{ + enum tree_code tc = TREE_CODE (*tp); + struct privatize_reduction *pr = (struct privatize_reduction *) data; + + if (TYPE_P (*tp)) + *walk_subtrees = 0; + + switch (tc) + { + case INDIRECT_REF: + case MEM_REF: + if (TREE_OPERAND (*tp, 0) == pr->ref_var) + *tp = pr->local_var; + + *walk_subtrees = 0; + break; + + case VAR_DECL: + case PARM_DECL: + case RESULT_DECL: + if (*tp == pr->ref_var) + *tp = pr->local_var; + + *walk_subtrees = 0; + break; + + default: + break; + } + + return NULL_TREE; +} + +/* OpenACC worker and vector loop state propagation requires reductions + to be inside local variables. This function replaces all reference-type + reductions variables associated with the loop with a local copy. It is + also used to create private copies of reduction variables for those + which are not associated with acc loops. */ + +static void +localize_reductions (tree clauses, tree body) +{ + tree c, var, type, new_var; + struct privatize_reduction pr; + + for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION) + { + var = OMP_CLAUSE_DECL (c); + + if (!lang_hooks.decls.omp_privatize_by_reference (var)) + { + OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c) = NULL; + continue; + } + + type = TREE_TYPE (TREE_TYPE (var)); + new_var = create_tmp_var (type, IDENTIFIER_POINTER (DECL_NAME (var))); + + pr.ref_var = var; + pr.local_var = new_var; + + walk_tree (&body, localize_reductions_r, &pr, NULL); + + OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c) = new_var; + } +} + + /* Gimplify the gross structure of an OMP_FOR statement. */ static enum gimplify_status @@ -12126,6 +12205,23 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p) gcc_unreachable (); } + if (ort == ORT_ACC) + { + gimplify_omp_ctx *outer = gimplify_omp_ctxp; + + while (outer + && outer->region_type != ORT_ACC_PARALLEL + && outer->region_type != ORT_ACC_KERNELS) + outer = outer->outer_context; + + /* FIXME: Reductions only work in parallel regions at present. We avoid + doing the reduction localization transformation in kernels regions + here, because the code to remove reductions in kernels regions cannot + handle that. */ + if (outer && outer->region_type == ORT_ACC_PARALLEL) + localize_reductions (OMP_FOR_CLAUSES (*expr_p), OMP_FOR_BODY (*expr_p)); + } + /* Set OMP_CLAUSE_LINEAR_NO_COPYIN flag on explicit linear clause for the IV. */ if (ort == ORT_SIMD && TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)) == 1) @@ -13654,6 +13750,12 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) || (ort & ORT_HOST_TEAMS) == ORT_HOST_TEAMS) { push_gimplify_context (); + + /* FIXME: Reductions are not supported in kernels regions yet. */ + if (/*ort == ORT_ACC_KERNELS ||*/ ort == ORT_ACC_PARALLEL) + localize_reductions (OMP_TARGET_CLAUSES (*expr_p), + OMP_TARGET_BODY (*expr_p)); + gimple *g = gimplify_and_return_first (OMP_BODY (expr), &body); if (gimple_code (g) == GIMPLE_BIND) pop_gimplify_context (g); diff --git a/gcc/omp-low.c b/gcc/omp-low.c index afd6061ae1e9..ae5cdfc5e260 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -7530,9 +7530,9 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, || is_oacc_kernels_decomposed_graphite_part (ctx)); tree orig = OMP_CLAUSE_DECL (c); - tree var = maybe_lookup_decl (orig, ctx); + tree var; tree ref_to_res = NULL_TREE; - tree incoming, outgoing, v1, v2, v3; + tree incoming, outgoing; bool is_private = false; enum tree_code rcode = OMP_CLAUSE_REDUCTION_CODE (c); @@ -7544,6 +7544,9 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, rcode = BIT_IOR_EXPR; tree op = build_int_cst (unsigned_type_node, rcode); + var = OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c); + if (!var) + var = maybe_lookup_decl (orig, ctx); if (!var) var = orig; @@ -7636,34 +7639,11 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, if (omp_privatize_by_reference (orig)) { - tree type = TREE_TYPE (var); - const char *id = IDENTIFIER_POINTER (DECL_NAME (var)); - - if (!inner) - { - tree x = create_tmp_var (TREE_TYPE (type), id); - gimplify_assign (var, build_fold_addr_expr (x), fork_seq); - } - - v1 = create_tmp_var (type, id); - v2 = create_tmp_var (type, id); - v3 = create_tmp_var (type, id); - - gimplify_assign (v1, var, fork_seq); - gimplify_assign (v2, var, fork_seq); - gimplify_assign (v3, var, fork_seq); - - var = build_simple_mem_ref (var); - v1 = build_simple_mem_ref (v1); - v2 = build_simple_mem_ref (v2); - v3 = build_simple_mem_ref (v3); outgoing = build_simple_mem_ref (outgoing); if (!TREE_CONSTANT (incoming)) incoming = build_simple_mem_ref (incoming); } - else - v1 = v2 = v3 = var; /* Determine position in reduction buffer, which may be used by target. The parser has ensured that this is not a @@ -7696,20 +7676,21 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION, TREE_TYPE (var), 6, init_code, unshare_expr (ref_to_res), - v1, level, op, off); + var, level, op, off); tree fini_call = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION, TREE_TYPE (var), 6, fini_code, unshare_expr (ref_to_res), - v2, level, op, off); + var, level, op, off); tree teardown_call = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION, - TREE_TYPE (var), 6, teardown_code, - ref_to_res, v3, level, op, off); + TREE_TYPE (var), 6, + teardown_code, ref_to_res, var, + level, op, off); - gimplify_assign (v1, setup_call, &before_fork); - gimplify_assign (v2, init_call, &after_fork); - gimplify_assign (v3, fini_call, &before_join); + gimplify_assign (var, setup_call, &before_fork); + gimplify_assign (var, init_call, &after_fork); + gimplify_assign (var, fini_call, &before_join); gimplify_assign (outgoing, teardown_call, &after_join); } diff --git a/gcc/tree-core.h b/gcc/tree-core.h index f0c65a25f070..980bdee6c285 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -269,7 +269,9 @@ enum omp_clause_code { placeholder used in OMP_CLAUSE_REDUCTION_{INIT,MERGE}. Operand 4: OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER: Another dummy VAR_DECL placeholder, used like the above for C/C++ array - reductions. */ + reductions. + Operand 5: OMP_CLAUSE_REDUCTION_PRIVATE_DECL: A private VAR_DECL of + the original DECL associated with the reduction clause. */ OMP_CLAUSE_REDUCTION, /* OpenMP clause: task_reduction (operator:variable_list). */ diff --git a/gcc/tree.c b/gcc/tree.c index 7bfd64160f4e..08f5a3e884bf 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -283,7 +283,7 @@ unsigned const char omp_clause_num_ops[] = 1, /* OMP_CLAUSE_SHARED */ 1, /* OMP_CLAUSE_FIRSTPRIVATE */ 2, /* OMP_CLAUSE_LASTPRIVATE */ - 5, /* OMP_CLAUSE_REDUCTION */ + 6, /* OMP_CLAUSE_REDUCTION */ 5, /* OMP_CLAUSE_TASK_REDUCTION */ 5, /* OMP_CLAUSE_IN_REDUCTION */ 1, /* OMP_CLAUSE_COPYIN */ @@ -11134,12 +11134,135 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data, break; case OMP_CLAUSE: - { - int len = omp_clause_num_ops[OMP_CLAUSE_CODE (*tp)]; - for (int i = 0; i < len; i++) - WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, i)); - WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp)); - } + switch (OMP_CLAUSE_CODE (*tp)) + { + case OMP_CLAUSE_GANG: + WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 1)); + /* FALLTHRU */ + + case OMP_CLAUSE_ASYNC: + case OMP_CLAUSE_WAIT: + case OMP_CLAUSE_WORKER: + case OMP_CLAUSE_VECTOR: + case OMP_CLAUSE_NUM_GANGS: + case OMP_CLAUSE_NUM_WORKERS: + case OMP_CLAUSE_VECTOR_LENGTH: + case OMP_CLAUSE_PRIVATE: + case OMP_CLAUSE_SHARED: + case OMP_CLAUSE_FIRSTPRIVATE: + case OMP_CLAUSE_COPYIN: + case OMP_CLAUSE_COPYPRIVATE: + case OMP_CLAUSE_FILTER: + case OMP_CLAUSE_FINAL: + case OMP_CLAUSE_IF: + case OMP_CLAUSE_NUM_THREADS: + case OMP_CLAUSE_SCHEDULE: + case OMP_CLAUSE_UNIFORM: + case OMP_CLAUSE_DEPEND: + case OMP_CLAUSE_NONTEMPORAL: + case OMP_CLAUSE_NUM_TEAMS: + case OMP_CLAUSE_THREAD_LIMIT: + case OMP_CLAUSE_DEVICE: + case OMP_CLAUSE_DIST_SCHEDULE: + case OMP_CLAUSE_SAFELEN: + case OMP_CLAUSE_SIMDLEN: + case OMP_CLAUSE_ORDERED: + case OMP_CLAUSE_PRIORITY: + case OMP_CLAUSE_GRAINSIZE: + case OMP_CLAUSE_NUM_TASKS: + case OMP_CLAUSE_HINT: + case OMP_CLAUSE_TO_DECLARE: + case OMP_CLAUSE_LINK: + case OMP_CLAUSE_DETACH: + case OMP_CLAUSE_USE_DEVICE_PTR: + case OMP_CLAUSE_USE_DEVICE_ADDR: + case OMP_CLAUSE_IS_DEVICE_PTR: + case OMP_CLAUSE_INCLUSIVE: + case OMP_CLAUSE_EXCLUSIVE: + case OMP_CLAUSE__LOOPTEMP_: + case OMP_CLAUSE__REDUCTEMP_: + case OMP_CLAUSE__CONDTEMP_: + case OMP_CLAUSE__SCANTEMP_: + case OMP_CLAUSE__SIMDUID_: + case OMP_CLAUSE_AFFINITY: + WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 0)); + /* FALLTHRU */ + + case OMP_CLAUSE_INDEPENDENT: + case OMP_CLAUSE_NOWAIT: + case OMP_CLAUSE_DEFAULT: + case OMP_CLAUSE_UNTIED: + case OMP_CLAUSE_MERGEABLE: + case OMP_CLAUSE_PROC_BIND: + case OMP_CLAUSE_DEVICE_TYPE: + case OMP_CLAUSE_INBRANCH: + case OMP_CLAUSE_NOTINBRANCH: + case OMP_CLAUSE_FOR: + case OMP_CLAUSE_PARALLEL: + case OMP_CLAUSE_SECTIONS: + case OMP_CLAUSE_TASKGROUP: + case OMP_CLAUSE_NOGROUP: + case OMP_CLAUSE_THREADS: + case OMP_CLAUSE_SIMD: + case OMP_CLAUSE_DEFAULTMAP: + case OMP_CLAUSE_ORDER: + case OMP_CLAUSE_BIND: + case OMP_CLAUSE_AUTO: + case OMP_CLAUSE_SEQ: + case OMP_CLAUSE_NOHOST: + case OMP_CLAUSE_TILE: + case OMP_CLAUSE__SIMT_: + case OMP_CLAUSE_IF_PRESENT: + case OMP_CLAUSE_FINALIZE: + WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp)); + + case OMP_CLAUSE_LASTPRIVATE: + WALK_SUBTREE (OMP_CLAUSE_DECL (*tp)); + WALK_SUBTREE (OMP_CLAUSE_LASTPRIVATE_STMT (*tp)); + WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp)); + + case OMP_CLAUSE_COLLAPSE: + { + int i; + for (i = 0; i < 3; i++) + WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, i)); + WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp)); + } + + case OMP_CLAUSE_LINEAR: + WALK_SUBTREE (OMP_CLAUSE_DECL (*tp)); + WALK_SUBTREE (OMP_CLAUSE_LINEAR_STEP (*tp)); + WALK_SUBTREE (OMP_CLAUSE_LINEAR_STMT (*tp)); + WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp)); + + case OMP_CLAUSE_ALIGNED: + case OMP_CLAUSE_ALLOCATE: + case OMP_CLAUSE_FROM: + case OMP_CLAUSE_TO: + case OMP_CLAUSE_MAP: + case OMP_CLAUSE__CACHE_: + WALK_SUBTREE (OMP_CLAUSE_DECL (*tp)); + WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 1)); + WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp)); + + case OMP_CLAUSE_REDUCTION: + { + for (int i = 0; i < 6; i++) + WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, i)); + WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp)); + } + + case OMP_CLAUSE_TASK_REDUCTION: + case OMP_CLAUSE_IN_REDUCTION: + { + for (int i = 0; i < 5; i++) + WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, i)); + WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp)); + } + + default: + gcc_unreachable (); + } break; case TARGET_EXPR: diff --git a/gcc/tree.h b/gcc/tree.h index 15e5147f40b0..5ee1c33f4e15 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -1746,6 +1746,8 @@ class auto_suppress_location_wrappers #define OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER(NODE) \ OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (NODE, OMP_CLAUSE_REDUCTION, \ OMP_CLAUSE_IN_REDUCTION), 4) +#define OMP_CLAUSE_REDUCTION_PRIVATE_DECL(NODE) \ + OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_REDUCTION), 5) /* True if a REDUCTION clause may reference the original list item (omp_orig) in its OMP_CLAUSE_REDUCTION_{,GIMPLE_}INIT. */ -- 2.33.0 ----------------- Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955