Due to the different levels of parallelism available in OpenACC, it is useful to mark certain variables as GOMP_MAP_PRIVATE so that they can be used in reductions. This patch was introduced in openacc-gcc-7-branch here <https://gcc.gnu.org/ml/gcc-patches/2017-09/msg00274.html>.
I bootstrapped and regtested on x86_64/nvptx. Is it OK for trunk? Thanks, Cesar
>From b0e7fb09bf3a3f853e77c2712b6f85ad21472e72 Mon Sep 17 00:00:00 2001 From: Chung-Lin Tang <clt...@codesourcery.com> Date: Tue, 5 Sep 2017 22:09:34 +0800 Subject: [PATCH 2/5] [OpenACC] Add support for making maps 'private' inside offloaded regions 2018-XX-YY Chung-Lin Tang <clt...@codesourcery.com> Cesar Philippidis <ce...@codesourcery.com> gcc/ * tree.h (OMP_CLAUSE_MAP_PRIVATE): Define macro. * gimplify.c (enum gimplify_omp_var_data): Add GOVD_MAP_PRIVATE enum value. (omp_add_variable): Add GOVD_MAP_PRIVATE to reduction clause flags if not a gang-partitioned loop directive. (gimplify_adjust_omp_clauses_1): Set OMP_CLAUSE_MAP_PRIVATE of new map clause to 1 if GOVD_MAP_PRIVATE flag is present. * omp-low.c (lower_oacc_reductions): Handle map clauses with OMP_CLAUSE_MAP_PRIVATE set in same matter as firstprivate/private. (lower_omp_target): Likewise. Add copy back code for map clauses with OMP_CLAUSE_MAP_PRIVATE set. libgomp/ * testsuite/libgomp.oacc-c-c++-common/reduction-9.c: New test. (cherry picked from openacc-gcc-7-branch commit 2dc21f336368889c1ebf031801a7613f65899ef1, e17bb2068f9) --- gcc/gimplify.c | 34 ++++++++++++++- gcc/omp-low.c | 28 +++++++++++-- gcc/tree.h | 3 ++ .../libgomp.oacc-c-c++-common/reduction-9.c | 41 +++++++++++++++++++ 4 files changed, 101 insertions(+), 5 deletions(-) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-9.c diff --git a/gcc/gimplify.c b/gcc/gimplify.c index cf8977c8508..7dadf69b758 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -105,6 +105,9 @@ enum gimplify_omp_var_data /* Flag for GOVD_MAP: must be present already. */ GOVD_MAP_FORCE_PRESENT = 524288, + /* Flag for GOVD_MAP, copy to/from private storage inside offloaded region. */ + GOVD_MAP_PRIVATE = 1048576, + GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR | GOVD_LOCAL) @@ -6835,6 +6838,21 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags) if (ctx->region_type == ORT_ACC && (flags & GOVD_REDUCTION)) { struct gimplify_omp_ctx *outer_ctx = ctx->outer_context; + + bool gang = false, worker = false, vector = false; + for (tree c = ctx->clauses; c; c = OMP_CLAUSE_CHAIN (c)) + { + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_GANG) + gang = true; + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WORKER) + worker = true; + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_VECTOR) + vector = true; + } + + /* Set new copy map as 'private' if sure we're not gang-partitioning. */ + bool map_private = !gang && (worker || vector); + while (outer_ctx) { n = splay_tree_lookup (outer_ctx->variables, (splay_tree_key)decl); @@ -6856,12 +6874,21 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags) /* Remove firstprivate and make it a copy map. */ n->value &= ~GOVD_FIRSTPRIVATE; n->value |= GOVD_MAP; + + /* If not gang-partitioned, add MAP_PRIVATE on the map + clause. */ + if (map_private) + n->value |= GOVD_MAP_PRIVATE; } } else if (outer_ctx->region_type == ORT_ACC_PARALLEL) { - splay_tree_insert (outer_ctx->variables, (splay_tree_key)decl, - GOVD_MAP | GOVD_SEEN); + unsigned f = GOVD_MAP | GOVD_SEEN; + + /* If not gang-partitioned, add MAP_PRIVATE on the map clause. */ + if (map_private) + f |= GOVD_MAP_PRIVATE; + splay_tree_insert (outer_ctx->variables, (splay_tree_key)decl, f); break; } outer_ctx = outer_ctx->outer_context; @@ -8904,6 +8931,9 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n, void *data) gcc_unreachable (); } OMP_CLAUSE_SET_MAP_KIND (clause, kind); + if ((flags & GOVD_MAP_PRIVATE) + && TREE_CODE (OMP_CLAUSE_DECL (clause)) == VAR_DECL) + OMP_CLAUSE_MAP_PRIVATE (clause) = 1; tree c2 = gomp_needs_data_present (decl); /* Handle OpenACC pointers that were declared inside acc data regions. */ diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 714490d6921..ef3c7651c74 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -4907,7 +4907,9 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, goto has_outer_reduction; } else if ((OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_FIRSTPRIVATE - || OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_PRIVATE) + || OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_PRIVATE + || (OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_MAP + && OMP_CLAUSE_MAP_PRIVATE (cls))) && orig == OMP_CLAUSE_DECL (cls)) { is_private = true; @@ -7637,7 +7639,10 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c) && TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE) x = build_simple_mem_ref (x); - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE + || (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (c) & GOMP_MAP_TO) + && OMP_CLAUSE_MAP_PRIVATE (c))) { gcc_assert (is_gimple_omp_oacc (ctx->stmt)); if (omp_is_reference (new_var) @@ -8505,7 +8510,24 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_seq_add_seq (&new_body, join_seq); if (offloaded) - new_body = maybe_catch_exception (new_body); + { + /* For OMP_CLAUSE_MAP_PRIVATE maps, add a copy back from private + storage to receiver ref, for copying back to host. */ + for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (c) & GOMP_MAP_FROM) + && OMP_CLAUSE_MAP_PRIVATE (c)) + { + tree var = OMP_CLAUSE_DECL (c); + tree new_var = lookup_decl (var, ctx); + tree x = build_receiver_ref (var, true, ctx); + gimple_seq seq = NULL; + gimplify_assign (x, new_var, &seq); + gimple_seq_add_seq (&new_body, seq); + } + + new_body = maybe_catch_exception (new_body); + } gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false)); gimple_omp_set_body (stmt, new_body); diff --git a/gcc/tree.h b/gcc/tree.h index 79b675025d9..8bdbe3341bb 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -1564,6 +1564,9 @@ extern tree maybe_wrap_with_location (tree, location_t); /* Nonzero if this map clause is for an ACC parallel reduction variable. */ #define OMP_CLAUSE_MAP_IN_REDUCTION(NODE) \ TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)) +/* Nozero if this map is loaded to private storage inside offloaded region. */ +#define OMP_CLAUSE_MAP_PRIVATE(NODE) \ + TREE_NOTHROW (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)) #define OMP_CLAUSE_PROC_BIND_KIND(NODE) \ (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_PROC_BIND)->omp_clause.subcode.proc_bind_kind) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-9.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-9.c new file mode 100644 index 00000000000..d6e02fc6d7e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-9.c @@ -0,0 +1,41 @@ +#include <stdio.h> +#include <stdlib.h> + +int +main (int argc, char *argv[]) +{ +#define N 100 + int n = N; + int i, j, tmp; + int input[N*N], output[N], houtput[N]; + + for (i = 0; i < n * n; i++) + input[i] = i; + + for (i = 0; i < n; i++) + { + tmp = 0; + for (j = 0; j < n; j++) + tmp += input[i * n + j]; + houtput[i] = tmp; + } + + #pragma acc parallel loop gang + for (i = 0; i < n; i++) + { + tmp = 0; + + #pragma acc loop worker reduction(+:tmp) + for (j = 0; j < n; j++) + tmp += input[i * n + j]; + + output[i] = tmp; + } + + /* Test if every worker-level reduction had correct private result. */ + for (i = 0; i < n; i++) + if (houtput[i] != output[i]) + abort (); + + return 0; +} -- 2.17.1