This patch teaches the gimplifier how to pass certain OpenACC reduction variables as firstprivate, and not with an implicit copy directive. This is matches the default behavior for the implicit data mappings of scalar variables inside OpenACC parallel regions. It should be noted that the gimplifier will still implicitly map reduction variables on loops immediately enclosed inside a parallel regions, like so
#pragma acc parallel #pragma acc loop reduction(+:sum) as copy. This change only impacts reductions variables inside nested acc loops like #pragma acc parallel #pragma acc loop for (...) { #pragma acc loop reduction(+:s2) Here s2 will be transferred into the accelerator as firstprivate instead of copy. Is this OK for trunk? I regtested and bootstrapped for x86_64 with nvptx offloading. Cesar
[OpenACC] Enable firstprivate OpenACC reductions 2018-XX-YY Cesar Philippidis <ce...@codesourcery.com> Chung-Lin Tang <clt...@codesourcery.com> gcc/ * gimplify.c (omp_add_variable): Enable firstprivate reduction variables. gcc/testsuite/ * c-c++-common/goacc/reduction-8.c: New test. libgomp/ * testsuite/libgomp.oacc-c-c++-common/privatize-reduction-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/privatize-reduction-2.c: New test. diff --git a/gcc/gimplify.c b/gcc/gimplify.c index dbd0f0ebd0c..4d954e20788 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -6823,20 +6823,27 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags) else splay_tree_insert (ctx->variables, (splay_tree_key)decl, flags); - /* For reductions clauses in OpenACC loop directives, by default create a - copy clause on the enclosing parallel construct for carrying back the - results. */ + /* For OpenACC loop directives, when a reduction clause is placed on + the outermost acc loop within an acc parallel or kernels + construct, it must have an implied copy data mapping. E.g. + + #pragma acc parallel + { + #pragma acc loop reduction (+:sum) + + a copy clause for sum should be added on the enclosing parallel + construct for carrying back the results. */ if (ctx->region_type == ORT_ACC && (flags & GOVD_REDUCTION)) { struct gimplify_omp_ctx *outer_ctx = ctx->outer_context; - while (outer_ctx) + if (outer_ctx) { n = splay_tree_lookup (outer_ctx->variables, (splay_tree_key)decl); if (n != NULL) { /* Ignore local variables and explicitly declared clauses. */ if (n->value & (GOVD_LOCAL | GOVD_EXPLICIT)) - break; + ; else if (outer_ctx->region_type == ORT_ACC_KERNELS) { /* According to the OpenACC spec, such a reduction variable @@ -6856,9 +6863,7 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags) { splay_tree_insert (outer_ctx->variables, (splay_tree_key)decl, GOVD_MAP | GOVD_SEEN); - break; } - outer_ctx = outer_ctx->outer_context; } } } diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-8.c b/gcc/testsuite/c-c++-common/goacc/reduction-8.c new file mode 100644 index 00000000000..8a0283f4ac3 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/reduction-8.c @@ -0,0 +1,94 @@ +/* { dg-additional-options "-fdump-tree-gimple" } */ + +#define n 1000 + +int +main(void) +{ + int i, j; + int result, array[n]; + +#pragma acc parallel loop reduction (+:result) + for (i = 0; i < n; i++) + result ++; + +#pragma acc parallel +#pragma acc loop reduction (+:result) + for (i = 0; i < n; i++) + result ++; + +#pragma acc parallel +#pragma acc loop + for (i = 0; i < n; i++) + { + result = i; + +#pragma acc loop reduction(+:result) + for (j = 0; j < n; j++) + result ++; + + array[i] = result; + } + +#pragma acc parallel +#pragma acc loop + for (i = 0; i < n; i++) + { + result = i; + +#pragma acc loop worker vector reduction(+:result) + for (j = 0; j < n; j++) + result ++; + + array[i] = result; + } + +#pragma acc parallel +#pragma acc loop // { dg-warning "insufficient partitioning" } + for (i = 0; i < n; i++) + { + result = i; + +#pragma acc loop gang reduction(+:result) + for (j = 0; j < n; j++) + result ++; + + array[i] = result; + } + +#pragma acc parallel copy(result) +#pragma acc loop // { dg-warning "insufficient partitioning" } + for (i = 0; i < n; i++) + { + result = i; + +#pragma acc loop gang reduction(+:result) + for (j = 0; j < n; j++) + result ++; + + array[i] = result; + } + +#pragma acc kernels +#pragma acc loop + for (i = 0; i < n; i++) + { + result = i; + +#pragma acc loop reduction(+:result) + for (j = 0; j < n; j++) + result ++; + + array[i] = result; + } + + return 0; +} + +/* Check that default copy maps are generated for loop reductions. */ +/* { dg-final { scan-tree-dump-times "reduction..:result. map.tofrom:result .len: 4.." 1 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "oacc_parallel map.tofrom:result .len: 4.." 2 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "map.tofrom:array .len: 4000.. firstprivate.result." 3 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "map.tofrom:result .len: 4.. map.tofrom:array .len: 4000.." 1 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "map.tofrom:array .len: 4000.. map.force_tofrom:result .len: 4.." 1 "gimple" } } */ + diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/privatize-reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/privatize-reduction-1.c new file mode 100644 index 00000000000..206e66fec79 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/privatize-reduction-1.c @@ -0,0 +1,41 @@ +#include <stdio.h> +#include <stdlib.h> + +int +main (int argc, char *argv[]) +{ +#define N 100 + int n = N; + int i, j, tmp; + int input[N*N], output[N], houtput[N]; + + for (i = 0; i < n * n; i++) + input[i] = i; + + for (i = 0; i < n; i++) + { + tmp = 0; + for (j = 0; j < n; j++) + tmp += input[i * n + j]; + houtput[i] = tmp; + } + + #pragma acc parallel loop gang + for (i = 0; i < n; i++) + { + tmp = 0; + + #pragma acc loop worker reduction(+:tmp) + for (j = 0; j < n; j++) + tmp += input[i * n + j]; + + output[i] = tmp; + } + + /* Test if every worker-level reduction had correct private result. */ + for (i = 0; i < n; i++) + if (houtput[i] != output[i]) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/privatize-reduction-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/privatize-reduction-2.c new file mode 100644 index 00000000000..0c317dcf8a6 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/privatize-reduction-2.c @@ -0,0 +1,23 @@ +#include <assert.h> + +int +main () +{ + const int n = 1000; + int i, j, temp, a[n]; + +#pragma acc parallel loop + for (i = 0; i < n; i++) + { + temp = i; +#pragma acc loop reduction (+:temp) + for (j = 0; j < n; j++) + temp ++; + a[i] = temp; + } + + for (i = 0; i < n; i++) + assert (a[i] == i+n); + + return 0; +} -- 2.17.1