Hi! On 2018-08-13T21:41:50+0100, Julian Brown <jul...@codesourcery.com> wrote: > On Mon, 13 Aug 2018 11:42:26 -0700 Cesar Philippidis <ce...@codesourcery.com> > wrote: >> On 08/13/2018 09:21 AM, Julian Brown wrote: >> > diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c >> > b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c >> > new file mode 100644 >> > index 0000000..2fa708a >> > --- /dev/null >> > +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c >> > @@ -0,0 +1,106 @@ >> > +/* { dg-xfail-run-if "gangprivate failure" { >> > openacc_nvidia_accel_selected } { "-O0" } { "" } } */
>> is the above xfail still necessary? It seems to xpass >> for me on nvptx. However, I see this regression on the host: >> >> FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/loop-gwv-2.c >> -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 -O2 execution test > Oops, this was the version of the patch I meant to post (and the one I > tested). The XFAIL on loop-gwv-2.c isn't necessary, plus that test > needed some other fixes to make it pass for NVPTX (it was written for > GCN to start with). As I should find out later, this testcase actually does work without the code changes (OpenACC privatization levels) that it's accompanying -- and I don't actually see anything in the testcase that the code changes would trigger for. Maybe it was for some earlier revision of these code changes? Anyway, as it's all-PASS for all systems that I've tested on, I've now pushed "Add 'libgomp.oacc-c-c++-common/loop-gwv-2.c'" to master branch in commit 5a16fb19e7c4274f8dd9bbdd30d7d06fe2eff8af, see attached. Grüße Thomas ----------------- Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank Thürauf
>From 5a16fb19e7c4274f8dd9bbdd30d7d06fe2eff8af Mon Sep 17 00:00:00 2001 From: Julian Brown <jul...@codesourcery.com> Date: Mon, 13 Aug 2018 21:41:50 +0100 Subject: [PATCH] Add 'libgomp.oacc-c-c++-common/loop-gwv-2.c' libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c: New. --- .../libgomp.oacc-c-c++-common/loop-gwv-2.c | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c new file mode 100644 index 00000000000..a4f81a39e24 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c @@ -0,0 +1,95 @@ +#include <stdio.h> +#include <openacc.h> +#include <alloca.h> +#include <string.h> +#include <gomp-constants.h> +#include <stdlib.h> + +#if 0 +#define DEBUG(DIM, IDX, VAL) \ + fprintf (stderr, "%sdist[%d] = %d\n", (DIM), (IDX), (VAL)) +#else +#define DEBUG(DIM, IDX, VAL) +#endif + +#define N (32*32*32) + +int +check (const char *dim, int *dist, int dimsize) +{ + int ix; + int exit = 0; + + for (ix = 0; ix < dimsize; ix++) + { + DEBUG(dim, ix, dist[ix]); + if (dist[ix] < (N) / (dimsize + 0.5) + || dist[ix] > (N) / (dimsize - 0.5)) + { + fprintf (stderr, "did not distribute to %ss (%d not between %d " + "and %d)\n", dim, dist[ix], (int) ((N) / (dimsize + 0.5)), + (int) ((N) / (dimsize - 0.5))); + exit |= 1; + } + } + + return exit; +} + +int main () +{ + int ary[N]; + int ix; + int exit = 0; + int gangsize = 0, workersize = 0, vectorsize = 0; + int *gangdist, *workerdist, *vectordist; + + for (ix = 0; ix < N;ix++) + ary[ix] = -1; + +#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + copy(ary) copyout(gangsize, workersize, vectorsize) + { +#pragma acc loop gang worker vector + for (unsigned ix = 0; ix < N; ix++) + { + int g, w, v; + + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); + + ary[ix] = (g << 16) | (w << 8) | v; + } + + gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); + } + + gangdist = (int *) alloca (gangsize * sizeof (int)); + workerdist = (int *) alloca (workersize * sizeof (int)); + vectordist = (int *) alloca (vectorsize * sizeof (int)); + memset (gangdist, 0, gangsize * sizeof (int)); + memset (workerdist, 0, workersize * sizeof (int)); + memset (vectordist, 0, vectorsize * sizeof (int)); + + /* Test that work is shared approximately equally amongst each active + gang/worker/vector. */ + for (ix = 0; ix < N; ix++) + { + int g = (ary[ix] >> 16) & 255; + int w = (ary[ix] >> 8) & 255; + int v = ary[ix] & 255; + + gangdist[g]++; + workerdist[w]++; + vectordist[v]++; + } + + exit = check ("gang", gangdist, gangsize); + exit |= check ("worker", workerdist, workersize); + exit |= check ("vector", vectordist, vectorsize); + + return exit; +} -- 2.30.2