I've applied the patch to gomp-4_0-branch to correct an issue involving the propagation of variables used in conditional expressions to worker and vector partitioned loops. More details regarding this patch can be found here <https://gcc.gnu.org/ml/gcc-patches/2016-10/msg02187.html>
Cesar
2016-10-26 Cesar Philippidis <ce...@codesourcery.com> gcc/ * config/nvptx/nvptx.c (nvptx_single): Use a single predicate for loops partitioned across both worker and vector axes. libgomp/ * testsuite/libgomp.oacc-c-c++-common/broadcast-1.c: New test. diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 7bf5987..4e6ed60 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -3507,11 +3507,38 @@ nvptx_single (unsigned mask, basic_block from, basic_block to) /* Insert the vector test inside the worker test. */ unsigned mode; rtx_insn *before = tail; + rtx wvpred = NULL_RTX; + bool skip_vector = false; + + /* Create a single predicate for loops containing both worker and + vectors. */ + if (cond_branch + && (GOMP_DIM_MASK (GOMP_DIM_WORKER) & mask) + && (GOMP_DIM_MASK (GOMP_DIM_VECTOR) & mask)) + { + rtx regx = gen_reg_rtx (SImode); + rtx regy = gen_reg_rtx (SImode); + rtx tmp = gen_reg_rtx (SImode); + wvpred = gen_reg_rtx (BImode); + + emit_insn_before (gen_oacc_dim_pos (regx, const1_rtx), head); + emit_insn_before (gen_oacc_dim_pos (regy, const2_rtx), head); + emit_insn_before (gen_rtx_SET (tmp, gen_rtx_IOR (SImode, regx, regy)), + head); + emit_insn_before (gen_rtx_SET (wvpred, gen_rtx_NE (BImode, tmp, + const0_rtx)), + head); + + skip_mask &= ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)); + skip_vector = true; + } + for (mode = GOMP_DIM_WORKER; mode <= GOMP_DIM_VECTOR; mode++) if (GOMP_DIM_MASK (mode) & skip_mask) { rtx_code_label *label = gen_label_rtx (); - rtx pred = cfun->machine->axis_predicate[mode - GOMP_DIM_WORKER]; + rtx pred = skip_vector ? wvpred + : cfun->machine->axis_predicate[mode - GOMP_DIM_WORKER]; if (!pred) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-1.c new file mode 100644 index 0000000..4dcb60d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-1.c @@ -0,0 +1,49 @@ +/* Ensure that worker-vector state conditional expressions are + properly handled by the nvptx backend. */ + +#include <assert.h> +#include <math.h> + + +#define N 1024 + +int A[N][N] ; + +void test(int x) +{ +#pragma acc parallel num_gangs(16) num_workers(4) vector_length(32) copyout(A) + { +#pragma acc loop gang + for(int j=0;j<N;j++) + { + if (x==1) + { +#pragma acc loop worker vector + for(int i=0;i<N;i++) + A[i][j] = 1; + } + else + { +#pragma acc loop worker vector + for(int i=0;i<N;i++) + A[i][j] = -1; + } + } + } +} + + +int main(void) +{ + test (0); + for (int i = 0; i < N; i++) + for (int j = 0; j < N; j++) + assert (A[i][j] == -1); + + test (1); + for (int i = 0; i < N; i++) + for (int j = 0; j < N; j++) + assert (A[i][j] == 1); + + return 0; +}