On 14-12-18 20:58, Tom de Vries wrote: > 0003-openacc-Add-target-hook-TARGET_GOACC_ADJUST_PARALLEL.patch
> 0017-nvptx-Enable-large-vectors.patch 1. If I void nvptx_adjust_parallelism like this: ... static unsigned nvptx_adjust_parallelism (unsigned inner_mask, unsigned outer_mask) { return default_goacc_adjust_parallelism (inner_mask, outer_mask); } ... I don't run into any failing tests. From what I can tell, the only test-case that the proposed implementation of the hook has an effect on, is the worker vector loop in vred2d-128.c, but that one is passing. Can you confirm that this hook is in fact needed? Does this test fail on a specific card? Or is there another test-case that exercises this? 2. If you have a test-case where this is indeed failing without the proposed hook implementation, then please try to remove the hardcoding of vector_length > 32 from the test-source and instead set it using -fopenacc-dim. AFAIU, the proposed hook does not handle that case, so you should be able to make it fail. If so, can you test whether attached implementation fixes it? Thanks, - Tom
[nvptx] Add nvptx_adjust_parallelism 2018-12-17 Tom de Vries <tdevr...@suse.de> * config/nvptx/nvptx.c (nvptx_adjust_parallelism): New function. (TARGET_GOACC_ADJUST_PARALLELISM): Define. --- gcc/config/nvptx/nvptx.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/omp-offload.c | 7 ++++++ gcc/omp-offload.h | 1 + 3 files changed, 63 insertions(+) diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index f4095ff5f55..90bbc5b251e 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -5314,6 +5314,58 @@ nvptx_dim_limit (int axis) return 0; } +/* This is a copy of oacc_validate_dims from omp-offload.c that does not update + the function attributes. */ + +static void +oacc_validate_dims_no_update (tree fn, tree attrs, int *dims, int level, + unsigned used) +{ + tree purpose[GOMP_DIM_MAX]; + unsigned ix; + tree pos = TREE_VALUE (attrs); + + gcc_assert (pos); + + for (ix = 0; ix != GOMP_DIM_MAX; ix++) + { + purpose[ix] = TREE_PURPOSE (pos); + tree val = TREE_VALUE (pos); + dims[ix] = val ? TREE_INT_CST_LOW (val) : -1; + pos = TREE_CHAIN (pos); + } + + targetm.goacc.validate_dims (fn, dims, level); + + for (ix = 0; ix != GOMP_DIM_MAX; ix++) + if (dims[ix] < 0) + dims[ix] = (used & GOMP_DIM_MASK (ix) + ? oacc_get_default_dim (ix) : oacc_get_min_dim (ix)); +} + +/* Adjust the parallelism available to a loop given vector_length + associated with the offloaded function. */ + +static unsigned +nvptx_adjust_parallelism (unsigned inner_mask, unsigned outer_mask) +{ + bool wv = ((inner_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) + && (inner_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))); + if (!wv) + return default_goacc_adjust_parallelism (inner_mask, outer_mask); + + int dims[GOMP_DIM_MAX]; + tree attrs = oacc_get_fn_attrib (current_function_decl); + int fn_level = oacc_fn_attrib_level (attrs); + oacc_validate_dims_no_update (current_function_decl, attrs, dims, fn_level, + inner_mask); + + if (dims[GOMP_DIM_VECTOR] > PTX_WARP_SIZE) + inner_mask &= ~GOMP_DIM_MASK (GOMP_DIM_WORKER); + + return default_goacc_adjust_parallelism (inner_mask, outer_mask); +} + /* Determine whether fork & joins are needed. */ static bool @@ -6109,6 +6161,9 @@ nvptx_set_current_function (tree fndecl) #undef TARGET_GOACC_DIM_LIMIT #define TARGET_GOACC_DIM_LIMIT nvptx_dim_limit +#undef TARGET_GOACC_ADJUST_PARALLELISM +#define TARGET_GOACC_ADJUST_PARALLELISM nvptx_adjust_parallelism + #undef TARGET_GOACC_FORK_JOIN #define TARGET_GOACC_FORK_JOIN nvptx_goacc_fork_join diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c index 3338e0633a1..80ecda82d24 100644 --- a/gcc/omp-offload.c +++ b/gcc/omp-offload.c @@ -580,6 +580,13 @@ oacc_get_default_dim (int dim) return oacc_default_dims[dim]; } +int +oacc_get_min_dim (int dim) +{ + gcc_assert (0 <= dim && dim < GOMP_DIM_MAX); + return oacc_min_dims[dim]; +} + /* Parse the default dimension parameter. This is a set of :-separated optional compute dimensions. Each specified dimension is a positive integer. When device type support is added, it is diff --git a/gcc/omp-offload.h b/gcc/omp-offload.h index 176c4da7e88..08e994abdb9 100644 --- a/gcc/omp-offload.h +++ b/gcc/omp-offload.h @@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see #define GCC_OMP_DEVICE_H extern int oacc_get_default_dim (int dim); +extern int oacc_get_min_dim (int dim); extern int oacc_fn_attrib_level (tree attr); extern GTY(()) vec<tree, va_gc> *offload_funcs;