[ was: Re: [nvptx] vector length patch series ] On 14-12-18 20:58, Tom de Vries wrote: > 0023-nvptx-Force-vl32-if-calling-vector-partitionable-rou.patch
> @@ -73,6 +73,7 @@ > #include "cfgloop.h" > #include "fold-const.h" > #include "intl.h" > +#include "tree-hash-traits.h" > #include "omp-offload.h" > > /* This file should be included last. */ I dropped that include, that's not necessary. > @@ -5557,19 +5637,6 @@ nvptx_adjust_parallelism (unsigned inner_mask, > unsigned outer_mask) > if (wv) > return inner_mask & ~GOMP_DIM_MASK (GOMP_DIM_WORKER); > > - /* It's difficult to guarantee that warps in large vector_lengths > - will remain convergent when a vector loop is nested inside a > - worker loop. Therefore, fallback to setting vector_length to > - PTX_WARP_SIZE. Hopefully this condition may be relaxed for > - sm_70+ targets. */ > - if ((inner_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)) > - && (outer_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))) > - { > - tree attr = tree_cons (get_identifier (NVPTX_GOACC_VL_WARP), NULL_TREE, > - DECL_ATTRIBUTES (current_function_decl)); > - DECL_ATTRIBUTES (current_function_decl) = attr; > - } > - > return inner_mask; > } > This patch is removing here some code related to a workaround that was added earlier in the patch series (0017-nvptx-Enable-large-vectors.patch). Which means that that submitted patch should not have contained that code in the first place. Committed (without test-cases) as attached. Thanks, - Tom
[nvptx] Force vl32 if calling vector-partitionable routines With PTX_MAX_VECTOR_LENGTH set to larger than PTX_WARP_SIZE, routines can be called from offloading regions with vector-size set to larger than warp size. OTOH, vector-partitionable routines assume warp-sized vector length. Detect if we're calling a vector-partitionable routine from an offloading region, and if so, fall back to warp-sized vector length in that region. 2018-12-17 Tom de Vries <tdevr...@suse.de> PR target/85486 * config/nvptx/nvptx.c (has_vector_partitionable_routine_calls_p): New function. (nvptx_goacc_validate_dims): Force vl32 if calling vector-partitionable routines. --- gcc/config/nvptx/nvptx.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 5a4b38de522..7fdc285b6f8 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -59,6 +59,7 @@ #include "builtins.h" #include "omp-general.h" #include "omp-low.h" +#include "omp-offload.h" #include "gomp-constants.h" #include "dumpfile.h" #include "internal-fn.h" @@ -5496,6 +5497,40 @@ nvptx_apply_dim_limits (int dims[]) dims[GOMP_DIM_VECTOR] = PTX_WARP_SIZE; } +/* Return true if FNDECL contains calls to vector-partitionable routines. */ + +static bool +has_vector_partitionable_routine_calls_p (tree fndecl) +{ + if (!fndecl) + return false; + + basic_block bb; + FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fndecl)) + for (gimple_stmt_iterator i = gsi_start_bb (bb); !gsi_end_p (i); + gsi_next_nondebug (&i)) + { + gimple *stmt = gsi_stmt (i); + if (gimple_code (stmt) != GIMPLE_CALL) + continue; + + tree callee = gimple_call_fndecl (stmt); + if (!callee) + continue; + + tree attrs = oacc_get_fn_attrib (callee); + if (attrs == NULL_TREE) + return false; + + int partition_level = oacc_fn_attrib_level (attrs); + bool seq_routine_p = partition_level == GOMP_DIM_MAX; + if (!seq_routine_p) + return true; + } + + return false; +} + /* As nvptx_goacc_validate_dims, but does not return bool to indicate whether DIMS has changed. */ @@ -5611,6 +5646,16 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int fn_level) old_dims[i] = dims[i]; const char *vector_reason = NULL; + if (offload_region_p && has_vector_partitionable_routine_calls_p (decl)) + { + if (dims[GOMP_DIM_VECTOR] > PTX_WARP_SIZE) + { + vector_reason = G_("using vector_length (%d) due to call to" + " vector-partitionable routine, ignoring %d"); + dims[GOMP_DIM_VECTOR] = PTX_WARP_SIZE; + } + } + if (dims[GOMP_DIM_VECTOR] == 0) { vector_reason = G_("using vector_length (%d), ignoring runtime setting");