On 14-12-18 20:58, Tom de Vries wrote:
> 0003-openacc-Add-target-hook-TARGET_GOACC_ADJUST_PARALLEL.patch

> 0017-nvptx-Enable-large-vectors.patch

1.

If I void nvptx_adjust_parallelism like this:
...
static unsigned
nvptx_adjust_parallelism (unsigned inner_mask, unsigned outer_mask)
{
  return default_goacc_adjust_parallelism (inner_mask, outer_mask);
}
...
I don't run into any failing tests. From what I can tell, the only
test-case that the proposed implementation of the hook has an effect on,
is the worker vector loop in vred2d-128.c, but that one is passing.

Can you confirm that this hook is in fact needed? Does this test fail on
a specific card? Or is there another test-case that exercises this?

2.

If you have a test-case where this is indeed failing without the
proposed hook implementation, then please try to remove the hardcoding
of vector_length > 32 from the test-source and instead set it using
-fopenacc-dim. AFAIU, the proposed hook does not handle that case, so
you should be able to make it fail.
If so, can you test whether attached implementation fixes it?

Thanks,
- Tom
[nvptx] Add nvptx_adjust_parallelism

2018-12-17  Tom de Vries  <tdevr...@suse.de>

	* config/nvptx/nvptx.c (nvptx_adjust_parallelism): New function.
	(TARGET_GOACC_ADJUST_PARALLELISM): Define.

---
 gcc/config/nvptx/nvptx.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++
 gcc/omp-offload.c        |  7 ++++++
 gcc/omp-offload.h        |  1 +
 3 files changed, 63 insertions(+)

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index f4095ff5f55..90bbc5b251e 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -5314,6 +5314,58 @@ nvptx_dim_limit (int axis)
   return 0;
 }
 
+/* This is a copy of oacc_validate_dims from omp-offload.c that does not update
+   the function attributes.  */
+
+static void
+oacc_validate_dims_no_update (tree fn, tree attrs, int *dims, int level,
+			      unsigned used)
+{
+  tree purpose[GOMP_DIM_MAX];
+  unsigned ix;
+  tree pos = TREE_VALUE (attrs);
+
+  gcc_assert (pos);
+
+  for (ix = 0; ix != GOMP_DIM_MAX; ix++)
+    {
+      purpose[ix] = TREE_PURPOSE (pos);
+      tree val = TREE_VALUE (pos);
+      dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
+      pos = TREE_CHAIN (pos);
+    }
+
+  targetm.goacc.validate_dims (fn, dims, level);
+
+  for (ix = 0; ix != GOMP_DIM_MAX; ix++)
+    if (dims[ix] < 0)
+      dims[ix] = (used & GOMP_DIM_MASK (ix)
+		  ? oacc_get_default_dim (ix) : oacc_get_min_dim (ix));
+}
+
+/* Adjust the parallelism available to a loop given vector_length
+   associated with the offloaded function.  */
+
+static unsigned
+nvptx_adjust_parallelism (unsigned inner_mask, unsigned outer_mask)
+{
+  bool wv = ((inner_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
+	     && (inner_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)));
+  if (!wv)
+    return default_goacc_adjust_parallelism (inner_mask, outer_mask);
+
+  int dims[GOMP_DIM_MAX];
+  tree attrs = oacc_get_fn_attrib (current_function_decl);
+  int fn_level = oacc_fn_attrib_level (attrs);
+  oacc_validate_dims_no_update (current_function_decl, attrs, dims, fn_level,
+				inner_mask);
+
+  if (dims[GOMP_DIM_VECTOR] > PTX_WARP_SIZE)
+    inner_mask &= ~GOMP_DIM_MASK (GOMP_DIM_WORKER);
+
+  return default_goacc_adjust_parallelism (inner_mask, outer_mask);
+}
+
 /* Determine whether fork & joins are needed.  */
 
 static bool
@@ -6109,6 +6161,9 @@ nvptx_set_current_function (tree fndecl)
 #undef TARGET_GOACC_DIM_LIMIT
 #define TARGET_GOACC_DIM_LIMIT nvptx_dim_limit
 
+#undef TARGET_GOACC_ADJUST_PARALLELISM
+#define TARGET_GOACC_ADJUST_PARALLELISM nvptx_adjust_parallelism
+
 #undef TARGET_GOACC_FORK_JOIN
 #define TARGET_GOACC_FORK_JOIN nvptx_goacc_fork_join
 
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index 3338e0633a1..80ecda82d24 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -580,6 +580,13 @@ oacc_get_default_dim (int dim)
   return oacc_default_dims[dim];
 }
 
+int
+oacc_get_min_dim (int dim)
+{
+  gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
+  return oacc_min_dims[dim];
+}
+
 /* Parse the default dimension parameter.  This is a set of
    :-separated optional compute dimensions.  Each specified dimension
    is a positive integer.  When device type support is added, it is
diff --git a/gcc/omp-offload.h b/gcc/omp-offload.h
index 176c4da7e88..08e994abdb9 100644
--- a/gcc/omp-offload.h
+++ b/gcc/omp-offload.h
@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3.  If not see
 #define GCC_OMP_DEVICE_H
 
 extern int oacc_get_default_dim (int dim);
+extern int oacc_get_min_dim (int dim);
 extern int oacc_fn_attrib_level (tree attr);
 
 extern GTY(()) vec<tree, va_gc> *offload_funcs;

Reply via email to