On 14-12-18 20:58, Tom de Vries wrote:
> 0007-nvptx-consolidate-offloaded-function-attributes-into.patch


> diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
> index a3169febbb4..dcfa57d10ca 100644
> --- a/gcc/config/nvptx/nvptx.c
> +++ b/gcc/config/nvptx/nvptx.c
> @@ -2872,6 +2872,17 @@ nvptx_reorg_uniform_simt ()
>      }
>  }
>  
> +/* Offloading function attributes.  */
> +
> +struct offload_attrs
> +{
> +  unsigned mask;
> +  int num_gangs;
> +  int num_workers;
> +  int vector_length;
> +  int max_workers;
> +};
> +

I like the idea of factoring out the extraction of information from the
function attributes.  But max_workers is something derived from that
information, so it doesn't seem proper to add it here.

>  /* Loop structure of the function.  The entire function is described as
>     a NULL loop.  */
>  
> @@ -4569,6 +4580,56 @@ nvptx_neuter_pars (parallel *par, unsigned modes, 
> unsigned outer)
>      nvptx_neuter_pars (par->next, modes, outer);
>  }
>  
> +static void
> +populate_offload_attrs (offload_attrs *oa)
> +{
> +  tree attr = oacc_get_fn_attrib (current_function_decl);
> +  tree dims = TREE_VALUE (attr);
> +  unsigned ix;
> +
> +  oa->mask = 0;
> +
> +  for (ix = 0; ix != GOMP_DIM_MAX; ix++, dims = TREE_CHAIN (dims))
> +    {
> +      tree t = TREE_VALUE (dims);
> +      int size = (t == NULL_TREE) ? 0 : TREE_INT_CST_LOW (t);

This is very strange.  Why do we represent 'TREE_VALUE (dims) ==
NULL_TREE' with '0' (which means determined at runtime)? In
oacc_validate_dims we use -1 for this, which means 'not set'.

> +      tree allowed = TREE_PURPOSE (dims);
> +
> +      if (size != 1 && !(allowed && integer_zerop (allowed)))
> +       oa->mask |= GOMP_DIM_MASK (ix);
> +
> +      switch (ix)
> +       {
> +       case GOMP_DIM_GANG:
> +         oa->num_gangs = size;
> +         break;
> +
> +       case GOMP_DIM_WORKER:
> +         oa->num_workers = size;
> +         break;
> +
> +       case GOMP_DIM_VECTOR:
> +         oa->vector_length = size;
> +         break;
> +       }
> +    }
> +

> +  if (oa->vector_length == 0)
> +    {
> +      /* FIXME: Need a more graceful way to handle large vector
> +        lengths in OpenACC routines.  */
> +      if (!lookup_attribute ("omp target entrypoint",
> +                            DECL_ATTRIBUTES (current_function_decl)))
> +       oa->vector_length = PTX_WARP_SIZE;
> +      else
> +       oa->vector_length = PTX_VECTOR_LENGTH;
> +    }

The case that 'oa->vector_length == 0' is triggered by calling
populate_offload_attrs from nvptx_adjust_parallelism, which is called
before oacc_validate_dims has updated the function attributes.

This kludge is trying to resolve a circular dependency: after calling
oacc_validate_dims and updating the function attributes, we know the
chosen vector length, which is necessary for nvptx_adjust_parallelism,
which influences the used_mask given as parameter to ... oacc_validate_dims.

The way the kludge tries to cut this circular dependency is by
replicating setting of default dimensions (designed to be done in
nvptx_goacc_validate_dims) here in populate_offload_attrs.

In the counter-proposed nvptx_adjust_parallelism (
https://gcc.gnu.org/ml/gcc-patches/2018-12/msg01619.html ), we cut the
circular dependency in a different way: by reusing
nvptx_goacc_validate_dims in nvptx_adjust_parallelism.

So, we can assume oa->vector_length > 0 here, and declare this dead code.

> +  if (oa->num_workers == 0)
> +    oa->max_workers = PTX_CTA_SIZE / oa->vector_length;
> +  else
> +    oa->max_workers = oa->num_workers;
> +}
> +

I moved this bit to the patch introducing nvptx_mach_max_workers.

>  #if WORKAROUND_PTXJIT_BUG_2
>  /* Variant of pc_set that only requires JUMP_P (INSN) if STRICT.  This 
> variant
>     is needed in the nvptx target because the branches generated for
> @@ -4750,27 +4811,19 @@ nvptx_reorg (void)
>      {
>        /* If we determined this mask before RTL expansion, we could
>          elide emission of some levels of forks and joins.  */
> -      unsigned mask = 0;
> -      tree dims = TREE_VALUE (attr);
> -      unsigned ix;
> +      offload_attrs oa;
>  
> -      for (ix = 0; ix != GOMP_DIM_MAX; ix++, dims = TREE_CHAIN (dims))
> -       {
> -         int size = TREE_INT_CST_LOW (TREE_VALUE (dims));
> -         tree allowed = TREE_PURPOSE (dims);
> +      populate_offload_attrs (&oa);
>  
> -         if (size != 1 && !(allowed && integer_zerop (allowed)))
> -           mask |= GOMP_DIM_MASK (ix);
> -       }
>        /* If there is worker neutering, there must be vector
>          neutering.  Otherwise the hardware will fail.  */
> -      gcc_assert (!(mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
> -                 || (mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)));
> +      gcc_assert (!(oa.mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
> +                 || (oa.mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)));
>  
>        /* Discover & process partitioned regions.  */
>        parallel *pars = nvptx_discover_pars (&bb_insn_map);
>        nvptx_process_pars (pars);
> -      nvptx_neuter_pars (pars, mask, 0);
> +      nvptx_neuter_pars (pars, oa.mask, 0);
>        delete pars;
>      }
>  

Committed as attached.

Thanks,
- Tom

[nvptx] Factor out populate_offload_attrs

Factor out populate_offload_attrs from nvptx_reorg.

2018-12-17  Tom de Vries  <tdevr...@suse.de>

	* config/nvptx/nvptx.c (struct offload_attrs): New.
	(populate_offload_attrs): New function.  Factor mask extraction out of
	nvptx_reorg.  Add extraction of dimensions.
	(nvptx_reorg): Use populate_offload_attrs.

---
 gcc/config/nvptx/nvptx.c | 63 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 50 insertions(+), 13 deletions(-)

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 8cb58341c23..f527429ce2d 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -2873,6 +2873,16 @@ nvptx_reorg_uniform_simt ()
     }
 }
 
+/* Offloading function attributes.  */
+
+struct offload_attrs
+{
+  unsigned mask;
+  int num_gangs;
+  int num_workers;
+  int vector_length;
+};
+
 /* Loop structure of the function.  The entire function is described as
    a NULL loop.  */
 
@@ -4576,6 +4586,41 @@ nvptx_neuter_pars (parallel *par, unsigned modes, unsigned outer)
     nvptx_neuter_pars (par->next, modes, outer);
 }
 
+static void
+populate_offload_attrs (offload_attrs *oa)
+{
+  tree attr = oacc_get_fn_attrib (current_function_decl);
+  tree dims = TREE_VALUE (attr);
+  unsigned ix;
+
+  oa->mask = 0;
+
+  for (ix = 0; ix != GOMP_DIM_MAX; ix++, dims = TREE_CHAIN (dims))
+    {
+      tree t = TREE_VALUE (dims);
+      int size = (t == NULL_TREE) ? -1 : TREE_INT_CST_LOW (t);
+      tree allowed = TREE_PURPOSE (dims);
+
+      if (size != 1 && !(allowed && integer_zerop (allowed)))
+	oa->mask |= GOMP_DIM_MASK (ix);
+
+      switch (ix)
+	{
+	case GOMP_DIM_GANG:
+	  oa->num_gangs = size;
+	  break;
+
+	case GOMP_DIM_WORKER:
+	  oa->num_workers = size;
+	  break;
+
+	case GOMP_DIM_VECTOR:
+	  oa->vector_length = size;
+	  break;
+	}
+    }
+}
+
 #if WORKAROUND_PTXJIT_BUG_2
 /* Variant of pc_set that only requires JUMP_P (INSN) if STRICT.  This variant
    is needed in the nvptx target because the branches generated for
@@ -4757,27 +4802,19 @@ nvptx_reorg (void)
     {
       /* If we determined this mask before RTL expansion, we could
 	 elide emission of some levels of forks and joins.  */
-      unsigned mask = 0;
-      tree dims = TREE_VALUE (attr);
-      unsigned ix;
+      offload_attrs oa;
 
-      for (ix = 0; ix != GOMP_DIM_MAX; ix++, dims = TREE_CHAIN (dims))
-	{
-	  int size = TREE_INT_CST_LOW (TREE_VALUE (dims));
-	  tree allowed = TREE_PURPOSE (dims);
+      populate_offload_attrs (&oa);
 
-	  if (size != 1 && !(allowed && integer_zerop (allowed)))
-	    mask |= GOMP_DIM_MASK (ix);
-	}
       /* If there is worker neutering, there must be vector
 	 neutering.  Otherwise the hardware will fail.  */
-      gcc_assert (!(mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
-		  || (mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)));
+      gcc_assert (!(oa.mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
+		  || (oa.mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)));
 
       /* Discover & process partitioned regions.  */
       parallel *pars = nvptx_discover_pars (&bb_insn_map);
       nvptx_process_pars (pars);
-      nvptx_neuter_pars (pars, mask, 0);
+      nvptx_neuter_pars (pars, oa.mask, 0);
       delete pars;
     }
 

Reply via email to