On 14-12-18 20:58, Tom de Vries wrote:
> 0012-nvptx-Add-axis_dim.patch

> diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
> index 74a0d4b04d9..02ecf12bd84 100644
> --- a/gcc/config/nvptx/nvptx.c
> +++ b/gcc/config/nvptx/nvptx.c
> @@ -2885,6 +2885,23 @@ struct offload_attrs
>    int max_workers;
>  };
>  
> +/* Define entries for cfun->machine->axis_dim.  */
> +
> +#define MACH_VECTOR_LENGTH 0
> +#define MACH_MAX_WORKERS 1
> +
> +static int ATTRIBUTE_UNUSED
> +nvptx_mach_max_workers ()
> +{
> +  return cfun->machine->axis_dim[MACH_MAX_WORKERS];
> +}
> +
> +static int ATTRIBUTE_UNUSED
> +nvptx_mach_vector_length ()
> +{
> +  return cfun->machine->axis_dim[MACH_VECTOR_LENGTH];
> +}
> +
>  /* Loop structure of the function.  The entire function is described as
>     a NULL loop.  */
>  
> @@ -4832,6 +4849,9 @@ nvptx_reorg (void)
>  
>        populate_offload_attrs (&oa);
>  
> +      cfun->machine->axis_dim[MACH_VECTOR_LENGTH] = oa.vector_length;
> +      cfun->machine->axis_dim[MACH_MAX_WORKERS] = oa.max_workers;
> +

This initialization here is done during pass_machine_reorg , but the
data is needed earlier, making it necessary there to call
populate_offload_attrs again, instead of using
nvptx_mach_vector_length/nvptx_mach_max_workers.

I've made the initialization lazy, which fixes that problem.

>        /* If there is worker neutering, there must be vector
>          neutering.  Otherwise the hardware will fail.  */
>        gcc_assert (!(oa.mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
> diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
> index a2fe8b68b22..4059691a609 100644
> --- a/gcc/config/nvptx/nvptx.h
> +++ b/gcc/config/nvptx/nvptx.h
> @@ -218,6 +218,8 @@ struct GTY(()) machine_function
>    int return_mode; /* Return mode of current fn.
>                       (machine_mode not defined yet.) */
>    rtx axis_predicate[2]; /* Neutering predicates.  */
> +  int axis_dim[2]; /* Maximum number of threads on each axis, dim[0] is
> +                     vector_length, dim[1] is num_workers.  */
>    rtx unisimt_master; /* 'Master lane index' for -muniform-simt.  */
>    rtx unisimt_predicate; /* Predicate for -muniform-simt.  */
>    rtx unisimt_location; /* Mask location for -muniform-simt.  */
> -- 
> 2.17.2

Committed as attached.

Thanks,
- Tom
[nvptx] Add nvptx_mach_vector_length, nvptx_mach_max_workers

The vector length and maximum number of workers are known compile-time.  Make
these easily available during code generation via new functions.

2019-01-03  Tom de Vries  <tdevr...@suse.de>

	* config/nvptx/nvptx.c (MACH_VECTOR_LENGTH, MACH_MAX_WORKERS): Define.
	(init_axis_dim, nvptx_mach_max_workers, nvptx_mach_vector_length): New
	function.
	* config/nvptx/nvptx.h (struct machine_function): Add axis_dims.

---
 gcc/config/nvptx/nvptx.c | 41 +++++++++++++++++++++++++++++++++++++++++
 gcc/config/nvptx/nvptx.h |  3 +++
 2 files changed, 44 insertions(+)

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index f527429ce2d..52cbac957ce 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -2883,6 +2883,47 @@ struct offload_attrs
   int vector_length;
 };
 
+/* Define entries for cfun->machine->axis_dim.  */
+
+#define MACH_VECTOR_LENGTH 0
+#define MACH_MAX_WORKERS 1
+
+static void populate_offload_attrs (offload_attrs *oa);
+
+static void
+init_axis_dim (void)
+{
+  offload_attrs oa;
+  int max_workers;
+
+  populate_offload_attrs (&oa);
+
+  if (oa.num_workers == 0)
+    max_workers = PTX_CTA_SIZE / oa.vector_length;
+  else
+    max_workers = oa.num_workers;
+
+  cfun->machine->axis_dim[MACH_VECTOR_LENGTH] = oa.vector_length;
+  cfun->machine->axis_dim[MACH_MAX_WORKERS] = max_workers;
+  cfun->machine->axis_dim_init_p = true;
+}
+
+static int ATTRIBUTE_UNUSED
+nvptx_mach_max_workers ()
+{
+  if (!cfun->machine->axis_dim_init_p)
+    init_axis_dim ();
+  return cfun->machine->axis_dim[MACH_MAX_WORKERS];
+}
+
+static int ATTRIBUTE_UNUSED
+nvptx_mach_vector_length ()
+{
+  if (!cfun->machine->axis_dim_init_p)
+    init_axis_dim ();
+  return cfun->machine->axis_dim[MACH_VECTOR_LENGTH];
+}
+
 /* Loop structure of the function.  The entire function is described as
    a NULL loop.  */
 
diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index a2fe8b68b22..cb4404504c5 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -218,6 +218,9 @@ struct GTY(()) machine_function
   int return_mode; /* Return mode of current fn.
 		      (machine_mode not defined yet.) */
   rtx axis_predicate[2]; /* Neutering predicates.  */
+  int axis_dim[2]; /* Maximum number of threads on each axis, dim[0] is
+		      vector_length, dim[1] is num_workers.  */
+  bool axis_dim_init_p;
   rtx unisimt_master; /* 'Master lane index' for -muniform-simt.  */
   rtx unisimt_predicate; /* Predicate for -muniform-simt.  */
   rtx unisimt_location; /* Mask location for -muniform-simt.  */

Reply via email to