4 consuming two locations

Juan A. Suarez Romero Mon, 05 Dec 2016 01:25:58 -0800

On Thu, 2016-12-01 at 22:22 -0800, Jason Ekstrand wrote:
> +Ken
> 
> On Thu, Dec 1, 2016 at 10:17 PM, Jason Ekstrand <ja...@jlekstrand.net
> > wrote:
> > I'm not sure how I feel about this one.  It seems like it would
> > almost be easier to just pick one convention or the other for NIR
> > and adjust one of the drivers accordingly.  I don't know that I
> > have a huge preference which convention we choose.  I guess the
> > Vulkan convention matches our hardware a bit better.  In either
> > case, converting from one to the other should be a simple matter of
> > building a remap table or a creative use of popcount.
> >


As you said in another email, I also think it would be more easy to
just mark 2 bits per dvec3/dvec4 in inputs_read and change the GL
driver accordingly, than the other way around, as it is done in TGSI.

> > On Fri, Nov 25, 2016 at 12:52 AM, Juan A. Suarez Romero <jasuarez@i
> > galia.com> wrote:
> > > One difference between OpenGL and Vulkan regarding 64-bit vertex
> > > 
> > > attribute types is that dvec3 and dvec4 consumes just one
> > > location in
> > > 
> > > OpenGL, while in Vulkan it consumes two locations.
> > > 
> > > 
> > > 
> > > Thus, in OpenGL for each dvec3/dvec4 vertex attrib we mark just
> > > one bit
> > > 
> > > in our internal inputs_read bitmap (and also the corresponding
> > > bit in
> > > 
> > > double_inputs_read bitmap) while in Vulkan we mark two
> > > consecutive bits
> > > 
> > > in both bitmaps.
> > > 
> > > 
> > > 
> > > This is handled with a nir option called
> > > "dvec3_consumes_two_locations",
> > > 
> > > which is set to True for Vulkan code. And all the computation
> > > regarding
> > > 
> > > emitting vertices as well as the mapping between attributes and
> > > physical
> > > 
> > > registers use this option to correctly do the work.
> > > 
> > > ---
> > > 
> > >  src/amd/vulkan/radv_pipeline.c               |  1 +
> > > 
> > >  src/compiler/nir/nir.h                       |  5 +++
> > > 
> > >  src/compiler/nir/nir_gather_info.c           |  6 +--
> > > 
> > >  src/gallium/drivers/freedreno/ir3/ir3_nir.c  |  1 +
> > > 
> > >  src/intel/vulkan/anv_device.c                |  2 +-
> > > 
> > >  src/intel/vulkan/genX_pipeline.c             | 62
> > > +++++++++++++++++-----------
> > > 
> > >  src/mesa/drivers/dri/i965/brw_compiler.c     | 23 ++++++++++-
> > > 
> > >  src/mesa/drivers/dri/i965/brw_compiler.h     |  2 +-
> > > 
> > >  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 14 +++++--
> > > 
> > >  src/mesa/drivers/dri/i965/brw_nir.c          | 18 +++++---
> > > 
> > >  src/mesa/drivers/dri/i965/brw_vec4.cpp       | 13 ++++--
> > > 
> > >  src/mesa/drivers/dri/i965/intel_screen.c     |  3 +-
> > > 
> > >  12 files changed, 105 insertions(+), 45 deletions(-)
> > > 
> > > 
> > > 
> > > diff --git a/src/amd/vulkan/radv_pipeline.c
> > > b/src/amd/vulkan/radv_pipeline.c
> > > 
> > > index ee5d812..90d4650 100644
> > > 
> > > --- a/src/amd/vulkan/radv_pipeline.c
> > > 
> > > +++ b/src/amd/vulkan/radv_pipeline.c
> > > 
> > > @@ -59,6 +59,7 @@ static const struct nir_shader_compiler_options
> > > nir_options = {
> > > 
> > >         .lower_unpack_unorm_4x8 = true,
> > > 
> > >         .lower_extract_byte = true,
> > > 
> > >         .lower_extract_word = true,
> > > 
> > > +       .dvec3_consumes_two_locations = true,
> > > 
> > >  };
> > > 
> > > 
> > > 
> > >  VkResult radv_CreateShaderModule(
> > > 
> > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > > 
> > > index 1679d89..0fc8f39 100644
> > > 
> > > --- a/src/compiler/nir/nir.h
> > > 
> > > +++ b/src/compiler/nir/nir.h
> > > 
> > > @@ -1794,6 +1794,11 @@ typedef struct nir_shader_compiler_options
> > > {
> > > 
> > >      * information must be inferred from the list of input
> > > nir_variables.
> > > 
> > >      */
> > > 
> > >     bool use_interpolated_input_intrinsics;
> > > 
> > > +
> > > 
> > > +   /**
> > > 
> > > +    * In Vulkan, a dvec3/dvec4 consumes two locations instead
> > > just one.
> > > 
> > > +    */
> > > 
> > > +   bool dvec3_consumes_two_locations;
> > > 
> > >  } nir_shader_compiler_options;
> > > 
> > > 
> > > 
> > >  typedef struct nir_shader {
> > > 
> > > diff --git a/src/compiler/nir/nir_gather_info.c
> > > b/src/compiler/nir/nir_gather_info.c
> > > 
> > > index 07c9949..8c80671 100644
> > > 
> > > --- a/src/compiler/nir/nir_gather_info.c
> > > 
> > > +++ b/src/compiler/nir/nir_gather_info.c
> > > 
> > > @@ -96,7 +96,7 @@ mark_whole_variable(nir_shader *shader,
> > > nir_variable *var)
> > > 
> > > 
> > > 
> > >     const unsigned slots =
> > > 
> > >        var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
> > > 
> > > -                        : glsl_count_attribute_slots(type,
> > > is_vertex_input);
> > > 
> > > +                        : glsl_count_attribute_slots(type,
> > > is_vertex_input && !shader->options-
> > > >dvec3_consumes_two_locations);
> > 
> > This makes no sense, why are we passing is_vertex_input &&
> > !dvec3_consumes_two_locations to an argument labled
> > is_vertex_input?

We are using glsl_count_attribute_slots() to check how many locations
the input consumes.
is_vertex_input only affects when the type is a dvec3, dvec4, dmatNx3,
or dmatNx4. In this case, when is_vertex_input == False then it
consumes 2 location,but if is_vertex_input == True then it consumes 1
location. 
This is because in GLSL spec, a dvec3/dvec4 input vertex attribute
consumes just one location.
But in Vulkan, they consume two locations, hence we mark the condition
as FALSE.
Probably is_vertex_input is not a very good name, and we could rename
it to dvec3_consumes_two_locations, or similar.


> >  
> > > 
> > >     set_io_mask(shader, var, 0, slots);
> > > 
> > >  }
> > > 
> > > @@ -168,7 +168,7 @@ try_mask_partial_io(nir_shader *shader,
> > > nir_deref_var *deref)
> > > 
> > >         var->data.mode == nir_var_shader_in)
> > > 
> > >        is_vertex_input = true;
> > > 
> > > 
> > > 
> > > -   unsigned offset = get_io_offset(deref, is_vertex_input);
> > > 
> > > +   unsigned offset = get_io_offset(deref, is_vertex_input &&
> > > !shader->options->dvec3_consumes_two_locations);
> > 
> > Same here

Same explanation as above. the io_offset is calculated based on how
many locations we are consuming.
> >  
> > >     if (offset == -1)
> > > 
> > >        return false;
> > > 
> > > 
> > > 
> > > @@ -184,7 +184,7 @@ try_mask_partial_io(nir_shader *shader,
> > > nir_deref_var *deref)
> > > 
> > >     }
> > > 
> > > 
> > > 
> > >     /* double element width for double types that takes two slots
> > > */
> > > 
> > > -   if (!is_vertex_input &&
> > > 
> > > +   if ((!is_vertex_input || shader->options-
> > > >dvec3_consumes_two_locations) &&
> > > 
> > >         glsl_type_is_dual_slot(glsl_without_array(type))) {
> > 
> > This makes a bit more sense but I'm still not quite seeing it.

Same explanation as above.
> >  
> > >        elem_width *= 2;
> > > 
> > >     }
> > > 
> > > diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
> > > b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
> > > 
> > > index 2d86a52..5c5c9ad 100644
> > > 
> > > --- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
> > > 
> > > +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
> > > 
> > > @@ -50,6 +50,7 @@ static const nir_shader_compiler_options
> > > options = {
> > > 
> > >                 .vertex_id_zero_based = true,
> > > 
> > >                 .lower_extract_byte = true,
> > > 
> > >                 .lower_extract_word = true,
> > > 
> > > +               .dvec3_consumes_two_locations = false,
> > > 
> > >  };
> > > 
> > > 
> > > 
> > >  struct nir_shader *
> > > 
> > > diff --git a/src/intel/vulkan/anv_device.c
> > > b/src/intel/vulkan/anv_device.c
> > > 
> > > index 2c8ac49..725848f 100644
> > > 
> > > --- a/src/intel/vulkan/anv_device.c
> > > 
> > > +++ b/src/intel/vulkan/anv_device.c
> > > 
> > > @@ -167,7 +167,7 @@ anv_physical_device_init(struct
> > > anv_physical_device *device,
> > > 
> > > 
> > > 
> > >     brw_process_intel_debug_variable();
> > > 
> > > 
> > > 
> > > -   device->compiler = brw_compiler_create(NULL, &device->info);
> > > 
> > > +   device->compiler = brw_compiler_create(NULL, &device->info,
> > > true);
> > > 
> > >     if (device->compiler == NULL) {
> > > 
> > >        result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
> > > 
> > >        goto fail;
> > > 
> > > diff --git a/src/intel/vulkan/genX_pipeline.c
> > > b/src/intel/vulkan/genX_pipeline.c
> > > 
> > > index cb164ad..97c40b8 100644
> > > 
> > > --- a/src/intel/vulkan/genX_pipeline.c
> > > 
> > > +++ b/src/intel/vulkan/genX_pipeline.c
> > > 
> > > @@ -33,26 +33,33 @@
> > > 
> > >  static uint32_t
> > > 
> > >  vertex_element_comp_control(enum isl_format format, unsigned
> > > comp)
> > > 
> > >  {
> > > 
> > > -   uint8_t bits;
> > > 
> > >     switch (comp) {
> > > 
> > > -   case 0: bits = isl_format_layouts[format].channels.r.bits;
> > > break;
> > > 
> > > -   case 1: bits = isl_format_layouts[format].channels.g.bits;
> > > break;
> > > 
> > > -   case 2: bits = isl_format_layouts[format].channels.b.bits;
> > > break;
> > > 
> > > -   case 3: bits = isl_format_layouts[format].channels.a.bits;
> > > break;
> > > 
> > > -   default: unreachable("Invalid component");
> > > 
> > > -   }
> > > 
> > > -
> > > 
> > > -   if (bits) {
> > > 
> > > -      return VFCOMP_STORE_SRC;
> > > 
> > > -   } else if (comp < 3) {
> > > 
> > > -      return VFCOMP_STORE_0;
> > > 
> > > -   } else if (isl_format_layouts[format].channels.r.type ==
> > > ISL_UINT ||
> > > 
> > > -            isl_format_layouts[format].channels.r.type ==
> > > ISL_SINT) {
> > > 
> > > -      assert(comp == 3);
> > > 
> > > -      return VFCOMP_STORE_1_INT;
> > > 
> > > -   } else {
> > > 
> > > -      assert(comp == 3);
> > > 
> > > -      return VFCOMP_STORE_1_FP;
> > > 
> > > +   case 0:
> > > 
> > > +      return isl_format_layouts[format].channels.r.bits ?
> > > 
> > > +         VFCOMP_STORE_SRC : VFCOMP_STORE_0;
> > > 
> > > +   case 1:
> > > 
> > > +      return isl_format_layouts[format].channels.g.bits ?
> > > 
> > > +         VFCOMP_STORE_SRC : VFCOMP_STORE_0;
> > > 
> > > +   case 2:
> > > 
> > > +      return isl_format_layouts[format].channels.b.bits ?
> > > 
> > > +         VFCOMP_STORE_SRC :
> > > ((isl_format_layouts[format].channels.r.type == ISL_RAW) ?
> > > 
> > > +                             VFCOMP_NOSTORE : VFCOMP_STORE_0);
> > > 
> > > +   case 3:
> > > 
> > > +      if (isl_format_layouts[format].channels.a.bits)
> > > 
> > > +         return VFCOMP_STORE_SRC;
> > > 
> > > +      else
> > > 
> > > +         switch (isl_format_layouts[format].channels.r.type) {
> > > 
> > > +         case ISL_RAW:
> > > 
> > > +            return isl_format_layouts[format].channels.b.bits ?
> > > 
> > > +               VFCOMP_STORE_0 : VFCOMP_NOSTORE;
> > > 
> > > +         case ISL_UINT:
> > > 
> > > +         case ISL_SINT:
> > > 
> > > +            return VFCOMP_STORE_1_INT;
> > > 
> > > +         default:
> > > 
> > > +            return VFCOMP_STORE_1_FP;
> > > 
> > > +         }
> > > 
> > > +   default:
> > > 
> > > +      unreachable("Invalid component");
> > > 
> > >     }
> > > 
> > >  }
> > > 
> > > 
> > > 
> > > @@ -64,8 +71,10 @@ emit_vertex_input(struct anv_pipeline
> > > *pipeline,
> > > 
> > > 
> > > 
> > >     /* Pull inputs_read out of the VS prog data */
> > > 
> > >     const uint64_t inputs_read = vs_prog_data->inputs_read;
> > > 
> > > +   const uint64_t double_inputs_read = vs_prog_data-
> > > >double_inputs_read;
> > > 
> > >     assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) ==
> > > 0);
> > > 
> > >     const uint32_t elements = inputs_read >>
> > > VERT_ATTRIB_GENERIC0;
> > > 
> > > +   const uint32_t elements_double = double_inputs_read >>
> > > VERT_ATTRIB_GENERIC0;
> > > 
> > > 
> > > 
> > >  #if GEN_GEN >= 8
> > > 
> > >     /* On BDW+, we only need to allocate space for base ids. 
> > > Setting up
> > > 
> > > @@ -83,13 +92,16 @@ emit_vertex_input(struct anv_pipeline
> > > *pipeline,
> > > 
> > >                                  vs_prog_data->uses_baseinstance;
> > > 
> > >  #endif
> > > 
> > > 
> > > 
> > > -   uint32_t elem_count = __builtin_popcount(elements) +
> > > needs_svgs_elem;
> > > 
> > > -   if (elem_count == 0)
> > > 
> > > +   uint32_t elem_count =
> > > 
> > > +      __builtin_popcount(elements) -
> > > DIV_ROUND_UP(__builtin_popcount(elements_double), 2);
> > > 
> > > +
> > > 
> > > +   uint32_t total_elems = elem_count + needs_svgs_elem;
> > > 
> > > +   if (total_elems == 0)
> > > 
> > >        return;
> > > 
> > > 
> > > 
> > >     uint32_t *p;
> > > 
> > > 
> > > 
> > > -   const uint32_t num_dwords = 1 + elem_count * 2;
> > > 
> > > +   const uint32_t num_dwords = 1 + total_elems * 2;
> > > 
> > >     p = anv_batch_emitn(&pipeline->batch, num_dwords,
> > > 
> > >                         GENX(3DSTATE_VERTEX_ELEMENTS));
> > > 
> > >     memset(p + 1, 0, (num_dwords - 1) * 4);
> > > 
> > > @@ -107,7 +119,9 @@ emit_vertex_input(struct anv_pipeline
> > > *pipeline,
> > > 
> > >        if ((elements & (1 << desc->location)) == 0)
> > > 
> > >           continue; /* Binding unused */
> > > 
> > > 
> > > 
> > > -      uint32_t slot = __builtin_popcount(elements & ((1 << desc-
> > > >location) - 1));
> > > 
> > > +      uint32_t slot =
> > > 
> > > +         __builtin_popcount(elements & ((1 << desc->location) -
> > > 1)) -
> > > 
> > > +         DIV_ROUND_UP(__builtin_popcount(elements_double & ((1
> > > << desc->location) - 1)), 2);
> > > 
> > > 
> > > 
> > >        struct GENX(VERTEX_ELEMENT_STATE) element = {
> > > 
> > >           .VertexBufferIndex = desc->binding,
> > > 
> > > @@ -137,7 +151,7 @@ emit_vertex_input(struct anv_pipeline
> > > *pipeline,
> > > 
> > >  #endif
> > > 
> > >     }
> > > 
> > > 
> > > 
> > > -   const uint32_t id_slot = __builtin_popcount(elements);
> > > 
> > > +   const uint32_t id_slot = elem_count;
> > > 
> > >     if (needs_svgs_elem) {
> > > 
> > >        /* From the Broadwell PRM for the
> > > 3D_Vertex_Component_Control enum:
> > > 
> > >         *    "Within a VERTEX_ELEMENT_STATE structure, if a
> > > Component
> > > 
> > > diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c
> > > b/src/mesa/drivers/dri/i965/brw_compiler.c
> > > 
> > > index 1aa72bc..b9eceeb 100644
> > > 
> > > --- a/src/mesa/drivers/dri/i965/brw_compiler.c
> > > 
> > > +++ b/src/mesa/drivers/dri/i965/brw_compiler.c
> > > 
> > > @@ -55,6 +55,22 @@ static const struct
> > > nir_shader_compiler_options scalar_nir_options = {
> > > 
> > >     .lower_unpack_snorm_4x8 = true,
> > > 
> > >     .lower_unpack_unorm_2x16 = true,
> > > 
> > >     .lower_unpack_unorm_4x8 = true,
> > > 
> > > +   .dvec3_consumes_two_locations = false,
> > > 
> > > +};
> > > 
> > > +
> > > 
> > > +static const struct nir_shader_compiler_options
> > > vulkan_scalar_nir_options = {
> > > 
> > > +   COMMON_OPTIONS,
> > > 
> > > +   .lower_pack_half_2x16 = true,
> > > 
> > > +   .lower_pack_snorm_2x16 = true,
> > > 
> > > +   .lower_pack_snorm_4x8 = true,
> > > 
> > > +   .lower_pack_unorm_2x16 = true,
> > > 
> > > +   .lower_pack_unorm_4x8 = true,
> > > 
> > > +   .lower_unpack_half_2x16 = true,
> > > 
> > > +   .lower_unpack_snorm_2x16 = true,
> > > 
> > > +   .lower_unpack_snorm_4x8 = true,
> > > 
> > > +   .lower_unpack_unorm_2x16 = true,
> > > 
> > > +   .lower_unpack_unorm_4x8 = true,
> > > 
> > > +   .dvec3_consumes_two_locations = true,
> > > 
> > >  };
> > > 
> > > 
> > > 
> > >  static const struct nir_shader_compiler_options
> > > vector_nir_options = {
> > > 
> > > @@ -75,6 +91,7 @@ static const struct nir_shader_compiler_options
> > > vector_nir_options = {
> > > 
> > >     .lower_unpack_unorm_2x16 = true,
> > > 
> > >     .lower_extract_byte = true,
> > > 
> > >     .lower_extract_word = true,
> > > 
> > > +   .dvec3_consumes_two_locations = false,
> > > 
> > >  };
> > > 
> > > 
> > > 
> > >  static const struct nir_shader_compiler_options
> > > vector_nir_options_gen6 = {
> > > 
> > > @@ -92,10 +109,11 @@ static const struct
> > > nir_shader_compiler_options vector_nir_options_gen6 = {
> > > 
> > >     .lower_unpack_unorm_2x16 = true,
> > > 
> > >     .lower_extract_byte = true,
> > > 
> > >     .lower_extract_word = true,
> > > 
> > > +   .dvec3_consumes_two_locations = false,
> > > 
> > >  };
> > > 
> > > 
> > > 
> > >  struct brw_compiler *
> > > 
> > > -brw_compiler_create(void *mem_ctx, const struct gen_device_info
> > > *devinfo)
> > > 
> > > +brw_compiler_create(void *mem_ctx, const struct gen_device_info
> > > *devinfo, bool is_vulkan)
> > > 
> > >  {
> > > 
> > >     struct brw_compiler *compiler = rzalloc(mem_ctx, struct
> > > brw_compiler);
> > > 
> > > 
> > > 
> > > @@ -138,7 +156,8 @@ brw_compiler_create(void *mem_ctx, const
> > > struct gen_device_info *devinfo)
> > > 
> > >           compiler-
> > > >glsl_compiler_options[i].EmitNoIndirectSampler = true;
> > > 
> > > 
> > > 
> > >        if (is_scalar) {
> > > 
> > > -         compiler->glsl_compiler_options[i].NirOptions =
> > > &scalar_nir_options;
> > > 
> > > +         compiler->glsl_compiler_options[i].NirOptions =
> > > is_vulkan ? &vulkan_scalar_nir_options
> > > 
> > > +                                                               
> > >    : &scalar_nir_options;
> > > 
> > >        } else {
> > > 
> > >           compiler->glsl_compiler_options[i].NirOptions =
> > > 
> > >              devinfo->gen < 6 ? &vector_nir_options :
> > > &vector_nir_options_gen6;
> > > 
> > > diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h
> > > b/src/mesa/drivers/dri/i965/brw_compiler.h
> > > 
> > > index 65a7478..34df343 100644
> > > 
> > > --- a/src/mesa/drivers/dri/i965/brw_compiler.h
> > > 
> > > +++ b/src/mesa/drivers/dri/i965/brw_compiler.h
> > > 
> > > @@ -760,7 +760,7 @@ DEFINE_PROG_DATA_DOWNCAST(sf)
> > > 
> > >  /** @} */
> > > 
> > > 
> > > 
> > >  struct brw_compiler *
> > > 
> > > -brw_compiler_create(void *mem_ctx, const struct gen_device_info
> > > *devinfo);
> > > 
> > > +brw_compiler_create(void *mem_ctx, const struct gen_device_info
> > > *devinfo, bool is_vulkan);
> > > 
> > > 
> > > 
> > >  /**
> > > 
> > >   * Compile a vertex shader.
> > > 
> > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> > > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> > > 
> > > index 14415bd..00c5bb4 100644
> > > 
> > > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> > > 
> > > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> > > 
> > > @@ -35,10 +35,16 @@ using namespace brw;
> > > 
> > >  fs_reg *
> > > 
> > >  fs_visitor::emit_vs_system_value(int location)
> > > 
> > >  {
> > > 
> > > -   fs_reg *reg = new(this->mem_ctx)
> > > 
> > > -      fs_reg(ATTR, 4 * (_mesa_bitcount_64(nir->info-
> > > >inputs_read) +
> > > 
> > > -                        _mesa_bitcount_64(nir->info-
> > > >double_inputs_read)),
> > > 
> > > -             BRW_REGISTER_TYPE_D);
> > > 
> > > +   fs_reg *reg;
> > > 
> > > +   if (nir->options->dvec3_consumes_two_locations)
> > > 
> > > +      reg = new(this->mem_ctx)
> > > 
> > > +         fs_reg(ATTR, 4 * _mesa_bitcount_64(nir->info-
> > > >inputs_read),
> > > 
> > > +                BRW_REGISTER_TYPE_D);
> > > 
> > > +   else
> > > 
> > > +      reg = new(this->mem_ctx)
> > > 
> > > +         fs_reg(ATTR, 4 * (_mesa_bitcount_64(nir->info-
> > > >inputs_read) +
> > > 
> > > +                           _mesa_bitcount_64(nir->info-
> > > >double_inputs_read)),
> > > 
> > > +                BRW_REGISTER_TYPE_D);
> > > 
> > >     struct brw_vs_prog_data *vs_prog_data =
> > > brw_vs_prog_data(prog_data);
> > > 
> > > 
> > > 
> > >     switch (location) {
> > > 
> > > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c
> > > b/src/mesa/drivers/dri/i965/brw_nir.c
> > > 
> > > index 763e3ec..daa22c6 100644
> > > 
> > > --- a/src/mesa/drivers/dri/i965/brw_nir.c
> > > 
> > > +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> > > 
> > > @@ -96,7 +96,7 @@ add_const_offset_to_base(nir_shader *nir,
> > > nir_variable_mode mode)
> > > 
> > >  }
> > > 
> > > 
> > > 
> > >  static bool
> > > 
> > > -remap_vs_attrs(nir_block *block, shader_info *nir_info)
> > > 
> > > +remap_vs_attrs(nir_block *block, nir_shader *nir)
> > > 
> > >  {
> > > 
> > >     nir_foreach_instr(instr, block) {
> > > 
> > >        if (instr->type != nir_instr_type_intrinsic)
> > > 
> > > @@ -111,10 +111,13 @@ remap_vs_attrs(nir_block *block,
> > > shader_info *nir_info)
> > > 
> > >            * before it and counting the bits.
> > > 
> > >            */
> > > 
> > >           int attr = intrin->const_index[0];
> > > 
> > > -         int slot = _mesa_bitcount_64(nir_info->inputs_read &
> > > 
> > > +         int slot = _mesa_bitcount_64(nir->info->inputs_read &
> > > 
> > > +                                      BITFIELD64_MASK(attr));
> > > 
> > > +
> > > 
> > > +         int dslot = 0;
> > > 
> > > +         if (!nir->options->dvec3_consumes_two_locations)
> > > 
> > > +            dslot = _mesa_bitcount_64(nir->info-
> > > >double_inputs_read &
> > > 
> > >                                        BITFIELD64_MASK(attr));
> > > 
> > > -         int dslot = _mesa_bitcount_64(nir_info-
> > > >double_inputs_read &
> > > 
> > > -                                       BITFIELD64_MASK(attr));
> > > 
> > >           intrin->const_index[0] = 4 * (slot + dslot);
> > > 
> > >        }
> > > 
> > >     }
> > > 
> > > @@ -204,7 +207,10 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
> > > 
> > >      * loaded as one vec4 or dvec4 per element (or matrix
> > > column), depending on
> > > 
> > >      * whether it is a double-precision type or not.
> > > 
> > >      */
> > > 
> > > -   nir_lower_io(nir, nir_var_shader_in, type_size_vs_input, 0);
> > > 
> > > +   nir_lower_io(nir,
> > > 
> > > +                nir_var_shader_in,
> > > 
> > > +                nir->options->dvec3_consumes_two_locations ?
> > > type_size_vec4 : type_size_vs_input,
> > > 
> > > +                0);
> > > 
> > > 
> > > 
> > >     /* This pass needs actual constants */
> > > 
> > >     nir_opt_constant_folding(nir);
> > > 
> > > @@ -220,7 +226,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
> > > 
> > >        nir_foreach_function(function, nir) {
> > > 
> > >           if (function->impl) {
> > > 
> > >              nir_foreach_block(block, function->impl) {
> > > 
> > > -               remap_vs_attrs(block, nir->info);
> > > 
> > > +               remap_vs_attrs(block, nir);
> > > 
> > >              }
> > > 
> > >           }
> > > 
> > >        }
> > > 
> > > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> > > b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> > > 
> > > index b9e592f..0aab97f 100644
> > > 
> > > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> > > 
> > > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> > > 
> > > @@ -2129,6 +2129,9 @@ brw_compile_vs(const struct brw_compiler
> > > *compiler, void *log_data,
> > > 
> > > 
> > > 
> > >     unsigned nr_attributes = _mesa_bitcount_64(prog_data-
> > > >inputs_read);
> > > 
> > > 
> > > 
> > > +   if (shader->options->dvec3_consumes_two_locations)
> > > 
> > > +      nr_attributes -= DIV_ROUND_UP(_mesa_bitcount_64(prog_data-
> > > >double_inputs_read), 2);
> > > 
> > > +
> > > 
> > >     /* gl_VertexID and gl_InstanceID are system values, but
> > > arrive via an
> > > 
> > >      * incoming vertex attribute.  So, add an extra slot.
> > > 
> > >      */
> > > 
> > > @@ -2146,9 +2149,13 @@ brw_compile_vs(const struct brw_compiler
> > > *compiler, void *log_data,
> > > 
> > >        nr_attributes++;
> > > 
> > >     }
> > > 
> > > 
> > > 
> > > -   unsigned nr_attribute_slots =
> > > 
> > > -      nr_attributes +
> > > 
> > > -      _mesa_bitcount_64(shader->info->double_inputs_read);
> > > 
> > > +   unsigned nr_attribute_slots = nr_attributes;
> > > 
> > > +   if (shader->options->dvec3_consumes_two_locations)
> > > 
> > > +      nr_attribute_slots +=
> > > 
> > > +         DIV_ROUND_UP(_mesa_bitcount_64(shader->info-
> > > >double_inputs_read), 2);
> > > 
> > > +   else
> > > 
> > > +      nr_attribute_slots +=
> > > 
> > > +         _mesa_bitcount_64(shader->info->double_inputs_read);
> > > 
> > > 
> > > 
> > >     /* The 3DSTATE_VS documentation lists the lower bound on
> > > "Vertex URB Entry
> > > 
> > >      * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. 
> > > Empirically, in
> > > 
> > > diff --git a/src/mesa/drivers/dri/i965/intel_screen.c
> > > b/src/mesa/drivers/dri/i965/intel_screen.c
> > > 
> > > index e1c3c19..c050f86 100644
> > > 
> > > --- a/src/mesa/drivers/dri/i965/intel_screen.c
> > > 
> > > +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> > > 
> > > @@ -1688,7 +1688,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen
> > > *dri_screen)
> > > 
> > >        ? screenExtensions : intelRobustScreenExtensions;
> > > 
> > > 
> > > 
> > >     screen->compiler = brw_compiler_create(screen,
> > > 
> > > -                                          &screen->devinfo);
> > > 
> > > +                                          &screen->devinfo,
> > > 
> > > +                                          false);
> > > 
> > >     screen->compiler->shader_debug_log = shader_debug_log_mesa;
> > > 
> > >     screen->compiler->shader_perf_log = shader_perf_log_mesa;
> > > 
> > >     screen->program_id = 1;
> > > 
> > > --
> > > 
> > > 2.7.4
> > > 
> > > 
> > > 
> > > _______________________________________________
> > > 
> > > mesa-dev mailing list
> > > 
> > > mesa-dev@lists.freedesktop.org
> > > 
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> > > 
> > > 
> > 
> >

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 19/22] anv/nir: add support for dvec3/4 consuming two locations

Reply via email to