constant offset combining from remapping.

Jordan Justen Sat, 12 Dec 2015 14:51:58 -0800

On 2015-12-11 13:23:50, Kenneth Graunke wrote:
> My tessellation branch has two additional remap functions.  I don't want
> to replicate this logic there.
> 
> v2: Handle inputs/outputs separately (suggested by Jason Ekstrand).
> 
> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org>
> ---
>  src/mesa/drivers/dri/i965/brw_nir.c | 93 
> +++++++++++++++++++++++++++----------
>  1 file changed, 68 insertions(+), 25 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c 
> b/src/mesa/drivers/dri/i965/brw_nir.c
> index 14ad172..afb1f6d 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir.c
> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> @@ -27,15 +27,41 @@
>  #include "glsl/nir/nir_builder.h"
>  #include "program/prog_to_nir.h"
>  
> -struct remap_vs_attrs_state {
> +static bool
> +is_input(nir_intrinsic_instr *intrin)
> +{
> +   return intrin->intrinsic == nir_intrinsic_load_input ||
> +          intrin->intrinsic == nir_intrinsic_load_per_vertex_input;
> +}
> +
> +static bool
> +is_output(nir_intrinsic_instr *intrin)
> +{
> +   return intrin->intrinsic == nir_intrinsic_load_output ||
> +          intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
> +          intrin->intrinsic == nir_intrinsic_store_output ||
> +          intrin->intrinsic == nir_intrinsic_store_per_vertex_output;
> +}
> +
> +/**
> + * In many cases, we just add the base and offset together, so there's no
> + * reason to keep them separate.  Sometimes, combining them is essential:
> + * if a shader only accesses part of a compound variable (such as a matrix
> + * or array), the variable's base may not actually exist in the VUE map.
> + *
> + * This pass adds constant offsets to instr->const_index[0], and resets
> + * the offset source to 0.  Non-constant offsets remain unchanged.
> + */
> +struct add_const_offset_to_base_params {
>     nir_builder b;
> -   uint64_t inputs_read;
> +   nir_variable_mode mode;
>  };
>  
>  static bool
> -remap_vs_attrs(nir_block *block, void *void_state)
> +add_const_offset_to_base(nir_block *block, void *closure)


void_state is not a great name, but closure doesn't seem right either.
What about state, context or opaque instead?

-Jordan

>  {
> -   struct remap_vs_attrs_state *state = void_state;
> +   struct add_const_offset_to_base_params *params = closure;
> +   nir_builder *b = &params->b;
>  
>     nir_foreach_instr_safe(block, instr) {
>        if (instr->type != nir_instr_type_intrinsic)
> @@ -43,30 +69,44 @@ remap_vs_attrs(nir_block *block, void *void_state)
>  
>        nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
>  
> +      if ((params->mode == nir_var_shader_in && is_input(intrin)) ||
> +          (params->mode == nir_var_shader_out && is_output(intrin))) {
> +         nir_src *offset = nir_get_io_offset_src(intrin);
> +         nir_const_value *const_offset = nir_src_as_const_value(*offset);
> +
> +         if (const_offset) {
> +            intrin->const_index[0] += const_offset->u[0];
> +            b->cursor = nir_before_instr(&intrin->instr);
> +            nir_instr_rewrite_src(&intrin->instr, offset,
> +                                  nir_src_for_ssa(nir_imm_int(b, 0)));
> +         }
> +      }
> +   }
> +   return true;
> +
> +}
> +
> +static bool
> +remap_vs_attrs(nir_block *block, void *closure)
> +{
> +   GLbitfield64 inputs_read = *((GLbitfield64 *) closure);
> +
> +   nir_foreach_instr(block, instr) {
> +      if (instr->type != nir_instr_type_intrinsic)
> +         continue;
> +
> +      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
> +
>        if (intrin->intrinsic == nir_intrinsic_load_input) {
>           /* Attributes come in a contiguous block, ordered by their
>            * gl_vert_attrib value.  That means we can compute the slot
>            * number for an attribute by masking out the enabled attributes
>            * before it and counting the bits.
>            */
> -         nir_const_value *const_offset = 
> nir_src_as_const_value(intrin->src[0]);
> -
> -         /* We set EmitNoIndirect for VS inputs, so there are no indirects. 
> */
> -         assert(const_offset);
> -
> -         int attr = intrin->const_index[0] + const_offset->u[0];
> -         int slot = _mesa_bitcount_64(state->inputs_read &
> -                                      BITFIELD64_MASK(attr));
> +         int attr = intrin->const_index[0];
> +         int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr));
>  
> -         /* The NIR -> FS pass will just add the base and offset together, so
> -          * there's no reason to keep them separate.  Just put it all in
> -          * const_index[0] and set the offset src[0] to load_const(0).
> -          */
>           intrin->const_index[0] = 4 * slot;
> -
> -         state->b.cursor = nir_before_instr(&intrin->instr);
> -         nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
> -                               nir_src_for_ssa(nir_imm_int(&state->b, 0)));
>        }
>     }
>     return true;
> @@ -77,6 +117,10 @@ brw_nir_lower_inputs(nir_shader *nir,
>                       const struct brw_device_info *devinfo,
>                       bool is_scalar)
>  {
> +   struct add_const_offset_to_base_params params = {
> +      .mode = nir_var_shader_in
> +   };
> +
>     switch (nir->stage) {
>     case MESA_SHADER_VERTEX:
>        /* Start with the location of the variable's base. */
> @@ -97,17 +141,16 @@ brw_nir_lower_inputs(nir_shader *nir,
>            * key->inputs_read since the two are identical aside from Gen4-5
>            * edge flag differences.
>            */
> -         struct remap_vs_attrs_state remap_state = {
> -            .inputs_read = nir->info.inputs_read,
> -         };
> +         GLbitfield64 inputs_read = nir->info.inputs_read;
>  
>           /* This pass needs actual constants */
>           nir_opt_constant_folding(nir);
>  
>           nir_foreach_overload(nir, overload) {
>              if (overload->impl) {
> -               nir_builder_init(&remap_state.b, overload->impl);
> -               nir_foreach_block(overload->impl, remap_vs_attrs, 
> &remap_state);
> +               nir_builder_init(&params.b, overload->impl);
> +               nir_foreach_block(overload->impl, add_const_offset_to_base, 
> &params);
> +               nir_foreach_block(overload->impl, remap_vs_attrs, 
> &inputs_read);
>              }
>           }
>        }
> -- 
> 2.6.3
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 01/13] i965: Separate base offset/constant offset combining from remapping.

Reply via email to