On Thu, 2016-07-07 at 20:12 -0700, Jason Ekstrand wrote: > > On Jul 6, 2016 6:59 PM, "Timothy Arceri" <timothy.arc...@collabora.co > m> wrote: > > > > --- > > src/mesa/drivers/dri/i965/brw_fs.cpp | 20 ++++++++++++-------- > > src/mesa/drivers/dri/i965/brw_fs.h | 5 +++-- > > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 29 > ++++++++++++++++++++--------- > > 3 files changed, 35 insertions(+), 19 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > > index 2f473cc..9e7223e 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > > @@ -1109,7 +1109,8 @@ fs_visitor::emit_general_interpolation(fs_reg > *attr, const char *name, > > const glsl_type *type, > > glsl_interp_qualifier > interpolation_mode, > > int *location, bool > mod_centroid, > > - bool mod_sample) > > + bool mod_sample, > > + unsigned > num_packed_components) > > { > > assert(stage == MESA_SHADER_FRAGMENT); > > brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this- > >prog_data; > > @@ -1131,22 +1132,26 @@ > fs_visitor::emit_general_interpolation(fs_reg *attr, const char > *name, > > > > for (unsigned i = 0; i < length; i++) { > > emit_general_interpolation(attr, name, elem_type, > interpolation_mode, > > - location, mod_centroid, > mod_sample); > > + location, mod_centroid, > mod_sample, > > + num_packed_components); > > } > > } else if (type->is_record()) { > > for (unsigned i = 0; i < type->length; i++) { > > const glsl_type *field_type = type- > >fields.structure[i].type; > > emit_general_interpolation(attr, name, field_type, > interpolation_mode, > > - location, mod_centroid, > mod_sample); > > + location, mod_centroid, > mod_sample, > > + num_packed_components); > > } > > } else { > > assert(type->is_scalar() || type->is_vector()); > > + unsigned num_components = num_packed_components ? > > + num_packed_components : type->vector_elements; > > > > if (prog_data->urb_setup[*location] == -1) { > > /* If there's no incoming setup data for this slot, don't > > * emit interpolation for it. > > */ > > - *attr = offset(*attr, bld, type->vector_elements); > > + *attr = offset(*attr, bld, num_components);
> This appears to be the only interesting use of num_components. > Pardon my while I ask a really stupid question: why can't we just > make it always 4 and call it a day? The size of *attr ultimately comes from nir_assign_var_locations() so this might work fine for the vec4 backend but for the scalar backend we would need to count the size differently meaning some kind of hack. However, as I pointed out in my other reply patch 4 also makes use of num_packed_components for calculating the location of arrays elements so there is still that to deal with also. > > (*location)++; > > return; > > } > > @@ -1158,7 +1163,6 @@ fs_visitor::emit_general_interpolation(fs_reg > *attr, const char *name, > > * handed us defined values in only the constant offset > > * field of the setup reg. > > */ > > - unsigned vector_elements = type->vector_elements; > > > > /* Data starts at suboffet 3 in 32-bit units (12 bytes), > so it is not > > * 64-bit aligned and the current implementation fails to > read the > > @@ -1166,10 +1170,10 @@ > fs_visitor::emit_general_interpolation(fs_reg *attr, const char > *name, > > * read it as vector of floats with twice the number of > components. > > */ > > if (attr->type == BRW_REGISTER_TYPE_DF) { > > - vector_elements *= 2; > > + num_components *= 2; > > attr->type = BRW_REGISTER_TYPE_F; > > } > > - for (unsigned int i = 0; i < vector_elements; i++) { > > + for (unsigned int i = 0; i < num_components; i++) { > > struct brw_reg interp = interp_reg(*location, i); > > interp = suboffset(interp, 3); > > interp.type = attr->type; > > @@ -1178,7 +1182,7 @@ fs_visitor::emit_general_interpolation(fs_reg > *attr, const char *name, > > } > > } else { > > /* Smooth/noperspective interpolation case. */ > > - for (unsigned int i = 0; i < type->vector_elements; i++) > { > > + for (unsigned int i = 0; i < num_components; i++) { > > struct brw_reg interp = interp_reg(*location, i); > > if (devinfo->needs_unlit_centroid_workaround && > mod_centroid) { > > /* Get the pixel/sample mask into f0 so that we > know > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > b/src/mesa/drivers/dri/i965/brw_fs.h > > index 1f88f8f..0c72802 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs.h > > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > > @@ -181,7 +181,7 @@ public: > > const glsl_type *type, > > glsl_interp_qualifier > interpolation_mode, > > int *location, bool > mod_centroid, > > - bool mod_sample); > > + bool mod_sample, unsigned > num_components); > > fs_reg *emit_vs_system_value(int location); > > void emit_interpolation_setup_gen4(); > > void emit_interpolation_setup_gen6(); > > @@ -200,7 +200,8 @@ public: > > void emit_nir_code(); > > void nir_setup_inputs(); > > void nir_setup_single_output_varying(fs_reg *reg, const > glsl_type *type, > > - unsigned *location); > > + unsigned *location, > > + unsigned num_components); > > void nir_setup_outputs(); > > void nir_setup_uniforms(); > > void nir_emit_system_values(); > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > index 04ed42e..a08297e 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > > @@ -78,7 +78,8 @@ fs_visitor::nir_setup_inputs() > > emit_general_interpolation(&input, var->name, var->type, > > (glsl_interp_qualifier) var- > >data.interpolation, > > &location, var->data.centroid, > > - var->data.sample); > > + var->data.sample, > > + var- > >data.num_packed_components); > > } > > } > > } > > @@ -86,23 +87,27 @@ fs_visitor::nir_setup_inputs() > > void > > fs_visitor::nir_setup_single_output_varying(fs_reg *reg, > > const glsl_type *type, > > - unsigned *location) > > + unsigned *location, > > + unsigned > num_packed_components) > > { > > if (type->is_array() || type->is_matrix()) { > > const struct glsl_type *elem_type = > glsl_get_array_element(type); > > const unsigned length = glsl_get_length(type); > > > > for (unsigned i = 0; i < length; i++) { > > - nir_setup_single_output_varying(reg, elem_type, > location); > > + nir_setup_single_output_varying(reg, elem_type, location, > > + num_packed_components); > > } > > } else if (type->is_record()) { > > for (unsigned i = 0; i < type->length; i++) { > > const struct glsl_type *field_type = type- > >fields.structure[i].type; > > - nir_setup_single_output_varying(reg, field_type, > location); > > + nir_setup_single_output_varying(reg, field_type, > location, > > + num_packed_components); > > } > > } else { > > assert(type->is_scalar() || type->is_vector()); > > - unsigned num_elements = type->vector_elements; > > + unsigned num_elements = num_packed_components ? > num_packed_components : > > + type->vector_elements; > > if (type->is_double()) > > num_elements *= 2; > > for (unsigned count = 0; count < num_elements; count += 4) { > > @@ -132,7 +137,8 @@ fs_visitor::nir_setup_outputs() > > case MESA_SHADER_TESS_EVAL: > > case MESA_SHADER_GEOMETRY: { > > unsigned location = var->data.location; > > - nir_setup_single_output_varying(®, var->type, > &location); > > + nir_setup_single_output_varying(®, var->type, > &location, > > + var- > >data.num_packed_components); > > break; > > } > > case MESA_SHADER_FRAGMENT: > > @@ -158,7 +164,9 @@ fs_visitor::nir_setup_outputs() > > } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) > { > > this->sample_mask = reg; > > } else { > > - int vector_elements = var->type->without_array()- > >vector_elements; > > + int vector_elements = var->data.num_packed_components > ? > > + var->data.num_packed_components : > > + var->type->without_array()->vector_elements; > > > > /* gl_FragData or a user-defined FS output */ > > assert(var->data.location >= FRAG_RESULT_DATA0 && > > @@ -3835,6 +3843,7 @@ fs_visitor::nir_emit_intrinsic(const > fs_builder &bld, nir_intrinsic_instr *instr > > > > case nir_intrinsic_load_input: { > > fs_reg src; > > + unsigned first_component = nir_intrinsic_component(instr); > > unsigned num_components = instr->num_components; > > enum brw_reg_type type = dest.type; > > > > @@ -3859,7 +3868,7 @@ fs_visitor::nir_emit_intrinsic(const > fs_builder &bld, nir_intrinsic_instr *instr > > src = offset(src, bld, const_offset->u32[0]); > > > > for (unsigned j = 0; j < num_components; j++) { > > - bld.MOV(offset(dest, bld, j), offset(src, bld, j)); > > + bld.MOV(offset(dest, bld, j), offset(src, bld, j + > first_component)); > > } > > > > if (type == BRW_REGISTER_TYPE_DF) { > > @@ -3985,6 +3994,7 @@ fs_visitor::nir_emit_intrinsic(const > fs_builder &bld, nir_intrinsic_instr *instr > > new_dest = offset(new_dest, bld, const_offset->u32[0]); > > > > unsigned num_components = instr->num_components; > > + unsigned first_component = nir_intrinsic_component(instr); > > unsigned bit_size = instr->src[0].is_ssa ? > > instr->src[0].ssa->bit_size : instr->src[0].reg.reg- > >bit_size; > > if (bit_size == 64) { > > @@ -3998,7 +4008,8 @@ fs_visitor::nir_emit_intrinsic(const > fs_builder &bld, nir_intrinsic_instr *instr > > } > > > > for (unsigned j = 0; j < num_components; j++) { > > - bld.MOV(offset(new_dest, bld, j), offset(src, bld, j)); > > + bld.MOV(offset(new_dest, bld, j + first_component), > > + offset(src, bld, j)); > > } > > break; > > } > > -- > > 2.7.4 > > > > _______________________________________________ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev