On Fri, 2016-01-29 at 13:14 -0800, Nanley Chery wrote: > On Fri, Jan 29, 2016 at 04:52:54PM +1100, Timothy Arceri wrote: > > In GLES 3.1+ and GL 4.4+ there is no guarantee that interpolation > > qualifiers will match between stages so we cannot safely pack > > varyings using the current packing pass in Mesa. > > > > We also disable packing on outward facing SSO as these too are > > outside > > the rule that guarantees the interpolation qualifiers match. > > > > We do however enable packing on individual arrays, structs, and > > matrices as these are required by the transform feedback code and > > it > > is still safe to do so. > > > > Finally we also enable packing when a varying is only used for > > transform feedback and its not a SSO. > > > > This fixes all remaining rendering issues with the dEQP SSO tests, > > the only issues remaining with thoses tests are to do with > > validation. > > > > Note: There is still one remaining SSO bug that this patch doesn't > > fix. > > Their is a chance that VS -> TCS will have mismatching interfaces > > because we pack VS output in case its used by transform feedback > > but > > don't pack TCS input for performance reasons. This patch will make > > the > > situation better but doesn't fix it. > > > > Cc: Samuel Iglesias Gonsálvez <sigles...@igalia.com> > > Cc: Tapani Pälli <tapani.pa...@intel.com> > > --- > > > > This patch applies on top of my ARB_enhanced_layouts component > > packing > > series [1]. > > > > The GLES 3.1 and GL 4.4 changes were tested by changing the code > > to > > always disable packing and running the changes in Intel CI system. > > > > This patch would also make it possible for a backend to do less > > packing > > in GLSL IR if it wished to do so now that the transform feedback > > requirements > > have been isolated. > > > > [1] http://patchwork.freedesktop.org/series/2101/ > > > > src/compiler/glsl/ir.h | 7 + > > src/compiler/glsl/ir_optimization.h | 2 +- > > src/compiler/glsl/link_varyings.cpp | 199 > > +++++++++++++++++++++------- > > src/compiler/glsl/lower_packed_varyings.cpp | 21 ++- > > 4 files changed, 174 insertions(+), 55 deletions(-) > > > > diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h > > index 287c0cb..8cadf34 100644 > > --- a/src/compiler/glsl/ir.h > > +++ b/src/compiler/glsl/ir.h > > @@ -736,6 +736,13 @@ public: > > unsigned is_unmatched_generic_inout:1; > > > > /** > > + * Is this varying used only by transform feedback? > > + * > > + * This is used by the linker to decide if its safe to pack > > the varying. > > + */ > > + unsigned is_xfb_only:1; > > + > > + /** > > * If non-zero, then this variable may be packed along with > > other variables > > * into a single varying slot, so this offset should be > > applied when > > * accessing components. For example, an offset of 1 means > > that the x > > diff --git a/src/compiler/glsl/ir_optimization.h > > b/src/compiler/glsl/ir_optimization.h > > index 94658bb..1ca26cd 100644 > > --- a/src/compiler/glsl/ir_optimization.h > > +++ b/src/compiler/glsl/ir_optimization.h > > @@ -129,7 +129,7 @@ void lower_packed_varyings(void *mem_ctx, > > unsigned locations_used, > > ir_variable_mode mode, > > gl_shader *shader, unsigned > > base_location, > > bool disable_varying_packing, > > - bool has_enhanced_layouts); > > + bool xfb_enabled, bool > > has_enhanced_layouts); > > bool lower_vector_insert(exec_list *instructions, bool > > lower_nonconstant_index); > > bool lower_vector_derefs(gl_shader *shader); > > void lower_named_interface_blocks(void *mem_ctx, gl_shader > > *shader); > > diff --git a/src/compiler/glsl/link_varyings.cpp > > b/src/compiler/glsl/link_varyings.cpp > > index 78eb2b6..2716723 100644 > > --- a/src/compiler/glsl/link_varyings.cpp > > +++ b/src/compiler/glsl/link_varyings.cpp > > @@ -41,6 +41,23 @@ > > > > > > /** > > + * Packing is always safe on individual arrays, structure and > > matices. It is > > + * also safe if the varying is only used for transform feedback. > > + */ > > +static bool > > +is_varying_packing_safe(const ir_variable *var, > > + const gl_shader_stage consumer_stage, > > + const gl_shader_stage producer_stage) { > > + if ((consumer_stage == MESA_SHADER_TESS_EVAL) || > > + (consumer_stage == MESA_SHADER_TESS_CTRL) || > > + (producer_stage == MESA_SHADER_TESS_CTRL)) { > > + return false; > > +} > > + return var->type->is_array() || var->type->is_record() || > > + var->type->is_matrix() || var->data.is_xfb_only; > > +} > > + > > +/** > > * Get the varying type stripped of the outermost array if we're > > processing > > * a stage whose varyings are arrays indexed by a vertex number > > (such as > > * geometry shader inputs). > > @@ -878,7 +895,7 @@ namespace { > > class varying_matches > > { > > public: > > - varying_matches(bool disable_varying_packing, > > + varying_matches(bool disable_varying_packing, bool xfb_enabled, > > gl_shader_stage producer_stage, > > gl_shader_stage consumer_stage); > > ~varying_matches(); > > @@ -892,10 +909,22 @@ private: > > * If true, this driver disables varying packing, so all > > varyings need to > > * be aligned on slot boundaries, and take up a number of slots > > equal to > > * their number of matrix columns times their array size. > > + * > > + * Packing may also be disabled because our current packing > > method is not > > + * safe in SSO or versions of OpenGL where interpolation > > qualifiers are not > > + * guaranteed to match across stages. > > */ > > const bool disable_varying_packing; > > > > /** > > + * If true, this driver has transform feedback enabled. The > > transform > > + * feedback code requires at least some packing be done even > > when varying > > + * packing is disabled, fortunately the cases its required for > > safe its > > + * still to do so. See is_varying_packing_safe(). > > + */ > > + const bool xfb_enabled; > > + > > + /** > > * Enum representing the order in which varyings are packed > > within a > > * packing class. > > * > > @@ -909,11 +938,13 @@ private: > > PACKING_ORDER_VEC2, > > PACKING_ORDER_SCALAR, > > PACKING_ORDER_VEC3, > > + PACKING_ORDER_LAST, > > }; > > > > static unsigned compute_packing_class(const ir_variable *var); > > static packing_order_enum compute_packing_order(const > > ir_variable *var); > > static int match_comparator(const void *x_generic, const void > > *y_generic); > > + static int xfb_comparator(const void *x_generic, const void > > *y_generic); > > > > /** > > * Structure recording the relationship between a single > > producer output > > @@ -969,9 +1000,11 @@ private: > > } /* anonymous namespace */ > > > > varying_matches::varying_matches(bool disable_varying_packing, > > + bool xfb_enabled, > > gl_shader_stage producer_stage, > > gl_shader_stage consumer_stage) > > : disable_varying_packing(disable_varying_packing), > > + xfb_enabled(xfb_enabled), > > producer_stage(producer_stage), > > consumer_stage(consumer_stage) > > { > > @@ -1067,7 +1100,9 @@ varying_matches::record(ir_variable > > *producer_var, ir_variable *consumer_var) > > = this->compute_packing_class(var); > > this->matches[this->num_matches].packing_order > > = this->compute_packing_order(var); > > - if (this->disable_varying_packing) { > > + if (this->disable_varying_packing && > > + !(xfb_enabled && > > + is_varying_packing_safe(var, consumer_stage, > > producer_stage))) { > > unsigned slots = type->count_attribute_slots(false); > > this->matches[this->num_matches].num_components = slots * 4; > > } else { > > @@ -1093,37 +1128,24 @@ varying_matches::assign_locations(struct > > gl_shader_program *prog, > > uint64_t reserved_slots, > > bool separate_shader) > > { > > - /* We disable varying sorting for separate shader programs for > > the > > - * following reasons: > > - * > > - * 1/ All programs must sort the code in the same order to > > guarantee the > > - * interface matching. However varying_matches::record() > > will change the > > - * interpolation qualifier of some stages. > > - * > > - * 2/ GLSL version 4.50 removes the matching constrain on the > > interpolation > > - * qualifier. > > - * > > - * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.40 > > spec: > > - * > > - * "The type and presence of interpolation qualifiers of > > variables with > > - * the same name declared in all linked shaders for the same > > cross-stage > > - * interface must match, otherwise the link command will > > fail. > > - * > > - * When comparing an output from one stage to an input of a > > subsequent > > - * stage, the input and output don't match if their > > interpolation > > - * qualifiers (or lack thereof) are not the same." > > - * > > - * "It is a link-time error if, within the same stage, the > > interpolation > > - * qualifiers of variables of the same name do not match." > > + /* If packing has been disbaled then we cannot safely sort the > > varyings as > > + * it may mean we are using a version of OpenGL where > > interpolation > > + * qualifiers are not guaranteed to be matching across shaders, > > sorting > > + * in this case could result in mismatching shader interfaces. > > */ > > - if (!separate_shader) { > > + if (!this->disable_varying_packing) { > > /* Sort varying matches into an order that makes them easy > > to pack. */ > > qsort(this->matches, this->num_matches, sizeof(*this- > > >matches), > > &varying_matches::match_comparator); > > + } else if (xfb_enabled) { > > + /* Only sort varyings that are only used by transform > > feedback. */ > > + qsort(this->matches, this->num_matches, sizeof(*this- > > >matches), > > + &varying_matches::xfb_comparator); > > } > > > > unsigned generic_location = 0; > > unsigned generic_patch_location = MAX_VARYING*4; > > + bool previous_var_xfb_only = false; > > > > for (unsigned i = 0; i < this->num_matches; i++) { > > unsigned *location = &generic_location; > > @@ -1147,16 +1169,30 @@ varying_matches::assign_locations(struct > > gl_shader_program *prog, > > /* Advance to the next slot if this varying has a different > > packing > > * class than the previous one, and we're not already on a > > slot > > * boundary. > > + * > > + * Also advance to the next slot if packing disabled, this > > makes sure we > > + * don't assign varyings the same locations, this is > > possible because > > + * we still pack individual arrays, records and matrices > > even when > > + * packing is disabled. Note we don't advance to the next > > slot if we can > > + * pack varyings together that are only used for transform > > feedback. > > */ > > - if (i > 0 && > > - this->matches[i - 1].packing_class > > - != this->matches[i].packing_class) { > > + if ((this->disable_varying_packing && > > + !(previous_var_xfb_only && var->data.is_xfb_only)) || > > + (i > 0 && this->matches[i - 1].packing_class > > + != this->matches[i].packing_class )) { > > *location = ALIGN(*location, 4); > > } > > > > + previous_var_xfb_only = var->data.is_xfb_only; > > + > > unsigned num_elements = type- > > >count_attribute_slots(is_vertex_input); > > - unsigned slot_end = this->disable_varying_packing ? 4 : > > - type->without_array()->vector_elements; > > + unsigned slot_end; > > + if (this->disable_varying_packing && > > + !(xfb_enabled && > > + is_varying_packing_safe(var, consumer_stage, > > producer_stage))) > > + slot_end = 4; > > + else > > + slot_end = type->without_array()->vector_elements; > > slot_end += *location - 1; > > > > /* FIXME: We could be smarter in the below code and loop > > back over > > @@ -1180,7 +1216,9 @@ varying_matches::assign_locations(struct > > gl_shader_program *prog, > > /* Increase the slot to make sure there is enough room > > for next > > * array element. > > */ > > - if (this->disable_varying_packing) > > + if (this->disable_varying_packing && > > + !(xfb_enabled && > > + is_varying_packing_safe(var, consumer_stage, > > producer_stage))) > > slot_end += 4; > > else > > slot_end += type->without_array()->vector_elements; > > @@ -1306,6 +1344,22 @@ varying_matches::match_comparator(const void > > *x_generic, const void *y_generic) > > > > > > /** > > + * Comparison function passed to qsort() to sort varyings used > > only by > > + * transform feedback when packing of other varyings is disabled. > > + */ > > +int > > +varying_matches::xfb_comparator(const void *x_generic, const void > > *y_generic) > > +{ > > + const match *x = (const match *) x_generic; > > + > > + if (x->producer_var != NULL && x->producer_var- > > >data.is_xfb_only) > > + return match_comparator(x_generic, y_generic); > > + > > + return PACKING_ORDER_LAST; > > +} > > + > > + > > +/** > > * Is the given variable a varying variable to be counted against > > the > > * limit in ctx->Const.MaxVarying? > > * This includes variables such as texcoords, colors and generic > > @@ -1623,26 +1677,67 @@ assign_varying_locations(struct gl_context > > *ctx, > > unsigned num_tfeedback_decls, > > tfeedback_decl *tfeedback_decls) > > { > > - if (ctx->Const.DisableVaryingPacking) { > > - /* Transform feedback code assumes varyings are packed, so > > if the driver > > - * has disabled varying packing, make sure it does not > > support transform > > - * feedback. > > - */ > > - assert(!ctx->Extensions.EXT_transform_feedback); > > - } > > + /* Transform feedback code assumes varying arrays are packed, > > so if the > > + * driver has disabled varying packing, make sure to at least > > enable > > + * packing required by transform feedback. > > + */ > > + bool xfb_enabled = ctx->Extensions.EXT_transform_feedback; > > Since this functionality is not available in OpenGL ES 1-2, shouldn't > the variable be assigned to the following? > > (_mesa_has_EXT_transform_feedback() || _mesa_is_gles3())
All the different requirements around packing make this a little confusing but the simple answer is no it doesn't need to have that restriction applied. The packing done when xfb_enabled is true is safe for SSO, and required for transform feedback, there is nothing wrong with enabling it for OpenGL ES 1-2. In fact it will already be enabled anyway for Intel as ctx->Const.DisableVaryingPacking is always true and none of the new rules apply to OpenGL ES 1-2 to disable it. In practice I believe all but some of the very old hardware that use gallium set ctx- >Const.DisableVaryingPacking to true, and in the cases where it is false its because they don't support EXT_transform_feedback. > > Regards, > Nanley > > > + > > + /* Disable varying packing for GLES 3.1+ and GL 4.4+, there is > > no guarantee > > + * that interpolation qualifiers will match between shaders in > > these > > + * versions. We also disable packing for the outer facing > > interfaces for > > + * SSO as they also do not have the same restriction during > > linking. > > + * > > + * Packing is still enabled on individual arrays, structs, and > > matrices as > > + * these are required by the transform feedback code and it is > > still safe > > + * to do so. We also enable packing when a varying is only used > > for > > + * transform feedback and its not a SSO. > > + * > > + * Varying packing currently only packs together varyings with > > matching > > + * interpolation qualifiers as the backends assume all packed > > components > > + * are to be processed in the same way. Therefore we cannot do > > packing in > > + * these versions of GL without the risk of mismatching > > interfaces. > > + * > > + * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 > > spec: > > + * > > + * "The type and presence of interpolation qualifiers of > > variables with > > + * the same name declared in all linked shaders for the same > > cross-stage > > + * interface must match, otherwise the link command will > > fail. > > + * > > + * When comparing an output from one stage to an input of a > > subsequent > > + * stage, the input and output don't match if their > > interpolation > > + * qualifiers (or lack thereof) are not the same." > > + * > > + * This text was also in at least revison 7 of the 4.40 spec > > but is no > > + * longer in revision 9 and not in the 4.50 spec. There is also > > no text in > > + * the GLSL ES spec that says these qualifiers must match > > across stages. > > + * > > + * From Section 4.3.9 (Interpolation) of the GLSL ES 3.0 spec: > > + * > > + * "The type and presence of the interpolation qualifiers > > and storage > > + * qualifiers and invariant qualifiers of variables with the > > same name > > + * declared in all linked shaders must match, otherwise the > > link command > > + * will fail." > > + * > > + * This text is no longer in the 3.1 or 3.2 specs. > > + */ > > + bool disable_varying_packing = ctx- > > >Const.DisableVaryingPacking; > > + if ((ctx->API == API_OPENGL_CORE && ctx->Version >= 44) || > > + (ctx->API == API_OPENGLES2 && ctx->Version >= 31) || > > + (prog->SeparateShader && (producer == NULL || consumer == > > NULL))) > > + disable_varying_packing = true; > > > > /* Tessellation shaders treat inputs and outputs as shared > > memory and can > > * access inputs and outputs of other invocations. > > * Therefore, they can't be lowered to temps easily (and > > definitely not > > * efficiently). > > */ > > - bool disable_varying_packing = > > - ctx->Const.DisableVaryingPacking || > > + disable_varying_packing = disable_varying_packing || > > (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) || > > (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) || > > (producer && producer->Stage == MESA_SHADER_TESS_CTRL); > > > > - varying_matches matches(disable_varying_packing, > > + varying_matches matches(disable_varying_packing, xfb_enabled, > > producer ? producer->Stage : > > (gl_shader_stage)-1, > > consumer ? consumer->Stage : > > (gl_shader_stage)-1); > > hash_table *tfeedback_candidates > > @@ -1762,8 +1857,10 @@ assign_varying_locations(struct gl_context > > *ctx, > > return false; > > } > > > > - if (matched_candidate->toplevel_var- > > >data.is_unmatched_generic_inout) > > + if (matched_candidate->toplevel_var- > > >data.is_unmatched_generic_inout) { > > + matched_candidate->toplevel_var->data.is_xfb_only = 1; > > matches.record(matched_candidate->toplevel_var, NULL); > > + } > > } > > > > const uint64_t reserved_slots = > > @@ -1863,13 +1960,19 @@ assign_varying_locations(struct gl_context > > *ctx, > > if (vertex_attributes > 0) > > lower_packed_varyings(mem_ctx, vertex_attributes, > > ir_var_shader_in, producer, > > - VERT_ATTRIB_GENERIC0, true, > > + VERT_ATTRIB_GENERIC0, true, > > false, > > ctx- > > >Extensions.ARB_enhanced_layouts); > > } > > > > + /* TCS inputs are not packed so don't enable packing > > required for > > + * transform feedback if the the consumer is a TCS. > > + */ > > + if (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) > > + xfb_enabled = false; > > + > > lower_packed_varyings(mem_ctx, slots_used, > > ir_var_shader_out, producer, > > - VARYING_SLOT_VAR0, > > - disable_varying_packing, > > + VARYING_SLOT_VAR0, > > disable_varying_packing, > > + xfb_enabled, > > ctx->Extensions.ARB_enhanced_layouts); > > } > > > > @@ -1886,13 +1989,13 @@ assign_varying_locations(struct gl_context > > *ctx, > > unsigned frag_outs = _mesa_bitcount_64(reserved_slots); > > if (frag_outs > 0) > > lower_packed_varyings(mem_ctx, frag_outs, > > ir_var_shader_out, > > - consumer, FRAG_RESULT_DATA0, > > true, > > + consumer, FRAG_RESULT_DATA0, > > true, false, > > ctx- > > >Extensions.ARB_enhanced_layouts); > > } > > > > lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in, > > consumer, > > - VARYING_SLOT_VAR0, > > - disable_varying_packing, > > + VARYING_SLOT_VAR0, > > disable_varying_packing, > > + xfb_enabled, > > ctx->Extensions.ARB_enhanced_layouts); > > } > > > > diff --git a/src/compiler/glsl/lower_packed_varyings.cpp > > b/src/compiler/glsl/lower_packed_varyings.cpp > > index d6d68fc..f570167 100644 > > --- a/src/compiler/glsl/lower_packed_varyings.cpp > > +++ b/src/compiler/glsl/lower_packed_varyings.cpp > > @@ -172,7 +172,7 @@ update_packed_array_type(const glsl_type *type, > > const glsl_type *packed_type) > > > > static bool > > needs_lowering(ir_variable *var, bool has_enhanced_layouts, > > - bool disable_varying_packing) > > + bool disable_varying_packing, bool xfb_enabled) > > { > > /* Don't lower varying with explicit location unless > > ARB_enhanced_layouts > > * is enabled, also don't try to pack structs with explicit > > location as > > @@ -186,7 +186,10 @@ needs_lowering(ir_variable *var, bool > > has_enhanced_layouts, > > /* Don't disable packing for explicit locations when > > ARB_enhanced_layouts > > * is supported. > > */ > > - if (disable_varying_packing && !var->data.explicit_location) > > + if (disable_varying_packing && !var->data.is_xfb_only && > > + !var->data.explicit_location && > > + !((var->type->is_array() || var->type->is_record() || > > + var->type->is_matrix()) && xfb_enabled)) > > return false; > > > > /* Things composed of vec4's don't need lowering everything > > else does. */ > > @@ -285,6 +288,7 @@ public: > > exec_list *out_variables, > > unsigned base_location, > > bool disable_varying_packing, > > + bool xfb_enabled, > > bool has_enhanced_layouts); > > > > void run(struct gl_shader *shader); > > @@ -353,6 +357,7 @@ private: > > exec_list *out_variables; > > > > bool disable_varying_packing; > > + bool xfb_enabled; > > bool has_enhanced_layouts; > > }; > > > > @@ -362,7 +367,8 @@ > > lower_packed_varyings_visitor::lower_packed_varyings_visitor( > > void *mem_ctx, unsigned locations_used, ir_variable_mode > > mode, > > bool is_outer_array_vert_idx, exec_list *out_instructions, > > exec_list *out_variables, unsigned base_location, > > - bool disable_varying_packing, bool has_enhanced_layouts) > > + bool disable_varying_packing, bool xfb_enabled, > > + bool has_enhanced_layouts) > > : mem_ctx(mem_ctx), > > base_location(base_location), > > locations_used(locations_used), > > @@ -374,6 +380,7 @@ > > lower_packed_varyings_visitor::lower_packed_varyings_visitor( > > out_instructions(out_instructions), > > out_variables(out_variables), > > disable_varying_packing(disable_varying_packing), > > + xfb_enabled(xfb_enabled), > > has_enhanced_layouts(has_enhanced_layouts) > > { > > } > > @@ -388,7 +395,8 @@ lower_packed_varyings_visitor::run(struct > > gl_shader *shader) > > > > if (var->data.mode != this->mode || > > var->data.location < (int) this->base_location || > > - !needs_lowering(var, has_enhanced_layouts, > > disable_varying_packing)) > > + !needs_lowering(var, has_enhanced_layouts, > > disable_varying_packing, > > + xfb_enabled)) > > continue; > > > > /* This lowering pass is only capable of packing floats and > > ints > > @@ -1118,7 +1126,7 @@ void > > lower_packed_varyings(void *mem_ctx, unsigned locations_used, > > ir_variable_mode mode, gl_shader *shader, > > unsigned base_location, bool > > disable_varying_packing, > > - bool has_enhanced_layouts) > > + bool xfb_enabled, bool has_enhanced_layouts) > > { > > ir_function *main_func = shader->symbols->get_function("main"); > > exec_list void_parameters; > > @@ -1143,6 +1151,7 @@ lower_packed_varyings(void *mem_ctx, unsigned > > locations_used, > > &new_variables, > > base_location, > > disable_varying_packin > > g, > > + xfb_enabled, > > has_enhanced_layouts); > > visitor.run(shader); > > if (mode == ir_var_shader_out) { > > @@ -1190,7 +1199,7 @@ lower_packed_varyings(void *mem_ctx, unsigned > > locations_used, > > > > if (var->data.mode != mode || > > var->data.location < (int) base_location || > > - !needs_lowering(var, has_enhanced_layouts, true)) > > + !needs_lowering(var, has_enhanced_layouts, true, > > false)) > > continue; > > > > /* Clone the variable for program resource list before _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev