On Fri, Jan 29, 2016 at 04:52:54PM +1100, Timothy Arceri wrote: > In GLES 3.1+ and GL 4.4+ there is no guarantee that interpolation > qualifiers will match between stages so we cannot safely pack > varyings using the current packing pass in Mesa. > > We also disable packing on outward facing SSO as these too are outside > the rule that guarantees the interpolation qualifiers match. > > We do however enable packing on individual arrays, structs, and > matrices as these are required by the transform feedback code and it > is still safe to do so. > > Finally we also enable packing when a varying is only used for > transform feedback and its not a SSO. > > This fixes all remaining rendering issues with the dEQP SSO tests, > the only issues remaining with thoses tests are to do with validation. > > Note: There is still one remaining SSO bug that this patch doesn't fix. > Their is a chance that VS -> TCS will have mismatching interfaces > because we pack VS output in case its used by transform feedback but > don't pack TCS input for performance reasons. This patch will make the > situation better but doesn't fix it. > > Cc: Samuel Iglesias Gonsálvez <sigles...@igalia.com> > Cc: Tapani Pälli <tapani.pa...@intel.com> > --- > > This patch applies on top of my ARB_enhanced_layouts component packing > series [1]. > > The GLES 3.1 and GL 4.4 changes were tested by changing the code to > always disable packing and running the changes in Intel CI system. > > This patch would also make it possible for a backend to do less packing > in GLSL IR if it wished to do so now that the transform feedback requirements > have been isolated. > > [1] http://patchwork.freedesktop.org/series/2101/ > > src/compiler/glsl/ir.h | 7 + > src/compiler/glsl/ir_optimization.h | 2 +- > src/compiler/glsl/link_varyings.cpp | 199 > +++++++++++++++++++++------- > src/compiler/glsl/lower_packed_varyings.cpp | 21 ++- > 4 files changed, 174 insertions(+), 55 deletions(-) > > diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h > index 287c0cb..8cadf34 100644 > --- a/src/compiler/glsl/ir.h > +++ b/src/compiler/glsl/ir.h > @@ -736,6 +736,13 @@ public: > unsigned is_unmatched_generic_inout:1; > > /** > + * Is this varying used only by transform feedback? > + * > + * This is used by the linker to decide if its safe to pack the > varying. > + */ > + unsigned is_xfb_only:1; > + > + /** > * If non-zero, then this variable may be packed along with other > variables > * into a single varying slot, so this offset should be applied when > * accessing components. For example, an offset of 1 means that the x > diff --git a/src/compiler/glsl/ir_optimization.h > b/src/compiler/glsl/ir_optimization.h > index 94658bb..1ca26cd 100644 > --- a/src/compiler/glsl/ir_optimization.h > +++ b/src/compiler/glsl/ir_optimization.h > @@ -129,7 +129,7 @@ void lower_packed_varyings(void *mem_ctx, > unsigned locations_used, ir_variable_mode mode, > gl_shader *shader, unsigned base_location, > bool disable_varying_packing, > - bool has_enhanced_layouts); > + bool xfb_enabled, bool has_enhanced_layouts); > bool lower_vector_insert(exec_list *instructions, bool > lower_nonconstant_index); > bool lower_vector_derefs(gl_shader *shader); > void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader); > diff --git a/src/compiler/glsl/link_varyings.cpp > b/src/compiler/glsl/link_varyings.cpp > index 78eb2b6..2716723 100644 > --- a/src/compiler/glsl/link_varyings.cpp > +++ b/src/compiler/glsl/link_varyings.cpp > @@ -41,6 +41,23 @@ > > > /** > + * Packing is always safe on individual arrays, structure and matices. It is > + * also safe if the varying is only used for transform feedback. > + */ > +static bool > +is_varying_packing_safe(const ir_variable *var, > + const gl_shader_stage consumer_stage, > + const gl_shader_stage producer_stage) { > + if ((consumer_stage == MESA_SHADER_TESS_EVAL) || > + (consumer_stage == MESA_SHADER_TESS_CTRL) || > + (producer_stage == MESA_SHADER_TESS_CTRL)) { > + return false; > +} > + return var->type->is_array() || var->type->is_record() || > + var->type->is_matrix() || var->data.is_xfb_only; > +} > + > +/** > * Get the varying type stripped of the outermost array if we're processing > * a stage whose varyings are arrays indexed by a vertex number (such as > * geometry shader inputs). > @@ -878,7 +895,7 @@ namespace { > class varying_matches > { > public: > - varying_matches(bool disable_varying_packing, > + varying_matches(bool disable_varying_packing, bool xfb_enabled, > gl_shader_stage producer_stage, > gl_shader_stage consumer_stage); > ~varying_matches(); > @@ -892,10 +909,22 @@ private: > * If true, this driver disables varying packing, so all varyings need to > * be aligned on slot boundaries, and take up a number of slots equal to > * their number of matrix columns times their array size. > + * > + * Packing may also be disabled because our current packing method is not > + * safe in SSO or versions of OpenGL where interpolation qualifiers are > not > + * guaranteed to match across stages. > */ > const bool disable_varying_packing; > > /** > + * If true, this driver has transform feedback enabled. The transform > + * feedback code requires at least some packing be done even when varying > + * packing is disabled, fortunately the cases its required for safe its > + * still to do so. See is_varying_packing_safe(). > + */ > + const bool xfb_enabled; > + > + /** > * Enum representing the order in which varyings are packed within a > * packing class. > * > @@ -909,11 +938,13 @@ private: > PACKING_ORDER_VEC2, > PACKING_ORDER_SCALAR, > PACKING_ORDER_VEC3, > + PACKING_ORDER_LAST, > }; > > static unsigned compute_packing_class(const ir_variable *var); > static packing_order_enum compute_packing_order(const ir_variable *var); > static int match_comparator(const void *x_generic, const void *y_generic); > + static int xfb_comparator(const void *x_generic, const void *y_generic); > > /** > * Structure recording the relationship between a single producer output > @@ -969,9 +1000,11 @@ private: > } /* anonymous namespace */ > > varying_matches::varying_matches(bool disable_varying_packing, > + bool xfb_enabled, > gl_shader_stage producer_stage, > gl_shader_stage consumer_stage) > : disable_varying_packing(disable_varying_packing), > + xfb_enabled(xfb_enabled), > producer_stage(producer_stage), > consumer_stage(consumer_stage) > { > @@ -1067,7 +1100,9 @@ varying_matches::record(ir_variable *producer_var, > ir_variable *consumer_var) > = this->compute_packing_class(var); > this->matches[this->num_matches].packing_order > = this->compute_packing_order(var); > - if (this->disable_varying_packing) { > + if (this->disable_varying_packing && > + !(xfb_enabled && > + is_varying_packing_safe(var, consumer_stage, producer_stage))) { > unsigned slots = type->count_attribute_slots(false); > this->matches[this->num_matches].num_components = slots * 4; > } else { > @@ -1093,37 +1128,24 @@ varying_matches::assign_locations(struct > gl_shader_program *prog, > uint64_t reserved_slots, > bool separate_shader) > { > - /* We disable varying sorting for separate shader programs for the > - * following reasons: > - * > - * 1/ All programs must sort the code in the same order to guarantee the > - * interface matching. However varying_matches::record() will change > the > - * interpolation qualifier of some stages. > - * > - * 2/ GLSL version 4.50 removes the matching constrain on the > interpolation > - * qualifier. > - * > - * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.40 spec: > - * > - * "The type and presence of interpolation qualifiers of variables with > - * the same name declared in all linked shaders for the same > cross-stage > - * interface must match, otherwise the link command will fail. > - * > - * When comparing an output from one stage to an input of a subsequent > - * stage, the input and output don't match if their interpolation > - * qualifiers (or lack thereof) are not the same." > - * > - * "It is a link-time error if, within the same stage, the > interpolation > - * qualifiers of variables of the same name do not match." > + /* If packing has been disbaled then we cannot safely sort the varyings as > + * it may mean we are using a version of OpenGL where interpolation > + * qualifiers are not guaranteed to be matching across shaders, sorting > + * in this case could result in mismatching shader interfaces. > */ > - if (!separate_shader) { > + if (!this->disable_varying_packing) { > /* Sort varying matches into an order that makes them easy to pack. */ > qsort(this->matches, this->num_matches, sizeof(*this->matches), > &varying_matches::match_comparator); > + } else if (xfb_enabled) { > + /* Only sort varyings that are only used by transform feedback. */ > + qsort(this->matches, this->num_matches, sizeof(*this->matches), > + &varying_matches::xfb_comparator); > } > > unsigned generic_location = 0; > unsigned generic_patch_location = MAX_VARYING*4; > + bool previous_var_xfb_only = false; > > for (unsigned i = 0; i < this->num_matches; i++) { > unsigned *location = &generic_location; > @@ -1147,16 +1169,30 @@ varying_matches::assign_locations(struct > gl_shader_program *prog, > /* Advance to the next slot if this varying has a different packing > * class than the previous one, and we're not already on a slot > * boundary. > + * > + * Also advance to the next slot if packing disabled, this makes sure > we > + * don't assign varyings the same locations, this is possible because > + * we still pack individual arrays, records and matrices even when > + * packing is disabled. Note we don't advance to the next slot if we > can > + * pack varyings together that are only used for transform feedback. > */ > - if (i > 0 && > - this->matches[i - 1].packing_class > - != this->matches[i].packing_class) { > + if ((this->disable_varying_packing && > + !(previous_var_xfb_only && var->data.is_xfb_only)) || > + (i > 0 && this->matches[i - 1].packing_class > + != this->matches[i].packing_class )) { > *location = ALIGN(*location, 4); > } > > + previous_var_xfb_only = var->data.is_xfb_only; > + > unsigned num_elements = type->count_attribute_slots(is_vertex_input); > - unsigned slot_end = this->disable_varying_packing ? 4 : > - type->without_array()->vector_elements; > + unsigned slot_end; > + if (this->disable_varying_packing && > + !(xfb_enabled && > + is_varying_packing_safe(var, consumer_stage, producer_stage))) > + slot_end = 4; > + else > + slot_end = type->without_array()->vector_elements; > slot_end += *location - 1; > > /* FIXME: We could be smarter in the below code and loop back over > @@ -1180,7 +1216,9 @@ varying_matches::assign_locations(struct > gl_shader_program *prog, > /* Increase the slot to make sure there is enough room for next > * array element. > */ > - if (this->disable_varying_packing) > + if (this->disable_varying_packing && > + !(xfb_enabled && > + is_varying_packing_safe(var, consumer_stage, producer_stage))) > slot_end += 4; > else > slot_end += type->without_array()->vector_elements; > @@ -1306,6 +1344,22 @@ varying_matches::match_comparator(const void > *x_generic, const void *y_generic) > > > /** > + * Comparison function passed to qsort() to sort varyings used only by > + * transform feedback when packing of other varyings is disabled. > + */ > +int > +varying_matches::xfb_comparator(const void *x_generic, const void *y_generic) > +{ > + const match *x = (const match *) x_generic; > + > + if (x->producer_var != NULL && x->producer_var->data.is_xfb_only) > + return match_comparator(x_generic, y_generic); > + > + return PACKING_ORDER_LAST; > +} > + > + > +/** > * Is the given variable a varying variable to be counted against the > * limit in ctx->Const.MaxVarying? > * This includes variables such as texcoords, colors and generic > @@ -1623,26 +1677,67 @@ assign_varying_locations(struct gl_context *ctx, > unsigned num_tfeedback_decls, > tfeedback_decl *tfeedback_decls) > { > - if (ctx->Const.DisableVaryingPacking) { > - /* Transform feedback code assumes varyings are packed, so if the > driver > - * has disabled varying packing, make sure it does not support > transform > - * feedback. > - */ > - assert(!ctx->Extensions.EXT_transform_feedback); > - } > + /* Transform feedback code assumes varying arrays are packed, so if the > + * driver has disabled varying packing, make sure to at least enable > + * packing required by transform feedback. > + */ > + bool xfb_enabled = ctx->Extensions.EXT_transform_feedback;
Since this functionality is not available in OpenGL ES 1-2, shouldn't the variable be assigned to the following? (_mesa_has_EXT_transform_feedback() || _mesa_is_gles3()) Regards, Nanley > + > + /* Disable varying packing for GLES 3.1+ and GL 4.4+, there is no > guarantee > + * that interpolation qualifiers will match between shaders in these > + * versions. We also disable packing for the outer facing interfaces for > + * SSO as they also do not have the same restriction during linking. > + * > + * Packing is still enabled on individual arrays, structs, and matrices as > + * these are required by the transform feedback code and it is still safe > + * to do so. We also enable packing when a varying is only used for > + * transform feedback and its not a SSO. > + * > + * Varying packing currently only packs together varyings with matching > + * interpolation qualifiers as the backends assume all packed components > + * are to be processed in the same way. Therefore we cannot do packing in > + * these versions of GL without the risk of mismatching interfaces. > + * > + * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec: > + * > + * "The type and presence of interpolation qualifiers of variables with > + * the same name declared in all linked shaders for the same > cross-stage > + * interface must match, otherwise the link command will fail. > + * > + * When comparing an output from one stage to an input of a subsequent > + * stage, the input and output don't match if their interpolation > + * qualifiers (or lack thereof) are not the same." > + * > + * This text was also in at least revison 7 of the 4.40 spec but is no > + * longer in revision 9 and not in the 4.50 spec. There is also no text in > + * the GLSL ES spec that says these qualifiers must match across stages. > + * > + * From Section 4.3.9 (Interpolation) of the GLSL ES 3.0 spec: > + * > + * "The type and presence of the interpolation qualifiers and storage > + * qualifiers and invariant qualifiers of variables with the same name > + * declared in all linked shaders must match, otherwise the link > command > + * will fail." > + * > + * This text is no longer in the 3.1 or 3.2 specs. > + */ > + bool disable_varying_packing = ctx->Const.DisableVaryingPacking; > + if ((ctx->API == API_OPENGL_CORE && ctx->Version >= 44) || > + (ctx->API == API_OPENGLES2 && ctx->Version >= 31) || > + (prog->SeparateShader && (producer == NULL || consumer == NULL))) > + disable_varying_packing = true; > > /* Tessellation shaders treat inputs and outputs as shared memory and can > * access inputs and outputs of other invocations. > * Therefore, they can't be lowered to temps easily (and definitely not > * efficiently). > */ > - bool disable_varying_packing = > - ctx->Const.DisableVaryingPacking || > + disable_varying_packing = disable_varying_packing || > (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) || > (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) || > (producer && producer->Stage == MESA_SHADER_TESS_CTRL); > > - varying_matches matches(disable_varying_packing, > + varying_matches matches(disable_varying_packing, xfb_enabled, > producer ? producer->Stage : (gl_shader_stage)-1, > consumer ? consumer->Stage : (gl_shader_stage)-1); > hash_table *tfeedback_candidates > @@ -1762,8 +1857,10 @@ assign_varying_locations(struct gl_context *ctx, > return false; > } > > - if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) > + if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) { > + matched_candidate->toplevel_var->data.is_xfb_only = 1; > matches.record(matched_candidate->toplevel_var, NULL); > + } > } > > const uint64_t reserved_slots = > @@ -1863,13 +1960,19 @@ assign_varying_locations(struct gl_context *ctx, > if (vertex_attributes > 0) > lower_packed_varyings(mem_ctx, vertex_attributes, > ir_var_shader_in, producer, > - VERT_ATTRIB_GENERIC0, true, > + VERT_ATTRIB_GENERIC0, true, false, > ctx->Extensions.ARB_enhanced_layouts); > } > > + /* TCS inputs are not packed so don't enable packing required for > + * transform feedback if the the consumer is a TCS. > + */ > + if (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) > + xfb_enabled = false; > + > lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out, producer, > - VARYING_SLOT_VAR0, > - disable_varying_packing, > + VARYING_SLOT_VAR0, disable_varying_packing, > + xfb_enabled, > ctx->Extensions.ARB_enhanced_layouts); > } > > @@ -1886,13 +1989,13 @@ assign_varying_locations(struct gl_context *ctx, > unsigned frag_outs = _mesa_bitcount_64(reserved_slots); > if (frag_outs > 0) > lower_packed_varyings(mem_ctx, frag_outs, ir_var_shader_out, > - consumer, FRAG_RESULT_DATA0, true, > + consumer, FRAG_RESULT_DATA0, true, false, > ctx->Extensions.ARB_enhanced_layouts); > } > > lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in, consumer, > - VARYING_SLOT_VAR0, > - disable_varying_packing, > + VARYING_SLOT_VAR0, disable_varying_packing, > + xfb_enabled, > ctx->Extensions.ARB_enhanced_layouts); > } > > diff --git a/src/compiler/glsl/lower_packed_varyings.cpp > b/src/compiler/glsl/lower_packed_varyings.cpp > index d6d68fc..f570167 100644 > --- a/src/compiler/glsl/lower_packed_varyings.cpp > +++ b/src/compiler/glsl/lower_packed_varyings.cpp > @@ -172,7 +172,7 @@ update_packed_array_type(const glsl_type *type, const > glsl_type *packed_type) > > static bool > needs_lowering(ir_variable *var, bool has_enhanced_layouts, > - bool disable_varying_packing) > + bool disable_varying_packing, bool xfb_enabled) > { > /* Don't lower varying with explicit location unless ARB_enhanced_layouts > * is enabled, also don't try to pack structs with explicit location as > @@ -186,7 +186,10 @@ needs_lowering(ir_variable *var, bool > has_enhanced_layouts, > /* Don't disable packing for explicit locations when ARB_enhanced_layouts > * is supported. > */ > - if (disable_varying_packing && !var->data.explicit_location) > + if (disable_varying_packing && !var->data.is_xfb_only && > + !var->data.explicit_location && > + !((var->type->is_array() || var->type->is_record() || > + var->type->is_matrix()) && xfb_enabled)) > return false; > > /* Things composed of vec4's don't need lowering everything else does. */ > @@ -285,6 +288,7 @@ public: > exec_list *out_variables, > unsigned base_location, > bool disable_varying_packing, > + bool xfb_enabled, > bool has_enhanced_layouts); > > void run(struct gl_shader *shader); > @@ -353,6 +357,7 @@ private: > exec_list *out_variables; > > bool disable_varying_packing; > + bool xfb_enabled; > bool has_enhanced_layouts; > }; > > @@ -362,7 +367,8 @@ > lower_packed_varyings_visitor::lower_packed_varyings_visitor( > void *mem_ctx, unsigned locations_used, ir_variable_mode mode, > bool is_outer_array_vert_idx, exec_list *out_instructions, > exec_list *out_variables, unsigned base_location, > - bool disable_varying_packing, bool has_enhanced_layouts) > + bool disable_varying_packing, bool xfb_enabled, > + bool has_enhanced_layouts) > : mem_ctx(mem_ctx), > base_location(base_location), > locations_used(locations_used), > @@ -374,6 +380,7 @@ > lower_packed_varyings_visitor::lower_packed_varyings_visitor( > out_instructions(out_instructions), > out_variables(out_variables), > disable_varying_packing(disable_varying_packing), > + xfb_enabled(xfb_enabled), > has_enhanced_layouts(has_enhanced_layouts) > { > } > @@ -388,7 +395,8 @@ lower_packed_varyings_visitor::run(struct gl_shader > *shader) > > if (var->data.mode != this->mode || > var->data.location < (int) this->base_location || > - !needs_lowering(var, has_enhanced_layouts, > disable_varying_packing)) > + !needs_lowering(var, has_enhanced_layouts, disable_varying_packing, > + xfb_enabled)) > continue; > > /* This lowering pass is only capable of packing floats and ints > @@ -1118,7 +1126,7 @@ void > lower_packed_varyings(void *mem_ctx, unsigned locations_used, > ir_variable_mode mode, gl_shader *shader, > unsigned base_location, bool disable_varying_packing, > - bool has_enhanced_layouts) > + bool xfb_enabled, bool has_enhanced_layouts) > { > ir_function *main_func = shader->symbols->get_function("main"); > exec_list void_parameters; > @@ -1143,6 +1151,7 @@ lower_packed_varyings(void *mem_ctx, unsigned > locations_used, > &new_variables, > base_location, > disable_varying_packing, > + xfb_enabled, > has_enhanced_layouts); > visitor.run(shader); > if (mode == ir_var_shader_out) { > @@ -1190,7 +1199,7 @@ lower_packed_varyings(void *mem_ctx, unsigned > locations_used, > > if (var->data.mode != mode || > var->data.location < (int) base_location || > - !needs_lowering(var, has_enhanced_layouts, true)) > + !needs_lowering(var, has_enhanced_layouts, true, false)) > continue; > > /* Clone the variable for program resource list before > -- > 2.5.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev