On 20 October 2013 09:20, Marek Olšák <mar...@gmail.com> wrote: > From: Marek Olšák <marek.ol...@amd.com> > > This avoids a defect in lower_output_reads. > > The problem is lower_output_reads treats the gl_FragData array as a single > variable. It first redirects all output writes to a temporary variable > (array) > and then writes the whole temporary variable to the output, generating > assignments to all elements of gl_FragData. >
Can you go into more detail about why this is a problem? At first glance it seems like it should be fine, because failing to assign to an element of gl_FragData is supposed to result in undefined data going to that fragment output. So lowering to a shader that does assign to that element of gl_FragData seems like it should be harmless. What am I missing here? > > BTW this pass can be modified to lower all arrays, not just inputs and > outputs. > The question is whether it is worth it. > --- > src/glsl/opt_dead_builtin_varyings.cpp | 163 > ++++++++++++++++++++++++++------- > 1 file changed, 132 insertions(+), 31 deletions(-) > > diff --git a/src/glsl/opt_dead_builtin_varyings.cpp > b/src/glsl/opt_dead_builtin_varyings.cpp > index 7e8cd43..72ff9df 100644 > --- a/src/glsl/opt_dead_builtin_varyings.cpp > +++ b/src/glsl/opt_dead_builtin_varyings.cpp > @@ -42,9 +42,11 @@ > * If any texture coordinate slots can be eliminated, the gl_TexCoord > array is > * broken down into separate vec4 variables with locations equal to > * VARYING_SLOT_TEX0 + i. > + * > + * The same is done for the gl_FragData fragment shader output. > */ > > -#include "main/imports.h" /* for snprintf */ > +#include "main/core.h" /* for snprintf and ARRAY_SIZE */ > #include "ir.h" > #include "ir_rvalue_visitor.h" > #include "ir_optimization.h" > @@ -60,10 +62,14 @@ namespace { > class varying_info_visitor : public ir_hierarchical_visitor { > public: > /* "mode" can be either ir_var_shader_in or ir_var_shader_out */ > - varying_info_visitor(ir_variable_mode mode) > + varying_info_visitor(ir_variable_mode mode, bool find_frag_outputs = > false) > : lower_texcoord_array(true), > texcoord_array(NULL), > texcoord_usage(0), > + find_frag_outputs(find_frag_outputs), > + lower_fragdata_array(true), > + fragdata_array(NULL), > + fragdata_usage(0), > color_usage(0), > tfeedback_color_usage(0), > fog(NULL), > @@ -79,8 +85,27 @@ public: > { > ir_variable *var = ir->variable_referenced(); > > - if (var && var->mode == this->mode && > - var->location == VARYING_SLOT_TEX0) { > + if (!var || var->mode != this->mode) > + return visit_continue; > + > + if (this->find_frag_outputs && var->location == FRAG_RESULT_DATA0) { > + this->fragdata_array = var; > + > + ir_constant *index = ir->array_index->as_constant(); > + if (index == NULL) { > + /* This is variable indexing. */ > + this->fragdata_usage |= (1 << var->type->array_size()) - 1; > + this->lower_fragdata_array = false; > + } > + else { > + this->fragdata_usage |= 1 << index->get_uint_component(0); > + } > + > + /* Don't visit the leaves of ir_dereference_array. */ > + return visit_continue_with_parent; > + } > + > + if (!this->find_frag_outputs && var->location == VARYING_SLOT_TEX0) > { > this->texcoord_array = var; > > ir_constant *index = ir->array_index->as_constant(); > @@ -105,8 +130,17 @@ public: > { > ir_variable *var = ir->variable_referenced(); > > - if (var->mode == this->mode && var->type->is_array() && > - var->location == VARYING_SLOT_TEX0) { > + if (var->mode != this->mode || !var->type->is_array()) > + return visit_continue; > + > + if (this->find_frag_outputs && var->location == FRAG_RESULT_DATA0) { > + /* This is a whole array dereference. */ > + this->fragdata_usage |= (1 << var->type->array_size()) - 1; > + this->lower_fragdata_array = false; > + return visit_continue; > + } > + > + if (!this->find_frag_outputs && var->location == VARYING_SLOT_TEX0) > { > /* This is a whole array dereference like "gl_TexCoord = x;", > * there's probably no point in lowering that. > */ > @@ -121,6 +155,10 @@ public: > if (var->mode != this->mode) > return visit_continue; > > + /* Nothing to do here for fragment outputs. */ > + if (this->find_frag_outputs) > + return visit_continue; > + > /* Handle colors and fog. */ > switch (var->location) { > case VARYING_SLOT_COL0: > @@ -185,12 +223,20 @@ public: > if (!this->texcoord_array) { > this->lower_texcoord_array = false; > } > + if (!this->fragdata_array) { > + this->lower_fragdata_array = false; > + } > } > > bool lower_texcoord_array; > ir_variable *texcoord_array; > unsigned texcoord_usage; /* bitmask */ > > + bool find_frag_outputs; /* false if it's looking for varyings */ > + bool lower_fragdata_array; > + ir_variable *fragdata_array; > + unsigned fragdata_usage; /* bitmask */ > + > ir_variable *color[2]; > ir_variable *backcolor[2]; > unsigned color_usage; /* bitmask */ > @@ -222,6 +268,7 @@ public: > { > void *const ctx = ir; > > + memset(this->new_fragdata, 0, sizeof(this->new_fragdata)); > memset(this->new_texcoord, 0, sizeof(this->new_texcoord)); > memset(this->new_color, 0, sizeof(this->new_color)); > memset(this->new_backcolor, 0, sizeof(this->new_backcolor)); > @@ -236,31 +283,16 @@ public: > * occurences of gl_TexCoord will be replaced with. > */ > if (info->lower_texcoord_array) { > - for (int i = MAX_TEXTURE_COORD_UNITS-1; i >= 0; i--) { > - if (info->texcoord_usage & (1 << i)) { > - char name[32]; > - > - if (!(external_texcoord_usage & (1 << i))) { > - /* This varying is unused in the next stage. Declare > - * a temporary instead of an output. */ > - snprintf(name, 32, "gl_%s_TexCoord%i_dummy", mode_str, > i); > - this->new_texcoord[i] = > - new (ctx) ir_variable(glsl_type::vec4_type, name, > - ir_var_temporary); > - } > - else { > - snprintf(name, 32, "gl_%s_TexCoord%i", mode_str, i); > - this->new_texcoord[i] = > - new(ctx) ir_variable(glsl_type::vec4_type, name, > - info->mode); > - this->new_texcoord[i]->location = VARYING_SLOT_TEX0 + i; > - this->new_texcoord[i]->explicit_location = true; > - this->new_texcoord[i]->explicit_index = 0; > - } > - > - ir->head->insert_before(new_texcoord[i]); > - } > - } > + prepare_array(ir, this->new_texcoord, > ARRAY_SIZE(this->new_texcoord), > + VARYING_SLOT_TEX0, "TexCoord", mode_str, > + info->texcoord_usage, external_texcoord_usage); > + } > + > + /* Handle gl_FragData in the same way like gl_TexCoord. */ > + if (info->lower_fragdata_array) { > + prepare_array(ir, this->new_fragdata, > ARRAY_SIZE(this->new_fragdata), > + FRAG_RESULT_DATA0, "FragData", mode_str, > + info->fragdata_usage, (1 << MAX_DRAW_BUFFERS) - 1); > } > > /* Create dummy variables which will replace set-but-unused color > and > @@ -301,6 +333,41 @@ public: > visit_list_elements(this, ir); > } > > + void prepare_array(exec_list *ir, > + struct ir_variable **new_var, > + int max_elements, unsigned start_location, > + const char *var_name, const char *mode_str, > + unsigned usage, unsigned external_usage) > + { > + void *const ctx = ir; > + > + for (int i = max_elements-1; i >= 0; i--) { > + if (usage & (1 << i)) { > + char name[32]; > + > + if (!(external_usage & (1 << i))) { > + /* This varying is unused in the next stage. Declare > + * a temporary instead of an output. */ > + snprintf(name, 32, "gl_%s_%s%i_dummy", mode_str, var_name, > i); > + new_var[i] = > + new (ctx) ir_variable(glsl_type::vec4_type, name, > + ir_var_temporary); > + } > + else { > + snprintf(name, 32, "gl_%s_%s%i", mode_str, var_name, i); > + new_var[i] = > + new(ctx) ir_variable(glsl_type::vec4_type, name, > + this->info->mode); > + new_var[i]->location = start_location + i; > + new_var[i]->explicit_location = true; > + new_var[i]->explicit_index = 0; > + } > + > + ir->head->insert_before(new_var[i]); > + } > + } > + } > + > virtual ir_visitor_status visit(ir_variable *var) > { > /* Remove the gl_TexCoord array. */ > @@ -309,6 +376,12 @@ public: > var->remove(); > } > > + /* Remove the gl_FragData array. */ > + if (this->info->lower_fragdata_array && > + var == this->info->fragdata_array) { > + var->remove(); > + } > + > /* Replace set-but-unused color and fog outputs with dummy > variables. */ > for (int i = 0; i < 2; i++) { > if (var == this->info->color[i] && this->new_color[i]) { > @@ -350,6 +423,19 @@ public: > } > } > > + /* Same for gl_FragData. */ > + if (this->info->lower_fragdata_array) { > + /* gl_TexCoord[i] occurence */ > This should say "gl_FragData[i] occurrence" > + ir_dereference_array *const da = > (*rvalue)->as_dereference_array(); > + > + if (da && da->variable_referenced() == > this->info->fragdata_array) { > + unsigned i = > da->array_index->as_constant()->get_uint_component(0); > + > + *rvalue = new(ctx) > ir_dereference_variable(this->new_fragdata[i]); > + return; > + } > + } > + > /* Replace set-but-unused color and fog outputs with dummy > variables. */ > ir_dereference_variable *const dv = > (*rvalue)->as_dereference_variable(); > if (!dv) > @@ -392,6 +478,7 @@ public: > > private: > const varying_info_visitor *info; > + ir_variable *new_fragdata[MAX_DRAW_BUFFERS]; > ir_variable *new_texcoord[MAX_TEXTURE_COORD_UNITS]; > ir_variable *new_color[2]; > ir_variable *new_backcolor[2]; > @@ -408,6 +495,15 @@ lower_texcoord_array(exec_list *ir, const > varying_info_visitor *info) > 1 | 2, true); > } > > +static void > +lower_fragdata_array(exec_list *ir) > +{ > + varying_info_visitor info(ir_var_shader_out, true); > + info.get(ir, 0, NULL); > + > + replace_varyings_visitor(ir, &info, 0, 0, 0); > +} > + > > void > do_dead_builtin_varyings(struct gl_context *ctx, > @@ -487,4 +583,9 @@ do_dead_builtin_varyings(struct gl_context *ctx, > producer_info.color_usage, > producer_info.has_fog); > } > At the top of the do_dead_builtin_varyings() function there's this logic: /* This optimization has no effect with the core context and GLES2, because * the built-in varyings we're eliminating here are not available there. * * EXT_separate_shader_objects doesn't allow this optimization, * because a program object can be bound partially (e.g. only one * stage of a program object can be bound). */ if (ctx->API == API_OPENGL_CORE || ctx->API == API_OPENGLES2 || ctx->Extensions.EXT_separate_shader_objects) { return; } Does this need to be changed so that when EXT_separate_shader_objects is present, we still go ahead and lower gl_FragData? > + > + /* Finally, lower the gl_FragData array to separate variables. */ > + if (consumer->Type == GL_FRAGMENT_SHADER) { > + lower_fragdata_array(consumer->ir); > + } > } > -- > 1.8.1.2 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev