On 27 October 2013 12:58, Marek Olšák <mar...@gmail.com> wrote: > On Sun, Oct 27, 2013 at 4:19 PM, Paul Berry <stereotype...@gmail.com> > wrote: > > On 20 October 2013 09:20, Marek Olšák <mar...@gmail.com> wrote: > >> > >> From: Marek Olšák <marek.ol...@amd.com> > >> > >> This avoids a defect in lower_output_reads. > >> > >> The problem is lower_output_reads treats the gl_FragData array as a > single > >> variable. It first redirects all output writes to a temporary variable > >> (array) > >> and then writes the whole temporary variable to the output, generating > >> assignments to all elements of gl_FragData. > > > > > > Can you go into more detail about why this is a problem? At first > glance it > > seems like it should be fine, because failing to assign to an element of > > gl_FragData is supposed to result in undefined data going to that > fragment > > output. So lowering to a shader that does assign to that element of > > gl_FragData seems like it should be harmless. What am I missing here? > > Thanks for the review. The problem is drivers cannot eliminate useless > writes to gl_FragData, and each enabled gl_FragData output decreases > performance. The GLSL compiler cannot eliminate the writes either, > because gl_FragData is an array. > > Maybe the GLSL compiler should resize arrays based on which elements > are written, so that unused elements are not declared, but this is not > enough for gl_FragData, where the i-th output can be written, but > (i-1)-th output doesn't have to be. >
Ah, ok. When I saw the word "defect", I misunderstood you to be fixing a correctness-of-rendering bug. As a performance optimization, I get it now. For driver back-ends that don't need lower_output_reads (such as i915 and i965), this optimization isn't needed. Would you mind adding a flag to ShaderCompilerOptions so that tgsi-based drivers can opt in to this new optimization? I want to make sure that the code generation of i965 and i915 isn't affected. With that change (and the other fixes we talked about), this patch is: Reviewed-by: Paul Berry <stereotype...@gmail.com> > > > > >> > >> > >> BTW this pass can be modified to lower all arrays, not just inputs and > >> outputs. > >> The question is whether it is worth it. > >> --- > >> src/glsl/opt_dead_builtin_varyings.cpp | 163 > >> ++++++++++++++++++++++++++------- > >> 1 file changed, 132 insertions(+), 31 deletions(-) > >> > >> diff --git a/src/glsl/opt_dead_builtin_varyings.cpp > >> b/src/glsl/opt_dead_builtin_varyings.cpp > >> index 7e8cd43..72ff9df 100644 > >> --- a/src/glsl/opt_dead_builtin_varyings.cpp > >> +++ b/src/glsl/opt_dead_builtin_varyings.cpp > >> @@ -42,9 +42,11 @@ > >> * If any texture coordinate slots can be eliminated, the gl_TexCoord > >> array is > >> * broken down into separate vec4 variables with locations equal to > >> * VARYING_SLOT_TEX0 + i. > >> + * > >> + * The same is done for the gl_FragData fragment shader output. > >> */ > >> > >> -#include "main/imports.h" /* for snprintf */ > >> +#include "main/core.h" /* for snprintf and ARRAY_SIZE */ > >> #include "ir.h" > >> #include "ir_rvalue_visitor.h" > >> #include "ir_optimization.h" > >> @@ -60,10 +62,14 @@ namespace { > >> class varying_info_visitor : public ir_hierarchical_visitor { > >> public: > >> /* "mode" can be either ir_var_shader_in or ir_var_shader_out */ > >> - varying_info_visitor(ir_variable_mode mode) > >> + varying_info_visitor(ir_variable_mode mode, bool find_frag_outputs = > >> false) > >> : lower_texcoord_array(true), > >> texcoord_array(NULL), > >> texcoord_usage(0), > >> + find_frag_outputs(find_frag_outputs), > >> + lower_fragdata_array(true), > >> + fragdata_array(NULL), > >> + fragdata_usage(0), > >> color_usage(0), > >> tfeedback_color_usage(0), > >> fog(NULL), > >> @@ -79,8 +85,27 @@ public: > >> { > >> ir_variable *var = ir->variable_referenced(); > >> > >> - if (var && var->mode == this->mode && > >> - var->location == VARYING_SLOT_TEX0) { > >> + if (!var || var->mode != this->mode) > >> + return visit_continue; > >> + > >> + if (this->find_frag_outputs && var->location == > FRAG_RESULT_DATA0) > >> { > >> + this->fragdata_array = var; > >> + > >> + ir_constant *index = ir->array_index->as_constant(); > >> + if (index == NULL) { > >> + /* This is variable indexing. */ > >> + this->fragdata_usage |= (1 << var->type->array_size()) - 1; > >> + this->lower_fragdata_array = false; > >> + } > >> + else { > >> + this->fragdata_usage |= 1 << index->get_uint_component(0); > >> + } > >> + > >> + /* Don't visit the leaves of ir_dereference_array. */ > >> + return visit_continue_with_parent; > >> + } > >> + > >> + if (!this->find_frag_outputs && var->location == > VARYING_SLOT_TEX0) > >> { > >> this->texcoord_array = var; > >> > >> ir_constant *index = ir->array_index->as_constant(); > >> @@ -105,8 +130,17 @@ public: > >> { > >> ir_variable *var = ir->variable_referenced(); > >> > >> - if (var->mode == this->mode && var->type->is_array() && > >> - var->location == VARYING_SLOT_TEX0) { > >> + if (var->mode != this->mode || !var->type->is_array()) > >> + return visit_continue; > >> + > >> + if (this->find_frag_outputs && var->location == > FRAG_RESULT_DATA0) > >> { > >> + /* This is a whole array dereference. */ > >> + this->fragdata_usage |= (1 << var->type->array_size()) - 1; > >> + this->lower_fragdata_array = false; > >> + return visit_continue; > >> + } > >> + > >> + if (!this->find_frag_outputs && var->location == > VARYING_SLOT_TEX0) > >> { > >> /* This is a whole array dereference like "gl_TexCoord = x;", > >> * there's probably no point in lowering that. > >> */ > >> @@ -121,6 +155,10 @@ public: > >> if (var->mode != this->mode) > >> return visit_continue; > >> > >> + /* Nothing to do here for fragment outputs. */ > >> + if (this->find_frag_outputs) > >> + return visit_continue; > >> + > >> /* Handle colors and fog. */ > >> switch (var->location) { > >> case VARYING_SLOT_COL0: > >> @@ -185,12 +223,20 @@ public: > >> if (!this->texcoord_array) { > >> this->lower_texcoord_array = false; > >> } > >> + if (!this->fragdata_array) { > >> + this->lower_fragdata_array = false; > >> + } > >> } > >> > >> bool lower_texcoord_array; > >> ir_variable *texcoord_array; > >> unsigned texcoord_usage; /* bitmask */ > >> > >> + bool find_frag_outputs; /* false if it's looking for varyings */ > >> + bool lower_fragdata_array; > >> + ir_variable *fragdata_array; > >> + unsigned fragdata_usage; /* bitmask */ > >> + > >> ir_variable *color[2]; > >> ir_variable *backcolor[2]; > >> unsigned color_usage; /* bitmask */ > >> @@ -222,6 +268,7 @@ public: > >> { > >> void *const ctx = ir; > >> > >> + memset(this->new_fragdata, 0, sizeof(this->new_fragdata)); > >> memset(this->new_texcoord, 0, sizeof(this->new_texcoord)); > >> memset(this->new_color, 0, sizeof(this->new_color)); > >> memset(this->new_backcolor, 0, sizeof(this->new_backcolor)); > >> @@ -236,31 +283,16 @@ public: > >> * occurences of gl_TexCoord will be replaced with. > >> */ > >> if (info->lower_texcoord_array) { > >> - for (int i = MAX_TEXTURE_COORD_UNITS-1; i >= 0; i--) { > >> - if (info->texcoord_usage & (1 << i)) { > >> - char name[32]; > >> - > >> - if (!(external_texcoord_usage & (1 << i))) { > >> - /* This varying is unused in the next stage. Declare > >> - * a temporary instead of an output. */ > >> - snprintf(name, 32, "gl_%s_TexCoord%i_dummy", > mode_str, > >> i); > >> - this->new_texcoord[i] = > >> - new (ctx) ir_variable(glsl_type::vec4_type, name, > >> - ir_var_temporary); > >> - } > >> - else { > >> - snprintf(name, 32, "gl_%s_TexCoord%i", mode_str, i); > >> - this->new_texcoord[i] = > >> - new(ctx) ir_variable(glsl_type::vec4_type, name, > >> - info->mode); > >> - this->new_texcoord[i]->location = VARYING_SLOT_TEX0 + > >> i; > >> - this->new_texcoord[i]->explicit_location = true; > >> - this->new_texcoord[i]->explicit_index = 0; > >> - } > >> - > >> - ir->head->insert_before(new_texcoord[i]); > >> - } > >> - } > >> + prepare_array(ir, this->new_texcoord, > >> ARRAY_SIZE(this->new_texcoord), > >> + VARYING_SLOT_TEX0, "TexCoord", mode_str, > >> + info->texcoord_usage, external_texcoord_usage); > >> + } > >> + > >> + /* Handle gl_FragData in the same way like gl_TexCoord. */ > >> + if (info->lower_fragdata_array) { > >> + prepare_array(ir, this->new_fragdata, > >> ARRAY_SIZE(this->new_fragdata), > >> + FRAG_RESULT_DATA0, "FragData", mode_str, > >> + info->fragdata_usage, (1 << MAX_DRAW_BUFFERS) - > >> 1); > >> } > >> > >> /* Create dummy variables which will replace set-but-unused color > >> and > >> @@ -301,6 +333,41 @@ public: > >> visit_list_elements(this, ir); > >> } > >> > >> + void prepare_array(exec_list *ir, > >> + struct ir_variable **new_var, > >> + int max_elements, unsigned start_location, > >> + const char *var_name, const char *mode_str, > >> + unsigned usage, unsigned external_usage) > >> + { > >> + void *const ctx = ir; > >> + > >> + for (int i = max_elements-1; i >= 0; i--) { > >> + if (usage & (1 << i)) { > >> + char name[32]; > >> + > >> + if (!(external_usage & (1 << i))) { > >> + /* This varying is unused in the next stage. Declare > >> + * a temporary instead of an output. */ > >> + snprintf(name, 32, "gl_%s_%s%i_dummy", mode_str, > var_name, > >> i); > >> + new_var[i] = > >> + new (ctx) ir_variable(glsl_type::vec4_type, name, > >> + ir_var_temporary); > >> + } > >> + else { > >> + snprintf(name, 32, "gl_%s_%s%i", mode_str, var_name, i); > >> + new_var[i] = > >> + new(ctx) ir_variable(glsl_type::vec4_type, name, > >> + this->info->mode); > >> + new_var[i]->location = start_location + i; > >> + new_var[i]->explicit_location = true; > >> + new_var[i]->explicit_index = 0; > >> + } > >> + > >> + ir->head->insert_before(new_var[i]); > >> + } > >> + } > >> + } > >> + > >> virtual ir_visitor_status visit(ir_variable *var) > >> { > >> /* Remove the gl_TexCoord array. */ > >> @@ -309,6 +376,12 @@ public: > >> var->remove(); > >> } > >> > >> + /* Remove the gl_FragData array. */ > >> + if (this->info->lower_fragdata_array && > >> + var == this->info->fragdata_array) { > >> + var->remove(); > >> + } > >> + > >> /* Replace set-but-unused color and fog outputs with dummy > >> variables. */ > >> for (int i = 0; i < 2; i++) { > >> if (var == this->info->color[i] && this->new_color[i]) { > >> @@ -350,6 +423,19 @@ public: > >> } > >> } > >> > >> + /* Same for gl_FragData. */ > >> + if (this->info->lower_fragdata_array) { > >> + /* gl_TexCoord[i] occurence */ > > > > > > This should say "gl_FragData[i] occurrence" > > Will fix. > > > > >> > >> + ir_dereference_array *const da = > >> (*rvalue)->as_dereference_array(); > >> + > >> + if (da && da->variable_referenced() == > >> this->info->fragdata_array) { > >> + unsigned i = > >> da->array_index->as_constant()->get_uint_component(0); > >> + > >> + *rvalue = new(ctx) > >> ir_dereference_variable(this->new_fragdata[i]); > >> + return; > >> + } > >> + } > >> + > >> /* Replace set-but-unused color and fog outputs with dummy > >> variables. */ > >> ir_dereference_variable *const dv = > >> (*rvalue)->as_dereference_variable(); > >> if (!dv) > >> @@ -392,6 +478,7 @@ public: > >> > >> private: > >> const varying_info_visitor *info; > >> + ir_variable *new_fragdata[MAX_DRAW_BUFFERS]; > >> ir_variable *new_texcoord[MAX_TEXTURE_COORD_UNITS]; > >> ir_variable *new_color[2]; > >> ir_variable *new_backcolor[2]; > >> @@ -408,6 +495,15 @@ lower_texcoord_array(exec_list *ir, const > >> varying_info_visitor *info) > >> 1 | 2, true); > >> } > >> > >> +static void > >> +lower_fragdata_array(exec_list *ir) > >> +{ > >> + varying_info_visitor info(ir_var_shader_out, true); > >> + info.get(ir, 0, NULL); > >> + > >> + replace_varyings_visitor(ir, &info, 0, 0, 0); > >> +} > >> + > >> > >> void > >> do_dead_builtin_varyings(struct gl_context *ctx, > >> @@ -487,4 +583,9 @@ do_dead_builtin_varyings(struct gl_context *ctx, > >> producer_info.color_usage, > >> producer_info.has_fog); > >> } > > > > > > At the top of the do_dead_builtin_varyings() function there's this logic: > > > > /* This optimization has no effect with the core context and GLES2, > > because > > * the built-in varyings we're eliminating here are not available > there. > > * > > * EXT_separate_shader_objects doesn't allow this optimization, > > * because a program object can be bound partially (e.g. only one > > * stage of a program object can be bound). > > */ > > if (ctx->API == API_OPENGL_CORE || > > ctx->API == API_OPENGLES2 || > > ctx->Extensions.EXT_separate_shader_objects) { > > return; > > } > > > > Does this need to be changed so that when EXT_separate_shader_objects is > > present, we still go ahead and lower gl_FragData? > > Good point. Yes, we want to always lower gl_FragData. > > Marek >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev