This optimisation pass will look for and pack together float, vec2, vec3 varyings in fragment shaders and transform the vertex shader accordingly. It might improve performance depending on the hardware. --- src/glsl/Makefile | 1 + src/glsl/linker.cpp | 3 + src/glsl/pack_varyings.cpp | 491 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 495 insertions(+), 0 deletions(-) create mode 100644 src/glsl/pack_varyings.cpp
diff --git a/src/glsl/Makefile b/src/glsl/Makefile index e2d29bd..8ce06bd 100644 --- a/src/glsl/Makefile +++ b/src/glsl/Makefile @@ -84,6 +84,7 @@ CXX_SOURCES = \ opt_structure_splitting.cpp \ opt_swizzle_swizzle.cpp \ opt_tree_grafting.cpp \ + pack_varyings.cpp \ s_expression.cpp LIBS = \ diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 255edc6..97b25ff 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -1404,6 +1404,7 @@ demote_shader_inputs_and_outputs(gl_shader *sh, enum ir_variable_mode mode) } } +extern void pack_varyings(gl_shader*& vs,gl_shader*& fs); void assign_varying_locations(struct gl_shader_program *prog, @@ -1413,6 +1414,8 @@ assign_varying_locations(struct gl_shader_program *prog, unsigned output_index = VERT_RESULT_VAR0; unsigned input_index = FRAG_ATTRIB_VAR0; + pack_varyings(producer,consumer); + /* Operate in a total of three passes. * * 1. Assign locations for any matching inputs and outputs. diff --git a/src/glsl/pack_varyings.cpp b/src/glsl/pack_varyings.cpp new file mode 100644 index 0000000..caf41aa --- /dev/null +++ b/src/glsl/pack_varyings.cpp @@ -0,0 +1,491 @@ +/* + * Copyright © 2011 Intel Corporation + * Copyright © 2011 Vincent Lejeune + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file pack_varyings.cpp + * + * Try to find packeable varyings and pack them. + * Currently the code only concerns varying float, vec2 and vec3. + * + * This is a 2 passes code : + * - In a first pass, all varyings from shaders are collected. A packing + * strategy is then build looking only at the amount of each type of varyings + * (float, vec2, vec3) ; in particular the number of occurence of each varyings + * is ignored as it is assumed that swizzle is mostly performance free. This + * strategy is stored as a hash_table of packing_remap structures. If a shader + * object is reused by several program, storing this strategy might avoid + * unnecessary recomputations ; this is not done yet. + * - In a second pass, the packing variables are declared in the shaders and the + * packed variables are removed. Instructions are then parsed and each occurence + * of packed variables is replaced by corresponding packing variable swizzled. + */ + +#include "main/core.h" +#include "glsl_symbol_table.h" +#include "ir.h" +#include "program.h" +#include "program/hash_table.h" +#include "linker.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" +#include "list.h" + +extern "C" { +#include "main/shaderobj.h" +} + +/* + * Boxed List definition + * Such a list holds pointer instead of exec_node ; in addition they can contain + * item stored in another list (exec_node can belong to a unique exec_list). + * This container is needed to store ir_variable* in the first pass without + * removing them from instruction flow. + * + */ + +// BOXED LIST BEGIN + +class box : public exec_node +{ +public: + void* content; + box(void* c):content(c) + { + + } +}; + +class boxed_exec_list : public exec_list +{ +public: + void push_tail(void *n) + { + box* b = new (this) box(n); + exec_list::push_tail(b); + } + + void push_head(void *n) + { + box* b = new (this) box(n); + exec_list::push_head(b); + } + + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = ralloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + bool has(const void* pointer) const + { + foreach_list_const(tmp,this) + { + box* tmpb = reinterpret_cast<box*>(const_cast<exec_node*>(tmp)); + if(tmpb->content == pointer) + return true; + } + return false; + } + +}; + +#define list_item(type,pointer) reinterpret_cast<type>(reinterpret_cast<box*>(pointer)->content) +#define list_item_const(type,pointer) reinterpret_cast<type>(reinterpret_cast<box*>(const_cast<exec_node*>(pointer))->content) + + +// BOXED LIST END + + +/** + * From Ian Romanick + * + * This structure holds information on the mapping of packed variables into + * a packing variable. + * + * The packing variable is not declared there because a new variable occurs in 2 + * shaders (a vertex shader and a fragment shader). Only name and type of this + * variable is present. + * The packed variables are not declared here, they are however stored as keys + * in the hash_table that defines the packing strategy. + */ +struct packing_remap { + /** + * Swizzle to access packed variable from packing variable. + */ + ir_swizzle_mask read_swiz; + + /** + * Shift to apply to write-mask when writing to the values of + * packed variable. + */ + unsigned write_mask_shift; + + /** + * Name of the packing variable. + */ + char* new_variable_name; + + /** + * type of the packing variable. + */ + const glsl_type* gltype; + +}; + +#define DBG_MSG(msg) printf("%s\n",msg); + +class varying_packer +{ +protected: + boxed_exec_list *list_float,*list_vec2,*list_vec3; + size_t list_float_size,list_vec2_size,list_vec3_size; + const exec_list* shader_ir; +public: + /** + * This function parse \c shader_ir and fills \c list_float , + * \c list_vec2 and \c list_vec3 accordingly. + */ + void collect_varyings(){ + + list_vec3_size = list_vec2_size = list_float_size = 0; + list_float->make_empty(); + list_vec2->make_empty(); + list_vec3->make_empty(); + + foreach_list_const(tmp,shader_ir) + { + + ir_instruction* inst = (ir_instruction*) tmp; + + if(ir_variable* var = inst->as_variable()) + { + if(var->mode == ir_var_in) + { + switch(var->type->gl_type) + { + case GL_FLOAT: // varying float case + assert(!list_float->has(var)); + list_float->push_head(var); + list_float_size++; + break; + case GL_FLOAT_VEC2: // varying vec2 case + assert(!list_vec2->has(var)); + list_vec2->push_head(var); + list_vec2_size++; + break; + case GL_FLOAT_VEC3: // varying vec3 case + assert(!list_vec3->has(var)); + list_vec3->push_head(var); + list_vec3_size++; + break; + default: // varying vec4 not packeable, other case not handled + break; + } + } + } + } + return; + } + + /** + * This function try to gather packable varying and store them in a + * \c packing_remap hash_table. + * + * The result is a hash_table whose keys are name of variables (and not + * pointer) because varyings in different shaders can be matched with their + * name, not with their pointer value. + * The data of the result are packing_remap*. A single packing_remap* is + * obviously shared by several keys. + * + * The result has to be freed by function caller. + */ + hash_table* define_pack_mapping(){ + hash_table* result = hash_table_ctor(32, hash_table_string_hash, + hash_table_string_compare); + + while(true) + { + if(list_vec3_size >= 1 && list_float_size >= 1) + { + ir_variable* vec3_var = + list_item(ir_variable*,list_vec3->pop_head()); + ir_variable* float_var = + list_item(ir_variable*,list_float->pop_head()); + list_vec3_size--; + list_float_size--; + + ir_swizzle_mask vec3_swizzle = {0,1,2,-1,3,false}; + packing_remap pr_vec3 = + {vec3_swizzle,0,"packed_vec4_varying",glsl_type::vec4_type}; + + + ir_swizzle_mask float_swizzle = {3,-1,-1,-1,1,false}; + packing_remap pr_float = + {float_swizzle,3,"packed_vec4_varying",glsl_type::vec4_type}; + + hash_table_insert(result, + new packing_remap(pr_vec3),vec3_var->name); + hash_table_insert(result, + new packing_remap(pr_float),float_var->name); + + continue; + } + if(list_vec2_size >= 2 ) + { + ir_variable* var1 = + list_item(ir_variable*,list_vec2->pop_head()); + ir_variable* var2 = + list_item(ir_variable*,list_vec2->pop_head()); + list_vec2_size--; + list_vec2_size--; + + ir_swizzle_mask var1_swizzle = {0,1,-1,-1,2,false}; + packing_remap pr_var1 = + {var1_swizzle,0,"packed_vec2_vec2_varying",glsl_type::vec4_type}; + + + ir_swizzle_mask var2_swizzle = {2,3,-1,-1,2,false}; + packing_remap pr_var2 = + {var2_swizzle,2,"packed_vec2_vec2_varying",glsl_type::vec4_type}; + + hash_table_insert(result,new packing_remap(pr_var1),var1->name); + hash_table_insert(result,new packing_remap(pr_var2),var2->name); + + continue; + } + break; + } + + return result; + } + + varying_packer(exec_list* instructions):shader_ir(instructions){ + list_float = new (instructions) boxed_exec_list(); + list_vec2 = new (instructions) boxed_exec_list(); + list_vec3 = new (instructions) boxed_exec_list(); + list_vec3_size = list_vec2_size = list_float_size = 0; + } + + ~varying_packer(){ + delete list_float; + delete list_vec2; + delete list_vec3; + } + +}; + +/** + * This utility function shifts \c mask bits by \c step. + * + * It is used for instance when a packed variable pv is + * stored in a packing variable pack is written at position i : + * - In original ir the assignment is pv.i = ... + * - In new ir the assignement is pack.new_i = ... + * where new_i is the output of the function, and depends only on \c step + */ +inline +unsigned cyclic_right_shift(unsigned mask,unsigned step) +{ + unsigned result = 0; + result |= mask << step; + result |= mask >> (4 - step); + return result; +} + + + +/** + * This visitor replaces every occurence of a packed variable in a rhs + * expression by packing variable and corresponding swizzle. + */ +class ir_rvalue_substituter : public ir_rvalue_visitor +{ +protected: + hash_table* replacement_table; + hash_table* introduced_variables; + +public: + void handle_rvalue(ir_rvalue **rvalue) + { + if(!*rvalue) + return; + ir_rvalue* tmp_rvalue = *rvalue; + if(ir_dereference_variable* dref = tmp_rvalue->as_dereference_variable()) + { + packing_remap* pr = static_cast<packing_remap*>( + hash_table_find(replacement_table,dref->var->name)); + if(!pr) + return; + ir_variable* newvar = static_cast<ir_variable*>( + hash_table_find(introduced_variables,pr)); + ir_dereference_variable* ndref = + new (dref->var) ir_dereference_variable(newvar); + ir_swizzle* swz = new (dref) ir_swizzle(ndref,pr->read_swiz); + *rvalue = swz; + } + } + + ir_rvalue_substituter(hash_table* htb1, hash_table* htb2): + replacement_table(htb1),introduced_variables(htb2) + { + + } + +}; + + +/** + * This visitor replaces every lhs occurence of a packed variable by packing + * variable and corresponding swizzle, and call a \c ir_rvalue_visitor + * to parse rhs. + */ + +class ir_variable_substituter : public ir_hierarchical_visitor +{ +protected: + hash_table* replacement_table; + hash_table* introduced_variables; + glsl_symbol_table* symbols; + + virtual + ir_variable* generate_new_varying(void* ctx,packing_remap* pr) const=0; + +public: + ir_visitor_status visit_enter(ir_assignment *assign) + { + ir_rvalue_substituter rs(replacement_table,introduced_variables); + assign->rhs->accept(&rs); + if(ir_dereference_variable* dref = assign->lhs->as_dereference_variable()) + { + packing_remap* pr = static_cast<packing_remap*>( + hash_table_find(replacement_table,dref->var->name)); + if(!pr) + return visit_continue; + dref->var = static_cast<ir_variable*>(hash_table_find( + introduced_variables,pr)); + assign->write_mask = + cyclic_right_shift(assign->write_mask,pr->write_mask_shift); + } + return visit_continue; + } + + /** + * This method generates the ir_variable* packing variable and add them + * at the beginning of \c instructions and removes declaration of packed + * variables. + * The new variable pointers are stored in \c introduced_variables hash_table + * where they can be reused when replacement of packed variables in ir takes + * place. + */ + void add_prelude(exec_list* instructions) + { + foreach_list_safe(tmp,instructions) + { + ir_instruction* inst = (ir_instruction*) tmp; + ir_variable* var = inst->as_variable(); + if(!var) + return; + packing_remap* pr = static_cast<packing_remap*>( + hash_table_find(replacement_table,var->name)); + if(!pr) + continue; + ir_variable* newvar = symbols->get_variable(pr->new_variable_name); + if(!newvar) + { + newvar = generate_new_varying(var,pr); + inst->insert_before(newvar); + symbols->add_variable(newvar); + } + hash_table_insert(introduced_variables,newvar,pr); + inst->remove(); + } + } + + ir_variable_substituter(hash_table* htb,glsl_symbol_table* s): + ir_hierarchical_visitor(),replacement_table(htb),symbols(s) + { + introduced_variables = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + } + + ~ir_variable_substituter() + { + free(introduced_variables); + } +}; + +class ir_variable_substituter_fs : public ir_variable_substituter +{ +protected: + virtual + ir_variable* generate_new_varying(void* ctx,packing_remap* pr) const + { + return new (ctx) ir_variable(pr->gltype,pr->new_variable_name,ir_var_in); + } +public: + ir_variable_substituter_fs(hash_table* htb,glsl_symbol_table* s): + ir_variable_substituter(htb,s) + { + + } +}; + +class ir_variable_substituter_vs : public ir_variable_substituter +{ +protected: + virtual + ir_variable* generate_new_varying(void* ctx,packing_remap* pr) const + { + return new (ctx) ir_variable(pr->gltype,pr->new_variable_name,ir_var_out); + } +public: + ir_variable_substituter_vs(hash_table* htb,glsl_symbol_table* s): + ir_variable_substituter(htb,s) + { + + } +}; + + + +void pack_varyings(gl_shader*& vs,gl_shader*& fs){ + + varying_packer vp(fs->ir); + vp.collect_varyings(); + hash_table* htb = vp.define_pack_mapping(); + + ir_variable_substituter_fs fsreplacer(htb,fs->symbols); + fsreplacer.add_prelude(fs->ir); + visit_list_elements(&fsreplacer,fs->ir); + + ir_variable_substituter_vs vsreplacer(htb,vs->symbols); + vsreplacer.add_prelude(vs->ir); + visit_list_elements(&vsreplacer,vs->ir); +} + + -- 1.7.6 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev