Thanks for your reviews. I'm taking note of your comments and I will have a new patch soon. (btw, I must apologize because I forgot to add a commit when rebasing, so this new patch didn't have function comments to explain their role, and "break;" where not pushed t)
----- Mail original ----- > De : Paul Berry <stereotype...@gmail.com> > À : Vincent Lejeune <v...@ovi.com> > Cc : mesa-dev@lists.freedesktop.org > Envoyé le : Mercredi 10 Août 2011 22h04 > Objet : Re: [Mesa-dev] [PATCH] Var packer optimisation. > > On 9 August 2011 14:19, Vincent Lejeune <v...@ovi.com> wrote: >> From: vlj <v...@ovi.com> >> >> This optimisation pass will look for and pack together vector (up to vec3) > and scalar in function body. It only concerns local variable, not temporary > which should however be packed as a side effect. It should reduce register > pressure. >> --- >> src/glsl/Makefile | 1 + >> src/glsl/glsl_parser_extras.cpp | 1 + >> src/glsl/ir_optimization.h | 1 + >> src/glsl/opt_var_packer.cpp | 309 > +++++++++++++++++++++++++++++++++++++++ >> 4 files changed, 312 insertions(+), 0 deletions(-) >> create mode 100644 src/glsl/opt_var_packer.cpp >> >> diff --git a/src/glsl/Makefile b/src/glsl/Makefile >> index 4100414..fff0dc7 100644 >> --- a/src/glsl/Makefile >> +++ b/src/glsl/Makefile >> @@ -83,6 +83,7 @@ CXX_SOURCES = \ >> opt_structure_splitting.cpp \ >> opt_swizzle_swizzle.cpp \ >> opt_tree_grafting.cpp \ >> + opt_var_packer.cpp \ >> s_expression.cpp >> >> LIBS = \ >> diff --git a/src/glsl/glsl_parser_extras.cpp > b/src/glsl/glsl_parser_extras.cpp >> index d9aa300..3b0e84a 100644 >> --- a/src/glsl/glsl_parser_extras.cpp >> +++ b/src/glsl/glsl_parser_extras.cpp >> @@ -776,6 +776,7 @@ do_common_optimization(exec_list *ir, bool linked, > unsigned max_unroll_iteration >> { >> GLboolean progress = GL_FALSE; >> >> + progress = do_var_packing(ir) || progress; >> progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress; >> >> if (linked) { > > This optimization pass shouldn't come at the top of > do_common_optimization, since it has the potential to decrease the > effectiveness of optimizations that follow. For example, if it packs > a dead variable with a live one, then dead code elimination won't be > able to eliminate the dead variable any more. I think it may > interfere with constant propagation and algebraic simplification too, > but I'm less certain of that. > >> diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h >> index dd26567..83d124f 100644 >> --- a/src/glsl/ir_optimization.h >> +++ b/src/glsl/ir_optimization.h >> @@ -71,3 +71,4 @@ bool lower_variable_index_to_cond_assign(exec_list > *instructions, >> bool lower_input, bool lower_output, bool lower_temp, bool > lower_uniform); >> bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz); >> bool optimize_redundant_jumps(exec_list *instructions); >> +bool do_var_packing(exec_list *instructions); >> diff --git a/src/glsl/opt_var_packer.cpp b/src/glsl/opt_var_packer.cpp >> new file mode 100644 >> index 0000000..e7d0638 >> --- /dev/null >> +++ b/src/glsl/opt_var_packer.cpp >> @@ -0,0 +1,309 @@ >> +/* >> + * Copyright © 2011 Vincent Lejeune >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the > "Software"), >> + * to deal in the Software without restriction, including without > limitation >> + * the rights to use, copy, modify, merge, publish, distribute, > sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice (including the > next >> + * paragraph) shall be included in all copies or substantial portions of > the >> + * Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > KIND, EXPRESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT > SHALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR > OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING >> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER >> + * DEALINGS IN THE SOFTWARE. >> + */ >> + >> +#include "ir.h" >> +#include "ir_hierarchical_visitor.h" >> +#include "ir_rvalue_visitor.h" >> +#include <cstring> >> + >> +class box : public exec_node >> +{ >> +public: >> + void* content; >> + box(void* c):content(c) >> + { >> + >> + } >> +}; >> + >> + >> + >> +class boxed_exec_list : public exec_list >> +{ >> +public: >> + void push_tail(void *n) >> + { >> + box* b = new (this) box(n); >> + exec_list::push_tail(b); >> + } >> + >> + void push_head(void *n) >> + { >> + box* b = new (this) box(n); >> + exec_list::push_head(b); >> + } >> + >> + static void* operator new(size_t size, void *ctx) >> + { >> + void *node; >> + >> + node = ralloc_size(ctx, size); >> + assert(node != NULL); >> + >> + return node; >> + } >> + >> + bool has(const void* pointer) const >> + { >> + foreach_list_const(tmp,this) >> + { >> + box* tmpb = > reinterpret_cast<box*>(const_cast<exec_node*>(tmp)); >> + if(tmpb->content == pointer) >> + return true; >> + } >> + return false; >> + } >> + >> + int size() >> + { >> + int result=0; >> + foreach_list_const(tmp,this) >> + { >> + result++; >> + } >> + return result; >> + } >> +}; >> + >> +#define list_item(type,pointer) > reinterpret_cast<type>(reinterpret_cast<box*>(pointer)->content) >> +#define list_item_const(type,pointer) > reinterpret_cast<type>(reinterpret_cast<box*>(const_cast<exec_node*>(pointer))->content) >> + >> + >> +#include <iostream> >> +using namespace std; >> + >> +class ir_variable_lister : public ir_hierarchical_visitor >> +{ >> + friend class ir_packer; >> +protected: >> + boxed_exec_list* available_vec3; >> + boxed_exec_list* available_vec2; >> + boxed_exec_list* available_float; >> + >> + void store_variable(ir_variable* var) >> + { >> + if(strcmp(var->name,"_ret_val") == 0) >> + return ; > > I assume this check is trying to avoid applying the optimization to > the temporary variables introduced by function inlining for return > values, yes? It's not obvious to me why we would want to make an > exception for them. And even if we do, this is not the best way to do > it, because it means that if the user happens to make a variable with > this name, it will not get optimized, regardless of how it is used. > >> + switch(var->type->gl_type) >> + { >> + case GL_FLOAT_VEC3: >> + if(available_vec3->has(var)) break; >> + available_vec3->push_tail(var); >> + break; >> + case GL_FLOAT_VEC2: >> + if(available_vec2->has(var)) break; >> + available_vec2->push_tail(var); >> + break; >> + case GL_FLOAT: >> + if(available_float->has(var)) break; >> + available_float->push_tail(var); >> + break; >> + default: >> + break; >> + } >> + } >> + >> + bool find_candidates(ir_variable*& var1,ir_swizzle_mask& mask1, > ir_variable*& var2, ir_swizzle_mask& mask2) >> + { >> + if(available_vec3->size() > 1 && > available_float->size() >1) >> + { >> + var1 = list_item(ir_variable*,available_vec3->pop_head()); >> + var2 = list_item(ir_variable*,available_float->pop_head()); >> + mask1.x = 0;mask1.y = 1;mask1.z = 2;mask1.num_components = > 3;mask1.has_duplicates=false; >> + mask2.x = 3;mask2.num_components = 1;mask1.has_duplicates=false; >> + return true; >> + } >> + if(available_vec2->size() > 2) >> + { >> + var1 = > list_item_const(ir_variable*,available_vec2->pop_head()); >> + var2 = > list_item_const(ir_variable*,available_vec2->pop_head()); >> + mask1.x = 0;mask1.y = 1;mask1.num_components = 2; >> + mask2.x = 2; mask2.y = 3;mask2.num_components = 2; >> + return true; >> + } >> + if(available_vec2->size() > 1 && > available_float->size() > 1) >> + { >> + var1 = > list_item_const(ir_variable*,available_vec2->pop_head()); >> + var2 = > list_item_const(ir_variable*,available_float->pop_head()); >> + mask1.x = 0;mask1.y = 1;mask1.num_components = 2; >> + mask2.x = 2; mask2.num_components = 1; >> + return true; >> + } >> + if(available_float->size() > 2) >> + { >> + var1 = > list_item_const(ir_variable*,available_float->pop_head()); >> + var2 = > list_item_const(ir_variable*,available_float->pop_head()); >> + mask1.x = 0;mask1.num_components = 1; >> + mask2.x = 1;mask2.num_components = 1; >> + return true; >> + } >> + return false; >> + } > > I realize this code may wind up getting rewritten as a result of Ian > and Eric's comments, but I wanted to point out something that I think > is a bug: all the ">"s in the above function look to me like they > should be ">=". For example, we want to combine vec3's and > floats > whenever there is at least one of each present, not when there are at > least two of each present. > >> + >> +public: >> + ir_visitor_status visit_enter(ir_assignment * assign) >> + { >> + if(ir_dereference_variable* dref = > assign->lhs->as_dereference_variable()) >> + { >> + if(dref->var->mode != ir_var_auto) >> + return visit_continue; >> + store_variable(dref->var); >> + } >> + return visit_continue; >> + } >> + >> + ir_variable_lister(void* ctx) >> + { >> + available_float = new (ctx) boxed_exec_list(); >> + available_vec2 = new (ctx) boxed_exec_list(); >> + available_vec3 = new (ctx) boxed_exec_list(); >> + } >> + >> +}; >> + >> + >> +class ir_variable_replacer : public ir_rvalue_visitor >> +{ >> +protected: >> + ir_variable* var1; >> + ir_swizzle_mask mask_for_var1; >> + ir_variable* var2; >> + ir_swizzle_mask mask_for_var2; >> + ir_variable* packed_var; >> + >> + inline >> + unsigned cyclic_right_shift(unsigned mask,unsigned step) const >> + { >> + unsigned result = 0; >> + result |= mask << step; >> + result |= mask >> (4 - step); >> + return result; >> + } >> + >> + >> +public: >> + void handle_rvalue(ir_rvalue **rvalue) >> + { >> + if(!*rvalue) >> + return; >> + ir_rvalue* tmp_rvalue = *rvalue; >> + if(ir_dereference_variable* dref = > tmp_rvalue->as_dereference_variable()) >> + { >> + if(dref->var == var1) >> + { >> + ir_dereference_variable* ndref = new (packed_var) > ir_dereference_variable(packed_var); >> + ir_swizzle* swz = new (dref) ir_swizzle(ndref,mask_for_var1); >> + *rvalue = swz; >> + } >> + if(dref->var == var2) >> + { >> + ir_dereference_variable* ndref = new (packed_var) > ir_dereference_variable(packed_var); >> + ir_swizzle* swz = new (dref) ir_swizzle(ndref,mask_for_var2); >> + *rvalue = swz; >> + } >> + } >> + } >> + >> + ir_visitor_status visit_enter(ir_assignment * assign) >> + { >> + assign->rhs->accept(this); >> + if(ir_dereference_variable* dref = > assign->lhs->as_dereference_variable()) >> + { >> + if(dref->var == var1) >> + { >> + dref->var = packed_var; >> + } >> + if(dref->var == var2) >> + { >> + dref->var = packed_var; >> + assign->write_mask = > cyclic_right_shift(assign->write_mask,mask_for_var1.num_components); >> + } >> + } >> + return visit_continue; >> + } >> + >> + ir_variable_replacer(ir_variable* v1,ir_swizzle_mask mask1, > ir_variable* v2, ir_swizzle_mask mask2, ir_variable* > newvar):var1(v1),mask_for_var1(mask1), var2(v2),mask_for_var2(mask2), > packed_var(newvar) >> + { >> + } >> +}; >> + >> + >> +class ir_packer : public ir_hierarchical_visitor >> +{ >> +public: >> + ir_visitor_status visit_enter(ir_function_signature * fonc) >> + { >> + int body_size = 0; >> + foreach_list_const(tmp,&(fonc->body)) >> + { >> + body_size++; >> + } >> + if(!body_size) >> + return visit_continue; >> + //cout << "ENTERING " << > fonc->function_name() << ":"<< endl; >> + ir_variable_lister v(fonc); >> + foreach_list(tmp,&(fonc->body)) >> + { >> + ir_instruction* inst = (ir_instruction*) tmp; >> + inst->accept(&v); >> + } >> + ir_variable *v1=0,*v2=0; >> + ir_swizzle_mask m1,m2; >> + if(!v.find_candidates(v1,m1,v2,m2)) >> + return visit_continue; >> + cout << "PACKING " << v1->name<< > " WITH " << v2->name << endl; >> + ir_variable* newvar=0; >> + switch(m1.num_components + m2.num_components) >> + { >> + case 4: >> + newvar = new (fonc) > ir_variable(glsl_type::vec4_type,"vec4_tmp",ir_var_temporary); >> + break; >> + case 3: >> + newvar = new (fonc) > ir_variable(glsl_type::vec3_type,"vec3_tmp",ir_var_temporary); >> + break; >> + case 2: >> + newvar = new (fonc) > ir_variable(glsl_type::vec2_type,"vec2_tmp",ir_var_temporary); >> + break; >> + } >> + fonc->body.push_head(newvar); >> + ir_variable_replacer vis2(v1,m1,v2,m2,newvar); >> + foreach_list(tmp,&(fonc->body)) >> + { >> + ir_instruction* inst = (ir_instruction*) tmp; >> + inst->accept(&vis2); >> + } >> + return visit_continue; >> + } >> +}; >> + >> + >> +bool >> +do_var_packing(exec_list *instructions) >> +{ >> + ir_packer v; >> + for(int i=0;i<5;i++) >> + visit_list_elements(&v, instructions); >> + >> + >> + return false; >> +} >> -- >> 1.7.3.4 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev >> > > I'm also concerned that this optimization pass doesn't pay any > attention to variable lifetimes. Because of Mesa's aggressive > function inlining, it's likely that a shader may have a large number > of variables whose lifetimes don't overlap. If this optimization pass > combines two variables whose lifetimes don't overlap, it will produce > a variable with a much longer lifetime, and therefore reduce the > opportunities for the individual back-ends to re-use registers. So if > we're not lucky, it might actually cause _increased_ register pressure > rather than decreased register pressure. > > Have you tested this optimization on any typical shaders and measured > the difference in register pressure? > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev