From: vlj <v...@ovi.com> --- src/glsl/Makefile | 1 + src/glsl/glsl_parser_extras.cpp | 7 +- src/glsl/ir_optimization.h | 1 + src/glsl/opt_var_packer.cpp | 332 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 338 insertions(+), 3 deletions(-) create mode 100644 src/glsl/opt_var_packer.cpp
diff --git a/src/glsl/Makefile b/src/glsl/Makefile index 68b98b2..226acc4 100644 --- a/src/glsl/Makefile +++ b/src/glsl/Makefile @@ -84,6 +84,7 @@ CXX_SOURCES = \ opt_swizzle_swizzle.cpp \ opt_tree_grafting.cpp \ opt_common_subexpression_elimination.cpp \ + opt_var_packer.cpp \ s_expression.cpp LIBS = \ diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index 0a57386..1aa7d88 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -777,8 +777,9 @@ do_common_optimization(exec_list *ir, bool linked, unsigned max_unroll_iteration GLboolean progress = GL_FALSE; - progress = do_common_subexpression_elimination(ir) || progress; - /*progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress; + progress = do_var_packing (ir) || progress; + //progress = do_common_subexpression_elimination(ir) || progress; + progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress; if (linked) { progress = do_function_inlining(ir) || progress; @@ -815,7 +816,7 @@ do_common_optimization(exec_list *ir, bool linked, unsigned max_unroll_iteration progress = set_loop_controls(ir, ls) || progress; progress = unroll_loops(ir, ls, max_unroll_iterations) || progress; } - delete ls;*/ + delete ls; return progress; } diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 3604e4e..5b1de66 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -72,3 +72,4 @@ bool lower_variable_index_to_cond_assign(exec_list *instructions, bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz); bool optimize_redundant_jumps(exec_list *instructions); bool do_common_subexpression_elimination(exec_list *instructions); +bool do_var_packing(exec_list *instructions); diff --git a/src/glsl/opt_var_packer.cpp b/src/glsl/opt_var_packer.cpp new file mode 100644 index 0000000..fe579f9 --- /dev/null +++ b/src/glsl/opt_var_packer.cpp @@ -0,0 +1,332 @@ +/* + * Copyright © 2011 Vincent Lejeune + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" +#include "ir_hierarchical_visitor.h" +#include "ir_rvalue_visitor.h" +#include <cstring> + +class box : public exec_node +{ +public: + void* content; + box(void* c):content(c) + { + + } +}; + + + +class boxed_exec_list : public exec_list +{ +public: + void push_tail(void *n) + { + box* b = new (this) box(n); + exec_list::push_tail(b); + } + + void push_head(void *n) + { + box* b = new (this) box(n); + exec_list::push_head(b); + } + + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = ralloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + bool has(const void* pointer) const + { + foreach_list_const(tmp,this) + { + box* tmpb = reinterpret_cast<box*>(const_cast<exec_node*>(tmp)); + if(tmpb->content == pointer) + return true; + } + return false; + } + + int size() + { + int result=0; + foreach_list_const(tmp,this) + { + result++; + } + return result; + } +}; + +#define list_item(type,pointer) reinterpret_cast<type>(reinterpret_cast<box*>(pointer)->content) +#define list_item_const(type,pointer) reinterpret_cast<type>(reinterpret_cast<box*>(const_cast<exec_node*>(pointer))->content) + + +#include <iostream> +using namespace std; + +class ir_variable_lister : public ir_hierarchical_visitor +{ + friend class ir_packer; +protected: + boxed_exec_list* available_vec3; + boxed_exec_list* available_vec2; + boxed_exec_list* available_float; + + void store_variable(ir_variable* var) + { + if(strcmp(var->name,"_ret_val") == 0) + return ; + switch(var->type->gl_type) + { + case GL_FLOAT_VEC3: + if(available_vec3->has(var)) break; + available_vec3->push_tail(var); + break; + case GL_FLOAT_VEC2: + if(available_vec2->has(var)) break; + available_vec2->push_tail(var); + break; + case GL_FLOAT: + if(available_float->has(var)) break; + available_float->push_tail(var); + break; + default: + break; + } + } + + bool find_candidates(ir_variable*& var1,ir_swizzle_mask& mask1, ir_variable*& var2, ir_swizzle_mask& mask2) + { + if(available_vec3->size() > 1 && available_float->size() >1) + { + var1 = list_item(ir_variable*,available_vec3->pop_head()); + var2 = list_item(ir_variable*,available_float->pop_head()); + mask1.x = 0;mask1.y = 1;mask1.z = 2;mask1.num_components = 3;mask1.has_duplicates=false; + mask2.x = 3;mask2.num_components = 1;mask1.has_duplicates=false; + return true; + } + if(available_vec2->size() > 2) + { + var1 = list_item_const(ir_variable*,available_vec2->pop_head()); + var2 = list_item_const(ir_variable*,available_vec2->pop_head()); + mask1.x = 0;mask1.y = 1;mask1.num_components = 2; + mask2.x = 2; mask2.y = 3;mask2.num_components = 2; + return true; + } + if(available_vec2->size() > 1 && available_float->size() > 1) + { + var1 = list_item_const(ir_variable*,available_vec2->pop_head()); + var2 = list_item_const(ir_variable*,available_float->pop_head()); + mask1.x = 0;mask1.y = 1;mask1.num_components = 2; + mask2.x = 2; mask2.num_components = 1; + return true; + } + if(available_float->size() > 2) + { + var1 = list_item_const(ir_variable*,available_float->pop_head()); + var2 = list_item_const(ir_variable*,available_float->pop_head()); + mask1.x = 0;mask1.num_components = 1; + mask2.x = 1;mask2.num_components = 1; + return true; + } + return false; + } + +public: + ir_visitor_status visit(ir_dereference_variable * dref) + { + if(dref->var->mode != ir_var_auto) + return visit_continue; + store_variable(dref->var); + return visit_continue; + } + + ir_variable_lister(void* ctx) + { + available_float = new (ctx) boxed_exec_list(); + available_vec2 = new (ctx) boxed_exec_list(); + available_vec3 = new (ctx) boxed_exec_list(); + } + +}; + +class ir_variable_replacer : public ir_rvalue_visitor +{ +protected: + ir_variable* var1; + ir_swizzle_mask mask_for_var1; + ir_variable* var2; + ir_swizzle_mask mask_for_var2; + ir_variable* packed_var; + + unsigned from_component(unsigned x) const + { + switch(x) + { + case 3: + return 1 << 3; + case 2: + return 1 << 2; + case 1: + return 1 << 1; + default: + return 1 << 0; + } + } + + unsigned write_mask_from_swizzle(const ir_swizzle_mask& mask) const + { + unsigned result = 0; + switch(mask.num_components) + { + case 4: + result |= from_component(mask.w); + case 3: + result |= from_component(mask.z); + case 2: + result |= from_component(mask.y); + case 1: + result |= from_component(mask.x); + default: + break; + } + return result; + } + + + +public: + void handle_rvalue(ir_rvalue **rvalue) + { + if(!*rvalue) + return; + ir_rvalue* tmp_rvalue = *rvalue; + if(ir_dereference_variable* dref = tmp_rvalue->as_dereference_variable()) + { + if(dref->var == var1) + { + ir_dereference_variable* ndref = new (packed_var) ir_dereference_variable(packed_var); + ir_swizzle* swz = new (dref) ir_swizzle(ndref,mask_for_var1); + *rvalue = swz; + } + if(dref->var == var2) + { + ir_dereference_variable* ndref = new (packed_var) ir_dereference_variable(packed_var); + ir_swizzle* swz = new (dref) ir_swizzle(ndref,mask_for_var2); + *rvalue = swz; + } + } + } + + ir_visitor_status visit_enter(ir_assignment * assign) + { + assign->rhs->accept(this); + if(ir_dereference_variable* dref = assign->lhs->as_dereference_variable()) + { + if(dref->var == var1) + { + dref->var = packed_var; + assign->write_mask = write_mask_from_swizzle(mask_for_var1); + } + if(dref->var == var2) + { + dref->var = packed_var; + assign->write_mask = write_mask_from_swizzle(mask_for_var2); + } + } + return visit_continue; + } + + ir_variable_replacer(ir_variable* v1,ir_swizzle_mask mask1, ir_variable* v2, ir_swizzle_mask mask2, ir_variable* newvar):var1(v1),mask_for_var1(mask1), var2(v2),mask_for_var2(mask2), packed_var(newvar) + { + } +}; + + +class ir_packer : public ir_hierarchical_visitor +{ +public: + ir_visitor_status visit_enter(ir_function_signature * fonc) + { + int body_size = 0; + foreach_list_const(tmp,&(fonc->body)) + { + body_size++; + } + if(!body_size) + return visit_continue; + //cout << "ENTERING " << fonc->function_name() << ":"<< endl; + ir_variable_lister v(fonc); + foreach_list(tmp,&(fonc->body)) + { + ir_instruction* inst = (ir_instruction*) tmp; + inst->accept(&v); + } + ir_variable *v1=0,*v2=0; + ir_swizzle_mask m1,m2; + if(!v.find_candidates(v1,m1,v2,m2)) + return visit_continue; + cout << "PACKING " << v1->name<< " WITH " << v2->name << endl; + ir_variable* newvar=0; + switch(m1.num_components + m2.num_components) + { + case 4: + newvar = new (fonc) ir_variable(glsl_type::vec4_type,"vec4_tmp",ir_var_temporary); + break; + case 3: + newvar = new (fonc) ir_variable(glsl_type::vec3_type,"vec3_tmp",ir_var_temporary); + break; + case 2: + newvar = new (fonc) ir_variable(glsl_type::vec2_type,"vec2_tmp",ir_var_temporary); + break; + } + fonc->body.push_head(newvar); + ir_variable_replacer vis2(v1,m1,v2,m2,newvar); + foreach_list(tmp,&(fonc->body)) + { + ir_instruction* inst = (ir_instruction*) tmp; + inst->accept(&vis2); + } + return visit_continue; + } +}; + + +bool +do_var_packing(exec_list *instructions) +{ + ir_packer v; + for(int i=0;i<50;i++) + visit_list_elements(&v, instructions); + + + return false; +} -- 1.7.3.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev