On Sat, Oct 3, 2015 at 1:57 PM, Matt Turner <matts...@gmail.com> wrote: > With NIR, it actually hurts things.
Buuutt.... not everything uses NIR. Of course I'm sure it has a similarly negative effect with nouveau/codegen, but I'm not so sure about things like nv30 which don't have an optimizing compiler backend. IMHO there should be a bit somewhere in the compiler options that indicates whether the driver implements an optimizing compiler, and if so, then all these idiotic optimizations should be removed. [I realize some are needed for correctness, obviously keep those.] -ilia > > total instructions in shared programs: 6529329 -> 6528888 (-0.01%) > instructions in affected programs: 14833 -> 14392 (-2.97%) > helped: 299 > HURT: 1 > > In all affected programs I inspected (including the single hurt one) the > pass CSE'd some multiplies and caused some reassociation (e.g., caused > (A * B) * C to be A * (B * C)) when the original intermediate result was > reused elsewhere. > --- > src/glsl/Makefile.sources | 1 - > src/glsl/glsl_parser_extras.cpp | 1 - > src/glsl/ir_optimization.h | 1 - > src/glsl/opt_cse.cpp | 472 > ---------------------------------------- > 4 files changed, 475 deletions(-) > delete mode 100644 src/glsl/opt_cse.cpp > > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources > index 32b6dba..7083246 100644 > --- a/src/glsl/Makefile.sources > +++ b/src/glsl/Makefile.sources > @@ -184,7 +184,6 @@ LIBGLSL_FILES = \ > opt_constant_variable.cpp \ > opt_copy_propagation.cpp \ > opt_copy_propagation_elements.cpp \ > - opt_cse.cpp \ > opt_dead_builtin_variables.cpp \ > opt_dead_builtin_varyings.cpp \ > opt_dead_code.cpp \ > diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp > index f554241..b521c5f 100644 > --- a/src/glsl/glsl_parser_extras.cpp > +++ b/src/glsl/glsl_parser_extras.cpp > @@ -1899,7 +1899,6 @@ do_common_optimization(exec_list *ir, bool linked, > progress = do_constant_variable_unlinked(ir) || progress; > progress = do_constant_folding(ir) || progress; > progress = do_minmax_prune(ir) || progress; > - progress = do_cse(ir) || progress; > progress = do_rebalance_tree(ir) || progress; > progress = do_algebraic(ir, native_integers, options) || progress; > progress = do_lower_jumps(ir) || progress; > diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h > index 265b223..ce5c492 100644 > --- a/src/glsl/ir_optimization.h > +++ b/src/glsl/ir_optimization.h > @@ -87,7 +87,6 @@ bool do_constant_variable_unlinked(exec_list *instructions); > bool do_copy_propagation(exec_list *instructions); > bool do_copy_propagation_elements(exec_list *instructions); > bool do_constant_propagation(exec_list *instructions); > -bool do_cse(exec_list *instructions); > void do_dead_builtin_varyings(struct gl_context *ctx, > gl_shader *producer, gl_shader *consumer, > unsigned num_tfeedback_decls, > diff --git a/src/glsl/opt_cse.cpp b/src/glsl/opt_cse.cpp > deleted file mode 100644 > index 4b8e9a0..0000000 > --- a/src/glsl/opt_cse.cpp > +++ /dev/null > @@ -1,472 +0,0 @@ > -/* > - * Copyright © 2013 Intel Corporation > - * > - * Permission is hereby granted, free of charge, to any person obtaining a > - * copy of this software and associated documentation files (the "Software"), > - * to deal in the Software without restriction, including without limitation > - * the rights to use, copy, modify, merge, publish, distribute, sublicense, > - * and/or sell copies of the Software, and to permit persons to whom the > - * Software is furnished to do so, subject to the following conditions: > - * > - * The above copyright notice and this permission notice (including the next > - * paragraph) shall be included in all copies or substantial portions of the > - * Software. > - * > - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > - * DEALINGS IN THE SOFTWARE. > - */ > - > -/** > - * \file opt_cse.cpp > - * > - * constant subexpression elimination at the GLSL IR level. > - * > - * Compare to brw_fs_cse.cpp for a more complete CSE implementation. This > one > - * is generic and handles texture operations, but it's rather simple > currently > - * and doesn't support modification of variables in the available expressions > - * list, so it can't do variables other than uniforms or shader inputs. > - */ > - > -#include "ir.h" > -#include "ir_visitor.h" > -#include "ir_rvalue_visitor.h" > -#include "ir_basic_block.h" > -#include "ir_optimization.h" > -#include "ir_builder.h" > -#include "glsl_types.h" > - > -using namespace ir_builder; > - > -static bool debug = false; > - > -namespace { > - > -/** > - * This is the record of an available expression for common subexpression > - * elimination. > - */ > -class ae_entry : public exec_node > -{ > -public: > - ae_entry(ir_instruction *base_ir, ir_rvalue **val) > - : val(val), base_ir(base_ir) > - { > - assert(val); > - assert(*val); > - assert(base_ir); > - > - var = NULL; > - } > - > - void init(ir_instruction *base_ir, ir_rvalue **val) > - { > - this->val = val; > - this->base_ir = base_ir; > - this->var = NULL; > - > - assert(val); > - assert(*val); > - assert(base_ir); > - } > - > - /** > - * The pointer to the expression that we might be able to reuse > - * > - * Note the double pointer -- this is the place in the base_ir expression > - * tree that we would rewrite to move the expression out to a new variable > - * assignment. > - */ > - ir_rvalue **val; > - > - /** > - * Root instruction in the basic block where the expression appeared. > - * > - * This is used so that we can insert the new variable declaration into > the > - * instruction stream (since *val is just somewhere in base_ir's > expression > - * tree). > - */ > - ir_instruction *base_ir; > - > - /** > - * The variable that the expression has been stored in, if it's been CSEd > - * once already. > - */ > - ir_variable *var; > -}; > - > -class cse_visitor : public ir_rvalue_visitor { > -public: > - cse_visitor(exec_list *validate_instructions) > - : validate_instructions(validate_instructions) > - { > - progress = false; > - mem_ctx = ralloc_context(NULL); > - this->ae = new(mem_ctx) exec_list; > - } > - ~cse_visitor() > - { > - ralloc_free(mem_ctx); > - } > - > - virtual ir_visitor_status visit_enter(ir_function_signature *ir); > - virtual ir_visitor_status visit_enter(ir_loop *ir); > - virtual ir_visitor_status visit_enter(ir_if *ir); > - virtual ir_visitor_status visit_enter(ir_call *ir); > - virtual void handle_rvalue(ir_rvalue **rvalue); > - > - bool progress; > - > -private: > - void *mem_ctx; > - > - ir_rvalue *try_cse(ir_rvalue *rvalue); > - void add_to_ae(ir_rvalue **rvalue); > - > - /** > - * Move all nodes from the ae list to the free list > - */ > - void empty_ae_list(); > - > - /** > - * Get and initialize a new ae_entry > - * > - * This will either come from the free list or be freshly allocated. > - */ > - ae_entry *get_ae_entry(ir_rvalue **rvalue); > - > - /** List of ae_entry: The available expressions to reuse */ > - exec_list *ae; > - > - /** > - * The whole shader, so that we can validate_ir_tree in debug mode. > - * > - * This proved quite useful when trying to get the tree manipulation > - * right. > - */ > - exec_list *validate_instructions; > - > - /** > - * List of available-for-use ae_entry objects. > - */ > - exec_list free_ae_entries; > -}; > - > -/** > - * Visitor to walk an expression tree to check that all variables referenced > - * are constants. > - */ > -class is_cse_candidate_visitor : public ir_hierarchical_visitor > -{ > -public: > - > - is_cse_candidate_visitor() > - : ok(true) > - { > - } > - > - virtual ir_visitor_status visit(ir_dereference_variable *ir); > - > - bool ok; > -}; > - > - > -class contains_rvalue_visitor : public ir_rvalue_visitor > -{ > -public: > - > - contains_rvalue_visitor(ir_rvalue *val) > - : val(val) > - { > - found = false; > - } > - > - virtual void handle_rvalue(ir_rvalue **rvalue); > - > - bool found; > - > -private: > - ir_rvalue *val; > -}; > - > -} /* unnamed namespace */ > - > -static void > -dump_ae(exec_list *ae) > -{ > - int i = 0; > - > - printf("CSE: AE contents:\n"); > - foreach_in_list(ae_entry, entry, ae) { > - printf("CSE: AE %2d (%p): ", i, entry); > - (*entry->val)->print(); > - printf("\n"); > - > - if (entry->var) > - printf("CSE: in var %p:\n", entry->var); > - > - i++; > - } > -} > - > -ir_visitor_status > -is_cse_candidate_visitor::visit(ir_dereference_variable *ir) > -{ > - /* Currently, since we don't handle kills of the ae based on variables > - * getting assigned, we can only handle constant variables. > - */ > - if (ir->var->data.read_only) { > - return visit_continue; > - } else { > - if (debug) > - printf("CSE: non-candidate: var %s is not read only\n", > ir->var->name); > - ok = false; > - return visit_stop; > - } > -} > - > -void > -contains_rvalue_visitor::handle_rvalue(ir_rvalue **rvalue) > -{ > - if (*rvalue == val) > - found = true; > -} > - > -static bool > -contains_rvalue(ir_rvalue *haystack, ir_rvalue *needle) > -{ > - contains_rvalue_visitor v(needle); > - haystack->accept(&v); > - return v.found; > -} > - > -static bool > -is_cse_candidate(ir_rvalue *ir) > -{ > - /* Our temporary variable assignment generation isn't ready to handle > - * anything bigger than a vector. > - */ > - if (!ir->type->is_vector() && !ir->type->is_scalar()) { > - if (debug) > - printf("CSE: non-candidate: not a vector/scalar\n"); > - return false; > - } > - > - /* Only handle expressions and textures currently. We may want to extend > - * to variable-index array dereferences at some point. > - */ > - switch (ir->ir_type) { > - case ir_type_expression: > - case ir_type_texture: > - break; > - default: > - if (debug) > - printf("CSE: non-candidate: not an expression/texture\n"); > - return false; > - } > - > - is_cse_candidate_visitor v; > - > - ir->accept(&v); > - > - return v.ok; > -} > - > -/** > - * Tries to find and return a reference to a previous computation of a given > - * expression. > - * > - * Walk the list of available expressions checking if any of them match the > - * rvalue, and if so, move the previous copy of the expression to a temporary > - * and return a reference of the temporary. > - */ > -ir_rvalue * > -cse_visitor::try_cse(ir_rvalue *rvalue) > -{ > - foreach_in_list(ae_entry, entry, ae) { > - if (debug) { > - printf("Comparing to AE %p: ", entry); > - (*entry->val)->print(); > - printf("\n"); > - } > - > - if (!rvalue->equals(*entry->val)) > - continue; > - > - if (debug) { > - printf("CSE: Replacing: "); > - (*entry->val)->print(); > - printf("\n"); > - printf("CSE: with: "); > - rvalue->print(); > - printf("\n"); > - } > - > - if (!entry->var) { > - ir_instruction *base_ir = entry->base_ir; > - > - ir_variable *var = new(rvalue) ir_variable(rvalue->type, > - "cse", > - ir_var_temporary); > - > - /* Write the previous expression result into a new variable. */ > - base_ir->insert_before(var); > - ir_assignment *assignment = assign(var, *entry->val); > - base_ir->insert_before(assignment); > - > - /* Replace the expression in the original tree with a deref of the > - * variable, but keep tracking the expression for further reuse. > - */ > - *entry->val = new(rvalue) ir_dereference_variable(var); > - entry->val = &assignment->rhs; > - > - entry->var = var; > - > - /* Update the base_irs in the AE list. We have to be sure that > - * they're correct -- expressions from our base_ir that weren't > moved > - * need to stay in this base_ir (so that later consumption of them > - * puts new variables between our new variable and our base_ir), but > - * expressions from our base_ir that we *did* move need base_ir > - * updated so that any further elimination from inside gets its new > - * assignments put before our new assignment. > - */ > - foreach_in_list(ae_entry, fixup_entry, ae) { > - if (contains_rvalue(assignment->rhs, *fixup_entry->val)) > - fixup_entry->base_ir = assignment; > - } > - > - if (debug) > - dump_ae(ae); > - } > - > - /* Replace the expression in our current tree with the variable. */ > - return new(rvalue) ir_dereference_variable(entry->var); > - } > - > - return NULL; > -} > - > -void > -cse_visitor::empty_ae_list() > -{ > - free_ae_entries.append_list(ae); > -} > - > -ae_entry * > -cse_visitor::get_ae_entry(ir_rvalue **rvalue) > -{ > - ae_entry *entry = (ae_entry *) free_ae_entries.pop_head(); > - if (entry) { > - entry->init(base_ir, rvalue); > - } else { > - entry = new(mem_ctx) ae_entry(base_ir, rvalue); > - } > - > - return entry; > -} > - > -/** Add the rvalue to the list of available expressions for CSE. */ > -void > -cse_visitor::add_to_ae(ir_rvalue **rvalue) > -{ > - if (debug) { > - printf("CSE: Add to AE: "); > - (*rvalue)->print(); > - printf("\n"); > - } > - > - ae->push_tail(get_ae_entry(rvalue)); > - > - if (debug) > - dump_ae(ae); > -} > - > -void > -cse_visitor::handle_rvalue(ir_rvalue **rvalue) > -{ > - if (!*rvalue) > - return; > - > - if (debug) { > - printf("CSE: handle_rvalue "); > - (*rvalue)->print(); > - printf("\n"); > - } > - > - if (!is_cse_candidate(*rvalue)) > - return; > - > - ir_rvalue *new_rvalue = try_cse(*rvalue); > - if (new_rvalue) { > - *rvalue = new_rvalue; > - progress = true; > - > - if (debug) > - validate_ir_tree(validate_instructions); > - } else { > - add_to_ae(rvalue); > - } > -} > - > -ir_visitor_status > -cse_visitor::visit_enter(ir_if *ir) > -{ > - handle_rvalue(&ir->condition); > - > - empty_ae_list(); > - visit_list_elements(this, &ir->then_instructions); > - > - empty_ae_list(); > - visit_list_elements(this, &ir->else_instructions); > - > - empty_ae_list(); > - return visit_continue_with_parent; > -} > - > -ir_visitor_status > -cse_visitor::visit_enter(ir_function_signature *ir) > -{ > - empty_ae_list(); > - visit_list_elements(this, &ir->body); > - > - empty_ae_list(); > - return visit_continue_with_parent; > -} > - > -ir_visitor_status > -cse_visitor::visit_enter(ir_loop *ir) > -{ > - empty_ae_list(); > - visit_list_elements(this, &ir->body_instructions); > - > - empty_ae_list(); > - return visit_continue_with_parent; > -} > - > -ir_visitor_status > -cse_visitor::visit_enter(ir_call *) > -{ > - /* Because call is an exec_list of ir_rvalues, handle_rvalue gets passed a > - * pointer to the (ir_rvalue *) on the stack. Since we save those > pointers > - * in the AE list, we can't let handle_rvalue get called. > - */ > - return visit_continue_with_parent; > -} > - > -/** > - * Does a (uniform-value) constant subexpression elimination pass on the code > - * present in the instruction stream. > - */ > -bool > -do_cse(exec_list *instructions) > -{ > - cse_visitor v(instructions); > - > - visit_list_elements(&v, instructions); > - > - return v.progress; > -} > -- > 2.4.9 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev