On Monday, November 03, 2014 01:34:49 PM Matt Turner wrote: > Improves 359 shaders by >=10% > 114 shaders by >=20% > 91 shaders by >=30% > 82 shaders by >=40% > 22 shaders by >=50% > 4 shaders by >=60% > 2 shaders by >=80% > > total instructions in shared programs: 5505182 -> 5482260 (-0.42%) > instructions in affected programs: 364629 -> 341707 (-6.29%) > --- > src/mesa/drivers/dri/i965/Makefile.sources | 1 + > src/mesa/drivers/dri/i965/brw_vec4.cpp | 155 ------------------- > .../dri/i965/brw_vec4_dead_code_eliminate.cpp | 169 > +++++++++++++++++++++ > 3 files changed, 170 insertions(+), 155 deletions(-) > create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp > > diff --git a/src/mesa/drivers/dri/i965/Makefile.sources > b/src/mesa/drivers/dri/i965/Makefile.sources > index 711aabe..10be4f1 100644 > --- a/src/mesa/drivers/dri/i965/Makefile.sources > +++ b/src/mesa/drivers/dri/i965/Makefile.sources > @@ -102,6 +102,7 @@ i965_FILES = \ > brw_vec4.cpp \ > brw_vec4_copy_propagation.cpp \ > brw_vec4_cse.cpp \ > + brw_vec4_dead_code_eliminate.cpp \ > brw_vec4_generator.cpp \ > brw_vec4_gs_visitor.cpp \ > brw_vec4_live_variables.cpp \ > diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp > b/src/mesa/drivers/dri/i965/brw_vec4.cpp > index df589b8..6560351 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp > @@ -411,161 +411,6 @@ vec4_visitor::opt_reduce_swizzle() > return progress; > } > > -static bool > -try_eliminate_instruction(vec4_instruction *inst, int new_writemask, > - const struct brw_context *brw) > -{ > - if (inst->has_side_effects()) > - return false; > - > - if (new_writemask == 0) { > - /* Don't dead code eliminate instructions that write to the > - * accumulator as a side-effect. Instead just set the destination > - * to the null register to free it. > - */ > - if (inst->writes_accumulator || inst->writes_flag()) { > - inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type)); > - } else { > - inst->opcode = BRW_OPCODE_NOP; > - } > - > - return true; > - } else if (inst->dst.writemask != new_writemask) { > - switch (inst->opcode) { > - case SHADER_OPCODE_TXF_CMS: > - case SHADER_OPCODE_GEN4_SCRATCH_READ: > - case VS_OPCODE_PULL_CONSTANT_LOAD: > - case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: > - break; > - default: > - /* Do not set a writemask on Gen6 for math instructions, those are > - * executed using align1 mode that does not support a destination > mask. > - */ > - if (!(brw->gen == 6 && inst->is_math()) && !inst->is_tex()) { > - inst->dst.writemask = new_writemask; > - return true; > - } > - } > - } > - > - return false; > -} > - > -/** > - * Must be called after calculate_live_intervals() to remove unused > - * writes to registers -- register allocation will fail otherwise > - * because something deffed but not used won't be considered to > - * interfere with other regs. > - */ > -bool > -vec4_visitor::dead_code_eliminate() > -{ > - bool progress = false; > - int pc = -1; > - > - calculate_live_intervals(); > - > - foreach_block_and_inst(block, vec4_instruction, inst, cfg) { > - pc++; > - > - bool inst_writes_flag = false; > - if (inst->dst.file != GRF) { > - if (inst->dst.is_null() && inst->writes_flag()) { > - inst_writes_flag = true; > - } else { > - continue; > - } > - } > - > - if (inst->dst.file == GRF) { > - int write_mask = inst->dst.writemask; > - > - for (int c = 0; c < 4; c++) { > - if (write_mask & (1 << c)) { > - assert(this->virtual_grf_end[inst->dst.reg * 4 + c] >= pc); > - if (this->virtual_grf_end[inst->dst.reg * 4 + c] == pc) { > - write_mask &= ~(1 << c); > - } > - } > - } > - > - progress = try_eliminate_instruction(inst, write_mask, brw) || > - progress; > - } > - > - if (inst->predicate || inst->prev == NULL) > - continue; > - > - int dead_channels; > - if (inst_writes_flag) { > -/* Arbitrarily chosen, other than not being an xyzw writemask. */ > -#define FLAG_WRITEMASK (1 << 5) > - dead_channels = inst->reads_flag() ? 0 : FLAG_WRITEMASK; > - } else { > - dead_channels = inst->dst.writemask; > - > - for (int i = 0; i < 3; i++) { > - if (inst->src[i].file != GRF || > - inst->src[i].reg != inst->dst.reg) > - continue; > - > - for (int j = 0; j < 4; j++) { > - int swiz = BRW_GET_SWZ(inst->src[i].swizzle, j); > - dead_channels &= ~(1 << swiz); > - } > - } > - } > - > - foreach_inst_in_block_reverse_starting_from(vec4_instruction, > scan_inst, > - inst, block) { > - if (dead_channels == 0) > - break; > - > - if (inst_writes_flag) { > - if (scan_inst->dst.is_null() && scan_inst->writes_flag()) { > - scan_inst->opcode = BRW_OPCODE_NOP; > - progress = true; > - continue; > - } else if (scan_inst->reads_flag()) { > - break; > - } > - } > - > - if (inst->dst.file == scan_inst->dst.file && > - inst->dst.reg == scan_inst->dst.reg && > - inst->dst.reg_offset == scan_inst->dst.reg_offset) { > - int new_writemask = scan_inst->dst.writemask & ~dead_channels; > - > - progress = try_eliminate_instruction(scan_inst, new_writemask, > brw) || > - progress; > - } > - > - for (int i = 0; i < 3; i++) { > - if (scan_inst->src[i].file != inst->dst.file || > - scan_inst->src[i].reg != inst->dst.reg) > - continue; > - > - for (int j = 0; j < 4; j++) { > - int swiz = BRW_GET_SWZ(scan_inst->src[i].swizzle, j); > - dead_channels &= ~(1 << swiz); > - } > - } > - } > - } > - > - if (progress) { > - foreach_block_and_inst_safe (block, backend_instruction, inst, cfg) { > - if (inst->opcode == BRW_OPCODE_NOP) { > - inst->remove(block); > - } > - } > - > - invalidate_live_intervals(); > - } > - > - return progress; > -} > - > void > vec4_visitor::split_uniform_registers() > { > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp > new file mode 100644 > index 0000000..b8370ba > --- /dev/null > +++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp > @@ -0,0 +1,169 @@ > +/* > + * Copyright © 2014 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#include "brw_vec4.h" > +#include "brw_vec4_live_variables.h" > +#include "brw_cfg.h" > + > +/** @file brw_vec4_dead_code_eliminate.cpp > + * > + * Dataflow-aware dead code elimination. > + * > + * Walks the instruction list from the bottom, removing instructions that > + * have results that both aren't used in later blocks and haven't been read > + * yet in the tail end of this block. > + */ > + > +using namespace brw; > + > +static bool > +can_do_writemask(const struct brw_context *brw, > + const vec4_instruction *inst) > +{ > + switch (inst->opcode) { > + case SHADER_OPCODE_GEN4_SCRATCH_READ: > + case VS_OPCODE_PULL_CONSTANT_LOAD: > + case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: > + return false; > + default: > + /* The MATH instruction on Gen6 only executes in align1 mode, which > does > + * not support writemasking. > + */ > + if (brw->gen == 6 && inst->is_math()) > + return false; > + > + if (inst->is_tex()) > + return false;
I'd feel a lot more confident in this function if it were: { /* The MATH instruction on Gen6 only executes in align1 mode, which does * not support writemasking. */ if (brw->gen == 6 && inst->is_math()) return false; return inst->mlen == 0; } > + > + return true; > + } > +} > + > +bool > +vec4_visitor::dead_code_eliminate() > +{ > + bool progress = false; > + > + calculate_live_intervals(); > + > + int num_vars = live_intervals->num_vars; > + BITSET_WORD *live = ralloc_array(NULL, BITSET_WORD, > BITSET_WORDS(num_vars)); > + BITSET_WORD *flag_live = ralloc_array(NULL, BITSET_WORD, 1); > + > + foreach_block(block, cfg) { > + memcpy(live, live_intervals->block_data[block->num].liveout, > + sizeof(BITSET_WORD) * BITSET_WORDS(num_vars)); > + memcpy(flag_live, live_intervals->block_data[block->num].flag_liveout, > + sizeof(BITSET_WORD)); > + > + foreach_inst_in_block_reverse(vec4_instruction, inst, block) { > + if (inst->dst.file == GRF && !inst->has_side_effects()) { > + bool result_live[4] = { false }; > + > + for (int c = 0; c < 4; c++) { > + int var = inst->dst.reg * 4 + c; > + result_live[c] = BITSET_TEST(live, var); > + } > + > + /* If the instruction can't do writemasking, then it's all or > + * nothing. > + */ > + if (!can_do_writemask(brw, inst)) { > + bool result = result_live[0] | result_live[1] | > + result_live[2] | result_live[3]; A little strange to be doing bitwise-or on bools, but...it's not wrong... > + result_live[0] = result; > + result_live[1] = result; > + result_live[2] = result; > + result_live[3] = result; > + } > + > + for (int c = 0; c < 4; c++) { > + if (!result_live[c] && inst->dst.writemask & (1 << c)) { > + inst->dst.writemask &= ~(1 << c); > + progress = true; > + > + if (inst->dst.writemask == 0) { > + if (inst->writes_accumulator) { Same comment I had about the FS pass. I think you want: if (inst->writes_accumulator || inst->writes_flag()) Otherwise, we stop generating the flag, which may still be necessary (we haven't checked). With can_do_writemask changed, and this flag bug fixed, the patch would be: Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> Great work as always. > + inst->dst = dst_reg(retype(brw_null_reg(), > inst->dst.type)); > + } else { > + inst->opcode = BRW_OPCODE_NOP; > + continue; > + } > + } > + } > + } > + } > + > + if (inst->dst.is_null() && inst->writes_flag()) { > + if (!BITSET_TEST(flag_live, 0)) { > + inst->opcode = BRW_OPCODE_NOP; > + progress = true; > + continue; > + } > + } > + > + if (inst->dst.file == GRF && !inst->predicate) { > + for (int c = 0; c < 4; c++) { > + if (inst->dst.writemask & (1 << c)) { > + int var = inst->dst.reg * 4 + c; > + BITSET_CLEAR(live, var); > + } > + } > + } > + > + if (inst->writes_flag()) { > + BITSET_CLEAR(flag_live, 0); > + } > + > + for (int i = 0; i < 3; i++) { > + if (inst->src[i].file == GRF) { > + for (int c = 0; c < 4; c++) { > + int swiz = BRW_GET_SWZ(inst->src[i].swizzle, c); > + int var = inst->src[i].reg * 4 + swiz; > + > + BITSET_SET(live, var); > + } > + } > + } > + > + if (inst->reads_flag()) { > + BITSET_SET(flag_live, 0); > + } > + } > + } > + > + ralloc_free(live); > + ralloc_free(flag_live); > + > + if (progress) { > + foreach_block_and_inst_safe(block, backend_instruction, inst, cfg) { > + if (inst->opcode == BRW_OPCODE_NOP) { > + inst->remove(block); > + } > + } > + > + invalidate_live_intervals(); > + } > + > + return progress; > +} >
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev