On Mon, Jun 30, 2014 at 11:49 AM, Ian Romanick <i...@freedesktop.org> wrote: > On 06/25/2014 02:12 PM, Matt Turner wrote: >> From: Kenneth Graunke <kenn...@whitecape.org> >> >> [mattst88]: Modified to perform CSE on instructions with >> the same writemask. Offered no improvement before. >> >> total instructions in shared programs: 1995633 -> 1995185 (-0.02%) >> instructions in affected programs: 14410 -> 13962 (-3.11%) >> >> Reviewed-by: Matt Turner <matts...@gmail.com> >> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> >> --- >> src/mesa/drivers/dri/i965/Makefile.sources | 1 + >> src/mesa/drivers/dri/i965/brw_vec4.cpp | 1 + >> src/mesa/drivers/dri/i965/brw_vec4.h | 2 + >> src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 237 >> +++++++++++++++++++++++++++++ >> 4 files changed, 241 insertions(+) >> create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_cse.cpp >> >> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources >> b/src/mesa/drivers/dri/i965/Makefile.sources >> index 2570059..8f1d272 100644 >> --- a/src/mesa/drivers/dri/i965/Makefile.sources >> +++ b/src/mesa/drivers/dri/i965/Makefile.sources >> @@ -102,6 +102,7 @@ i965_FILES = \ >> brw_util.c \ >> brw_vec4.cpp \ >> brw_vec4_copy_propagation.cpp \ >> + brw_vec4_cse.cpp \ >> brw_vec4_generator.cpp \ >> brw_vec4_gs.c \ >> brw_vec4_gs_visitor.cpp \ >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp >> b/src/mesa/drivers/dri/i965/brw_vec4.cpp >> index 24903f9..0d57399 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp >> @@ -1747,6 +1747,7 @@ vec4_visitor::run() >> progress = dead_control_flow_eliminate(this) || progress; >> progress = opt_copy_propagation() || progress; >> progress = opt_algebraic() || progress; >> + progress = opt_cse() || progress; >> progress = opt_register_coalesce() || progress; >> } while (progress); >> >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h >> b/src/mesa/drivers/dri/i965/brw_vec4.h >> index 366198c..4a8eabb 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4.h >> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h >> @@ -426,6 +426,8 @@ public: >> bool dead_code_eliminate(); >> bool virtual_grf_interferes(int a, int b); >> bool opt_copy_propagation(); >> + bool opt_cse_local(bblock_t *, exec_list *); >> + bool opt_cse(); >> bool opt_algebraic(); >> bool opt_register_coalesce(); >> void opt_set_dependency_control(); >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp >> b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp >> new file mode 100644 >> index 0000000..33c7430 >> --- /dev/null >> +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp >> @@ -0,0 +1,237 @@ >> +/* >> + * Copyright © 2012, 2013, 2014 Intel Corporation >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the >> "Software"), >> + * to deal in the Software without restriction, including without limitation >> + * the rights to use, copy, modify, merge, publish, distribute, sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice (including the next >> + * paragraph) shall be included in all copies or substantial portions of the >> + * Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS >> OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR >> OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING >> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER >> DEALINGS >> + * IN THE SOFTWARE. >> + */ >> + >> +#include "brw_vec4.h" >> +#include "brw_cfg.h" >> + >> +using namespace brw; >> + >> +/** @file brw_vec4_cse.cpp >> + * >> + * Support for local common subexpression elimination. >> + * >> + * See Muchnick's Advanced Compiler Design and Implementation, section >> + * 13.1 (p378). >> + */ >> + >> +namespace { >> +struct aeb_entry : public exec_node { >> + /** The instruction that generates the expression value. */ >> + vec4_instruction *generator; >> + >> + /** The temporary where the value is stored. */ >> + src_reg tmp; >> +}; >> +} >> + >> +static bool >> +is_expression(const vec4_instruction *const inst) > > is_expression seems like a weird name for this. It's not obvious to a > noob what this is doing.
Yeah, sort of is. I'd like to change this one and the fs CSE implementation in sync. Not sure what else to call it. can_cse_inst, something else? >> +{ >> + switch (inst->opcode) { >> + case BRW_OPCODE_SEL: >> + case BRW_OPCODE_NOT: >> + case BRW_OPCODE_AND: >> + case BRW_OPCODE_OR: >> + case BRW_OPCODE_XOR: >> + case BRW_OPCODE_SHR: >> + case BRW_OPCODE_SHL: >> + case BRW_OPCODE_ASR: >> + case BRW_OPCODE_ADD: >> + case BRW_OPCODE_MUL: >> + case BRW_OPCODE_FRC: >> + case BRW_OPCODE_RNDU: >> + case BRW_OPCODE_RNDD: >> + case BRW_OPCODE_RNDE: >> + case BRW_OPCODE_RNDZ: >> + case BRW_OPCODE_LINE: >> + case BRW_OPCODE_PLN: >> + case BRW_OPCODE_MAD: >> + case BRW_OPCODE_LRP: >> + return true; >> + case SHADER_OPCODE_RCP: >> + case SHADER_OPCODE_RSQ: >> + case SHADER_OPCODE_SQRT: >> + case SHADER_OPCODE_EXP2: >> + case SHADER_OPCODE_LOG2: >> + case SHADER_OPCODE_POW: >> + case SHADER_OPCODE_INT_QUOTIENT: >> + case SHADER_OPCODE_INT_REMAINDER: >> + case SHADER_OPCODE_SIN: >> + case SHADER_OPCODE_COS: >> + return inst->mlen == 0; >> + default: >> + return false; >> + } >> +} >> + >> +static bool >> +is_expression_commutative(enum opcode op) >> +{ >> + switch (op) { >> + case BRW_OPCODE_AND: >> + case BRW_OPCODE_OR: >> + case BRW_OPCODE_XOR: >> + case BRW_OPCODE_ADD: >> + case BRW_OPCODE_MUL: >> + return true; >> + default: >> + return false; >> + } >> +} >> + >> +static bool >> +operands_match(enum opcode op, src_reg *xs, src_reg *ys) >> +{ >> + if (!is_expression_commutative(op)) { >> + return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && >> xs[2].equals(ys[2]); >> + } else { >> + return (xs[0].equals(ys[0]) && xs[1].equals(ys[1])) || >> + (xs[1].equals(ys[0]) && xs[0].equals(ys[1])); >> + } >> +} >> + >> +static bool >> +instructions_match(vec4_instruction *a, vec4_instruction *b) >> +{ >> + return a->opcode == b->opcode && >> + a->saturate == b->saturate && >> + a->conditional_mod == b->conditional_mod && >> + a->dst.type == b->dst.type && >> + a->dst.writemask == b->dst.writemask && >> + operands_match(a->opcode, a->src, b->src); >> +} >> + >> +bool >> +vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb) >> +{ >> + bool progress = false; >> + >> + void *cse_ctx = ralloc_context(NULL); >> + >> + for (vec4_instruction *inst = (vec4_instruction *)block->start; >> + inst != block->end->next; >> + inst = (vec4_instruction *) inst->next) { > > Are you planning to rebase this on your foreach_inst_in_block patch? Yes. >> + >> + /* Skip some cases. */ >> + if (is_expression(inst) && !inst->predicate && inst->mlen == 0 && > > Doesn't is_expression already check inst->mlen == 0 for the cases that > count? Yeah, looks like it. I'll remove the check here. >> + !inst->conditional_mod) >> + { > > { should go on previous line. Okay. _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev