We generate silly code for array access, and it's easier to generally support the cleanup than to specifically avoid the bad code in each place we might generate it.
Removes 4.6% of instructions from 41.6% of shaders in shader-db, particularly savage2/hon and unigine. --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 91 +++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_vec4.h | 1 + src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 1 + 3 files changed, 93 insertions(+), 0 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 436de2f..5fd4756 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -306,6 +306,97 @@ vec4_visitor::pack_uniform_registers() } } +static bool +src_reg_is_zero(src_reg *reg) +{ + if (reg->file != IMM) + return false; + + if (reg->type == BRW_REGISTER_TYPE_F) { + return reg->imm.f == 0.0; + } else { + return reg->imm.i == 0; + } +} + +static bool +src_reg_is_one(src_reg *reg) +{ + if (reg->file != IMM) + return false; + + if (reg->type == BRW_REGISTER_TYPE_F) { + return reg->imm.f == 1.0; + } else { + return reg->imm.i == 1; + } +} + +/** + * Does algebraic optimizations (0 * a = 0, 1 * a = a, a + 0 = a). + * + * While GLSL IR also performs this optimization, we end up with it in + * our instruction stream for a couple of reasons. One is that we + * sometimes generate silly instructions, for example in array access + * where we'll generate "ADD offset, index, base" even if base is 0. + * The other is that GLSL IR's constant propagation doesn't track the + * components of aggregates, so some VS patterns (initialize matrix to + * 0, accumulate in vertex blending factors) end up breaking down to + * instructions involving 0. + */ +bool +vec4_visitor::opt_algebraic() +{ + bool progress = false; + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + switch (inst->opcode) { + case BRW_OPCODE_ADD: + if (src_reg_is_zero(&inst->src[1])) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = src_reg(); + progress = true; + } + break; + + case BRW_OPCODE_MUL: + if (src_reg_is_zero(&inst->src[1])) { + inst->opcode = BRW_OPCODE_MOV; + switch (inst->src[0].type) { + case BRW_REGISTER_TYPE_F: + inst->src[0] = src_reg(0.0f); + break; + case BRW_REGISTER_TYPE_D: + inst->src[0] = src_reg(0); + break; + case BRW_REGISTER_TYPE_UD: + inst->src[0] = src_reg(0u); + break; + default: + assert(!"not reached"); + inst->src[0] = src_reg(0.0f); + break; + } + inst->src[1] = src_reg(); + progress = true; + } else if (src_reg_is_one(&inst->src[1])) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = src_reg(); + } + break; + default: + break; + } + } + + if (progress) + this->live_intervals_valid = false; + + return progress; +} + /** * Only a limited number of hardware registers may be used for push * constants, so this turns access to the overflowed constants into diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 7739a15..3f116ee 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -401,6 +401,7 @@ public: bool dead_code_eliminate(); bool virtual_grf_interferes(int a, int b); bool opt_copy_propagation(); + bool opt_algebraic(); vec4_instruction *emit(vec4_instruction *inst); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index c40c41f..7031d2a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -615,6 +615,7 @@ vec4_visitor::run() progress = false; progress = dead_code_eliminate() || progress; progress = opt_copy_propagation() || progress; + progress = opt_algebraic() || progress; } while (progress); -- 1.7.5.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev