vec4_live_variables tracks now each flag channel independently, so vec4_dead_code_eliminate can update the writemask of null registers, based on which component are alive at the moment. This would allow vec4_cmod_propagation to optimize out several movs involving null registers.
v2: added support to track each flag channel independently at vec4 live_variables, as v1 assumed that it was already doing it, as pointed by Francisco Jerez --- I was tempted to split this commit in two, one for tracking each channel independently at vec4_live_variables, and other for using it at vec4_dead_code_eliminate, but in the end I concluded that made more sense as one commit, as are changes tightly tied. FWIW, the shader-db numbers for the optimization patch ("i965/vec4: adding vec4_cmod_propagation optimization", 2/5) changed, being now the following: total instructions in shared programs: 6240413 -> 6235841 (-0.07%) instructions in affected programs: 401933 -> 397361 (-1.14%) total loops in shared programs: 1979 -> 1979 (0.00%) helped: 2265 HURT: 0 That are slightly worse that the numbers for the first version of the patch, before adding that extra check. I will use this numbers if this patch is approved. FWIW, without this change on live_variables and dead_code eliminate, the numbers on the optimization would be the following ones: total instructions in shared programs: 6240413 -> 6240253 (-0.00%) instructions in affected programs: 18965 -> 18805 (-0.84%) total loops in shared programs: 1979 -> 1979 (0.00%) helped: 160 HURT: src/mesa/drivers/dri/i965/brw_ir_vec4.h | 26 ++++++++++++++++++ .../dri/i965/brw_vec4_dead_code_eliminate.cpp | 31 +++++++++++++++------- .../drivers/dri/i965/brw_vec4_live_variables.cpp | 14 ++++++---- 3 files changed, 57 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h index 96dd633..934d7b1 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h @@ -185,6 +185,32 @@ public: return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2; } + bool reads_flag(unsigned c) + { + if (!reads_flag()) + return false; + + switch(predicate) { + case BRW_PREDICATE_NONE: + return false; + break; + case BRW_PREDICATE_ALIGN16_REPLICATE_X: + return (c == 0 ? true : false); + break; + case BRW_PREDICATE_ALIGN16_REPLICATE_Y: + return (c == 1 ? true : false); + break; + case BRW_PREDICATE_ALIGN16_REPLICATE_Z: + return (c == 2 ? true : false); + break; + case BRW_PREDICATE_ALIGN16_REPLICATE_W: + return (c == 3 ? true : false); + break; + default: + return true; + } + } + bool writes_flag() { return (conditional_mod && (opcode != BRW_OPCODE_SEL && diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp index 8fc7a36..d4216fd 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp @@ -78,13 +78,19 @@ vec4_visitor::dead_code_eliminate() sizeof(BITSET_WORD)); foreach_inst_in_block_reverse(vec4_instruction, inst, block) { - if (inst->dst.file == GRF && !inst->has_side_effects()) { + if ((inst->dst.file == GRF && !inst->has_side_effects()) || + (inst->dst.is_null() && inst->writes_flag())){ bool result_live[4] = { false }; - for (unsigned i = 0; i < inst->regs_written; i++) { - for (int c = 0; c < 4; c++) - result_live[c] |= BITSET_TEST( - live, var_from_reg(alloc, offset(inst->dst, i), c)); + if (inst->dst.file == GRF) { + for (unsigned i = 0; i < inst->regs_written; i++) { + for (int c = 0; c < 4; c++) + result_live[c] |= BITSET_TEST( + live, var_from_reg(alloc, offset(inst->dst, i), c)); + } + } else { + for (unsigned c = 0; c < 4; c++) + result_live[c] = BITSET_TEST(flag_live, c); } /* If the instruction can't do writemasking, then it's all or @@ -117,7 +123,11 @@ vec4_visitor::dead_code_eliminate() } if (inst->dst.is_null() && inst->writes_flag()) { - if (!BITSET_TEST(flag_live, 0)) { + bool combined_live = false; + for (unsigned c = 0; c < 4; c++) + combined_live |= BITSET_TEST(flag_live,c); + + if (!combined_live) { inst->opcode = BRW_OPCODE_NOP; progress = true; continue; @@ -136,7 +146,8 @@ vec4_visitor::dead_code_eliminate() } if (inst->writes_flag()) { - BITSET_CLEAR(flag_live, 0); + for (unsigned c = 0; c < 4; c++) + BITSET_CLEAR(flag_live, c); } for (int i = 0; i < 3; i++) { @@ -150,8 +161,10 @@ vec4_visitor::dead_code_eliminate() } } - if (inst->reads_flag()) { - BITSET_SET(flag_live, 0); + for (unsigned c = 0; c < 4; c++) { + if (inst->reads_flag(c)) { + BITSET_SET(flag_live, c); + } } } } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp index 6782379..aa9a657 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp @@ -86,9 +86,10 @@ vec4_live_variables::setup_def_use() } } } - if (inst->reads_flag()) { - if (!BITSET_TEST(bd->flag_def, 0)) { - BITSET_SET(bd->flag_use, 0); + for (unsigned c = 0; c < 4; c++) { + if (inst->reads_flag(c) && + !BITSET_TEST(bd->flag_def, c)) { + BITSET_SET(bd->flag_use, c); } } @@ -110,8 +111,11 @@ vec4_live_variables::setup_def_use() } } if (inst->writes_flag()) { - if (!BITSET_TEST(bd->flag_use, 0)) { - BITSET_SET(bd->flag_def, 0); + for (unsigned c = 0; c < 4; c++) { + if ((inst->dst.writemask & (1 << c)) && + !BITSET_TEST(bd->flag_use, c)) { + BITSET_SET(bd->flag_def, c); + } } } -- 2.1.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev