When we have code such as this: mov vgrf1.0.x:F, vgrf2.xxxx:F mov vgrf3.0.x:F, vgrf1.xxxx:F ... mov vgrf3.0.x:F, vgrf1.xxxx:F
And vgrf1 is chosen for spilling, we can emit this: mov vgrf1.0.x:F, vgrf2.xxxx:F gen4_scratch_write hw_reg0:F, vgrf1.xxxx:D, 22D mov vgrf3.0.x:F, vgrf1.xxxx:F ... gen4_scratch_read vgrf4.0.x:F, 22D mov vgrf3.0.x:F, vgrf4.xxxx:F Instead of this: mov vgrf1.0.x:F, vgrf2.xxxx:F gen4_scratch_write hw_reg0:F, vgrf1.xxxx:D, 22D gen4_scratch_read vgrf4.0.x:F, 22D mov vgrf3.0.x:F, vgrf4.xxxx:F ... gen4_scratch_read vgrf5.0.x:F, 22D mov vgrf3.0.x:F, vgrf5.xxxx:F And save one scratch read while still preserving the benefits of spilling the register. --- .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 39 +++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 80ab813..5fed2f9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -334,6 +334,18 @@ vec4_visitor::choose_spill_reg(struct ra_graph *g) return ra_get_best_spill_node(g); } +static bool +writemask_matches_swizzle(unsigned writemask, unsigned swizzle) +{ + for (int i = 0; i < 4; i++) { + unsigned channel = 1 << BRW_GET_SWZ(swizzle, i); + if (!(writemask & channel)) + return false; + } + + return true; +} + void vec4_visitor::spill_reg(int spill_reg_nr) { @@ -341,11 +353,33 @@ vec4_visitor::spill_reg(int spill_reg_nr) unsigned int spill_offset = last_scratch++; /* Generate spill/unspill instructions for the objects being spilled. */ + vec4_instruction *spill_write_inst = NULL; foreach_block_and_inst(block, vec4_instruction, inst, cfg) { + /* We don't spill registers used for scratch */ + if (inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_READ || + inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE) + continue; + int scratch_reg = -1; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) { - if (scratch_reg == -1) { + /* If we are reading the spilled register right after writing + * to it we can skip the scratch read and use directly the + * register we used as source for the scratch write. For this + * to work we must check that: + * + * 1) The write is inconditional, that is, it is not predicated or + it is a SEL. + * 2) All the channels that we read have been written in that + * last write instruction. + */ + if (spill_write_inst && + (!spill_write_inst->predicate || + spill_write_inst->opcode == BRW_OPCODE_SEL) && + writemask_matches_swizzle(spill_write_inst->dst.writemask, + inst->src[i].swizzle)) { + scratch_reg = spill_write_inst->dst.reg; + } else if (scratch_reg == -1) { scratch_reg = alloc.allocate(1); src_reg temp = inst->src[i]; temp.reg = scratch_reg; @@ -358,6 +392,9 @@ vec4_visitor::spill_reg(int spill_reg_nr) if (inst->dst.file == GRF && inst->dst.reg == spill_reg_nr) { emit_scratch_write(block, inst, spill_offset); + spill_write_inst = inst; + } else { + spill_write_inst = NULL; } } -- 1.9.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev