We were not invalidating entries with a src that reads more than 1 register when we find writes that overwrite any register read by entry->src after the first. This leads to incorrect copy propagation because we re-use entries from the ACP that have been partially invalidated. --- .../drivers/dri/i965/brw_fs_copy_propagation.cpp | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 3e89a0d..89ba8c5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -44,6 +44,7 @@ struct acp_entry : public exec_node { fs_reg dst; fs_reg src; uint8_t regs_written; + uint8_t regs_read; enum opcode opcode; bool saturate; }; @@ -775,8 +776,17 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, */ for (int i = 0; i < ACP_HASH_SIZE; i++) { foreach_in_list_safe(acp_entry, entry, &acp[i]) { - if (inst->overwrites_reg(entry->src)) - entry->remove(); + /* Make sure we kill the entry if this instructions overwrites + * _any_ of the registers that it reads + */ + fs_reg tmp = entry->src; + for (int n = 0; n < entry->regs_read; n++) { + if (inst->overwrites_reg(tmp)) { + entry->remove(); + break; + } + tmp.reg_offset++; + } } } } @@ -785,10 +795,11 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, * operand of another instruction, add it to the ACP. */ if (can_propagate_from(inst)) { - acp_entry *entry = ralloc(copy_prop_ctx, acp_entry); - entry->dst = inst->dst; - entry->src = inst->src[0]; + acp_entry *entry = ralloc(copy_prop_ctx, acp_entry); + entry->dst = inst->dst; + entry->src = inst->src[0]; entry->regs_written = inst->regs_written; + entry->regs_read = inst->regs_read(0); entry->opcode = inst->opcode; entry->saturate = inst->saturate; acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry); @@ -804,6 +815,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, entry->dst.reg_offset = offset; entry->src = inst->src[i]; entry->regs_written = regs_written; + entry->regs_read = inst->regs_read(i); entry->opcode = inst->opcode; if (!entry->dst.equals(inst->src[i])) { acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry); -- 2.1.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev