Kenneth Graunke <kenn...@whitecape.org> writes: > On 09/22/2012 02:04 PM, Eric Anholt wrote: >> Use a simple chaining hash table for the ACP. This is not really very good, >> because we still do a full walk of the tree per destination write, but it >> still reduces fp-long-alu runtime from 5.3 to 3.9s. >> --- >> src/mesa/drivers/dri/i965/brw_fs.h | 2 +- >> .../drivers/dri/i965/brw_fs_copy_propagation.cpp | 46 >> +++++++++++++------- >> 2 files changed, 32 insertions(+), 16 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h >> b/src/mesa/drivers/dri/i965/brw_fs.h >> index 59a0e50..3ea4e00 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs.h >> +++ b/src/mesa/drivers/dri/i965/brw_fs.h >> @@ -247,7 +247,7 @@ public: >> bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry); >> bool try_constant_propagate(fs_inst *inst, acp_entry *entry); >> bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block, >> - exec_list *acp); >> + exec_list *acp, int acp_count); >> bool register_coalesce(); >> bool register_coalesce_2(); >> bool compute_to_mrf(); >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp >> index ad34657..0f6dbd4 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp >> @@ -196,7 +196,8 @@ fs_visitor::try_constant_propagate(fs_inst *inst, >> acp_entry *entry) >> */ >> bool >> fs_visitor::opt_copy_propagate_local(void *mem_ctx, >> - fs_bblock *block, exec_list *acp) >> + fs_bblock *block, exec_list *acp, >> + int acp_count) >> { >> bool progress = false; >> >> @@ -205,28 +206,41 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx, >> inst = (fs_inst *)inst->next) { >> >> /* Try propagating into this instruction. */ >> - foreach_list(entry_node, acp) { >> - acp_entry *entry = (acp_entry *)entry_node; >> + for (int i = 0; i < 3; i++) { >> + if (inst->src[i].file != GRF) >> + continue; >> >> - if (try_constant_propagate(inst, entry)) >> - progress = true; >> + foreach_list(entry_node, &acp[inst->src[i].reg % acp_count]) { >> + acp_entry *entry = (acp_entry *)entry_node; >> >> - for (int i = 0; i < 3; i++) { >> - if (try_copy_propagate(inst, i, entry)) >> - progress = true; >> - } >> + if (try_constant_propagate(inst, entry)) >> + progress = true; >> + >> + if (try_copy_propagate(inst, i, entry)) >> + progress = true; >> + } >> } >> >> /* kill the destination from the ACP */ >> if (inst->dst.file == GRF) { >> - foreach_list_safe(entry_node, acp) { >> + foreach_list_safe(entry_node, &acp[inst->dst.reg % acp_count]) { >> acp_entry *entry = (acp_entry *)entry_node; >> >> - if (inst->overwrites_reg(entry->dst) || >> - inst->overwrites_reg(entry->src)) { >> + if (inst->overwrites_reg(entry->dst)) { >> entry->remove(); >> } >> } >> + >> + /* Oops, we only have the chaining hash based on the destination, >> not >> + * the source, so walk across the entire table. >> + */ >> + for (int i = 0; i < acp_count; i++) { >> + foreach_list_safe(entry_node, &acp[i]) { >> + acp_entry *entry = (acp_entry *)entry_node; >> + if (inst->overwrites_reg(entry->src)) >> + entry->remove(); >> + } >> + } >> } >> >> /* If this instruction is a raw copy, add it to the ACP. */ >> @@ -246,7 +260,7 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx, >> acp_entry *entry = ralloc(mem_ctx, acp_entry); >> entry->dst = inst->dst; >> entry->src = inst->src[0]; >> - acp->push_tail(entry); >> + acp[entry->dst.reg % acp_count].push_tail(entry); >> } >> } >> >> @@ -263,9 +277,11 @@ fs_visitor::opt_copy_propagate() >> >> for (int b = 0; b < cfg.num_blocks; b++) { >> fs_bblock *block = cfg.blocks[b]; >> - exec_list acp; >> + int acp_count = 16; >> + exec_list acp[acp_count]; > > I still think it might be worth moving these declarations into > opt_copy_propagate_local() rather than in the loop here.
Yeah, that was prep for doing propagation across block boundaries, but it's the wrong interface for that anyway, so I've moved them in. > I'd also make acp_count const. For one, it better not change. Also, > making it const may help the compiler determine the size of the array > statically, at compile time. It should anyway, but. Never hurts. Const doesn't let the compiler do anything different. I wish that meme would die.
pgpKr1AqnmQLq.pgp
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev