On 31 August 2012 11:32, Eric Anholt <e...@anholt.net> wrote: > --- > src/mesa/drivers/dri/i965/brw_eu_compact.c | 111 > ++++++++++++++++++++++------ > 1 file changed, 87 insertions(+), 24 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c > b/src/mesa/drivers/dri/i965/brw_eu_compact.c > index 2c15c85..dd661f5 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c > +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c > @@ -324,6 +324,19 @@ brw_try_compact_instruction(struct brw_compile *c, > return false; /* FINISHME: What else needs handling? */ > } > > + > + if (src->header.opcode == BRW_OPCODE_IF || > + src->header.opcode == BRW_OPCODE_ELSE || > + src->header.opcode == BRW_OPCODE_ENDIF || > + src->header.opcode == BRW_OPCODE_DO || > + src->header.opcode == BRW_OPCODE_HALT || > + src->header.opcode == BRW_OPCODE_WHILE) { > + /* FINISHME: The fixup code below, and brw_set_uip_jip and friends, > needs > + * to be able to handle flow control. > + */ > + return false; > + } > + > /* FINISHME: immediates */ > if (src->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE || > src->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) > @@ -476,12 +489,40 @@ void brw_debug_compact_uncompact(struct > intel_context *intel, > } > } > > +static int > +compressed_between(int old_ip, int old_target_ip, int *compressed_counts) > +{ > + int this_compressed_count = compressed_counts[old_ip]; > + int target_compressed_count = compressed_counts[old_target_ip]; > + return target_compressed_count - this_compressed_count; > +} > + > +static void > +update_uip_jip(struct brw_instruction *insn, int this_old_ip, > + int *compressed_counts) > +{ > + int target_old_ip; > + > + target_old_ip = this_old_ip + insn->bits3.break_cont.jip; > + insn->bits3.break_cont.jip -= compressed_between(this_old_ip, > + target_old_ip, > + compressed_counts); > + > + target_old_ip = this_old_ip + insn->bits3.break_cont.uip; > + insn->bits3.break_cont.uip -= compressed_between(this_old_ip, > + target_old_ip, > + compressed_counts); > +} > + > void > brw_compact_instructions(struct brw_compile *p) > { > struct brw_context *brw = p->brw; > struct intel_context *intel = &brw->intel; > void *store = p->store; > + /* Control flow fixup information */ > + int compressed_counts[p->next_insn_offset / 8]; >
A comment would have saved me some time in understanding what information is stored in compressed_counts. I think this would be a correct description but I'm not certain: "If an instruction was located at offset 8*i before compaction, compressed_counts[i] is the number of instructions that preceded it which were not previously compressed, but are now." An alternative way of representing the same information, possibly easier to follow, would be: /* If an instruction was located at offset 8*i before compaction, 8*new_ip[i] is the location of the instruction after compaction */ int new_ip[p->next_insn_offset / 8]; (Of course, update_uip_jip() would need to be reworked) > + int old_ip[p->next_insn_offset / 8]; > And, assuming I'm understanding the code correctly, I believe old_ip could be commented: /* If an instruction is located at offset 8*i after compaction, 8*old_ip[i] was the location of the instruction before compaction. In the case of NOPs inserted automatically for alignment, 8*old_ip[i] was the old location of the instruction that needed to be aligned. */ > assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - > 1] != 0); > assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0); > @@ -491,36 +532,21 @@ brw_compact_instructions(struct brw_compile *p) > if (intel->gen != 6) > return; > > - /* FINISHME: If we are going to compress instructions between flow > control, > - * we have to do fixups to flow control offsets to represent the new > - * distances, since flow control uses (virtual address distance)/2, > not a > - * logical instruction count. > - */ > - bool continue_compressing = true; > - for (int i = 0; i < p->nr_insn; i++) { > - if (p->store[i].header.opcode == BRW_OPCODE_WHILE) > - return; > - } > - > int src_offset; > int offset = 0; > + int compressed_count = 0; > for (src_offset = 0; src_offset < p->nr_insn * 16;) { > struct brw_instruction *src = store + src_offset; > void *dst = store + offset; > > - switch (src->header.opcode) { > - case BRW_OPCODE_IF: > - case BRW_OPCODE_HALT: > - case BRW_OPCODE_JMPI: > - continue_compressing = false; > - break; > - } > + old_ip[offset / 8] = src_offset / 8; > + compressed_counts[src_offset / 8] = compressed_count; > > struct brw_instruction saved = *src; > > - if (continue_compressing && > - !src->header.cmpt_control && > + if (!src->header.cmpt_control && > brw_try_compact_instruction(p, dst, src)) { > + compressed_count++; > > /* debug */ > struct brw_instruction uncompacted; > @@ -544,6 +570,7 @@ brw_compact_instructions(struct brw_compile *p) > align->dw0.opcode = BRW_OPCODE_NOP; > align->dw0.cmpt_ctrl = 1; > offset += 8; > + old_ip[offset / 8] = src_offset / 8; > dst = store + offset; > } > > @@ -558,20 +585,56 @@ brw_compact_instructions(struct brw_compile *p) > } > } > I'm running out of time to look at this today, so I had to stop reviewing here. I think the stuff that follows is likely correct (because if it weren't, it would probably fail spectacularly :)). I'll try to return to it tomorrow just to make sure. > > + /* Fix up control flow offsets. */ > + p->next_insn_offset = offset; > + for (offset = 0; offset < p->next_insn_offset;) { > + struct brw_instruction *insn = store + offset; > + int this_old_ip = old_ip[offset / 8]; > + int this_compressed_count = compressed_counts[this_old_ip]; > + int target_old_ip, target_compressed_count; > + > + switch (insn->header.opcode) { > + case BRW_OPCODE_BREAK: > + case BRW_OPCODE_CONTINUE: > + case BRW_OPCODE_HALT: > + update_uip_jip(insn, this_old_ip, compressed_counts); > + break; > + > + case BRW_OPCODE_IF: > + case BRW_OPCODE_ELSE: > + case BRW_OPCODE_ENDIF: > + case BRW_OPCODE_WHILE: > + if (intel->gen == 6) { > + target_old_ip = this_old_ip + > insn->bits1.branch_gen6.jump_count; > + target_compressed_count = compressed_counts[target_old_ip]; > + insn->bits1.branch_gen6.jump_count -= (target_compressed_count > - > + this_compressed_count); > + } else { > + update_uip_jip(insn, this_old_ip, compressed_counts); > + } > + break; > + } > + > + if (insn->header.cmpt_control) { > + offset += 8; > + } else { > + offset += 16; > + } > + } > + > /* p->nr_insn is counting the number of uncompacted instructions > still, so > * divide. We do want to be sure there's a valid instruction in any > * alignment padding, so that the next compression pass (for the FS > 8/16 > * compile passes) parses correctly. > */ > - if (offset & 8) { > + if (p->next_insn_offset & 8) { > struct brw_compact_instruction *align = store + offset; > memset(align, 0, sizeof(*align)); > align->dw0.opcode = BRW_OPCODE_NOP; > align->dw0.cmpt_ctrl = 1; > - offset += 8; > + p->next_insn_offset += 8; > } > - p->next_insn_offset = offset; > - p->nr_insn = offset / 16; > + p->nr_insn = p->next_insn_offset / 16; > > if (0) { > fprintf(stdout, "dumping compacted program\n"); > -- > 1.7.10.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev