On 08/28/2014 08:10 PM, Matt Turner wrote: > --- > src/mesa/drivers/dri/i965/brw_eu_compact.c | 39 > ++++++++++++++++++------------ > 1 file changed, 24 insertions(+), 15 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c > b/src/mesa/drivers/dri/i965/brw_eu_compact.c > index 5008ba6..9c23d55 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c > +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c > @@ -23,12 +23,12 @@ > > /** @file brw_eu_compact.c > * > - * Instruction compaction is a feature of gm45 and newer hardware that allows > + * Instruction compaction is a feature of G45 and newer hardware that allows > * for a smaller instruction encoding. > * > * The instruction cache is on the order of 32KB, and many programs generate > * far more instructions than that. The instruction cache is built to barely > - * keep up with instruction dispatch abaility in cache hit cases -- L1 > + * keep up with instruction dispatch ability in cache hit cases -- L1 > * instruction cache misses that still hit in the next level could limit > * throughput by around 50%. > * > @@ -1207,14 +1207,15 @@ static void > update_gen4_jump_count(struct brw_context *brw, brw_inst *insn, > int this_old_ip, int *compacted_counts) > { > - assert(brw->gen == 5); > + assert(brw->gen == 5 || brw->is_g4x); > > /* Jump Count is in units of: > + * - uncompacted instructions on G45; and > * - compacted instructions on Gen5. > */ > int jump_count = brw_inst_gen4_jump_count(brw, insn); > - int jump_count_compacted = jump_count; > - int jump_count_uncompacted = jump_count / 2; > + int jump_count_compacted = jump_count * (brw->is_g4x ? 2 : 1); > + int jump_count_uncompacted = jump_count / (brw->is_g4x ? 1 : 2); > > int target_old_ip = this_old_ip + jump_count_uncompacted; > > @@ -1222,7 +1223,8 @@ update_gen4_jump_count(struct brw_context *brw, > brw_inst *insn, > int target_compacted_count = compacted_counts[target_old_ip]; > > jump_count_compacted -= (target_compacted_count - this_compacted_count); > - brw_inst_set_gen4_jump_count(brw, insn, jump_count_compacted); > + brw_inst_set_gen4_jump_count(brw, insn, jump_count_compacted / > + (brw->is_g4x ? 2 : 1)); > } > > void > @@ -1265,13 +1267,14 @@ brw_init_compaction_tables(struct brw_context *brw) > src_index_table = gen6_src_index_table; > break; > case 5: > + case 4: > control_index_table = g45_control_index_table; > datatype_table = g45_datatype_table; > subreg_table = g45_subreg_table; > src_index_table = g45_src_index_table; > break;
The check a couple hunks below prevents us from getting here on GEN4? > default: > - return; > + unreachable("unknown generation"); > } > } > > @@ -1282,7 +1285,8 @@ brw_compact_instructions(struct brw_compile *p, int > start_offset, > struct brw_context *brw = p->brw; > void *store = p->store + start_offset / 16; > /* For an instruction at byte offset 16*i before compaction, this is the > - * number of compacted instructions that preceded it. > + * number of compacted instructions minus the number of padding NOP/NENOPs > + * that preceded it. > */ > int compacted_counts[(p->next_insn_offset - start_offset) / > sizeof(brw_inst)]; > /* For an instruction at byte offset 8*i after compaction, this was its IP > @@ -1290,7 +1294,7 @@ brw_compact_instructions(struct brw_compile *p, int > start_offset, > */ > int old_ip[(p->next_insn_offset - start_offset) / > sizeof(brw_compact_inst)]; > > - if (brw->gen == 4) > + if (brw->gen == 4 && !brw->is_g4x) > return; > > int offset = 0; > @@ -1319,17 +1323,22 @@ brw_compact_instructions(struct brw_compile *p, int > start_offset, > offset += sizeof(brw_compact_inst); > } else { > /* It appears that the end of thread SEND instruction needs to be > - * aligned, or the GPU hangs. > + * aligned, or the GPU hangs. All uncompacted instructions need to > be > + * aligned on G45. > */ > - if ((brw_inst_opcode(brw, src) == BRW_OPCODE_SEND || > - brw_inst_opcode(brw, src) == BRW_OPCODE_SENDC) && > - brw_inst_eot(brw, src) && > - (offset & sizeof(brw_compact_inst)) != 0) { > + if ((offset & sizeof(brw_compact_inst)) != 0 && > + (((brw_inst_opcode(brw, src) == BRW_OPCODE_SEND || > + brw_inst_opcode(brw, src) == BRW_OPCODE_SENDC) && > + brw_inst_eot(brw, src)) || > + brw->is_g4x)) { > brw_compact_inst *align = store + offset; > memset(align, 0, sizeof(*align)); > - brw_compact_inst_set_opcode(align, BRW_OPCODE_NOP); > + brw_compact_inst_set_opcode(align, brw->is_g4x ? > BRW_OPCODE_NENOP : > + BRW_OPCODE_NOP); > brw_compact_inst_set_cmpt_control(align, true); > offset += sizeof(brw_compact_inst); > + compacted_count--; > + compacted_counts[src_offset / sizeof(brw_inst)] = > compacted_count; > old_ip[offset / sizeof(brw_compact_inst)] = src_offset / > sizeof(brw_inst); > > dst = store + offset; > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev