On 31 August 2012 11:32, Eric Anholt <e...@anholt.net> wrote: > From: Kenneth Graunke <kenn...@whitecape.org> > > Reduces l4d2 program size from 1195kb to 919kb. > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/mesa/drivers/dri/i965/brw_eu.c | 2 + > src/mesa/drivers/dri/i965/brw_eu.h | 1 + > src/mesa/drivers/dri/i965/brw_eu_compact.c | 208 > +++++++++++++++++++++++++--- > 3 files changed, 192 insertions(+), 19 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_eu.c > b/src/mesa/drivers/dri/i965/brw_eu.c > index c2515eb..a59b83f 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu.c > +++ b/src/mesa/drivers/dri/i965/brw_eu.c > @@ -206,6 +206,8 @@ brw_init_compile(struct brw_context *brw, struct > brw_compile *p, void *mem_ctx) > p->loop_stack_array_size = 16; > p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size); > p->if_depth_in_loop = rzalloc_array(mem_ctx, int, > p->loop_stack_array_size); > + > + brw_init_compaction_tables(&brw->intel); > } > > > diff --git a/src/mesa/drivers/dri/i965/brw_eu.h > b/src/mesa/drivers/dri/i965/brw_eu.h > index 01b8d08..b64611e 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu.h > +++ b/src/mesa/drivers/dri/i965/brw_eu.h > @@ -1108,6 +1108,7 @@ void brw_set_uip_jip(struct brw_compile *p); > uint32_t brw_swap_cmod(uint32_t cmod); > > /* brw_eu_compact.c */ > +void brw_init_compaction_tables(struct intel_context *intel); > void brw_compact_instructions(struct brw_compile *c); > void brw_uncompact_instruction(struct intel_context *intel, > struct brw_instruction *dst, > diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c > b/src/mesa/drivers/dri/i965/brw_eu_compact.c > index dd661f5..009c961 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c > +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c > @@ -180,6 +180,151 @@ static const uint32_t gen6_src_index_table[32] = { > 0b001101010000, > }; > > +static const uint32_t gen7_control_index_table[32] = { > + 0b0000000000000000010, > + 0b0000100000000000000, > + 0b0000100000000000001, > + 0b0000100000000000010, > + 0b0000100000000000011, > + 0b0000100000000000100, > + 0b0000100000000000101, > + 0b0000100000000000111, > + 0b0000100000000001000, > + 0b0000100000000001001, > + 0b0000100000000001101, > + 0b0000110000000000000, > + 0b0000110000000000001, > + 0b0000110000000000010, > + 0b0000110000000000011, > + 0b0000110000000000100, > + 0b0000110000000000101, > + 0b0000110000000000111, > + 0b0000110000000001001, > + 0b0000110000000001101, > + 0b0000110000000010000, > + 0b0000110000100000000, > + 0b0001000000000000000, > + 0b0001000000000000010, > + 0b0001000000000000100, > + 0b0001000000100000000, > + 0b0010110000000000000, > + 0b0010110000000010000, > + 0b0011000000000000000, > + 0b0011000000100000000, > + 0b0101000000000000000, > + 0b0101000000100000000 > +}; > + > +static const uint32_t gen7_datatype_table[32] = { > + 0b001000000000000001, > + 0b001000000000100000, > + 0b001000000000100001, > + 0b001000000001100001, > + 0b001000000010111101, > + 0b001000001011111101, > + 0b001000001110100001, > + 0b001000001110100101, > + 0b001000001110111101, > + 0b001000010000100001, > + 0b001000110000100000, > + 0b001000110000100001, > + 0b001001010010100101, > + 0b001001110010100100, > + 0b001001110010100101, > + 0b001111001110111101, > + 0b001111011110011101, > + 0b001111011110111100, > + 0b001111011110111101, > + 0b001111111110111100, > + 0b000000001000001100, > + 0b001000000000111101, > + 0b001000000010100101, > + 0b001000010000100000, > + 0b001001010010100100, > + 0b001001110010000100, > + 0b001010010100001001, > + 0b001101111110111101, > + 0b001111111110111101, > + 0b001011110110101100, > + 0b001010010100101000, > + 0b001010110100101000 > +}; > + > +static const uint32_t gen7_subreg_table[32] = { > + 0b000000000000000, > + 0b000000000000001, > + 0b000000000001000, > + 0b000000000001111, > + 0b000000000010000, > + 0b000000010000000, > + 0b000000100000000, > + 0b000000110000000, > + 0b000001000000000, > + 0b000001000010000, > + 0b000010100000000, > + 0b001000000000000, > + 0b001000000000001, > + 0b001000010000001, > + 0b001000010000010, > + 0b001000010000011, > + 0b001000010000100, > + 0b001000010000111, > + 0b001000010001000, > + 0b001000010001110, > + 0b001000010001111, > + 0b001000110000000, > + 0b001000111101000, > + 0b010000000000000, > + 0b010000110000000, > + 0b011000000000000, > + 0b011110010000111, > + 0b100000000000000, > + 0b101000000000000, > + 0b110000000000000, > + 0b111000000000000, > + 0b111000000011100 > +}; > + > +static const uint32_t gen7_src_index_table[32] = { > + 0b000000000000, > + 0b000000000010, > + 0b000000010000, > + 0b000000010010, > + 0b000000011000, > + 0b000000100000, > + 0b000000101000, > + 0b000001001000, > + 0b000001010000, > + 0b000001110000, > + 0b000001111000, > + 0b001100000000, > + 0b001100000010, > + 0b001100001000, > + 0b001100010000, > + 0b001100010010, > + 0b001100100000, > + 0b001100101000, > + 0b001100111000, > + 0b001101000000, > + 0b001101000010, > + 0b001101001000, > + 0b001101010000, > + 0b001101100000, > + 0b001101101000, > + 0b001101110000, > + 0b001101110001, > + 0b001101111000, > + 0b010001101000, > + 0b010001101001, > + 0b010001101010, > + 0b010110001000 > +}; > + > +static const uint32_t *control_index_table; > +static const uint32_t *datatype_table; > +static const uint32_t *subreg_table; > +static const uint32_t *src_index_table; > + > static bool > set_control_index(struct brw_compact_instruction *dst, > struct brw_instruction *src) > @@ -197,8 +342,8 @@ set_control_index(struct brw_compact_instruction *dst, > uncompressed |= src->header.execution_size << 13; > uncompressed |= src->header.saturate << 16; >
For gen7, we need to include FlagSubRegNum and FlagRegNum in "uncompressed". > > - for (int i = 0; i < ARRAY_SIZE(gen6_control_index_table); i++) { > - if (gen6_control_index_table[i] == uncompressed) { > + for (int i = 0; i < 32; i++) { > + if (control_index_table[i] == uncompressed) { > dst->dw0.control_index = i; > return true; > } > @@ -216,8 +361,8 @@ set_datatype_index(struct brw_compact_instruction *dst, > uncompressed |= src->bits1.ud & 0x7fff; > uncompressed |= (src->bits1.ud >> 29) << 15; > > - for (int i = 0; i < ARRAY_SIZE(gen6_datatype_table); i++) { > - if (gen6_datatype_table[i] == uncompressed) { > + for (int i = 0; i < 32; i++) { > + if (datatype_table[i] == uncompressed) { > dst->dw0.data_type_index = i; > return true; > } > @@ -236,8 +381,8 @@ set_subreg_index(struct brw_compact_instruction *dst, > uncompressed |= src->bits2.da1.src0_subreg_nr << 5; > uncompressed |= src->bits3.da1.src1_subreg_nr << 10; > > - for (int i = 0; i < ARRAY_SIZE(gen6_subreg_table); i++) { > - if (gen6_subreg_table[i] == uncompressed) { > + for (int i = 0; i < 32; i++) { > + if (subreg_table[i] == uncompressed) { > dst->dw0.sub_reg_index = i; > return true; > } > @@ -250,8 +395,8 @@ static bool > get_src_index(uint32_t uncompressed, > uint32_t *compressed) > { > - for (int i = 0; i < ARRAY_SIZE(gen6_src_index_table); i++) { > - if (gen6_src_index_table[i] == uncompressed) { > + for (int i = 0; i < 32; i++) { > + if (src_index_table[i] == uncompressed) { > *compressed = i; > return true; > } > @@ -373,7 +518,7 @@ static void > set_uncompressed_control(struct brw_instruction *dst, > struct brw_compact_instruction *src) > { > - uint32_t compressed = gen6_control_index_table[src->dw0.control_index]; > + uint32_t compressed = control_index_table[src->dw0.control_index]; > As with set_control_index, this function needs to be modified for Gen7 to reflect the fact that the data in the table includes values for FlagSubRegNum and FlagRegNum. > > dst->header.access_mode = compressed >> 0; > dst->header.mask_control = compressed >> 1; > @@ -391,7 +536,7 @@ static void > set_uncompressed_datatype(struct brw_instruction *dst, > struct brw_compact_instruction *src) > { > - uint32_t uncompressed = gen6_datatype_table[src->dw0.data_type_index]; > + uint32_t uncompressed = datatype_table[src->dw0.data_type_index]; > > dst->bits1.ud &= ~(0x7 << 29); > dst->bits1.ud |= ((uncompressed >> 15) & 0x7) << 29; > @@ -403,7 +548,7 @@ static void > set_uncompressed_subreg(struct brw_instruction *dst, > struct brw_compact_instruction *src) > { > - uint32_t uncompressed = gen6_subreg_table[src->dw0.sub_reg_index]; > + uint32_t uncompressed = subreg_table[src->dw0.sub_reg_index]; > > dst->bits1.da1.dest_subreg_nr = (uncompressed >> 0) & 0x1f; > dst->bits2.da1.src0_subreg_nr = (uncompressed >> 5) & 0x1f; > @@ -415,7 +560,7 @@ set_uncompressed_src0(struct brw_instruction *dst, > struct brw_compact_instruction *src) > { > uint32_t compressed = src->dw0.src0_index | src->dw1.src0_index << 2; > - uint32_t uncompressed = gen6_src_index_table[compressed]; > + uint32_t uncompressed = src_index_table[compressed]; > > dst->bits2.da1.src0_abs = uncompressed >> 0; > dst->bits2.da1.src0_negate = uncompressed >> 1; > @@ -429,7 +574,7 @@ static void > set_uncompressed_src1(struct brw_instruction *dst, > struct brw_compact_instruction *src) > { > - uint32_t uncompressed = gen6_src_index_table[src->dw1.src1_index]; > + uint32_t uncompressed = src_index_table[src->dw1.src1_index]; > > dst->bits3.da1.src1_abs = uncompressed >> 0; > dst->bits3.da1.src1_negate = uncompressed >> 1; > @@ -515,6 +660,36 @@ update_uip_jip(struct brw_instruction *insn, int > this_old_ip, > } > > void > +brw_init_compaction_tables(struct intel_context *intel) > +{ > + assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - > 1] != 0); > + assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0); > + assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0); > + assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != > 0); > + assert(gen7_control_index_table[ARRAY_SIZE(gen6_control_index_table) - > 1] != 0); > + assert(gen7_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0); > + assert(gen7_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0); > + assert(gen7_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != > 0); > + > + switch (intel->gen) { > + case 7: > + control_index_table = gen7_control_index_table; > + datatype_table = gen7_datatype_table; > + subreg_table = gen7_subreg_table; > + src_index_table = gen7_src_index_table; > + break; > + case 6: > + control_index_table = gen6_control_index_table; > + datatype_table = gen6_datatype_table; > + subreg_table = gen6_subreg_table; > + src_index_table = gen6_src_index_table; > + break; > + default: > + return; > + } > +} > + > +void > brw_compact_instructions(struct brw_compile *p) > { > struct brw_context *brw = p->brw; > @@ -524,12 +699,7 @@ brw_compact_instructions(struct brw_compile *p) > int compressed_counts[p->next_insn_offset / 8]; > int old_ip[p->next_insn_offset / 8]; > > - assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - > 1] != 0); > - assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0); > - assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0); > - assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != > 0); > - > - if (intel->gen != 6) > + if (intel->gen < 6) > return; > > int src_offset; > -- > 1.7.10.4 > Other changes need to be made for Gen7: 1. In brw_try_compact_instruction(), for Gen7, temp.dw0.flag_reg_nr must be set to zero. 2. In brw_uncompact_instruction(), for Gen7, we need to skip the statement "dst->bits2.da1.flag_reg_nr = src->dw0.flag_reg_nr;" to avoid overwriting the value set by set_uncompressed_control(). > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > My comments on patches 5-7 are largely cosmetic (and a few minor performance concerns), and patches 1 and 4 look good, so regardless of whether you decide to go ahead with my suggested changes, consider them: Reviewed-by: Paul Berry <stereotype...@gmail.com> The comments on this patch (8) I believe are genuine bugs in Gen7. I don't really consider myself qualified to comment on patches 2-3 because I'm not too familiar with the build system, so consider them Acked-by: Paul Berry <stereotype...@gmail.com>
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev