On Fri, Oct 16, 2015 at 8:03 PM, Connor Abbott <cwabbo...@gmail.com> wrote: > The heuristic we're using is rather lame, since it assumes everything is > non-uniform and loops execute 10 times, but it should be enough for > measuring improvements in the scheduler that don't result in a change in > the number of instructions. > > v2: > - Switch loops and cycle counts to be compatible with older shader-db. > - Make loop heuristic 10x to match with spilling code. > > Signed-off-by: Connor Abbott <cwabbo...@gmail.com> > --- > src/mesa/drivers/dri/i965/brw_cfg.h | 4 ++++ > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 11 ++++++----- > .../drivers/dri/i965/brw_schedule_instructions.cpp | 20 > ++++++++++++++++++++ > src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 9 +++++---- > 4 files changed, 35 insertions(+), 9 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h > b/src/mesa/drivers/dri/i965/brw_cfg.h > index a094917..d0bdb00 100644 > --- a/src/mesa/drivers/dri/i965/brw_cfg.h > +++ b/src/mesa/drivers/dri/i965/brw_cfg.h > @@ -90,6 +90,8 @@ struct bblock_t { > struct exec_list parents; > struct exec_list children; > int num; > + > + unsigned cycle_count; > }; > > static inline struct backend_instruction * > @@ -285,6 +287,8 @@ struct cfg_t { > int num_blocks; > > bool idom_dirty; > + > + unsigned cycle_count; > }; > > /* Note that this is implemented with a double for loop -- break will > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > index 17e19cf..3bb0e7d 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > @@ -2180,9 +2180,9 @@ fs_generator::generate_code(const cfg_t *cfg, int > dispatch_width) > > if (unlikely(debug_flag)) { > fprintf(stderr, "Native code for %s\n" > - "SIMD%d shader: %d instructions. %d loops. %d:%d spills:fills. > Promoted %u constants. Compacted %d to %d" > + "SIMD%d shader: %d instructions. %d loops. %u cycles. %d:%d > spills:fills. Promoted %u constants. Compacted %d to %d" > " bytes (%.0f%%)\n", > - shader_name, dispatch_width, before_size / 16, loop_count, > + shader_name, dispatch_width, before_size / 16, loop_count, > cfg->cycle_count, > spill_count, fill_count, promoted_constants, before_size, > after_size, > 100.0f * (before_size - after_size) / before_size); > > @@ -2192,12 +2192,13 @@ fs_generator::generate_code(const cfg_t *cfg, int > dispatch_width) > } > > compiler->shader_debug_log(log_data, > - "%s SIMD%d shader: %d inst, %d loops, " > + "%s SIMD%d shader: %d inst, %d loops, %u > cycles, " > "%d:%d spills:fills, Promoted %u constants, " > "compacted %d to %d bytes.\n", > stage_abbrev, dispatch_width, before_size / 16, > - loop_count, spill_count, fill_count, > - promoted_constants, before_size, after_size); > + loop_count, cfg->cycle_count, spill_count, > + fill_count, promoted_constants, before_size, > + after_size); > > return start_offset; > } > diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > index 1652261..e14d041 100644 > --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp > @@ -1467,6 +1467,24 @@ instruction_scheduler::schedule_instructions(bblock_t > *block) > if (block->end()->opcode == BRW_OPCODE_NOP) > block->end()->remove(block); > assert(instructions_to_schedule == 0); > + > + block->cycle_count = time; > +} > + > +static unsigned get_cycle_count(cfg_t *cfg) > +{ > + unsigned count = 0, multiplier = 1; > + foreach_block(block, cfg) { > + if (block->start()->opcode == BRW_OPCODE_DO) > + multiplier *= 10; /* assume that loops execute ~10 times */ > + > + count += block->cycle_count * multiplier;
Unfortunately, I don't think this properly handles "if (...) { tex } else { tex };" and similar things where the latency isn't necessarily additive. However, it's a good first-order. Reviewed-by: Jason Ekstrand <jason.ekstr...@intel.com> > + > + if (block->end()->opcode == BRW_OPCODE_WHILE) > + multiplier /= 10; > + } > + > + return count; > } > > void > @@ -1507,6 +1525,8 @@ instruction_scheduler::run(cfg_t *cfg) > post_reg_alloc); > bs->dump_instructions(); > } > + > + cfg->cycle_count = get_cycle_count(cfg); > } > > void > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > index dcacc90..8c926ec 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp > @@ -1656,10 +1656,10 @@ vec4_generator::generate_code(const cfg_t *cfg) > fprintf(stderr, "Native code for %s program %d:\n", stage_name, > prog->Id); > } > - fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. Compacted > %d to %d" > + fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles. > Compacted %d to %d" > " bytes (%.0f%%)\n", > stage_abbrev, > - before_size / 16, loop_count, before_size, after_size, > + before_size / 16, loop_count, cfg->cycle_count, before_size, > after_size, > 100.0f * (before_size - after_size) / before_size); > > dump_assembly(p->store, annotation.ann_count, annotation.ann, > @@ -1668,9 +1668,10 @@ vec4_generator::generate_code(const cfg_t *cfg) > } > > compiler->shader_debug_log(log_data, > - "%s vec4 shader: %d inst, %d loops, " > + "%s vec4 shader: %d inst, %d loops, %u cycles, > " > "compacted %d to %d bytes.\n", > - stage_abbrev, before_size / 16, loop_count, > + stage_abbrev, before_size / 16, > + loop_count, cfg->cycle_count, > before_size, after_size); > } > > -- > 2.4.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev