Re: [Mesa-dev] [PATCH v2 4/6] i965: dump scheduling cycle estimates

Jason Ekstrand Wed, 21 Oct 2015 15:59:45 -0700

On Fri, Oct 16, 2015 at 8:03 PM, Connor Abbott <cwabbo...@gmail.com> wrote:
> The heuristic we're using is rather lame, since it assumes everything is
> non-uniform and loops execute 10 times, but it should be enough for
> measuring improvements in the scheduler that don't result in a change in
> the number of instructions.
>
> v2:
> - Switch loops and cycle counts to be compatible with older shader-db.
> - Make loop heuristic 10x to match with spilling code.
>
> Signed-off-by: Connor Abbott <cwabbo...@gmail.com>
> ---
>  src/mesa/drivers/dri/i965/brw_cfg.h                  |  4 ++++
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp       | 11 ++++++-----
>  .../drivers/dri/i965/brw_schedule_instructions.cpp   | 20 
> ++++++++++++++++++++
>  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp     |  9 +++++----
>  4 files changed, 35 insertions(+), 9 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h 
> b/src/mesa/drivers/dri/i965/brw_cfg.h
> index a094917..d0bdb00 100644
> --- a/src/mesa/drivers/dri/i965/brw_cfg.h
> +++ b/src/mesa/drivers/dri/i965/brw_cfg.h
> @@ -90,6 +90,8 @@ struct bblock_t {
>     struct exec_list parents;
>     struct exec_list children;
>     int num;
> +
> +   unsigned cycle_count;
>  };
>
>  static inline struct backend_instruction *
> @@ -285,6 +287,8 @@ struct cfg_t {
>     int num_blocks;
>
>     bool idom_dirty;
> +
> +   unsigned cycle_count;
>  };
>
>  /* Note that this is implemented with a double for loop -- break will
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index 17e19cf..3bb0e7d 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -2180,9 +2180,9 @@ fs_generator::generate_code(const cfg_t *cfg, int 
> dispatch_width)
>
>     if (unlikely(debug_flag)) {
>        fprintf(stderr, "Native code for %s\n"
> -              "SIMD%d shader: %d instructions. %d loops. %d:%d spills:fills. 
> Promoted %u constants. Compacted %d to %d"
> +              "SIMD%d shader: %d instructions. %d loops. %u cycles. %d:%d 
> spills:fills. Promoted %u constants. Compacted %d to %d"
>                " bytes (%.0f%%)\n",
> -              shader_name, dispatch_width, before_size / 16, loop_count,
> +              shader_name, dispatch_width, before_size / 16, loop_count, 
> cfg->cycle_count,
>                spill_count, fill_count, promoted_constants, before_size, 
> after_size,
>                100.0f * (before_size - after_size) / before_size);
>
> @@ -2192,12 +2192,13 @@ fs_generator::generate_code(const cfg_t *cfg, int 
> dispatch_width)
>     }
>
>     compiler->shader_debug_log(log_data,
> -                              "%s SIMD%d shader: %d inst, %d loops, "
> +                              "%s SIMD%d shader: %d inst, %d loops, %u 
> cycles, "
>                                "%d:%d spills:fills, Promoted %u constants, "
>                                "compacted %d to %d bytes.\n",
>                                stage_abbrev, dispatch_width, before_size / 16,
> -                              loop_count, spill_count, fill_count,
> -                              promoted_constants, before_size, after_size);
> +                              loop_count, cfg->cycle_count, spill_count,
> +                              fill_count, promoted_constants, before_size,
> +                              after_size);
>
>     return start_offset;
>  }
> diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp 
> b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> index 1652261..e14d041 100644
> --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> @@ -1467,6 +1467,24 @@ instruction_scheduler::schedule_instructions(bblock_t 
> *block)
>     if (block->end()->opcode == BRW_OPCODE_NOP)
>        block->end()->remove(block);
>     assert(instructions_to_schedule == 0);
> +
> +   block->cycle_count = time;
> +}
> +
> +static unsigned get_cycle_count(cfg_t *cfg)
> +{
> +   unsigned count = 0, multiplier = 1;
> +   foreach_block(block, cfg) {
> +      if (block->start()->opcode == BRW_OPCODE_DO)
> +         multiplier *= 10; /* assume that loops execute ~10 times */
> +
> +      count += block->cycle_count * multiplier;


Unfortunately, I don't think this properly handles "if (...) { tex }
else { tex };" and similar things where the latency isn't necessarily
additive.  However, it's a good first-order.

Reviewed-by: Jason Ekstrand <jason.ekstr...@intel.com>

> +
> +      if (block->end()->opcode == BRW_OPCODE_WHILE)
> +         multiplier /= 10;
> +   }
> +
> +   return count;
>  }
>
>  void
> @@ -1507,6 +1525,8 @@ instruction_scheduler::run(cfg_t *cfg)
>                post_reg_alloc);
>        bs->dump_instructions();
>     }
> +
> +   cfg->cycle_count = get_cycle_count(cfg);
>  }
>
>  void
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> index dcacc90..8c926ec 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> @@ -1656,10 +1656,10 @@ vec4_generator::generate_code(const cfg_t *cfg)
>           fprintf(stderr, "Native code for %s program %d:\n", stage_name,
>                   prog->Id);
>        }
> -      fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. Compacted 
> %d to %d"
> +      fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles. 
> Compacted %d to %d"
>                        " bytes (%.0f%%)\n",
>                stage_abbrev,
> -              before_size / 16, loop_count, before_size, after_size,
> +              before_size / 16, loop_count, cfg->cycle_count, before_size, 
> after_size,
>                100.0f * (before_size - after_size) / before_size);
>
>        dump_assembly(p->store, annotation.ann_count, annotation.ann,
> @@ -1668,9 +1668,10 @@ vec4_generator::generate_code(const cfg_t *cfg)
>     }
>
>     compiler->shader_debug_log(log_data,
> -                              "%s vec4 shader: %d inst, %d loops, "
> +                              "%s vec4 shader: %d inst, %d loops, %u cycles, 
> "
>                                "compacted %d to %d bytes.\n",
> -                              stage_abbrev, before_size / 16, loop_count,
> +                              stage_abbrev, before_size / 16,
> +                              loop_count, cfg->cycle_count,
>                                before_size, after_size);
>  }
>
> --
> 2.4.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 4/6] i965: dump scheduling cycle estimates

Reply via email to