On 11/30/2012 01:24 PM, Eric Anholt wrote:
This can be used for two purposes: Using hand-coded shaders to determine
per-instruction timings, or figuring out which shader to optimize in a
whole application.  On the psychonauts trace, we see:

type   ID      cycles spent                   % of total
...
fs16  543:     339343898369 ( 339.34 Gcycles)       9.2%
fs16  521:     532215110990 ( 532.22 Gcycles)      14.4%
fs16  524:    1036231987390 (1036.23 Gcycles)      28.0%

confirming our previous understanding that fragment shaders are where
it's all at.  But on GLBenchmark 2.7, we get:

fs16   69:     205928219888 ( 205.93 Gcycles)       7.5%
fs16   75:     364066413095 ( 364.07 Gcycles)      13.2%
vs     87:    1107217698878 (1107.22 Gcycles)      40.3%

That's interesting.  I should look into that.

Note that this doesn't cover the instructions that set up the message to
the URB/FB write -- we'd need to convert the MRF usage in these
instructions to GRFs so that our offsets/times don't overwrite our
shader outputs.
---
  src/mesa/drivers/dri/i965/brw_context.c          |    3 +
  src/mesa/drivers/dri/i965/brw_context.h          |   28 ++++-
  src/mesa/drivers/dri/i965/brw_defines.h          |   20 +++-
  src/mesa/drivers/dri/i965/brw_eu.h               |    6 +-
  src/mesa/drivers/dri/i965/brw_eu_emit.c          |   55 +++++++++-
  src/mesa/drivers/dri/i965/brw_fs.cpp             |  101 +++++++++++++++++
  src/mesa/drivers/dri/i965/brw_fs.h               |    7 ++
  src/mesa/drivers/dri/i965/brw_fs_emit.cpp        |    4 +
  src/mesa/drivers/dri/i965/brw_program.c          |  128 ++++++++++++++++++++++
  src/mesa/drivers/dri/i965/brw_vec4.cpp           |   81 ++++++++++++++
  src/mesa/drivers/dri/i965/brw_vec4.h             |    7 ++
  src/mesa/drivers/dri/i965/brw_vec4_emit.cpp      |    4 +
  src/mesa/drivers/dri/i965/brw_vs_surface_state.c |   10 ++
  src/mesa/drivers/dri/i965/brw_vtbl.c             |   14 +++
  src/mesa/drivers/dri/i965/brw_wm_surface_state.c |    7 ++
  src/mesa/drivers/dri/intel/intel_context.c       |    6 +
  src/mesa/drivers/dri/intel/intel_context.h       |    1 +
  17 files changed, 475 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 4b1b247..5665a3a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -383,6 +383,9 @@ brwCreateContext(int api,

     brw_fs_alloc_reg_sets(brw);

+   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+      brw_init_shader_time(brw);
+
     return true;
  }

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 1abaee3..dc25cab 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -559,14 +559,15 @@ struct brw_vs_prog_data {
  #define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
  #define SURF_INDEX_TEXTURE(t)        (BRW_MAX_DRAW_BUFFERS + 2 + (t))
  #define SURF_INDEX_WM_UBO(u)         (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) + 
u)
-
+#define SURF_INDEX_WM_SHADER_TIME    (SURF_INDEX_WM_UBO(12))
  /** Maximum size of the binding table. */
-#define BRW_MAX_WM_SURFACES          SURF_INDEX_WM_UBO(BRW_MAX_WM_UBOS)
+#define BRW_MAX_WM_SURFACES          (SURF_INDEX_WM_SHADER_TIME + 1)

  #define SURF_INDEX_VERT_CONST_BUFFER (0)
  #define SURF_INDEX_VS_TEXTURE(t)     (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t))
  #define SURF_INDEX_VS_UBO(u)         (SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT) 
+ u)
-#define BRW_MAX_VS_SURFACES          SURF_INDEX_VS_UBO(BRW_MAX_VS_UBOS)
+#define SURF_INDEX_VS_SHADER_TIME    (SURF_INDEX_VS_UBO(12))
+#define BRW_MAX_VS_SURFACES          (SURF_INDEX_VS_SHADER_TIME + 1)

  #define SURF_INDEX_SOL_BINDING(t)    ((t))
  #define BRW_MAX_GS_SURFACES          
SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
@@ -651,6 +652,13 @@ struct brw_tracked_state {
     void (*emit)( struct brw_context *brw );
  };

+enum shader_time_shader_type {
+   ST_NONE,
+   ST_VS,
+   ST_FS8,
+   ST_FS16,
+};
+
  /* Flags for brw->state.cache.
   */
  #define CACHE_NEW_BLEND_STATE            (1<<BRW_BLEND_STATE)
@@ -1089,6 +1097,16 @@ struct brw_context

     uint32_t num_instances;
     int basevertex;
+
+   struct {
+      drm_intel_bo *bo;
+      struct gl_shader_program **programs;
+      enum shader_time_shader_type *types;
+      uint64_t *cumulative;
+      int num_entries;
+      int max_entries;
+      double report_time;
+   } shader_time;
  };

  /*======================================================================
@@ -1144,7 +1162,9 @@ void brwInitFragProgFuncs( struct dd_function_table 
*functions );
  int brw_get_scratch_size(int size);
  void brw_get_scratch_bo(struct intel_context *intel,
                        drm_intel_bo **scratch_bo, int size);
-
+void brw_init_shader_time(struct brw_context *brw);
+void brw_collect_and_report_shader_time(struct brw_context *brw);
+void brw_destroy_shader_time(struct brw_context *brw);

  /* brw_urb.c
   */
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 6dc4707..b84d8f9 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -665,6 +665,8 @@ enum opcode {
     SHADER_OPCODE_TXS,
     FS_OPCODE_TXB,

+   SHADER_OPCODE_SHADER_TIME_ADD,
+
     FS_OPCODE_DDX,
     FS_OPCODE_DDY,
     FS_OPCODE_PIXEL_X,
@@ -729,6 +731,8 @@ enum opcode {
  #define BRW_ARF_CONTROL               0x80
  #define BRW_ARF_NOTIFICATION_COUNT    0x90
  #define BRW_ARF_IP                    0xA0
+#define BRW_ARF_TDR                   0xB0
+#define BRW_ARF_TIMESTAMP             0xC0

  #define BRW_MRF_COMPR4                        (1 << 7)

@@ -956,7 +960,21 @@ enum brw_message_target {
  #define BRW_SCRATCH_SPACE_SIZE_1M     10
  #define BRW_SCRATCH_SPACE_SIZE_2M     11


Maybe add an /** URB Atomic Operations */ comment here?

-
+#define BRW_AOP_AND                   1
+#define BRW_AOP_OR                    2
+#define BRW_AOP_XOR                   3
+#define BRW_AOP_MOV                   4
+#define BRW_AOP_INC                   5
+#define BRW_AOP_DEC                   6
+#define BRW_AOP_ADD                   7
+#define BRW_AOP_SUB                   8
+#define BRW_AOP_REVSUB                9
+#define BRW_AOP_IMAX                  10
+#define BRW_AOP_IMIN                  11
+#define BRW_AOP_UMAX                  12
+#define BRW_AOP_UMIN                  13
+#define BRW_AOP_CMPWR                 14
+#define BRW_AOP_PREDEC                15

This looks awesome.  Series is:
Reviewed-by: Kenneth Graunke <kenn...@whitecape.org>

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to