On 11/30/2012 01:24 PM, Eric Anholt wrote:
This can be used for two purposes: Using hand-coded shaders to determine
per-instruction timings, or figuring out which shader to optimize in a
whole application. On the psychonauts trace, we see:
type ID cycles spent % of total
...
fs16 543: 339343898369 ( 339.34 Gcycles) 9.2%
fs16 521: 532215110990 ( 532.22 Gcycles) 14.4%
fs16 524: 1036231987390 (1036.23 Gcycles) 28.0%
confirming our previous understanding that fragment shaders are where
it's all at. But on GLBenchmark 2.7, we get:
fs16 69: 205928219888 ( 205.93 Gcycles) 7.5%
fs16 75: 364066413095 ( 364.07 Gcycles) 13.2%
vs 87: 1107217698878 (1107.22 Gcycles) 40.3%
That's interesting. I should look into that.
Note that this doesn't cover the instructions that set up the message to
the URB/FB write -- we'd need to convert the MRF usage in these
instructions to GRFs so that our offsets/times don't overwrite our
shader outputs.
---
src/mesa/drivers/dri/i965/brw_context.c | 3 +
src/mesa/drivers/dri/i965/brw_context.h | 28 ++++-
src/mesa/drivers/dri/i965/brw_defines.h | 20 +++-
src/mesa/drivers/dri/i965/brw_eu.h | 6 +-
src/mesa/drivers/dri/i965/brw_eu_emit.c | 55 +++++++++-
src/mesa/drivers/dri/i965/brw_fs.cpp | 101 +++++++++++++++++
src/mesa/drivers/dri/i965/brw_fs.h | 7 ++
src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 4 +
src/mesa/drivers/dri/i965/brw_program.c | 128 ++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_vec4.cpp | 81 ++++++++++++++
src/mesa/drivers/dri/i965/brw_vec4.h | 7 ++
src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 4 +
src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 10 ++
src/mesa/drivers/dri/i965/brw_vtbl.c | 14 +++
src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 7 ++
src/mesa/drivers/dri/intel/intel_context.c | 6 +
src/mesa/drivers/dri/intel/intel_context.h | 1 +
17 files changed, 475 insertions(+), 7 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.c
b/src/mesa/drivers/dri/i965/brw_context.c
index 4b1b247..5665a3a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -383,6 +383,9 @@ brwCreateContext(int api,
brw_fs_alloc_reg_sets(brw);
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ brw_init_shader_time(brw);
+
return true;
}
diff --git a/src/mesa/drivers/dri/i965/brw_context.h
b/src/mesa/drivers/dri/i965/brw_context.h
index 1abaee3..dc25cab 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -559,14 +559,15 @@ struct brw_vs_prog_data {
#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 2 + (t))
#define SURF_INDEX_WM_UBO(u) (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) +
u)
-
+#define SURF_INDEX_WM_SHADER_TIME (SURF_INDEX_WM_UBO(12))
/** Maximum size of the binding table. */
-#define BRW_MAX_WM_SURFACES SURF_INDEX_WM_UBO(BRW_MAX_WM_UBOS)
+#define BRW_MAX_WM_SURFACES (SURF_INDEX_WM_SHADER_TIME + 1)
#define SURF_INDEX_VERT_CONST_BUFFER (0)
#define SURF_INDEX_VS_TEXTURE(t) (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t))
#define SURF_INDEX_VS_UBO(u) (SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT)
+ u)
-#define BRW_MAX_VS_SURFACES SURF_INDEX_VS_UBO(BRW_MAX_VS_UBOS)
+#define SURF_INDEX_VS_SHADER_TIME (SURF_INDEX_VS_UBO(12))
+#define BRW_MAX_VS_SURFACES (SURF_INDEX_VS_SHADER_TIME + 1)
#define SURF_INDEX_SOL_BINDING(t) ((t))
#define BRW_MAX_GS_SURFACES
SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
@@ -651,6 +652,13 @@ struct brw_tracked_state {
void (*emit)( struct brw_context *brw );
};
+enum shader_time_shader_type {
+ ST_NONE,
+ ST_VS,
+ ST_FS8,
+ ST_FS16,
+};
+
/* Flags for brw->state.cache.
*/
#define CACHE_NEW_BLEND_STATE (1<<BRW_BLEND_STATE)
@@ -1089,6 +1097,16 @@ struct brw_context
uint32_t num_instances;
int basevertex;
+
+ struct {
+ drm_intel_bo *bo;
+ struct gl_shader_program **programs;
+ enum shader_time_shader_type *types;
+ uint64_t *cumulative;
+ int num_entries;
+ int max_entries;
+ double report_time;
+ } shader_time;
};
/*======================================================================
@@ -1144,7 +1162,9 @@ void brwInitFragProgFuncs( struct dd_function_table
*functions );
int brw_get_scratch_size(int size);
void brw_get_scratch_bo(struct intel_context *intel,
drm_intel_bo **scratch_bo, int size);
-
+void brw_init_shader_time(struct brw_context *brw);
+void brw_collect_and_report_shader_time(struct brw_context *brw);
+void brw_destroy_shader_time(struct brw_context *brw);
/* brw_urb.c
*/
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h
b/src/mesa/drivers/dri/i965/brw_defines.h
index 6dc4707..b84d8f9 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -665,6 +665,8 @@ enum opcode {
SHADER_OPCODE_TXS,
FS_OPCODE_TXB,
+ SHADER_OPCODE_SHADER_TIME_ADD,
+
FS_OPCODE_DDX,
FS_OPCODE_DDY,
FS_OPCODE_PIXEL_X,
@@ -729,6 +731,8 @@ enum opcode {
#define BRW_ARF_CONTROL 0x80
#define BRW_ARF_NOTIFICATION_COUNT 0x90
#define BRW_ARF_IP 0xA0
+#define BRW_ARF_TDR 0xB0
+#define BRW_ARF_TIMESTAMP 0xC0
#define BRW_MRF_COMPR4 (1 << 7)
@@ -956,7 +960,21 @@ enum brw_message_target {
#define BRW_SCRATCH_SPACE_SIZE_1M 10
#define BRW_SCRATCH_SPACE_SIZE_2M 11
Maybe add an /** URB Atomic Operations */ comment here?
-
+#define BRW_AOP_AND 1
+#define BRW_AOP_OR 2
+#define BRW_AOP_XOR 3
+#define BRW_AOP_MOV 4
+#define BRW_AOP_INC 5
+#define BRW_AOP_DEC 6
+#define BRW_AOP_ADD 7
+#define BRW_AOP_SUB 8
+#define BRW_AOP_REVSUB 9
+#define BRW_AOP_IMAX 10
+#define BRW_AOP_IMIN 11
+#define BRW_AOP_UMAX 12
+#define BRW_AOP_UMIN 13
+#define BRW_AOP_CMPWR 14
+#define BRW_AOP_PREDEC 15
This looks awesome. Series is:
Reviewed-by: Kenneth Graunke <kenn...@whitecape.org>
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev