From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeon/r600_pipe_common.h | 3 +++ src/gallium/drivers/radeon/r600_query.c | 21 +++++++++++++++++++++ src/gallium/drivers/radeon/r600_query.h | 3 +++ src/gallium/drivers/radeonsi/si_state_draw.c | 5 +++++ 4 files changed, 32 insertions(+)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 25d40da..8ebaed7 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -567,20 +567,23 @@ struct r600_common_context { /* Misc stats. */ unsigned num_draw_calls; unsigned num_spill_draw_calls; unsigned num_compute_calls; unsigned num_spill_compute_calls; unsigned num_dma_calls; unsigned num_cp_dma_calls; unsigned num_vs_flushes; unsigned num_ps_flushes; unsigned num_cs_flushes; + unsigned num_fb_cache_flushes; + unsigned num_L2_invalidates; + unsigned num_L2_writebacks; uint64_t num_alloc_tex_transfer_bytes; unsigned last_tex_ps_draw_ratio; /* for query */ /* Render condition. */ struct r600_atom render_cond_atom; struct pipe_query *render_cond; unsigned render_cond_mode; bool render_cond_invert; bool render_cond_force_off; /* for u_blitter */ diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index 0e4270a..6b93329 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -106,20 +106,29 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx, break; case R600_QUERY_NUM_VS_FLUSHES: query->begin_result = rctx->num_vs_flushes; break; case R600_QUERY_NUM_PS_FLUSHES: query->begin_result = rctx->num_ps_flushes; break; case R600_QUERY_NUM_CS_FLUSHES: query->begin_result = rctx->num_cs_flushes; break; + case R600_QUERY_NUM_FB_CACHE_FLUSHES: + query->begin_result = rctx->num_fb_cache_flushes; + break; + case R600_QUERY_NUM_L2_INVALIDATES: + query->begin_result = rctx->num_L2_invalidates; + break; + case R600_QUERY_NUM_L2_WRITEBACKS: + query->begin_result = rctx->num_L2_writebacks; + break; case R600_QUERY_REQUESTED_VRAM: case R600_QUERY_REQUESTED_GTT: case R600_QUERY_MAPPED_VRAM: case R600_QUERY_MAPPED_GTT: case R600_QUERY_VRAM_USAGE: case R600_QUERY_GTT_USAGE: case R600_QUERY_GPU_TEMPERATURE: case R600_QUERY_CURRENT_GPU_SCLK: case R600_QUERY_CURRENT_GPU_MCLK: case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO: @@ -190,20 +199,29 @@ static bool r600_query_sw_end(struct r600_common_context *rctx, break; case R600_QUERY_NUM_VS_FLUSHES: query->end_result = rctx->num_vs_flushes; break; case R600_QUERY_NUM_PS_FLUSHES: query->end_result = rctx->num_ps_flushes; break; case R600_QUERY_NUM_CS_FLUSHES: query->end_result = rctx->num_cs_flushes; break; + case R600_QUERY_NUM_FB_CACHE_FLUSHES: + query->end_result = rctx->num_fb_cache_flushes; + break; + case R600_QUERY_NUM_L2_INVALIDATES: + query->end_result = rctx->num_L2_invalidates; + break; + case R600_QUERY_NUM_L2_WRITEBACKS: + query->end_result = rctx->num_L2_writebacks; + break; case R600_QUERY_REQUESTED_VRAM: case R600_QUERY_REQUESTED_GTT: case R600_QUERY_MAPPED_VRAM: case R600_QUERY_MAPPED_GTT: case R600_QUERY_VRAM_USAGE: case R600_QUERY_GTT_USAGE: case R600_QUERY_GPU_TEMPERATURE: case R600_QUERY_CURRENT_GPU_SCLK: case R600_QUERY_CURRENT_GPU_MCLK: case R600_QUERY_BUFFER_WAIT_TIME: @@ -1658,20 +1676,23 @@ static struct pipe_driver_query_info r600_driver_query_list[] = { X("num-shader-cache-hits", NUM_SHADER_CACHE_HITS, UINT64, CUMULATIVE), X("draw-calls", DRAW_CALLS, UINT64, AVERAGE), X("spill-draw-calls", SPILL_DRAW_CALLS, UINT64, AVERAGE), X("compute-calls", COMPUTE_CALLS, UINT64, AVERAGE), X("spill-compute-calls", SPILL_COMPUTE_CALLS, UINT64, AVERAGE), X("dma-calls", DMA_CALLS, UINT64, AVERAGE), X("cp-dma-calls", CP_DMA_CALLS, UINT64, AVERAGE), X("num-vs-flushes", NUM_VS_FLUSHES, UINT64, AVERAGE), X("num-ps-flushes", NUM_PS_FLUSHES, UINT64, AVERAGE), X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, AVERAGE), + X("num-fb-cache-flushes", NUM_FB_CACHE_FLUSHES, UINT64, AVERAGE), + X("num-L2-invalidates", NUM_L2_INVALIDATES, UINT64, AVERAGE), + X("num-L2-writebacks", NUM_L2_WRITEBACKS, UINT64, AVERAGE), X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE), X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE), X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE), X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE), X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE), X("num-ctx-flushes", NUM_CTX_FLUSHES, UINT64, AVERAGE), X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE), X("num-evictions", NUM_EVICTIONS, UINT64, CUMULATIVE), X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE), X("GTT-usage", GTT_USAGE, BYTES, AVERAGE), diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h index 2ff586a..af434fa 100644 --- a/src/gallium/drivers/radeon/r600_query.h +++ b/src/gallium/drivers/radeon/r600_query.h @@ -45,20 +45,23 @@ struct r600_resource; enum { R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC, R600_QUERY_SPILL_DRAW_CALLS, R600_QUERY_COMPUTE_CALLS, R600_QUERY_SPILL_COMPUTE_CALLS, R600_QUERY_DMA_CALLS, R600_QUERY_CP_DMA_CALLS, R600_QUERY_NUM_VS_FLUSHES, R600_QUERY_NUM_PS_FLUSHES, R600_QUERY_NUM_CS_FLUSHES, + R600_QUERY_NUM_FB_CACHE_FLUSHES, + R600_QUERY_NUM_L2_INVALIDATES, + R600_QUERY_NUM_L2_WRITEBACKS, R600_QUERY_REQUESTED_VRAM, R600_QUERY_REQUESTED_GTT, R600_QUERY_MAPPED_VRAM, R600_QUERY_MAPPED_GTT, R600_QUERY_BUFFER_WAIT_TIME, R600_QUERY_NUM_CTX_FLUSHES, R600_QUERY_NUM_BYTES_MOVED, R600_QUERY_NUM_EVICTIONS, R600_QUERY_VRAM_USAGE, R600_QUERY_GTT_USAGE, diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index cae19dc..b3f664e 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -728,20 +728,23 @@ static void si_emit_surface_sync(struct r600_common_context *rctx, radeon_emit(cs, 0); /* CP_COHER_BASE */ radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ } void si_emit_cache_flush(struct si_context *sctx) { struct r600_common_context *rctx = &sctx->b; struct radeon_winsys_cs *cs = rctx->gfx.cs; uint32_t cp_coher_cntl = 0; + if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER) + sctx->b.num_fb_cache_flushes++; + /* SI has a bug that it always flushes ICACHE and KCACHE if either * bit is set. An alternative way is to write SQC_CACHES, but that * doesn't seem to work reliably. Since the bug doesn't affect * correctness (it only does more work than necessary) and * the performance impact is likely negligible, there is no plan * to add a workaround for it. */ if (rctx->flags & SI_CONTEXT_INV_ICACHE) cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1); @@ -845,35 +848,37 @@ void si_emit_cache_flush(struct si_context *sctx) if (rctx->flags & SI_CONTEXT_INV_GLOBAL_L2 || (rctx->chip_class <= CIK && (rctx->flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2))) { /* Invalidate L1 & L2. (L1 is always invalidated) * WB must be set on VI+ when TC_ACTION is set. */ si_emit_surface_sync(rctx, cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0301F0_TC_WB_ACTION_ENA(rctx->chip_class >= VI)); cp_coher_cntl = 0; + sctx->b.num_L2_invalidates++; } else { /* L1 invalidation and L2 writeback must be done separately, * because both operations can't be done together. */ if (rctx->flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2) { /* WB = write-back * NC = apply to non-coherent MTYPEs * (i.e. MTYPE <= 1, which is what we use everywhere) * * WB doesn't work without NC. */ si_emit_surface_sync(rctx, cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1)); cp_coher_cntl = 0; + sctx->b.num_L2_writebacks++; } if (rctx->flags & SI_CONTEXT_INV_VMEM_L1) { /* Invalidate per-CU VMEM L1. */ si_emit_surface_sync(rctx, cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1)); cp_coher_cntl = 0; } } /* If TC flushes haven't cleared this... */ -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev