From: Marek Olšák <marek.ol...@amd.com>

---
 src/gallium/drivers/radeon/r600_pipe_common.h |  3 +++
 src/gallium/drivers/radeon/r600_query.c       | 21 +++++++++++++++++++++
 src/gallium/drivers/radeon/r600_query.h       |  3 +++
 src/gallium/drivers/radeonsi/si_state_draw.c  |  5 +++++
 4 files changed, 32 insertions(+)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 25d40da..8ebaed7 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -567,20 +567,23 @@ struct r600_common_context {
        /* Misc stats. */
        unsigned                        num_draw_calls;
        unsigned                        num_spill_draw_calls;
        unsigned                        num_compute_calls;
        unsigned                        num_spill_compute_calls;
        unsigned                        num_dma_calls;
        unsigned                        num_cp_dma_calls;
        unsigned                        num_vs_flushes;
        unsigned                        num_ps_flushes;
        unsigned                        num_cs_flushes;
+       unsigned                        num_fb_cache_flushes;
+       unsigned                        num_L2_invalidates;
+       unsigned                        num_L2_writebacks;
        uint64_t                        num_alloc_tex_transfer_bytes;
        unsigned                        last_tex_ps_draw_ratio; /* for query */
 
        /* Render condition. */
        struct r600_atom                render_cond_atom;
        struct pipe_query               *render_cond;
        unsigned                        render_cond_mode;
        bool                            render_cond_invert;
        bool                            render_cond_force_off; /* for u_blitter 
*/
 
diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 0e4270a..6b93329 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -106,20 +106,29 @@ static bool r600_query_sw_begin(struct 
r600_common_context *rctx,
                break;
        case R600_QUERY_NUM_VS_FLUSHES:
                query->begin_result = rctx->num_vs_flushes;
                break;
        case R600_QUERY_NUM_PS_FLUSHES:
                query->begin_result = rctx->num_ps_flushes;
                break;
        case R600_QUERY_NUM_CS_FLUSHES:
                query->begin_result = rctx->num_cs_flushes;
                break;
+       case R600_QUERY_NUM_FB_CACHE_FLUSHES:
+               query->begin_result = rctx->num_fb_cache_flushes;
+               break;
+       case R600_QUERY_NUM_L2_INVALIDATES:
+               query->begin_result = rctx->num_L2_invalidates;
+               break;
+       case R600_QUERY_NUM_L2_WRITEBACKS:
+               query->begin_result = rctx->num_L2_writebacks;
+               break;
        case R600_QUERY_REQUESTED_VRAM:
        case R600_QUERY_REQUESTED_GTT:
        case R600_QUERY_MAPPED_VRAM:
        case R600_QUERY_MAPPED_GTT:
        case R600_QUERY_VRAM_USAGE:
        case R600_QUERY_GTT_USAGE:
        case R600_QUERY_GPU_TEMPERATURE:
        case R600_QUERY_CURRENT_GPU_SCLK:
        case R600_QUERY_CURRENT_GPU_MCLK:
        case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
@@ -190,20 +199,29 @@ static bool r600_query_sw_end(struct r600_common_context 
*rctx,
                break;
        case R600_QUERY_NUM_VS_FLUSHES:
                query->end_result = rctx->num_vs_flushes;
                break;
        case R600_QUERY_NUM_PS_FLUSHES:
                query->end_result = rctx->num_ps_flushes;
                break;
        case R600_QUERY_NUM_CS_FLUSHES:
                query->end_result = rctx->num_cs_flushes;
                break;
+       case R600_QUERY_NUM_FB_CACHE_FLUSHES:
+               query->end_result = rctx->num_fb_cache_flushes;
+               break;
+       case R600_QUERY_NUM_L2_INVALIDATES:
+               query->end_result = rctx->num_L2_invalidates;
+               break;
+       case R600_QUERY_NUM_L2_WRITEBACKS:
+               query->end_result = rctx->num_L2_writebacks;
+               break;
        case R600_QUERY_REQUESTED_VRAM:
        case R600_QUERY_REQUESTED_GTT:
        case R600_QUERY_MAPPED_VRAM:
        case R600_QUERY_MAPPED_GTT:
        case R600_QUERY_VRAM_USAGE:
        case R600_QUERY_GTT_USAGE:
        case R600_QUERY_GPU_TEMPERATURE:
        case R600_QUERY_CURRENT_GPU_SCLK:
        case R600_QUERY_CURRENT_GPU_MCLK:
        case R600_QUERY_BUFFER_WAIT_TIME:
@@ -1658,20 +1676,23 @@ static struct pipe_driver_query_info 
r600_driver_query_list[] = {
        X("num-shader-cache-hits",      NUM_SHADER_CACHE_HITS,  UINT64, 
CUMULATIVE),
        X("draw-calls",                 DRAW_CALLS,             UINT64, 
AVERAGE),
        X("spill-draw-calls",           SPILL_DRAW_CALLS,       UINT64, 
AVERAGE),
        X("compute-calls",              COMPUTE_CALLS,          UINT64, 
AVERAGE),
        X("spill-compute-calls",        SPILL_COMPUTE_CALLS,    UINT64, 
AVERAGE),
        X("dma-calls",                  DMA_CALLS,              UINT64, 
AVERAGE),
        X("cp-dma-calls",               CP_DMA_CALLS,           UINT64, 
AVERAGE),
        X("num-vs-flushes",             NUM_VS_FLUSHES,         UINT64, 
AVERAGE),
        X("num-ps-flushes",             NUM_PS_FLUSHES,         UINT64, 
AVERAGE),
        X("num-cs-flushes",             NUM_CS_FLUSHES,         UINT64, 
AVERAGE),
+       X("num-fb-cache-flushes",       NUM_FB_CACHE_FLUSHES,   UINT64, 
AVERAGE),
+       X("num-L2-invalidates",         NUM_L2_INVALIDATES,     UINT64, 
AVERAGE),
+       X("num-L2-writebacks",          NUM_L2_WRITEBACKS,      UINT64, 
AVERAGE),
        X("requested-VRAM",             REQUESTED_VRAM,         BYTES, AVERAGE),
        X("requested-GTT",              REQUESTED_GTT,          BYTES, AVERAGE),
        X("mapped-VRAM",                MAPPED_VRAM,            BYTES, AVERAGE),
        X("mapped-GTT",                 MAPPED_GTT,             BYTES, AVERAGE),
        X("buffer-wait-time",           BUFFER_WAIT_TIME,       MICROSECONDS, 
CUMULATIVE),
        X("num-ctx-flushes",            NUM_CTX_FLUSHES,        UINT64, 
AVERAGE),
        X("num-bytes-moved",            NUM_BYTES_MOVED,        BYTES, 
CUMULATIVE),
        X("num-evictions",              NUM_EVICTIONS,          UINT64, 
CUMULATIVE),
        X("VRAM-usage",                 VRAM_USAGE,             BYTES, AVERAGE),
        X("GTT-usage",                  GTT_USAGE,              BYTES, AVERAGE),
diff --git a/src/gallium/drivers/radeon/r600_query.h 
b/src/gallium/drivers/radeon/r600_query.h
index 2ff586a..af434fa 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -45,20 +45,23 @@ struct r600_resource;
 enum {
        R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC,
        R600_QUERY_SPILL_DRAW_CALLS,
        R600_QUERY_COMPUTE_CALLS,
        R600_QUERY_SPILL_COMPUTE_CALLS,
        R600_QUERY_DMA_CALLS,
        R600_QUERY_CP_DMA_CALLS,
        R600_QUERY_NUM_VS_FLUSHES,
        R600_QUERY_NUM_PS_FLUSHES,
        R600_QUERY_NUM_CS_FLUSHES,
+       R600_QUERY_NUM_FB_CACHE_FLUSHES,
+       R600_QUERY_NUM_L2_INVALIDATES,
+       R600_QUERY_NUM_L2_WRITEBACKS,
        R600_QUERY_REQUESTED_VRAM,
        R600_QUERY_REQUESTED_GTT,
        R600_QUERY_MAPPED_VRAM,
        R600_QUERY_MAPPED_GTT,
        R600_QUERY_BUFFER_WAIT_TIME,
        R600_QUERY_NUM_CTX_FLUSHES,
        R600_QUERY_NUM_BYTES_MOVED,
        R600_QUERY_NUM_EVICTIONS,
        R600_QUERY_VRAM_USAGE,
        R600_QUERY_GTT_USAGE,
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index cae19dc..b3f664e 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -728,20 +728,23 @@ static void si_emit_surface_sync(struct 
r600_common_context *rctx,
        radeon_emit(cs, 0);               /* CP_COHER_BASE */
        radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
 }
 
 void si_emit_cache_flush(struct si_context *sctx)
 {
        struct r600_common_context *rctx = &sctx->b;
        struct radeon_winsys_cs *cs = rctx->gfx.cs;
        uint32_t cp_coher_cntl = 0;
 
+       if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER)
+               sctx->b.num_fb_cache_flushes++;
+
        /* SI has a bug that it always flushes ICACHE and KCACHE if either
         * bit is set. An alternative way is to write SQC_CACHES, but that
         * doesn't seem to work reliably. Since the bug doesn't affect
         * correctness (it only does more work than necessary) and
         * the performance impact is likely negligible, there is no plan
         * to add a workaround for it.
         */
 
        if (rctx->flags & SI_CONTEXT_INV_ICACHE)
                cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
@@ -845,35 +848,37 @@ void si_emit_cache_flush(struct si_context *sctx)
        if (rctx->flags & SI_CONTEXT_INV_GLOBAL_L2 ||
            (rctx->chip_class <= CIK &&
             (rctx->flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
                /* Invalidate L1 & L2. (L1 is always invalidated)
                 * WB must be set on VI+ when TC_ACTION is set.
                 */
                si_emit_surface_sync(rctx, cp_coher_cntl |
                                     S_0085F0_TC_ACTION_ENA(1) |
                                     S_0301F0_TC_WB_ACTION_ENA(rctx->chip_class 
>= VI));
                cp_coher_cntl = 0;
+               sctx->b.num_L2_invalidates++;
        } else {
                /* L1 invalidation and L2 writeback must be done separately,
                 * because both operations can't be done together.
                 */
                if (rctx->flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2) {
                        /* WB = write-back
                         * NC = apply to non-coherent MTYPEs
                         *      (i.e. MTYPE <= 1, which is what we use 
everywhere)
                         *
                         * WB doesn't work without NC.
                         */
                        si_emit_surface_sync(rctx, cp_coher_cntl |
                                             S_0301F0_TC_WB_ACTION_ENA(1) |
                                             S_0301F0_TC_NC_ACTION_ENA(1));
                        cp_coher_cntl = 0;
+                       sctx->b.num_L2_writebacks++;
                }
                if (rctx->flags & SI_CONTEXT_INV_VMEM_L1) {
                        /* Invalidate per-CU VMEM L1. */
                        si_emit_surface_sync(rctx, cp_coher_cntl |
                                             S_0085F0_TCL1_ACTION_ENA(1));
                        cp_coher_cntl = 0;
                }
        }
 
        /* If TC flushes haven't cleared this... */
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to