From: Marek Olšák <marek.ol...@amd.com> The next patch will add SPI_BUSY monitoring. --- src/gallium/drivers/radeon/r600_gpu_load.c | 64 ++++++++++++++++----------- src/gallium/drivers/radeon/r600_pipe_common.h | 15 +++++-- src/gallium/drivers/radeon/r600_query.c | 6 +-- 3 files changed, 53 insertions(+), 32 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_gpu_load.c b/src/gallium/drivers/radeon/r600_gpu_load.c index a653834..764d9b5 100644 --- a/src/gallium/drivers/radeon/r600_gpu_load.c +++ b/src/gallium/drivers/radeon/r600_gpu_load.c @@ -37,26 +37,31 @@ #include "r600_pipe_common.h" #include "os/os_time.h" /* For good accuracy at 1000 fps or lower. This will be inaccurate for higher * fps (there are too few samples per frame). */ #define SAMPLES_PER_SEC 10000 #define GRBM_STATUS 0x8010 #define GUI_ACTIVE(x) (((x) >> 31) & 0x1) -static bool r600_is_gpu_busy(struct r600_common_screen *rscreen) +static void r600_update_grbm_counters(struct r600_common_screen *rscreen, + union r600_grbm_counters *counters) { uint32_t value = 0; rscreen->ws->read_registers(rscreen->ws, GRBM_STATUS, 1, &value); - return GUI_ACTIVE(value); + + if (GUI_ACTIVE(value)) + p_atomic_inc(&counters->named.gui_busy); + else + p_atomic_inc(&counters->named.gui_idle); } static PIPE_THREAD_ROUTINE(r600_gpu_load_thread, param) { struct r600_common_screen *rscreen = (struct r600_common_screen*)param; const int period_us = 1000000 / SAMPLES_PER_SEC; int sleep_us = period_us; int64_t cur_time, last_time = os_time_get(); while (!p_atomic_read(&rscreen->gpu_load_stop_thread)) { @@ -70,72 +75,81 @@ static PIPE_THREAD_ROUTINE(r600_gpu_load_thread, param) if (os_time_timeout(last_time, last_time + period_us, cur_time)) sleep_us = MAX2(sleep_us - 1, 1); else sleep_us += 1; /*printf("Hz: %.1f\n", 1000000.0 / (cur_time - last_time));*/ last_time = cur_time; /* Update the counters. */ - if (r600_is_gpu_busy(rscreen)) - p_atomic_inc(&rscreen->gpu_load_counter_busy); - else - p_atomic_inc(&rscreen->gpu_load_counter_idle); + r600_update_grbm_counters(rscreen, &rscreen->grbm_counters); } p_atomic_dec(&rscreen->gpu_load_stop_thread); return 0; } void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen) { if (!rscreen->gpu_load_thread) return; p_atomic_inc(&rscreen->gpu_load_stop_thread); pipe_thread_wait(rscreen->gpu_load_thread); rscreen->gpu_load_thread = 0; } -static uint64_t r600_gpu_load_read_counter(struct r600_common_screen *rscreen) +static uint64_t r600_read_counter(struct r600_common_screen *rscreen, + unsigned busy_index) { /* Start the thread if needed. */ if (!rscreen->gpu_load_thread) { pipe_mutex_lock(rscreen->gpu_load_mutex); /* Check again inside the mutex. */ if (!rscreen->gpu_load_thread) rscreen->gpu_load_thread = pipe_thread_create(r600_gpu_load_thread, rscreen); pipe_mutex_unlock(rscreen->gpu_load_mutex); } - /* The busy counter is in the lower 32 bits. - * The idle counter is in the upper 32 bits. */ - return p_atomic_read(&rscreen->gpu_load_counter_busy) | - ((uint64_t)p_atomic_read(&rscreen->gpu_load_counter_idle) << 32); -} + unsigned busy = p_atomic_read(&rscreen->grbm_counters.array[busy_index]); + unsigned idle = p_atomic_read(&rscreen->grbm_counters.array[busy_index + 1]); -/** - * Just return the counters. - */ -uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen) -{ - return r600_gpu_load_read_counter(rscreen); + return busy | ((uint64_t)idle << 32); } -unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin) +static unsigned r600_end_counter(struct r600_common_screen *rscreen, + uint64_t begin, unsigned busy_index) { - uint64_t end = r600_gpu_load_read_counter(rscreen); + uint64_t end = r600_read_counter(rscreen, busy_index); unsigned busy = (end & 0xffffffff) - (begin & 0xffffffff); unsigned idle = (end >> 32) - (begin >> 32); - /* Calculate the GPU load. + /* Calculate the % of time the busy counter was being incremented. * - * If no counters have been incremented, return the current load. + * If no counters were incremented, return the current counter status. * It's for the case when the load is queried faster than * the counters are updated. */ - if (idle || busy) + if (idle || busy) { return busy*100 / (busy + idle); - else - return r600_is_gpu_busy(rscreen) ? 100 : 0; + } else { + union r600_grbm_counters counters; + + memset(&counters, 0, sizeof(counters)); + r600_update_grbm_counters(rscreen, &counters); + return counters.array[busy_index] ? 100 : 0; + } +} + +#define BUSY_INDEX(rscreen, field) (&rscreen->grbm_counters.named.field##_busy - \ + rscreen->grbm_counters.array) + +uint64_t r600_begin_counter_gui(struct r600_common_screen *rscreen) +{ + return r600_read_counter(rscreen, BUSY_INDEX(rscreen, gui)); +} + +unsigned r600_end_counter_gui(struct r600_common_screen *rscreen, uint64_t begin) +{ + return r600_end_counter(rscreen, begin, BUSY_INDEX(rscreen, gui)); } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 74f86dc..9f69298 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -345,20 +345,28 @@ struct r600_surface { unsigned db_depth_size; unsigned db_depth_slice; /* EG and later */ unsigned db_stencil_base; /* EG and later */ unsigned db_stencil_info; /* EG and later */ unsigned db_prefetch_limit; /* R600 only */ unsigned db_htile_surface; unsigned db_htile_data_base; unsigned db_preload_control; /* EG and later */ }; +union r600_grbm_counters { + struct { + unsigned gui_busy; + unsigned gui_idle; + } named; + unsigned array[0]; +}; + struct r600_common_screen { struct pipe_screen b; struct radeon_winsys *ws; enum radeon_family family; enum chip_class chip_class; struct radeon_info info; uint64_t debug_flags; bool has_cp_dma; bool has_streamout; @@ -378,22 +386,21 @@ struct r600_common_screen { unsigned num_compilations; /* Along with ST_DEBUG=precompile, this should show if applications * are loading shaders on demand. This is a monotonic counter. */ unsigned num_shaders_created; unsigned num_shader_cache_hits; /* GPU load thread. */ pipe_mutex gpu_load_mutex; pipe_thread gpu_load_thread; - unsigned gpu_load_counter_busy; - unsigned gpu_load_counter_idle; + union r600_grbm_counters grbm_counters; volatile unsigned gpu_load_stop_thread; /* bool */ char renderer_string[100]; /* Performance counters. */ struct r600_perfcounters *perfcounters; /* If pipe_screen wants to re-emit the framebuffer state of all * contexts, it should atomically increment this. Each context will * compare this with its own last known value of the counter before @@ -732,22 +739,22 @@ struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, const char *r600_get_llvm_processor_name(enum radeon_family family); void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, struct r600_resource *dst, struct r600_resource *src); void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs, struct radeon_saved_cs *saved); void radeon_clear_saved_cs(struct radeon_saved_cs *saved); bool r600_check_device_reset(struct r600_common_context *rctx); /* r600_gpu_load.c */ void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen); -uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen); -unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin); +uint64_t r600_begin_counter_gui(struct r600_common_screen *rscreen); +unsigned r600_end_counter_gui(struct r600_common_screen *rscreen, uint64_t begin); /* r600_perfcounters.c */ void r600_perfcounters_destroy(struct r600_common_screen *rscreen); /* r600_query.c */ void r600_init_screen_query_functions(struct r600_common_screen *rscreen); void r600_query_init(struct r600_common_context *rctx); void r600_suspend_queries(struct r600_common_context *ctx); void r600_resume_queries(struct r600_common_context *ctx); void r600_query_init_backend_mask(struct r600_common_context *ctx); diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index 3c72f27..b7fbd37 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -138,21 +138,21 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx, case R600_QUERY_BUFFER_WAIT_TIME: case R600_QUERY_NUM_GFX_IBS: case R600_QUERY_NUM_SDMA_IBS: case R600_QUERY_NUM_BYTES_MOVED: case R600_QUERY_NUM_EVICTIONS: { enum radeon_value_id ws_id = winsys_id_from_type(query->b.type); query->begin_result = rctx->ws->query_value(rctx->ws, ws_id); break; } case R600_QUERY_GPU_LOAD: - query->begin_result = r600_gpu_load_begin(rctx->screen); + query->begin_result = r600_begin_counter_gui(rctx->screen); break; case R600_QUERY_NUM_COMPILATIONS: query->begin_result = p_atomic_read(&rctx->screen->num_compilations); break; case R600_QUERY_NUM_SHADERS_CREATED: query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created); break; case R600_QUERY_NUM_SHADER_CACHE_HITS: query->begin_result = p_atomic_read(&rctx->screen->num_shader_cache_hits); @@ -229,22 +229,22 @@ static bool r600_query_sw_end(struct r600_common_context *rctx, case R600_QUERY_BUFFER_WAIT_TIME: case R600_QUERY_NUM_GFX_IBS: case R600_QUERY_NUM_SDMA_IBS: case R600_QUERY_NUM_BYTES_MOVED: case R600_QUERY_NUM_EVICTIONS: { enum radeon_value_id ws_id = winsys_id_from_type(query->b.type); query->end_result = rctx->ws->query_value(rctx->ws, ws_id); break; } case R600_QUERY_GPU_LOAD: - query->end_result = r600_gpu_load_end(rctx->screen, - query->begin_result); + query->end_result = r600_end_counter_gui(rctx->screen, + query->begin_result); query->begin_result = 0; break; case R600_QUERY_NUM_COMPILATIONS: query->end_result = p_atomic_read(&rctx->screen->num_compilations); break; case R600_QUERY_NUM_SHADERS_CREATED: query->end_result = p_atomic_read(&rctx->screen->num_shaders_created); break; case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO: query->end_result = rctx->last_tex_ps_draw_ratio; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev