From: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/gallium/drivers/radeonsi/si_perfcounter.c | 13 ++-- src/gallium/drivers/radeonsi/si_query.c | 75 ++++++++++--------- src/gallium/drivers/radeonsi/si_query.h | 18 +++-- 3 files changed, 62 insertions(+), 44 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index 69e149c76b6..0b3d8f89273 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -861,21 +861,24 @@ static void si_pc_query_add_result(struct si_screen *screen, uint32_t value = results[counter->base + j * counter->stride]; result->batch[i].u64 += value; } } } static struct si_query_ops batch_query_ops = { .destroy = si_pc_query_destroy, .begin = si_query_hw_begin, .end = si_query_hw_end, - .get_result = si_query_hw_get_result + .get_result = si_query_hw_get_result, + + .suspend = si_query_hw_suspend, + .resume = si_query_hw_resume, }; static struct si_query_hw_ops batch_query_hw_ops = { .prepare_buffer = si_pc_query_prepare_buffer, .emit_start = si_pc_query_emit_start, .emit_stop = si_pc_query_emit_stop, .clear_result = si_pc_query_clear_result, .add_result = si_pc_query_add_result, }; @@ -994,41 +997,41 @@ struct pipe_query *si_create_batch_query(struct pipe_context *ctx, fprintf(stderr, "perfcounter group %s: too many selected\n", block->b->b->name); goto error; } group->selectors[group->num_counters] = sub_index; ++group->num_counters; } /* Compute result bases and CS size per group */ - query->b.num_cs_dw_end = pc->num_stop_cs_dwords; - query->b.num_cs_dw_end += pc->num_instance_cs_dwords; + query->b.b.num_cs_dw_suspend = pc->num_stop_cs_dwords; + query->b.b.num_cs_dw_suspend += pc->num_instance_cs_dwords; i = 0; for (group = query->groups; group; group = group->next) { struct si_pc_block *block = group->block; unsigned read_dw; unsigned instances = 1; if ((block->b->b->flags & SI_PC_BLOCK_SE) && group->se < 0) instances = screen->info.max_se; if (group->instance < 0) instances *= block->num_instances; group->result_base = i; query->b.result_size += sizeof(uint64_t) * instances * group->num_counters; i += instances * group->num_counters; read_dw = 6 * group->num_counters; - query->b.num_cs_dw_end += instances * read_dw; - query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords; + query->b.b.num_cs_dw_suspend += instances * read_dw; + query->b.b.num_cs_dw_suspend += instances * pc->num_instance_cs_dwords; } if (query->shaders) { if (query->shaders == SI_PC_SHADERS_WINDOWING) query->shaders = 0xffffffff; } /* Map user-supplied query array to result indices */ query->counters = CALLOC(num_queries, sizeof(*query->counters)); for (i = 0; i < num_queries; ++i) { diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index aed3e1e80c1..479a1bbf2c4 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -27,20 +27,22 @@ #include "si_pipe.h" #include "si_query.h" #include "util/u_memory.h" #include "util/u_upload_mgr.h" #include "util/os_time.h" #include "util/u_suballoc.h" #include "amd/common/sid.h" #define SI_MAX_STREAMS 4 +static struct si_query_ops query_hw_ops; + struct si_hw_query_params { unsigned start_offset; unsigned end_offset; unsigned fence_offset; unsigned pair_stride; unsigned pair_count; }; /* Queries without buffer handling or suspend/resume. */ struct si_query_sw { @@ -600,28 +602,20 @@ static bool si_query_hw_prepare_buffer(struct si_screen *sscreen, } static void si_query_hw_get_result_resource(struct si_context *sctx, struct si_query *rquery, bool wait, enum pipe_query_value_type result_type, int index, struct pipe_resource *resource, unsigned offset); -static struct si_query_ops query_hw_ops = { - .destroy = si_query_hw_destroy, - .begin = si_query_hw_begin, - .end = si_query_hw_end, - .get_result = si_query_hw_get_result, - .get_result_resource = si_query_hw_get_result_resource, -}; - static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_hw *query, struct r600_resource *buffer, uint64_t va); static void si_query_hw_do_emit_stop(struct si_context *sctx, struct si_query_hw *query, struct r600_resource *buffer, uint64_t va); static void si_query_hw_add_result(struct si_screen *sscreen, struct si_query_hw *, void *buffer, @@ -658,55 +652,54 @@ static struct pipe_query *si_query_hw_create(struct si_screen *sscreen, query->b.type = query_type; query->b.ops = &query_hw_ops; query->ops = &query_hw_default_hw_ops; switch (query_type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: query->result_size = 16 * sscreen->info.num_render_backends; query->result_size += 16; /* for the fence + alignment */ - query->num_cs_dw_end = 6 + si_cp_write_fence_dwords(sscreen); + query->b.num_cs_dw_suspend = 6 + si_cp_write_fence_dwords(sscreen); break; case SI_QUERY_TIME_ELAPSED_SDMA: /* GET_GLOBAL_TIMESTAMP only works if the offset is a multiple of 32. */ query->result_size = 64; - query->num_cs_dw_end = 0; break; case PIPE_QUERY_TIME_ELAPSED: query->result_size = 24; - query->num_cs_dw_end = 8 + si_cp_write_fence_dwords(sscreen); + query->b.num_cs_dw_suspend = 8 + si_cp_write_fence_dwords(sscreen); break; case PIPE_QUERY_TIMESTAMP: query->result_size = 16; - query->num_cs_dw_end = 8 + si_cp_write_fence_dwords(sscreen); + query->b.num_cs_dw_suspend = 8 + si_cp_write_fence_dwords(sscreen); query->flags = SI_QUERY_HW_FLAG_NO_START; break; case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_SO_STATISTICS: case PIPE_QUERY_SO_OVERFLOW_PREDICATE: /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ query->result_size = 32; - query->num_cs_dw_end = 6; + query->b.num_cs_dw_suspend = 6; query->stream = index; break; case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ query->result_size = 32 * SI_MAX_STREAMS; - query->num_cs_dw_end = 6 * SI_MAX_STREAMS; + query->b.num_cs_dw_suspend = 6 * SI_MAX_STREAMS; break; case PIPE_QUERY_PIPELINE_STATISTICS: /* 11 values on GCN. */ query->result_size = 11 * 16; query->result_size += 8; /* for the fence + alignment */ - query->num_cs_dw_end = 6 + si_cp_write_fence_dwords(sscreen); + query->b.num_cs_dw_suspend = 6 + si_cp_write_fence_dwords(sscreen); break; default: assert(0); FREE(query); return NULL; } if (!si_query_hw_init(sscreen, query)) { FREE(query); return NULL; @@ -833,22 +826,20 @@ static void si_query_hw_emit_start(struct si_context *sctx, query->buffer.previous = qbuf; query->buffer.buf = si_new_query_buffer(sctx->screen, query); if (!query->buffer.buf) return; } /* emit begin query */ va = query->buffer.buf->gpu_address + query->buffer.results_end; query->ops->emit_start(sctx, query, query->buffer.buf, va); - - sctx->num_cs_dw_queries_suspend += query->num_cs_dw_end; } static void si_query_hw_do_emit_stop(struct si_context *sctx, struct si_query_hw *query, struct r600_resource *buffer, uint64_t va) { struct radeon_cmdbuf *cs = sctx->gfx_cs; uint64_t fence_va = 0; @@ -928,23 +919,20 @@ static void si_query_hw_emit_stop(struct si_context *sctx, if (query->flags & SI_QUERY_HW_FLAG_NO_START) si_need_gfx_cs_space(sctx); /* emit end query */ va = query->buffer.buf->gpu_address + query->buffer.results_end; query->ops->emit_stop(sctx, query, query->buffer.buf, va); query->buffer.results_end += query->result_size; - if (!(query->flags & SI_QUERY_HW_FLAG_NO_START)) - sctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end; - si_update_occlusion_query_state(sctx, query->b.type, -1); si_update_prims_generated_query_state(sctx, query->b.type, -1); } static void emit_set_predicate(struct si_context *ctx, struct r600_resource *buf, uint64_t va, uint32_t op) { struct radeon_cmdbuf *cs = ctx->gfx_cs; @@ -1112,21 +1100,22 @@ bool si_query_hw_begin(struct si_context *sctx, if (!(query->flags & SI_QUERY_HW_FLAG_BEGIN_RESUMES)) si_query_hw_reset_buffers(sctx, query); r600_resource_reference(&query->workaround_buf, NULL); si_query_hw_emit_start(sctx, query); if (!query->buffer.buf) return false; - LIST_ADDTAIL(&query->list, &sctx->active_queries); + LIST_ADDTAIL(&query->b.active_list, &sctx->active_queries); + sctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend; return true; } static bool si_end_query(struct pipe_context *ctx, struct pipe_query *query) { struct si_context *sctx = (struct si_context *)ctx; struct si_query *rquery = (struct si_query *)query; return rquery->ops->end(sctx, rquery); } @@ -1134,22 +1123,24 @@ static bool si_end_query(struct pipe_context *ctx, struct pipe_query *query) bool si_query_hw_end(struct si_context *sctx, struct si_query *rquery) { struct si_query_hw *query = (struct si_query_hw *)rquery; if (query->flags & SI_QUERY_HW_FLAG_NO_START) si_query_hw_reset_buffers(sctx, query); si_query_hw_emit_stop(sctx, query); - if (!(query->flags & SI_QUERY_HW_FLAG_NO_START)) - LIST_DELINIT(&query->list); + if (!(query->flags & SI_QUERY_HW_FLAG_NO_START)) { + LIST_DELINIT(&query->b.active_list); + sctx->num_cs_dw_queries_suspend -= query->b.num_cs_dw_suspend; + } if (!query->buffer.buf) return false; return true; } static void si_get_hw_query_params(struct si_context *sctx, struct si_query_hw *rquery, int index, struct si_hw_query_params *params) @@ -1342,20 +1333,41 @@ static void si_query_hw_add_result(struct si_screen *sscreen, result->pipeline_statistics.c_primitives, result->pipeline_statistics.ps_invocations, result->pipeline_statistics.cs_invocations); #endif break; default: assert(0); } } +void si_query_hw_suspend(struct si_context *sctx, struct si_query *query) +{ + si_query_hw_emit_stop(sctx, (struct si_query_hw *)query); +} + +void si_query_hw_resume(struct si_context *sctx, struct si_query *query) +{ + si_query_hw_emit_start(sctx, (struct si_query_hw *)query); +} + +static struct si_query_ops query_hw_ops = { + .destroy = si_query_hw_destroy, + .begin = si_query_hw_begin, + .end = si_query_hw_end, + .get_result = si_query_hw_get_result, + .get_result_resource = si_query_hw_get_result_resource, + + .suspend = si_query_hw_suspend, + .resume = si_query_hw_resume, +}; + static boolean si_get_query_result(struct pipe_context *ctx, struct pipe_query *query, boolean wait, union pipe_query_result *result) { struct si_context *sctx = (struct si_context *)ctx; struct si_query *rquery = (struct si_query *)query; return rquery->ops->get_result(sctx, rquery, wait, result); } @@ -1637,40 +1649,35 @@ static void si_render_condition(struct pipe_context *ctx, sctx->render_cond = query; sctx->render_cond_invert = condition; sctx->render_cond_mode = mode; si_set_atom_dirty(sctx, atom, query != NULL); } void si_suspend_queries(struct si_context *sctx) { - struct si_query_hw *query; + struct si_query *query; - LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, list) { - si_query_hw_emit_stop(sctx, query); - } - assert(sctx->num_cs_dw_queries_suspend == 0); + LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, active_list) + query->ops->suspend(sctx, query); } void si_resume_queries(struct si_context *sctx) { - struct si_query_hw *query; - - assert(sctx->num_cs_dw_queries_suspend == 0); + struct si_query *query; /* Check CS space here. Resuming must not be interrupted by flushes. */ si_need_gfx_cs_space(sctx); - LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, list) { - si_query_hw_emit_start(sctx, query); - } + LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, active_list) + query->ops->resume(sctx, query); } #define XFULL(name_, query_type_, type_, result_type_, group_id_) \ { \ .name = name_, \ .query_type = SI_QUERY_##query_type_, \ .type = PIPE_DRIVER_QUERY_TYPE_##type_, \ .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \ .group_id = group_id_ \ } diff --git a/src/gallium/drivers/radeonsi/si_query.h b/src/gallium/drivers/radeonsi/si_query.h index 032946edf4d..ebd965a004f 100644 --- a/src/gallium/drivers/radeonsi/si_query.h +++ b/src/gallium/drivers/radeonsi/si_query.h @@ -126,28 +126,37 @@ struct si_query_ops { bool (*end)(struct si_context *, struct si_query *); bool (*get_result)(struct si_context *, struct si_query *, bool wait, union pipe_query_result *result); void (*get_result_resource)(struct si_context *, struct si_query *, bool wait, enum pipe_query_value_type result_type, int index, struct pipe_resource *resource, unsigned offset); + + void (*suspend)(struct si_context *, struct si_query *); + void (*resume)(struct si_context *, struct si_query *); }; struct si_query { struct threaded_query b; struct si_query_ops *ops; - /* The type of query */ + /* The PIPE_QUERY_xxx type of query */ unsigned type; + + /* The number of dwords for suspend. */ + unsigned num_cs_dw_suspend; + + /* Linked list of queries that must be suspended at end of CS. */ + struct list_head active_list; }; enum { SI_QUERY_HW_FLAG_NO_START = (1 << 0), /* gap */ /* whether begin_query doesn't clear the result */ SI_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2), }; struct si_query_hw_ops { @@ -180,44 +189,43 @@ struct si_query_buffer { struct si_query_hw { struct si_query b; struct si_query_hw_ops *ops; unsigned flags; /* The query buffer and how many results are in it. */ struct si_query_buffer buffer; /* Size of the result in memory for both begin_query and end_query, * this can be one or two numbers, or it could even be a size of a structure. */ unsigned result_size; - /* The number of dwords for end_query. */ - unsigned num_cs_dw_end; - /* Linked list of queries */ - struct list_head list; /* For transform feedback: which stream the query is for */ unsigned stream; /* Workaround via compute shader */ struct r600_resource *workaround_buf; unsigned workaround_offset; }; bool si_query_hw_init(struct si_screen *sscreen, struct si_query_hw *query); void si_query_hw_destroy(struct si_screen *sscreen, struct si_query *rquery); bool si_query_hw_begin(struct si_context *sctx, struct si_query *rquery); bool si_query_hw_end(struct si_context *sctx, struct si_query *rquery); bool si_query_hw_get_result(struct si_context *sctx, struct si_query *rquery, bool wait, union pipe_query_result *result); +void si_query_hw_suspend(struct si_context *sctx, struct si_query *query); +void si_query_hw_resume(struct si_context *sctx, struct si_query *query); + /* Performance counters */ struct si_perfcounters { unsigned num_groups; unsigned num_blocks; struct si_pc_block *blocks; unsigned num_stop_cs_dwords; unsigned num_instance_cs_dwords; -- 2.19.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev