Thanks! On 06.02.19 01:27, Timothy Arceri wrote: > On 4/2/19 11:26 pm, Nicolai Hähnle wrote: >> On 01.02.19 05:25, Timothy Arceri wrote: >>> On 26/1/19 11:56 am, Marek Olšák wrote: >>>> Timothy, can you please test the attached fix? >>> >>> I'm having trouble compiling 32bit mesa on my machine at the moment >>> so haven't been able to test Batman. But this commit also causes No >>> Mans Sky to lock up my machine and the attached patch does not fix it. >> >> Is there a trace or something else to easily reproduce it? > > I've filed a bug report with all the info you should need to reproduce: > > https://bugs.freedesktop.org/show_bug.cgi?id=109561 > >> >> Cheers, >> Nicolai >> >> >> >>> >>>> >>>> Thanks, >>>> Marek >>>> >>>> On Wed, Jan 2, 2019 at 10:58 PM Timothy Arceri >>>> <tarc...@itsqueeze.com <mailto:tarc...@itsqueeze.com>> wrote: >>>> >>>> This commit seems to cause bad stuttering in the Batman Arkham City >>>> benchmark. >>>> >>>> On 7/12/18 1:00 am, Nicolai Hähnle wrote: >>>> > From: Nicolai Hähnle <nicolai.haeh...@amd.com >>>> <mailto:nicolai.haeh...@amd.com>> >>>> > >>>> > This is a move towards using composition instead of >>>> inheritance for >>>> > different query types. >>>> > >>>> > This change weakens out-of-memory error reporting somewhat, >>>> though this >>>> > should be acceptable since we didn't consistently report such >>>> errors in >>>> > the first place. >>>> > --- >>>> > src/gallium/drivers/radeonsi/si_perfcounter.c | 8 +- >>>> > src/gallium/drivers/radeonsi/si_query.c | 177 >>>> +++++++++--------- >>>> > src/gallium/drivers/radeonsi/si_query.h | 17 +- >>>> > src/gallium/drivers/radeonsi/si_texture.c | 7 +- >>>> > 4 files changed, 99 insertions(+), 110 deletions(-) >>>> > >>>> > diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c >>>> b/src/gallium/drivers/radeonsi/si_perfcounter.c >>>> > index 0b3d8f89273..f0d10c054c4 100644 >>>> > --- a/src/gallium/drivers/radeonsi/si_perfcounter.c >>>> > +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c >>>> > @@ -761,23 +761,22 @@ static void si_pc_query_destroy(struct >>>> si_screen *sscreen, >>>> > struct si_query_group *group = query->groups; >>>> > query->groups = group->next; >>>> > FREE(group); >>>> > } >>>> > >>>> > FREE(query->counters); >>>> > >>>> > si_query_hw_destroy(sscreen, rquery); >>>> > } >>>> > >>>> > -static bool si_pc_query_prepare_buffer(struct si_screen >>>> *screen, >>>> > - struct si_query_hw >>>> *hwquery, >>>> > - struct r600_resource >>>> *buffer) >>>> > +static bool si_pc_query_prepare_buffer(struct si_context *ctx, >>>> > + struct si_query_buffer >>>> *qbuf) >>>> > { >>>> > /* no-op */ >>>> > return true; >>>> > } >>>> > >>>> > static void si_pc_query_emit_start(struct si_context *sctx, >>>> > struct si_query_hw *hwquery, >>>> > struct r600_resource *buffer, >>>> uint64_t va) >>>> > { >>>> > struct si_query_pc *query = (struct si_query_pc *)hwquery; >>>> > @@ -1055,23 +1054,20 @@ struct pipe_query >>>> *si_create_batch_query(struct pipe_context *ctx, >>>> > counter->base = group->result_base + j; >>>> > counter->stride = group->num_counters; >>>> > >>>> > counter->qwords = 1; >>>> > if ((block->b->b->flags & SI_PC_BLOCK_SE) && >>>> group->se < 0) >>>> > counter->qwords = screen->info.max_se; >>>> > if (group->instance < 0) >>>> > counter->qwords *= block->num_instances; >>>> > } >>>> > >>>> > - if (!si_query_hw_init(screen, &query->b)) >>>> > - goto error; >>>> > - >>>> > return (struct pipe_query *)query; >>>> > >>>> > error: >>>> > si_pc_query_destroy(screen, &query->b.b); >>>> > return NULL; >>>> > } >>>> > >>>> > static bool si_init_block_names(struct si_screen *screen, >>>> > struct si_pc_block *block) >>>> > { >>>> > diff --git a/src/gallium/drivers/radeonsi/si_query.c >>>> b/src/gallium/drivers/radeonsi/si_query.c >>>> > index 479a1bbf2c4..5b0fba0ed92 100644 >>>> > --- a/src/gallium/drivers/radeonsi/si_query.c >>>> > +++ b/src/gallium/drivers/radeonsi/si_query.c >>>> > @@ -514,86 +514,129 @@ static struct pipe_query >>>> *si_query_sw_create(unsigned query_type) >>>> > query = CALLOC_STRUCT(si_query_sw); >>>> > if (!query) >>>> > return NULL; >>>> > >>>> > query->b.type = query_type; >>>> > query->b.ops = &sw_query_ops; >>>> > >>>> > return (struct pipe_query *)query; >>>> > } >>>> > >>>> > -void si_query_hw_destroy(struct si_screen *sscreen, >>>> > - struct si_query *rquery) >>>> > +void si_query_buffer_destroy(struct si_screen *sscreen, struct >>>> si_query_buffer *buffer) >>>> > { >>>> > - struct si_query_hw *query = (struct si_query_hw *)rquery; >>>> > - struct si_query_buffer *prev = query->buffer.previous; >>>> > + struct si_query_buffer *prev = buffer->previous; >>>> > >>>> > /* Release all query buffers. */ >>>> > while (prev) { >>>> > struct si_query_buffer *qbuf = prev; >>>> > prev = prev->previous; >>>> > r600_resource_reference(&qbuf->buf, NULL); >>>> > FREE(qbuf); >>>> > } >>>> > >>>> > - r600_resource_reference(&query->buffer.buf, NULL); >>>> > - r600_resource_reference(&query->workaround_buf, NULL); >>>> > - FREE(rquery); >>>> > + r600_resource_reference(&buffer->buf, NULL); >>>> > +} >>>> > + >>>> > +void si_query_buffer_reset(struct si_context *sctx, struct >>>> si_query_buffer *buffer) >>>> > +{ >>>> > + /* Discard all query buffers except for the oldest. */ >>>> > + while (buffer->previous) { >>>> > + struct si_query_buffer *qbuf = buffer->previous; >>>> > + buffer->previous = qbuf->previous; >>>> > + >>>> > + r600_resource_reference(&buffer->buf, NULL); >>>> > + buffer->buf = qbuf->buf; /* move ownership */ >>>> > + FREE(qbuf); >>>> > + } >>>> > + buffer->results_end = 0; >>>> > + >>>> > + /* Discard even the oldest buffer if it can't be mapped >>>> without a stall. */ >>>> > + if (buffer->buf && >>>> > + (si_rings_is_buffer_referenced(sctx, buffer->buf->buf, >>>> RADEON_USAGE_READWRITE) || >>>> > + !sctx->ws->buffer_wait(buffer->buf->buf, 0, >>>> RADEON_USAGE_READWRITE))) { >>>> > + r600_resource_reference(&buffer->buf, NULL); >>>> > + } >>>> > } >>>> > >>>> > -static struct r600_resource *si_new_query_buffer(struct >>>> si_screen *sscreen, >>>> > - struct >>>> si_query_hw >>>> *query) >>>> > +bool si_query_buffer_alloc(struct si_context *sctx, struct >>>> si_query_buffer *buffer, >>>> > + bool (*prepare_buffer)(struct >>>> si_context >>>> *, struct si_query_buffer*), >>>> > + unsigned size) >>>> > { >>>> > - unsigned buf_size = MAX2(query->result_size, >>>> > - sscreen->info.min_alloc_size); >>>> > + if (buffer->buf && buffer->results_end + size >= >>>> buffer->buf->b.b.width0) >>>> > + return true; >>>> > + >>>> > + if (buffer->buf) { >>>> > + struct si_query_buffer *qbuf = >>>> MALLOC_STRUCT(si_query_buffer); >>>> > + memcpy(qbuf, buffer, sizeof(*qbuf)); >>>> > + buffer->previous = qbuf; >>>> > + } >>>> > + >>>> > + buffer->results_end = 0; >>>> > >>>> > /* Queries are normally read by the CPU after >>>> > * being written by the gpu, hence staging is probably >>>> a good >>>> > * usage pattern. >>>> > */ >>>> > - struct r600_resource *buf = r600_resource( >>>> > - pipe_buffer_create(&sscreen->b, 0, >>>> > - PIPE_USAGE_STAGING, buf_size)); >>>> > - if (!buf) >>>> > - return NULL; >>>> > + struct si_screen *screen = sctx->screen; >>>> > + unsigned buf_size = MAX2(size, >>>> screen->info.min_alloc_size); >>>> > + buffer->buf = r600_resource( >>>> > + pipe_buffer_create(&screen->b, 0, >>>> PIPE_USAGE_STAGING, buf_size)); >>>> > + if (unlikely(!buffer->buf)) >>>> > + return false; >>>> > >>>> > - if (!query->ops->prepare_buffer(sscreen, query, buf)) { >>>> > - r600_resource_reference(&buf, NULL); >>>> > - return NULL; >>>> > + if (prepare_buffer) { >>>> > + if (unlikely(!prepare_buffer(sctx, buffer))) { >>>> > + r600_resource_reference(&buffer->buf, >>>> NULL); >>>> > + return false; >>>> > + } >>>> > } >>>> > >>>> > - return buf; >>>> > + return true; >>>> > } >>>> > >>>> > -static bool si_query_hw_prepare_buffer(struct si_screen >>>> *sscreen, >>>> > - struct si_query_hw *query, >>>> > - struct r600_resource >>>> *buffer) >>>> > + >>>> > +void si_query_hw_destroy(struct si_screen *sscreen, >>>> > + struct si_query *rquery) >>>> > +{ >>>> > + struct si_query_hw *query = (struct si_query_hw *)rquery; >>>> > + >>>> > + si_query_buffer_destroy(sscreen, &query->buffer); >>>> > + r600_resource_reference(&query->workaround_buf, NULL); >>>> > + FREE(rquery); >>>> > +} >>>> > + >>>> > +static bool si_query_hw_prepare_buffer(struct si_context *sctx, >>>> > + struct si_query_buffer >>>> *qbuf) >>>> > { >>>> > - /* Callers ensure that the buffer is currently unused by >>>> the GPU. */ >>>> > - uint32_t *results = >>>> sscreen->ws->buffer_map(buffer->buf, NULL, >>>> > + static const struct si_query_hw si_query_hw_s; >>>> > + struct si_query_hw *query = container_of(qbuf, >>>> &si_query_hw_s, buffer); >>>> > + struct si_screen *screen = sctx->screen; >>>> > + >>>> > + /* The caller ensures that the buffer is currently unused >>>> by the GPU. */ >>>> > + uint32_t *results = screen->ws->buffer_map(qbuf->buf->buf, >>>> NULL, >>>> > PIPE_TRANSFER_WRITE | >>>> > PIPE_TRANSFER_UNSYNCHRONIZED); >>>> > if (!results) >>>> > return false; >>>> > >>>> > - memset(results, 0, buffer->b.b.width0); >>>> > + memset(results, 0, qbuf->buf->b.b.width0); >>>> > >>>> > if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER || >>>> > query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE || >>>> > query->b.type == >>>> PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) { >>>> > - unsigned max_rbs = >>>> sscreen->info.num_render_backends; >>>> > - unsigned enabled_rb_mask = >>>> sscreen->info.enabled_rb_mask; >>>> > + unsigned max_rbs = >>>> screen->info.num_render_backends; >>>> > + unsigned enabled_rb_mask = >>>> screen->info.enabled_rb_mask; >>>> > unsigned num_results; >>>> > unsigned i, j; >>>> > >>>> > /* Set top bits for unused backends. */ >>>> > - num_results = buffer->b.b.width0 / >>>> query->result_size; >>>> > + num_results = qbuf->buf->b.b.width0 / >>>> query->result_size; >>>> > for (j = 0; j < num_results; j++) { >>>> > for (i = 0; i < max_rbs; i++) { >>>> > if (!(enabled_rb_mask & (1<<i))) { >>>> > results[(i * 4)+1] = >>>> 0x80000000; >>>> > results[(i * 4)+3] = >>>> 0x80000000; >>>> > } >>>> > } >>>> > results += 4 * max_rbs; >>>> > } >>>> > } >>>> > @@ -624,30 +667,20 @@ static void >>>> si_query_hw_clear_result(struct >>>> si_query_hw *, >>>> > union pipe_query_result *); >>>> > >>>> > static struct si_query_hw_ops query_hw_default_hw_ops = { >>>> > .prepare_buffer = si_query_hw_prepare_buffer, >>>> > .emit_start = si_query_hw_do_emit_start, >>>> > .emit_stop = si_query_hw_do_emit_stop, >>>> > .clear_result = si_query_hw_clear_result, >>>> > .add_result = si_query_hw_add_result, >>>> > }; >>>> > >>>> > -bool si_query_hw_init(struct si_screen *sscreen, >>>> > - struct si_query_hw *query) >>>> > -{ >>>> > - query->buffer.buf = si_new_query_buffer(sscreen, query); >>>> > - if (!query->buffer.buf) >>>> > - return false; >>>> > - >>>> > - return true; >>>> > -} >>>> > - >>>> > static struct pipe_query *si_query_hw_create(struct si_screen >>>> *sscreen, >>>> > unsigned query_type, >>>> > unsigned index) >>>> > { >>>> > struct si_query_hw *query = CALLOC_STRUCT(si_query_hw); >>>> > if (!query) >>>> > return NULL; >>>> > >>>> > query->b.type = query_type; >>>> > query->b.ops = &query_hw_ops; >>>> > @@ -693,25 +726,20 @@ static struct pipe_query >>>> *si_query_hw_create(struct si_screen *sscreen, >>>> > query->result_size = 11 * 16; >>>> > query->result_size += 8; /* for the fence + >>>> alignment */ >>>> > query->b.num_cs_dw_suspend = 6 + >>>> si_cp_write_fence_dwords(sscreen); >>>> > break; >>>> > default: >>>> > assert(0); >>>> > FREE(query); >>>> > return NULL; >>>> > } >>>> > >>>> > - if (!si_query_hw_init(sscreen, query)) { >>>> > - FREE(query); >>>> > - return NULL; >>>> > - } >>>> > - >>>> > return (struct pipe_query *)query; >>>> > } >>>> > >>>> > static void si_update_occlusion_query_state(struct si_context >>>> *sctx, >>>> > unsigned type, int >>>> diff) >>>> > { >>>> > if (type == PIPE_QUERY_OCCLUSION_COUNTER || >>>> > type == PIPE_QUERY_OCCLUSION_PREDICATE || >>>> > type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) { >>>> > bool old_enable = sctx->num_occlusion_queries >>>> != 0; >>>> > @@ -802,43 +830,31 @@ static void >>>> si_query_hw_do_emit_start(struct si_context *sctx, >>>> > } >>>> > radeon_add_to_buffer_list(sctx, sctx->gfx_cs, >>>> query->buffer.buf, RADEON_USAGE_WRITE, >>>> > RADEON_PRIO_QUERY); >>>> > } >>>> > >>>> > static void si_query_hw_emit_start(struct si_context *sctx, >>>> > struct si_query_hw *query) >>>> > { >>>> > uint64_t va; >>>> > >>>> > - if (!query->buffer.buf) >>>> > - return; // previous buffer allocation failure >>>> > + if (!si_query_buffer_alloc(sctx, &query->buffer, >>>> query->ops->prepare_buffer, >>>> > + query->result_size)) >>>> > + return; >>>> > >>>> > si_update_occlusion_query_state(sctx, query->b.type, 1); >>>> > si_update_prims_generated_query_state(sctx, >>>> query->b.type, 1); >>>> > >>>> > if (query->b.type != SI_QUERY_TIME_ELAPSED_SDMA) >>>> > si_need_gfx_cs_space(sctx); >>>> > >>>> > - /* Get a new query buffer if needed. */ >>>> > - if (query->buffer.results_end + query->result_size > >>>> query->buffer.buf->b.b.width0) { >>>> > - struct si_query_buffer *qbuf = >>>> MALLOC_STRUCT(si_query_buffer); >>>> > - *qbuf = query->buffer; >>>> > - query->buffer.results_end = 0; >>>> > - query->buffer.previous = qbuf; >>>> > - query->buffer.buf = >>>> si_new_query_buffer(sctx->screen, query); >>>> > - if (!query->buffer.buf) >>>> > - return; >>>> > - } >>>> > - >>>> > - /* emit begin query */ >>>> > va = query->buffer.buf->gpu_address + >>>> query->buffer.results_end; >>>> > - >>>> > query->ops->emit_start(sctx, query, query->buffer.buf, >>>> va); >>>> > } >>>> > >>>> > static void si_query_hw_do_emit_stop(struct si_context *sctx, >>>> > struct si_query_hw *query, >>>> > struct r600_resource >>>> *buffer, >>>> > uint64_t va) >>>> > { >>>> > struct radeon_cmdbuf *cs = sctx->gfx_cs; >>>> > uint64_t fence_va = 0; >>>> > @@ -905,26 +921,30 @@ static void >>>> si_query_hw_do_emit_stop(struct >>>> si_context *sctx, >>>> > query->buffer.buf, fence_va, >>>> 0x80000000, >>>> > query->b.type); >>>> > } >>>> > } >>>> > >>>> > static void si_query_hw_emit_stop(struct si_context *sctx, >>>> > struct si_query_hw *query) >>>> > { >>>> > uint64_t va; >>>> > >>>> > - if (!query->buffer.buf) >>>> > - return; // previous buffer allocation failure >>>> > - >>>> > /* The queries which need begin already called this in >>>> begin_query. */ >>>> > - if (query->flags & SI_QUERY_HW_FLAG_NO_START) >>>> > + if (query->flags & SI_QUERY_HW_FLAG_NO_START) { >>>> > si_need_gfx_cs_space(sctx); >>>> > + if (!si_query_buffer_alloc(sctx, &query->buffer, >>>> query->ops->prepare_buffer, >>>> > + query->result_size)) >>>> > + return; >>>> > + } >>>> > + >>>> > + if (!query->buffer.buf) >>>> > + return; // previous buffer allocation failure >>>> > >>>> > /* emit end query */ >>>> > va = query->buffer.buf->gpu_address + >>>> query->buffer.results_end; >>>> > >>>> > query->ops->emit_stop(sctx, query, query->buffer.buf, va); >>>> > >>>> > query->buffer.results_end += query->result_size; >>>> > >>>> > si_update_occlusion_query_state(sctx, query->b.type, -1); >>>> > si_update_prims_generated_query_state(sctx, >>>> query->b.type, -1); >>>> > @@ -1054,59 +1074,32 @@ static void si_destroy_query(struct >>>> pipe_context *ctx, struct pipe_query *query) >>>> > >>>> > static boolean si_begin_query(struct pipe_context *ctx, >>>> > struct pipe_query *query) >>>> > { >>>> > struct si_context *sctx = (struct si_context *)ctx; >>>> > struct si_query *rquery = (struct si_query *)query; >>>> > >>>> > return rquery->ops->begin(sctx, rquery); >>>> > } >>>> > >>>> > -void si_query_hw_reset_buffers(struct si_context *sctx, >>>> > - struct si_query_hw *query) >>>> > -{ >>>> > - struct si_query_buffer *prev = query->buffer.previous; >>>> > - >>>> > - /* Discard the old query buffers. */ >>>> > - while (prev) { >>>> > - struct si_query_buffer *qbuf = prev; >>>> > - prev = prev->previous; >>>> > - r600_resource_reference(&qbuf->buf, NULL); >>>> > - FREE(qbuf); >>>> > - } >>>> > - >>>> > - query->buffer.results_end = 0; >>>> > - query->buffer.previous = NULL; >>>> > - >>>> > - /* Obtain a new buffer if the current one can't be mapped >>>> without a stall. */ >>>> > - if (si_rings_is_buffer_referenced(sctx, >>>> query->buffer.buf->buf, RADEON_USAGE_READWRITE) || >>>> > - !sctx->ws->buffer_wait(query->buffer.buf->buf, 0, >>>> RADEON_USAGE_READWRITE)) { >>>> > - r600_resource_reference(&query->buffer.buf, NULL); >>>> > - query->buffer.buf = >>>> si_new_query_buffer(sctx->screen, query); >>>> > - } else { >>>> > - if (!query->ops->prepare_buffer(sctx->screen, >>>> query, query->buffer.buf)) >>>> > - >>>> r600_resource_reference(&query->buffer.buf, >>>> NULL); >>>> > - } >>>> > -} >>>> > - >>>> > bool si_query_hw_begin(struct si_context *sctx, >>>> > struct si_query *rquery) >>>> > { >>>> > struct si_query_hw *query = (struct si_query_hw *)rquery; >>>> > >>>> > if (query->flags & SI_QUERY_HW_FLAG_NO_START) { >>>> > assert(0); >>>> > return false; >>>> > } >>>> > >>>> > if (!(query->flags & SI_QUERY_HW_FLAG_BEGIN_RESUMES)) >>>> > - si_query_hw_reset_buffers(sctx, query); >>>> > + si_query_buffer_reset(sctx, &query->buffer); >>>> > >>>> > r600_resource_reference(&query->workaround_buf, NULL); >>>> > >>>> > si_query_hw_emit_start(sctx, query); >>>> > if (!query->buffer.buf) >>>> > return false; >>>> > >>>> > LIST_ADDTAIL(&query->b.active_list, >>>> &sctx->active_queries); >>>> > sctx->num_cs_dw_queries_suspend += >>>> query->b.num_cs_dw_suspend; >>>> > return true; >>>> > @@ -1119,21 +1112,21 @@ static bool si_end_query(struct >>>> pipe_context *ctx, struct pipe_query *query) >>>> > >>>> > return rquery->ops->end(sctx, rquery); >>>> > } >>>> > >>>> > bool si_query_hw_end(struct si_context *sctx, >>>> > struct si_query *rquery) >>>> > { >>>> > struct si_query_hw *query = (struct si_query_hw *)rquery; >>>> > >>>> > if (query->flags & SI_QUERY_HW_FLAG_NO_START) >>>> > - si_query_hw_reset_buffers(sctx, query); >>>> > + si_query_buffer_reset(sctx, &query->buffer); >>>> > >>>> > si_query_hw_emit_stop(sctx, query); >>>> > >>>> > if (!(query->flags & SI_QUERY_HW_FLAG_NO_START)) { >>>> > LIST_DELINIT(&query->b.active_list); >>>> > sctx->num_cs_dw_queries_suspend -= >>>> query->b.num_cs_dw_suspend; >>>> > } >>>> > >>>> > if (!query->buffer.buf) >>>> > return false; >>>> > diff --git a/src/gallium/drivers/radeonsi/si_query.h >>>> b/src/gallium/drivers/radeonsi/si_query.h >>>> > index ebd965a004f..63af760a271 100644 >>>> > --- a/src/gallium/drivers/radeonsi/si_query.h >>>> > +++ b/src/gallium/drivers/radeonsi/si_query.h >>>> > @@ -27,20 +27,21 @@ >>>> > >>>> > #include "util/u_threaded_context.h" >>>> > >>>> > struct pipe_context; >>>> > struct pipe_query; >>>> > struct pipe_resource; >>>> > >>>> > struct si_screen; >>>> > struct si_context; >>>> > struct si_query; >>>> > +struct si_query_buffer; >>>> > struct si_query_hw; >>>> > struct r600_resource; >>>> > >>>> > enum { >>>> > SI_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC, >>>> > SI_QUERY_DECOMPRESS_CALLS, >>>> > SI_QUERY_MRT_DRAW_CALLS, >>>> > SI_QUERY_PRIM_RESTART_CALLS, >>>> > SI_QUERY_SPILL_DRAW_CALLS, >>>> > SI_QUERY_COMPUTE_CALLS, >>>> > @@ -153,23 +154,21 @@ struct si_query { >>>> > }; >>>> > >>>> > enum { >>>> > SI_QUERY_HW_FLAG_NO_START = (1 << 0), >>>> > /* gap */ >>>> > /* whether begin_query doesn't clear the result */ >>>> > SI_QUERY_HW_FLAG_BEGIN_RESUMES = (1 << 2), >>>> > }; >>>> > >>>> > struct si_query_hw_ops { >>>> > - bool (*prepare_buffer)(struct si_screen *, >>>> > - struct si_query_hw *, >>>> > - struct r600_resource *); >>>> > + bool (*prepare_buffer)(struct si_context *, struct >>>> si_query_buffer *); >>>> > void (*emit_start)(struct si_context *, >>>> > struct si_query_hw *, >>>> > struct r600_resource *buffer, >>>> uint64_t va); >>>> > void (*emit_stop)(struct si_context *, >>>> > struct si_query_hw *, >>>> > struct r600_resource *buffer, >>>> uint64_t va); >>>> > void (*clear_result)(struct si_query_hw *, union >>>> pipe_query_result *); >>>> > void (*add_result)(struct si_screen *screen, >>>> > struct si_query_hw *, void *buffer, >>>> > union pipe_query_result *result); >>>> > @@ -179,40 +178,45 @@ struct si_query_buffer { >>>> > /* The buffer where query results are stored. */ >>>> > struct r600_resource *buf; >>>> > /* Offset of the next free result after current query >>>> data */ >>>> > unsigned results_end; >>>> > /* If a query buffer is full, a new buffer is created and >>>> the old one >>>> > * is put in here. When we calculate the result, we sum up >>>> the samples >>>> > * from all buffers. */ >>>> > struct si_query_buffer *previous; >>>> > }; >>>> > >>>> > +void si_query_buffer_destroy(struct si_screen *sctx, struct >>>> si_query_buffer *buffer); >>>> > +void si_query_buffer_reset(struct si_context *sctx, struct >>>> si_query_buffer *buffer); >>>> > +bool si_query_buffer_alloc(struct si_context *sctx, struct >>>> si_query_buffer *buffer, >>>> > + bool (*prepare_buffer)(struct >>>> si_context >>>> *, struct si_query_buffer*), >>>> > + unsigned size); >>>> > + >>>> > + >>>> > struct si_query_hw { >>>> > struct si_query b; >>>> > struct si_query_hw_ops *ops; >>>> > unsigned flags; >>>> > >>>> > /* The query buffer and how many results are in it. */ >>>> > struct si_query_buffer buffer; >>>> > /* Size of the result in memory for both begin_query and >>>> end_query, >>>> > * this can be one or two numbers, or it could even be a >>>> size of a structure. */ >>>> > unsigned result_size; >>>> > /* For transform feedback: which stream the query is >>>> for */ >>>> > unsigned stream; >>>> > >>>> > /* Workaround via compute shader */ >>>> > struct r600_resource *workaround_buf; >>>> > unsigned workaround_offset; >>>> > }; >>>> > >>>> > -bool si_query_hw_init(struct si_screen *sscreen, >>>> > - struct si_query_hw *query); >>>> > void si_query_hw_destroy(struct si_screen *sscreen, >>>> > struct si_query *rquery); >>>> > bool si_query_hw_begin(struct si_context *sctx, >>>> > struct si_query *rquery); >>>> > bool si_query_hw_end(struct si_context *sctx, >>>> > struct si_query *rquery); >>>> > bool si_query_hw_get_result(struct si_context *sctx, >>>> > struct si_query *rquery, >>>> > bool wait, >>>> > union pipe_query_result *result); >>>> > @@ -237,20 +241,17 @@ struct pipe_query >>>> *si_create_batch_query(struct pipe_context *ctx, >>>> > unsigned num_queries, >>>> > unsigned *query_types); >>>> > >>>> > int si_get_perfcounter_info(struct si_screen *, >>>> > unsigned index, >>>> > struct pipe_driver_query_info *info); >>>> > int si_get_perfcounter_group_info(struct si_screen *, >>>> > unsigned index, >>>> > struct >>>> pipe_driver_query_group_info *info); >>>> > >>>> > -void si_query_hw_reset_buffers(struct si_context *sctx, >>>> > - struct si_query_hw *query); >>>> > - >>>> > struct si_qbo_state { >>>> > void *saved_compute; >>>> > struct pipe_constant_buffer saved_const0; >>>> > struct pipe_shader_buffer saved_ssbo[3]; >>>> > }; >>>> > >>>> > #endif /* SI_QUERY_H */ >>>> > diff --git a/src/gallium/drivers/radeonsi/si_texture.c >>>> b/src/gallium/drivers/radeonsi/si_texture.c >>>> > index ac1a0aa6097..9df12e0f5bd 100644 >>>> > --- a/src/gallium/drivers/radeonsi/si_texture.c >>>> > +++ b/src/gallium/drivers/radeonsi/si_texture.c >>>> > @@ -2276,25 +2276,24 @@ void >>>> vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx, >>>> > struct si_context *sctx = (struct si_context*)ctx; >>>> > struct pipe_query *tmp; >>>> > unsigned i = vi_get_context_dcc_stats_index(sctx, tex); >>>> > bool query_active = sctx->dcc_stats[i].query_active; >>>> > bool disable = false; >>>> > >>>> > if (sctx->dcc_stats[i].ps_stats[2]) { >>>> > union pipe_query_result result; >>>> > >>>> > /* Read the results. */ >>>> > - ctx->get_query_result(ctx, >>>> sctx->dcc_stats[i].ps_stats[2], >>>> > + struct pipe_query *query = >>>> sctx->dcc_stats[i].ps_stats[2]; >>>> > + ctx->get_query_result(ctx, query, >>>> > true, &result); >>>> > - si_query_hw_reset_buffers(sctx, >>>> > - (struct si_query_hw*) >>>> > - sctx->dcc_stats[i].ps_stats[2]); >>>> > + si_query_buffer_reset(sctx, &((struct >>>> si_query_hw*)query)->buffer); >>>> > >>>> > /* Compute the approximate number of fullscreen >>>> draws. */ >>>> > tex->ps_draw_ratio = >>>> > >>>> result.pipeline_statistics.ps_invocations / >>>> > (tex->buffer.b.b.width0 * >>>> tex->buffer.b.b.height0); >>>> > sctx->last_tex_ps_draw_ratio = tex->ps_draw_ratio; >>>> > >>>> > disable = tex->dcc_separate_buffer && >>>> > !vi_should_enable_separate_dcc(tex); >>>> > } >>>> > >>>> _______________________________________________ >>>> mesa-dev mailing list >>>> mesa-dev@lists.freedesktop.org >>>> <mailto:mesa-dev@lists.freedesktop.org> >>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev >>>> >>
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev