For now, not enabled by default, but can be enabled (on a3xx/a4xx) with FD_MESA_DEBUG=reorder.
Signed-off-by: Rob Clark <robdcl...@gmail.com> --- src/gallium/drivers/freedreno/freedreno_batch.c | 168 ++++++++++++++++++--- src/gallium/drivers/freedreno/freedreno_batch.h | 1 + src/gallium/drivers/freedreno/freedreno_context.c | 38 ++--- src/gallium/drivers/freedreno/freedreno_context.h | 2 - src/gallium/drivers/freedreno/freedreno_query_hw.c | 2 +- src/gallium/drivers/freedreno/freedreno_resource.c | 6 +- src/gallium/drivers/freedreno/freedreno_resource.h | 1 + src/gallium/drivers/freedreno/freedreno_screen.c | 9 ++ src/gallium/drivers/freedreno/freedreno_screen.h | 2 + src/gallium/drivers/freedreno/freedreno_state.c | 15 +- src/gallium/drivers/freedreno/freedreno_util.h | 1 + 11 files changed, 188 insertions(+), 57 deletions(-) diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c index 5c6ae76..9d5bcf8 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.c +++ b/src/gallium/drivers/freedreno/freedreno_batch.c @@ -25,26 +25,20 @@ */ #include "util/list.h" +#include "util/set.h" +#include "util/hash_table.h" #include "util/u_string.h" #include "freedreno_batch.h" #include "freedreno_context.h" #include "freedreno_resource.h" -struct fd_batch * -fd_batch_create(struct fd_context *ctx) +static void +batch_init(struct fd_batch *batch) { - struct fd_batch *batch = CALLOC_STRUCT(fd_batch); - static unsigned seqno = 0; + struct fd_context *ctx = batch->ctx; unsigned size = 0; - if (!batch) - return NULL; - - pipe_reference_init(&batch->reference, 1); - batch->seqno = ++seqno; - batch->ctx = ctx; - /* if kernel is too old to support unlimited # of cmd buffers, we * have no option but to allocate large worst-case sizes so that * we don't need to grow the ringbuffer. Performance is likely to @@ -62,7 +56,11 @@ fd_batch_create(struct fd_context *ctx) fd_ringbuffer_set_parent(batch->draw, batch->gmem); fd_ringbuffer_set_parent(batch->binning, batch->gmem); - list_inithead(&batch->used_resources); + batch->cleared = batch->partial_cleared = 0; + batch->restore = batch->resolve = 0; + batch->needs_flush = false; + batch->gmem_reason = 0; + batch->num_draws = 0; /* reset maximal bounds: */ batch->max_scissor.minx = batch->max_scissor.miny = ~0; @@ -73,16 +71,37 @@ fd_batch_create(struct fd_context *ctx) if (is_a3xx(ctx->screen)) util_dynarray_init(&batch->rbrc_patches); - return batch; + assert(LIST_IS_EMPTY(&batch->used_resources)); } -void -__fd_batch_destroy(struct fd_batch *batch) +struct fd_batch * +fd_batch_create(struct fd_context *ctx) { - fd_bc_invalidate_batch(batch); + struct fd_batch *batch = CALLOC_STRUCT(fd_batch); + static unsigned seqno = 0; - util_copy_framebuffer_state(&batch->framebuffer, NULL); + if (!batch) + return NULL; + + DBG("%p", batch); + + pipe_reference_init(&batch->reference, 1); + batch->seqno = ++seqno; + batch->ctx = ctx; + + list_inithead(&batch->used_resources); + + batch_init(batch); + + batch->dependencies = _mesa_set_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + return batch; +} + +static void +batch_fini(struct fd_batch *batch) +{ fd_ringbuffer_del(batch->draw); fd_ringbuffer_del(batch->binning); fd_ringbuffer_del(batch->gmem); @@ -91,6 +110,51 @@ __fd_batch_destroy(struct fd_batch *batch) if (is_a3xx(batch->ctx->screen)) util_dynarray_fini(&batch->rbrc_patches); +} + +static void +batch_reset(struct fd_batch *batch) +{ + struct set_entry *entry; + + DBG("%p", batch); + + batch_fini(batch); + batch_init(batch); + + set_foreach(batch->dependencies, entry) { + struct fd_batch *dep = (struct fd_batch *)entry->key; + _mesa_set_remove(batch->dependencies, entry); + fd_batch_reference(&dep, NULL); + } +} + +void +fd_batch_reset(struct fd_batch *batch) +{ + if (batch->needs_flush) + batch_reset(batch); +} + +static void +unref_batch(struct set_entry *entry) +{ + struct fd_batch *batch = (struct fd_batch *)entry->key; + fd_batch_reference(&batch, NULL); +} + +void +__fd_batch_destroy(struct fd_batch *batch) +{ + fd_bc_invalidate_batch(batch); + + DBG("%p", batch); + + util_copy_framebuffer_state(&batch->framebuffer, NULL); + + batch_fini(batch); + + _mesa_set_destroy(batch->dependencies, unref_batch); free(batch); } @@ -101,16 +165,26 @@ __fd_batch_describe(char* buf, const struct fd_batch *batch) util_sprintf(buf, "fd_batch<%u>", batch->seqno); } -void -fd_batch_flush(struct fd_batch *batch) +static void +batch_flush(struct fd_batch *batch) { struct fd_resource *rsc, *rsc_tmp; + struct set_entry *entry; DBG("%p: needs_flush=%d", batch, batch->needs_flush); if (!batch->needs_flush) return; + batch->needs_flush = false; + + set_foreach(batch->dependencies, entry) { + struct fd_batch *dep = (struct fd_batch *)entry->key; + fd_batch_flush(dep); + _mesa_set_remove(batch->dependencies, entry); + fd_batch_reference(&dep, NULL); + } + fd_gmem_render_tiles(batch); /* go through all the used resources and clear their reading flag */ @@ -119,18 +193,67 @@ fd_batch_flush(struct fd_batch *batch) debug_assert(rsc->status != 0); rsc->status = 0; fd_batch_reference(&rsc->pending_batch, NULL); + fd_batch_reference(&rsc->write_batch, NULL); list_delinit(&rsc->list); } assert(LIST_IS_EMPTY(&batch->used_resources)); - batch->needs_flush = false; - fd_bc_invalidate_batch(batch); + + if (batch == batch->ctx->batch) { + batch_reset(batch); + } else { + fd_bc_invalidate_batch(batch); + } +} + +void +fd_batch_flush(struct fd_batch *batch) +{ + /* NOTE: we need to hold an extra ref across the body of flush, + * since the last ref to this batch could be dropped when cleaning + * up used_resources + */ + struct fd_batch *tmp = NULL; + fd_batch_reference(&tmp, batch); + batch_flush(tmp); + fd_batch_reference(&tmp, NULL); +} + +static void +batch_add_dep(struct fd_batch *batch, struct fd_batch *dep) +{ + if (!_mesa_set_search(batch->dependencies, dep)) { + struct fd_batch *other = NULL; + fd_batch_reference(&other, dep); + _mesa_set_add(batch->dependencies, other); + } +} + +static void +batch_update_dep(struct fd_batch *batch, struct fd_resource *rsc, + enum fd_resource_status status) +{ + switch (status) { + case FD_PENDING_WRITE: + DBG("%p: flush forced! (%p, %d)\n", rsc->pending_batch, rsc, rsc->status); + fd_batch_flush(rsc->pending_batch); + assert(rsc->pending_batch == NULL); + break; + case FD_PENDING_READ: + if (rsc->write_batch) + batch_add_dep(batch, rsc->write_batch); + batch_add_dep(batch, rsc->pending_batch); + break; + } } void fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc, enum fd_resource_status status) { + if (unlikely(rsc->pending_batch && (rsc->pending_batch != batch))) + batch_update_dep(batch, rsc, status); + rsc->status |= status; if (rsc->stencil) @@ -139,7 +262,6 @@ fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc, /* TODO resources can actually be shared across contexts, * so I'm not sure a single list-head will do the trick? */ - debug_assert((rsc->pending_batch == batch) || !rsc->pending_batch); list_delinit(&rsc->list); list_addtail(&rsc->list, &batch->used_resources); fd_batch_reference(&rsc->pending_batch, batch); @@ -154,5 +276,5 @@ fd_batch_check_size(struct fd_batch *batch) struct fd_ringbuffer *ring = batch->draw; if (((ring->cur - ring->start) > (ring->size/4 - 0x1000)) || (fd_mesa_debug & FD_DBG_FLUSH)) - fd_context_render(&batch->ctx->base); + fd_batch_flush(batch); } diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h index d500f95..44da3c4 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.h +++ b/src/gallium/drivers/freedreno/freedreno_batch.h @@ -129,6 +129,7 @@ struct fd_batch { struct fd_batch * fd_batch_create(struct fd_context *ctx); +void fd_batch_reset(struct fd_batch *batch); void fd_batch_flush(struct fd_batch *batch); void fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc, enum fd_resource_status status); diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index 4359fb2..3a16a51 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -38,39 +38,27 @@ #include "freedreno_query_hw.h" #include "freedreno_util.h" -/* emit accumulated render cmds, needed for example if render target has - * changed, or for flush() - */ -void -fd_context_render(struct pipe_context *pctx) -{ - struct fd_context *ctx = fd_context(pctx); - struct fd_batch *new_batch; - - fd_batch_flush(ctx->batch); - - new_batch = fd_batch_create(ctx); - util_copy_framebuffer_state(&new_batch->framebuffer, &ctx->batch->framebuffer); - fd_batch_reference(&ctx->batch, NULL); - ctx->batch = new_batch; -} - static void fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, unsigned flags) { - struct fd_batch *batch = NULL; - - fd_batch_reference(&batch, fd_context(pctx)->batch); - - fd_context_render(pctx); + struct fd_context *ctx = fd_context(pctx); + uint32_t timestamp; + + if (!ctx->screen->reorder) { + struct fd_batch *batch = NULL; + fd_batch_reference(&batch, ctx->batch); + fd_batch_flush(batch); + timestamp = fd_ringbuffer_timestamp(batch->gmem); + fd_batch_reference(&batch, NULL); + } else { + timestamp = fd_bc_flush(&ctx->batch_cache); + } if (fence) { fd_screen_fence_ref(pctx->screen, fence, NULL); - *fence = fd_fence_create(pctx, fd_ringbuffer_timestamp(batch->gmem)); + *fence = fd_fence_create(pctx, timestamp); } - - fd_batch_reference(&batch, NULL); } /** diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 6be7437..012f452 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -378,8 +378,6 @@ struct pipe_context * fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, const uint8_t *primtypes, void *priv); -void fd_context_render(struct pipe_context *pctx); - void fd_context_destroy(struct pipe_context *pctx); #endif /* FREEDRENO_CONTEXT_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c index ec8bf20..a55aee2 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_hw.c +++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c @@ -210,7 +210,7 @@ fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q, if (!ctx->batch->needs_flush) return true; DBG("reading query result forces flush!"); - fd_context_render(&ctx->base); + fd_batch_flush(ctx->batch); } util_query_clear_result(result, q->type); diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index 4fd8559..d7603b2 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -111,6 +111,7 @@ realloc_bo(struct fd_resource *rsc, uint32_t size) rsc->timestamp = 0; rsc->status = 0; fd_batch_reference(&rsc->pending_batch, NULL); + fd_batch_reference(&rsc->write_batch, NULL); list_delinit(&rsc->list); util_range_set_empty(&rsc->valid_buffer_range); } @@ -327,7 +328,7 @@ fd_resource_transfer_map(struct pipe_context *pctx, if (((ptrans->usage & PIPE_TRANSFER_WRITE) && pending(rsc, FD_PENDING_READ | FD_PENDING_WRITE)) || pending(rsc, FD_PENDING_WRITE)) - fd_context_render(pctx); + fd_batch_flush(rsc->pending_batch); /* The GPU keeps track of how the various bo's are being used, and * will wait if necessary for the proper operation to have @@ -456,6 +457,7 @@ fd_resource_destroy(struct pipe_screen *pscreen, if (rsc->bo) fd_bo_del(rsc->bo); fd_batch_reference(&rsc->pending_batch, NULL); + fd_batch_reference(&rsc->write_batch, NULL); list_delinit(&rsc->list); util_range_destroy(&rsc->valid_buffer_range); FREE(rsc); @@ -849,7 +851,7 @@ fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc) struct fd_resource *rsc = fd_resource(prsc); if (pending(rsc, FD_PENDING_WRITE | FD_PENDING_READ)) - fd_context_render(pctx); + fd_batch_flush(rsc->pending_batch); } void diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h index 3b990a9..2615527 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.h +++ b/src/gallium/drivers/freedreno/freedreno_resource.h @@ -96,6 +96,7 @@ struct fd_resource { */ struct list_head list; struct fd_batch *pending_batch; + struct fd_batch *write_batch; /* set of batches whose batch-cache key references this resource: */ struct set *batches; diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 5255c10..a18df54 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -75,6 +75,7 @@ static const struct debug_named_value debug_options[] = { {"flush", FD_DBG_FLUSH, "Force flush after every draw"}, {"deqp", FD_DBG_DEQP, "Enable dEQP hacks"}, {"nir", FD_DBG_NIR, "Prefer NIR as native IR"}, + {"reorder", FD_DBG_REORDER,"Enable reordering for draws/blits"}, DEBUG_NAMED_VALUE_END }; @@ -649,6 +650,14 @@ fd_screen_create(struct fd_device *dev) goto fail; } + /* NOTE: don't enable reordering on a2xx, since completely untested. + * Also, don't enable if we have too old of a kernel to support + * growable cmdstream buffers, since memory requirement for cmdstream + * buffers would be too much otherwise. + */ + if ((screen->gpu_id >= 300) && (fd_device_version(dev) >= FD_VERSION_UNLIMITED_CMDS)) + screen->reorder = !!(fd_mesa_debug & FD_DBG_REORDER); + pscreen->destroy = fd_screen_destroy; pscreen->get_param = fd_screen_get_param; pscreen->get_paramf = fd_screen_get_paramf; diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h index a81c778..67fa689 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.h +++ b/src/gallium/drivers/freedreno/freedreno_screen.h @@ -65,6 +65,8 @@ struct fd_screen { struct fd_pipe *pipe; int64_t cpu_gpu_time_delta; + + bool reorder; }; static inline struct fd_screen * diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c index 98b56c7..63ffa0c 100644 --- a/src/gallium/drivers/freedreno/freedreno_state.c +++ b/src/gallium/drivers/freedreno/freedreno_state.c @@ -117,10 +117,17 @@ fd_set_framebuffer_state(struct pipe_context *pctx, struct fd_context *ctx = fd_context(pctx); struct pipe_framebuffer_state *cso; - DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush, - framebuffer->cbufs[0], framebuffer->zsbuf); - - fd_context_render(pctx); + if (ctx->screen->reorder) { + struct fd_batch *batch = + fd_batch_from_fb(&ctx->batch_cache, ctx, framebuffer); + fd_batch_reference(&ctx->batch, NULL); + ctx->batch = batch; + ctx->dirty = ~0; + } else { + DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush, + framebuffer->cbufs[0], framebuffer->zsbuf); + fd_batch_flush(ctx->batch); + } cso = &ctx->batch->framebuffer; diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index 8f125d9..5cb958e 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -75,6 +75,7 @@ enum adreno_stencil_op fd_stencil_op(unsigned op); #define FD_DBG_FLUSH 0x1000 #define FD_DBG_DEQP 0x2000 #define FD_DBG_NIR 0x4000 +#define FD_DBG_REORDER 0x8000 extern int fd_mesa_debug; extern bool fd_binning_enabled; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev