Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>
On 06.08.2016 17:32, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com> +23% Bioshock Infinite performance. v2: - use the new fence_finish interface - allow deferred fences with multiple contexts - clear the ctx pointer after a deferred flush --- src/gallium/drivers/radeon/r600_pipe_common.c | 44 ++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 119fdf5..1c56e6e 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -40,20 +40,26 @@ #include <sys/utsname.h> #ifndef HAVE_LLVM #define HAVE_LLVM 0 #endif struct r600_multi_fence { struct pipe_reference reference; struct pipe_fence_handle *gfx; struct pipe_fence_handle *sdma; + + /* If the context wasn't flushed at fence creation, this is non-NULL. */ + struct { + struct r600_common_context *ctx; + unsigned ib_index; + } gfx_unflushed; }; /* * shader binary helpers. */ void radeon_shader_binary_init(struct radeon_shader_binary *b) { memset(b, 0, sizeof(*b)); } @@ -255,42 +261,59 @@ void r600_postflush_resume_features(struct r600_common_context *ctx) static void r600_flush_from_st(struct pipe_context *ctx, struct pipe_fence_handle **fence, unsigned flags) { struct pipe_screen *screen = ctx->screen; struct r600_common_context *rctx = (struct r600_common_context *)ctx; unsigned rflags = 0; struct pipe_fence_handle *gfx_fence = NULL; struct pipe_fence_handle *sdma_fence = NULL; + bool deferred_fence = false; if (flags & PIPE_FLUSH_END_OF_FRAME) rflags |= RADEON_FLUSH_END_OF_FRAME; if (flags & PIPE_FLUSH_DEFERRED) rflags |= RADEON_FLUSH_ASYNC; if (rctx->dma.cs) { rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL); } - rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL); + + /* Instead of flushing, create a deferred fence. Constraints: + * - The state tracker must allow a deferred flush. + * - The state tracker must request a fence. + * Thread safety in fence_finish must be ensured by the state tracker. + */ + if (flags & PIPE_FLUSH_DEFERRED && fence) { + gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs); + deferred_fence = true; + } else { + rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL); + } /* Both engines can signal out of order, so we need to keep both fences. */ if (gfx_fence || sdma_fence) { struct r600_multi_fence *multi_fence = CALLOC_STRUCT(r600_multi_fence); if (!multi_fence) return; multi_fence->reference.count = 1; multi_fence->gfx = gfx_fence; multi_fence->sdma = sdma_fence; + if (deferred_fence) { + multi_fence->gfx_unflushed.ctx = rctx; + multi_fence->gfx_unflushed.ib_index = rctx->num_gfx_cs_flushes; + } + screen->fence_reference(screen, fence, NULL); *fence = (struct pipe_fence_handle*)multi_fence; } } static void r600_flush_dma_ring(void *ctx, unsigned flags, struct pipe_fence_handle **fence) { struct r600_common_context *rctx = (struct r600_common_context *)ctx; struct radeon_winsys_cs *cs = rctx->dma.cs; @@ -953,36 +976,55 @@ static void r600_fence_reference(struct pipe_screen *screen, *rdst = rsrc; } static boolean r600_fence_finish(struct pipe_screen *screen, struct pipe_context *ctx, struct pipe_fence_handle *fence, uint64_t timeout) { struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws; struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence; + struct r600_common_context *rctx = + ctx ? (struct r600_common_context*)ctx : NULL; int64_t abs_timeout = os_time_get_absolute_timeout(timeout); if (rfence->sdma) { if (!rws->fence_wait(rws, rfence->sdma, timeout)) return false; /* Recompute the timeout after waiting. */ if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { int64_t time = os_time_get_nano(); timeout = abs_timeout > time ? abs_timeout - time : 0; } } if (!rfence->gfx) return true; + /* Flush the gfx IB if it hasn't been flushed yet. */ + if (rctx && + rfence->gfx_unflushed.ctx == rctx && + rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) { + rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL); + rfence->gfx_unflushed.ctx = NULL; + + if (!timeout) + return false; + + /* Recompute the timeout after all that. */ + if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { + int64_t time = os_time_get_nano(); + timeout = abs_timeout > time ? abs_timeout - time : 0; + } + } + return rws->fence_wait(rws, rfence->gfx, timeout); } static void r600_query_memory_info(struct pipe_screen *screen, struct pipe_memory_info *info) { struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; struct radeon_winsys *ws = rscreen->ws; unsigned vram_usage, gtt_usage;
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev