From: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/gallium/drivers/radeonsi/si_fence.c | 83 ++++++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-)
diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c index b7b02b55831..bc0ae302945 100644 --- a/src/gallium/drivers/radeonsi/si_fence.c +++ b/src/gallium/drivers/radeonsi/si_fence.c @@ -22,33 +22,41 @@ * */ #include <libsync.h> #include "util/os_time.h" #include "util/u_memory.h" #include "util/u_queue.h" #include "si_pipe.h" +#include "radeon/r600_cs.h" + +struct si_fine_fence { + struct r600_resource *buf; + unsigned offset; +}; struct si_multi_fence { struct pipe_reference reference; struct pipe_fence_handle *gfx; struct pipe_fence_handle *sdma; struct tc_unflushed_batch_token *tc_token; struct util_queue_fence ready; /* If the context wasn't flushed at fence creation, this is non-NULL. */ struct { struct r600_common_context *ctx; unsigned ib_index; } gfx_unflushed; + + struct si_fine_fence fine; }; static void si_add_fence_dependency(struct r600_common_context *rctx, struct pipe_fence_handle *fence) { struct radeon_winsys *ws = rctx->ws; if (rctx->dma.cs) ws->cs_add_fence_dependency(rctx->dma.cs, fence); ws->cs_add_fence_dependency(rctx->gfx.cs, fence); @@ -59,20 +67,21 @@ static void si_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle *src) { struct radeon_winsys *ws = ((struct r600_common_screen*)screen)->ws; struct si_multi_fence **rdst = (struct si_multi_fence **)dst; struct si_multi_fence *rsrc = (struct si_multi_fence *)src; if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) { ws->fence_reference(&(*rdst)->gfx, NULL); ws->fence_reference(&(*rdst)->sdma, NULL); tc_unflushed_batch_token_reference(&(*rdst)->tc_token, NULL); + r600_resource_reference(&(*rdst)->fine.buf, NULL); FREE(*rdst); } *rdst = rsrc; } static struct si_multi_fence *si_create_multi_fence() { struct si_multi_fence *fence = CALLOC_STRUCT(si_multi_fence); if (!fence) return NULL; @@ -132,20 +141,66 @@ static void si_fence_server_sync(struct pipe_context *ctx, * this fence dependency is signalled. * * Should we flush the context to allow more GPU parallelism? */ if (rfence->sdma) si_add_fence_dependency(rctx, rfence->sdma); if (rfence->gfx) si_add_fence_dependency(rctx, rfence->gfx); } +static bool si_fine_fence_signaled(struct radeon_winsys *rws, + const struct si_fine_fence *fine) +{ + char *map = rws->buffer_map(fine->buf->buf, NULL, PIPE_TRANSFER_READ | + PIPE_TRANSFER_UNSYNCHRONIZED); + if (!map) + return false; + + uint32_t *fence = (uint32_t*)(map + fine->offset); + return *fence != 0; +} + +static void si_fine_fence_set(struct si_context *ctx, + struct si_fine_fence *fine, + unsigned flags) +{ + assert(util_bitcount(flags & (PIPE_FLUSH_TOP_OF_PIPE | PIPE_FLUSH_BOTTOM_OF_PIPE)) == 1); + + u_suballocator_alloc(ctx->b.allocator_zeroed_memory, 4, 4, + &fine->offset, (struct pipe_resource **)&fine->buf); + if (!fine->buf) + return; + + uint64_t fence_va = fine->buf->gpu_address + fine->offset; + + radeon_add_to_buffer_list(&ctx->b, &ctx->b.gfx, fine->buf, + RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); + if (flags & PIPE_FLUSH_TOP_OF_PIPE) { + struct radeon_winsys_cs *cs = ctx->b.gfx.cs; + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cs, fence_va); + radeon_emit(cs, fence_va >> 32); + radeon_emit(cs, 0x80000000); + } else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) { + si_gfx_write_event_eop(&ctx->b, V_028A90_BOTTOM_OF_PIPE_TS, 0, + EOP_DATA_SEL_VALUE_32BIT, + NULL, fence_va, 0x80000000, + PIPE_QUERY_GPU_FINISHED); + } else { + assert(false); + } +} + static boolean si_fence_finish(struct pipe_screen *screen, struct pipe_context *ctx, struct pipe_fence_handle *fence, uint64_t timeout) { struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws; struct si_multi_fence *rfence = (struct si_multi_fence *)fence; struct r600_common_context *rctx; int64_t abs_timeout = os_time_get_absolute_timeout(timeout); @@ -185,20 +240,27 @@ static boolean si_fence_finish(struct pipe_screen *screen, /* Recompute the timeout after waiting. */ if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { int64_t time = os_time_get_nano(); timeout = abs_timeout > time ? abs_timeout - time : 0; } } if (!rfence->gfx) return true; + if (rfence->fine.buf && + si_fine_fence_signaled(rws, &rfence->fine)) { + rws->fence_reference(&rfence->gfx, NULL); + r600_resource_reference(&rfence->fine.buf, NULL); + return true; + } + /* Flush the gfx IB if it hasn't been flushed yet. */ if (rctx && rfence->gfx_unflushed.ctx == rctx && rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) { /* Section 4.1.2 (Signaling) of the OpenGL 4.6 (Core profile) * spec says: * * "If the sync object being blocked upon will not be * signaled in finite time (for example, by an associated * fence command issued previously, but not yet flushed to @@ -224,21 +286,30 @@ static boolean si_fence_finish(struct pipe_screen *screen, if (!timeout) return false; /* Recompute the timeout after all that. */ if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { int64_t time = os_time_get_nano(); timeout = abs_timeout > time ? abs_timeout - time : 0; } } - return rws->fence_wait(rws, rfence->gfx, timeout); + if (rws->fence_wait(rws, rfence->gfx, timeout)) + return true; + + /* Re-check in case the GPU is slow or hangs, but the commands before + * the fine-grained fence have completed. */ + if (rfence->fine.buf && + si_fine_fence_signaled(rws, &rfence->fine)) + return true; + + return false; } static void si_create_fence_fd(struct pipe_context *ctx, struct pipe_fence_handle **pfence, int fd) { struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen; struct radeon_winsys *ws = rscreen->ws; struct si_multi_fence *rfence; *pfence = NULL; @@ -307,25 +378,33 @@ static int si_fence_get_fd(struct pipe_screen *screen, static void si_flush_from_st(struct pipe_context *ctx, struct pipe_fence_handle **fence, unsigned flags) { struct pipe_screen *screen = ctx->screen; struct r600_common_context *rctx = (struct r600_common_context *)ctx; struct radeon_winsys *ws = rctx->ws; struct pipe_fence_handle *gfx_fence = NULL; struct pipe_fence_handle *sdma_fence = NULL; bool deferred_fence = false; + struct si_fine_fence fine = {}; unsigned rflags = RADEON_FLUSH_ASYNC; if (flags & PIPE_FLUSH_END_OF_FRAME) rflags |= RADEON_FLUSH_END_OF_FRAME; + if (flags & (PIPE_FLUSH_TOP_OF_PIPE | PIPE_FLUSH_BOTTOM_OF_PIPE)) { + assert(flags & PIPE_FLUSH_DEFERRED); + assert(fence); + + si_fine_fence_set((struct si_context *)rctx, &fine, flags); + } + /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */ if (rctx->dma.cs) rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL); if (!radeon_emitted(rctx->gfx.cs, rctx->initial_gfx_cs_size)) { if (fence) ws->fence_reference(&gfx_fence, rctx->last_gfx_fence); if (!(flags & PIPE_FLUSH_DEFERRED)) ws->cs_sync_flush(rctx->gfx.cs); } else { @@ -366,20 +445,22 @@ static void si_flush_from_st(struct pipe_context *ctx, /* If both fences are NULL, fence_finish will always return true. */ multi_fence->gfx = gfx_fence; multi_fence->sdma = sdma_fence; if (deferred_fence) { multi_fence->gfx_unflushed.ctx = rctx; multi_fence->gfx_unflushed.ib_index = rctx->num_gfx_cs_flushes; } + multi_fence->fine = fine; + if (flags & TC_FLUSH_ASYNC) { util_queue_fence_signal(&multi_fence->ready); tc_unflushed_batch_token_reference(&multi_fence->tc_token, NULL); } } finish: if (!(flags & PIPE_FLUSH_DEFERRED)) { if (rctx->dma.cs) ws->cs_sync_flush(rctx->dma.cs); ws->cs_sync_flush(rctx->gfx.cs); -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev