Since we use fences internally for tracking buffer busyness within brw_batch.c, we can expose those directly for GL/DRI2 sync objects.
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> --- src/mesa/drivers/dri/i965/brw_batch.c | 87 ++++++++++++++++-- src/mesa/drivers/dri/i965/brw_batch.h | 22 ++++- src/mesa/drivers/dri/i965/brw_sync.c | 167 +++++++++------------------------- 3 files changed, 140 insertions(+), 136 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_batch.c b/src/mesa/drivers/dri/i965/brw_batch.c index defa329e53..b257d000f8 100644 --- a/src/mesa/drivers/dri/i965/brw_batch.c +++ b/src/mesa/drivers/dri/i965/brw_batch.c @@ -204,7 +204,7 @@ static void __brw_request_retire(struct brw_request * const rq) assert(RQ_BO(tmp)->exec == NULL); assert(RQ_RING(tmp) == ring); - list_for_each_entry_safe(struct __brw_fence, fence, &tmp->fences, link) { + list_for_each_entry_safe(struct brw_fence, fence, &tmp->fences, link) { int signal = brw_fence_get_signal(fence); assert(brw_fence_get_request(fence) == tmp); @@ -755,7 +755,7 @@ static void __brw_batch_grow_exec(struct brw_batch *batch) if (new_exec != batch->exec) { struct list_head * const list = &batch->next_request->fences; - list_for_each_entry_rev(struct __brw_fence, fence, list, link) { + list_for_each_entry_rev(struct brw_fence, fence, list, link) { int signal = brw_fence_get_signal(fence); struct brw_bo *bo = NULL; @@ -1321,7 +1321,7 @@ int brw_batch_flush(struct brw_batch *batch, struct perf_debug *perf) * At any rate, we have to decouple our fences so that we don't die * later on when trying to use them. */ - list_for_each_entry_safe(struct __brw_fence, fence, &rq->fences, link) { + list_for_each_entry_safe(struct brw_fence, fence, &rq->fences, link) { int signal = brw_fence_get_signal(fence); list_inithead(&fence->link); fence->rq = NULL; @@ -1360,7 +1360,7 @@ int brw_batch_flush(struct brw_batch *batch, struct perf_debug *perf) } skip: - list_for_each_entry_rev(struct __brw_fence, fence, &rq->fences, link) { + list_for_each_entry_rev(struct brw_fence, fence, &rq->fences, link) { int signal = brw_fence_get_signal(fence); struct brw_bo *bo = NULL; @@ -1697,6 +1697,81 @@ struct brw_bo *brw_bo_create_from_name(struct brw_batch *batch, return bo; } +bool brw_batch_insert_fence(struct brw_batch *batch, + struct brw_fence *fence, + unsigned flags) +{ + struct brw_request *rq; + + if (!batch->bo->dirty) { + rq = batch->requests[batch->ring].mru; + if (rq == NULL) + return false; + + fence->seqno = rq->seqno; + } else { + batch->inside_begin_count++; + fence->seqno = __brw_batch_emit_seqno(batch, flags); + rq = batch->next_request; + batch->emit.nbatch = batch->tail - batch->map; + batch->inside_begin_count--; + } + + fence->rq = FENCE_MARK_SIGNAL(rq, NO_SIGNAL); + list_addtail(&fence->link, &rq->fences); + return true; +} + +bool +brw_fence_busy(struct brw_fence *fence, struct perf_debug *perf) +{ + struct brw_request *rq = brw_fence_get_request(fence); + struct brw_batch *batch; + + if (rq == NULL) + return false; + + batch = RQ_BO(rq)->batch; + + if (rq->seqno == 0) + return brw_batch_flush(batch, perf) == 0; + + if (seqno_busy(fence->seqno, batch->seqno_map[CACHELINE_DWORDS*RQ_RING(rq)])) + return true; + + list_del(&fence->link); + fence->rq = NULL; + return false; +} + +int brw_fence_wait(struct brw_fence *fence, + int64_t timeout, + struct perf_debug *perf) +{ + struct brw_request *rq = brw_fence_get_request(fence); + int err; + + err = 0; + if (seqno_busy(fence->seqno, + RQ_BO(rq)->batch->seqno_map[CACHELINE_DWORDS*RQ_RING(rq)])) + err = __brw_request_wait(rq, timeout, perf); + if (err == 0) { + list_del(&fence->link); + fence->rq = NULL; + } + + return err; +} + +void brw_fence_finish(struct brw_fence *fence) +{ + if (fence->rq == NULL) + return; + + list_del(&fence->link); + fence->rq = NULL; +} + /* * Provide a WC mmapping of the buffer. Coherent everywhere, but * reads are very slow (as they are uncached) unless streamed using movntdqa. @@ -1822,7 +1897,7 @@ void *brw_bo_map(struct brw_bo *bo, unsigned flags, struct perf_debug *perf) bo->handle, flags)); if ((flags & MAP_ASYNC) == 0) { - struct __brw_fence *fences; + struct brw_fence *fences; int nfence; if (flags & MAP_WRITE) { @@ -2273,7 +2348,7 @@ __brw_batch_fini__requests(struct brw_batch *batch) } /* Incomplete batch, decouple buffers from the request */ - list_for_each_entry_rev(struct __brw_fence, fence, &rq->fences, link) { + list_for_each_entry_rev(struct brw_fence, fence, &rq->fences, link) { int signal = brw_fence_get_signal(fence); struct brw_bo *bo = NULL; diff --git a/src/mesa/drivers/dri/i965/brw_batch.h b/src/mesa/drivers/dri/i965/brw_batch.h index e6e2f801ad..264868f253 100644 --- a/src/mesa/drivers/dri/i965/brw_batch.h +++ b/src/mesa/drivers/dri/i965/brw_batch.h @@ -65,7 +65,7 @@ enum brw_bo_domain { DOMAIN_NONE, DOMAIN_CPU, DOMAIN_GTT }; * the GPU passes that point, the fence will be signalled. Or you can wait * for a fence to complete. */ -struct __brw_fence { +struct brw_fence { struct brw_request *rq; struct list_head link; uint32_t seqno; @@ -74,7 +74,7 @@ struct __brw_fence { typedef struct brw_bo { struct brw_batch *batch; struct drm_i915_gem_exec_object2 *exec; - struct __brw_fence read[__BRW_NUM_RINGS], write; + struct brw_fence read[__BRW_NUM_RINGS], write; unsigned active : __BRW_NUM_RINGS; unsigned dirty : 1; @@ -317,12 +317,12 @@ void brw_bo_read(struct brw_bo *bo, uint64_t offset, unsigned flags, struct perf_debug *perf); -static inline struct brw_request *brw_fence_get_request(struct __brw_fence *f) +static inline struct brw_request *brw_fence_get_request(struct brw_fence *f) { return (struct brw_request *)((uintptr_t)f->rq & ~3); } -static inline int brw_fence_get_signal(struct __brw_fence *f) +static inline int brw_fence_get_signal(struct brw_fence *f) { return (uintptr_t)f->rq & 3; } @@ -338,7 +338,7 @@ static inline bool brw_bo_busy(struct brw_bo *bo, #define BUSY_WRITE 1 #define BUSY_FLUSH 2 { - struct __brw_fence *fences; + struct brw_fence *fences; int nfence; if (!bo) @@ -389,6 +389,18 @@ static inline void brw_bo_put(struct brw_bo *bo) __brw_bo_free(bo); } +bool +brw_batch_insert_fence(struct brw_batch *batch, + struct brw_fence *fence, + unsigned flags); + +bool brw_fence_busy(struct brw_fence *fence, struct perf_debug *perf); + +int brw_fence_wait(struct brw_fence *fence, + int64_t timeout, + struct perf_debug *perf); +void brw_fence_finish(struct brw_fence *fence); + /* Control batch command insertion and submission to hw */ MUST_CHECK int __brw_batch_begin(struct brw_batch *batch, uint32_t estimated_bytes, diff --git a/src/mesa/drivers/dri/i965/brw_sync.c b/src/mesa/drivers/dri/i965/brw_sync.c index 988b1bc38b..03a32bec69 100644 --- a/src/mesa/drivers/dri/i965/brw_sync.c +++ b/src/mesa/drivers/dri/i965/brw_sync.c @@ -41,122 +41,35 @@ #include "main/imports.h" #include "brw_context.h" - -struct brw_fence { - struct brw_context *brw; - /** The fence waits for completion of this batch. */ - brw_bo *batch_bo; - - mtx_t mutex; - bool signalled; -}; +#include "brw_defines.h" struct brw_gl_sync { struct gl_sync_object gl; struct brw_fence fence; }; -static void -brw_fence_init(struct brw_context *brw, struct brw_fence *fence) -{ - fence->brw = brw; - fence->batch_bo = NULL; - mtx_init(&fence->mutex, mtx_plain); -} - -static void -brw_fence_finish(struct brw_fence *fence) -{ - brw_bo_put(fence->batch_bo); - - mtx_destroy(&fence->mutex); -} - -static void -brw_fence_insert(struct brw_context *brw, struct brw_fence *fence) -{ - assert(!fence->batch_bo); - assert(!fence->signalled); - - brw_mi_flush(brw, brw->batch.ring); - fence->batch_bo = brw_bo_get(brw->batch.bo); - brw_batch_flush(&brw->batch, PERF_DEBUG(brw, "SyncFence")); -} - -static bool -brw_fence_has_completed_locked(struct brw_fence *fence) -{ - if (fence->signalled) - return true; - - if (brw_bo_busy(fence->batch_bo, BUSY_WRITE | BUSY_FLUSH, NULL)) { - brw_bo_put(fence->batch_bo); - fence->batch_bo = NULL; - fence->signalled = true; - return true; - } - - return false; -} - -static bool -brw_fence_has_completed(struct brw_fence *fence) -{ - bool ret; - - mtx_lock(&fence->mutex); - ret = brw_fence_has_completed_locked(fence); - mtx_unlock(&fence->mutex); - - return ret; -} - -static bool -brw_fence_client_wait_locked(struct brw_context *brw, struct brw_fence *fence, - uint64_t timeout) -{ - if (fence->signalled) - return true; - - assert(fence->batch_bo); - - /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and returns - * immediately for timeouts <= 0. The best we can do is to clamp the - * timeout to INT64_MAX. This limits the maximum timeout from 584 years to - * 292 years - likely not a big deal. - */ - if (timeout > INT64_MAX) - timeout = INT64_MAX; - - if (drm_intel_gem_bo_wait(fence->batch_bo->base, timeout) != 0) - return false; - - fence->signalled = true; - brw_bo_put(fence->batch_bo); - fence->batch_bo = NULL; - - return true; -} - /** * Return true if the function successfully signals or has already signalled. * (This matches the behavior expected from __DRI2fence::client_wait_sync). */ static bool -brw_fence_client_wait(struct brw_context *brw, struct brw_fence *fence, - uint64_t timeout) +brw_fence_client_wait(struct brw_fence *fence, + uint64_t timeout, + struct perf_debug *perf) { - bool ret; - - mtx_lock(&fence->mutex); - ret = brw_fence_client_wait_locked(brw, fence, timeout); - mtx_unlock(&fence->mutex); + /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and returns + * immediately for timeout == 0, and indefinitely if timeout is negative. + * The best we can do is to clamp the timeout to INT64_MAX. This limits + * the maximum timeout from 584 years to 292 years - likely not a big deal. + */ + if (timeout > INT64_MAX) + timeout = INT64_MAX; - return ret; + return brw_fence_wait(fence, timeout, perf) == 0; } static void -brw_fence_server_wait(struct brw_context *brw, struct brw_fence *fence) +brw_fence_server_wait(struct brw_fence *fence) { /* We have nothing to do for WaitSync. Our GL command stream is sequential, * so given that the sync object has already flushed the batchbuffer, any @@ -178,53 +91,55 @@ brw_gl_new_sync(struct gl_context *ctx, GLuint id) } static void -brw_gl_delete_sync(struct gl_context *ctx, struct gl_sync_object *_sync) +brw_gl_delete_sync(struct gl_context *ctx, struct gl_sync_object *s) { - struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync; + struct brw_gl_sync *sync = (struct brw_gl_sync *)s; brw_fence_finish(&sync->fence); free(sync); } static void -brw_gl_fence_sync(struct gl_context *ctx, struct gl_sync_object *_sync, +brw_gl_fence_sync(struct gl_context *ctx, struct gl_sync_object *s, GLenum condition, GLbitfield flags) { struct brw_context *brw = brw_context(ctx); - struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync; + struct brw_gl_sync *sync = (struct brw_gl_sync *)s; - brw_fence_init(brw, &sync->fence); - brw_fence_insert(brw, &sync->fence); + s->StatusFlag = + !brw_batch_insert_fence(&brw->batch, + &sync->fence, + PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH); } static void -brw_gl_client_wait_sync(struct gl_context *ctx, struct gl_sync_object *_sync, +brw_gl_client_wait_sync(struct gl_context *ctx, struct gl_sync_object *s, GLbitfield flags, GLuint64 timeout) { - struct brw_context *brw = brw_context(ctx); - struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync; + struct brw_gl_sync *sync = (struct brw_gl_sync *)s; - if (brw_fence_client_wait(brw, &sync->fence, timeout)) - sync->gl.StatusFlag = 1; + s->StatusFlag = + brw_fence_client_wait(&sync->fence, timeout, + PERF_DEBUG(brw_context(ctx), "ClientWaitSync")); } static void -brw_gl_server_wait_sync(struct gl_context *ctx, struct gl_sync_object *_sync, +brw_gl_server_wait_sync(struct gl_context *ctx, struct gl_sync_object *s, GLbitfield flags, GLuint64 timeout) { - struct brw_context *brw = brw_context(ctx); - struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync; + struct brw_gl_sync *sync = (struct brw_gl_sync *)s; - brw_fence_server_wait(brw, &sync->fence); + brw_fence_server_wait(&sync->fence); } static void -brw_gl_check_sync(struct gl_context *ctx, struct gl_sync_object *_sync) +brw_gl_check_sync(struct gl_context *ctx, struct gl_sync_object *s) { - struct brw_gl_sync *sync = (struct brw_gl_sync *) _sync; + struct brw_gl_sync *sync = (struct brw_gl_sync *)s; - if (brw_fence_has_completed(&sync->fence)) - sync->gl.StatusFlag = 1; + s->StatusFlag = + !brw_fence_busy(&sync->fence, PERF_DEBUG(brw_context(ctx), "CheckSync")); } void @@ -248,8 +163,9 @@ brw_dri_create_fence(__DRIcontext *ctx) if (!fence) return NULL; - brw_fence_init(brw, fence); - brw_fence_insert(brw, fence); + brw_batch_insert_fence(&brw->batch, fence, + PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH); return fence; } @@ -264,12 +180,13 @@ brw_dri_destroy_fence(__DRIscreen *dri_screen, void *_fence) } static GLboolean -brw_dri_client_wait_sync(__DRIcontext *ctx, void *_fence, unsigned flags, +brw_dri_client_wait_sync(__DRIcontext *ctx, void *fence, unsigned flags, uint64_t timeout) { - struct brw_fence *fence = _fence; + struct brw_context *brw = ctx->driverPrivate; - return brw_fence_client_wait(fence->brw, fence, timeout); + return brw_fence_client_wait(fence, timeout, + PERF_DEBUG(brw, "DRI2ClientFenceWait")); } static void @@ -283,7 +200,7 @@ brw_dri_server_wait_sync(__DRIcontext *ctx, void *_fence, unsigned flags) if (!fence) return; - brw_fence_server_wait(fence->brw, fence); + brw_fence_server_wait(fence); } const __DRI2fenceExtension intelFenceExtension = { -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev