From: Marek Olšák <marek.ol...@amd.com> syncobj is used internally for interactions with command submission. --- src/gallium/drivers/radeon/radeon_winsys.h | 12 +++ src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 115 +++++++++++++++++++++++++++-- src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 18 ++++- 3 files changed, 138 insertions(+), 7 deletions(-)
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 99e22e0..2438ec2 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -590,20 +590,32 @@ struct radeon_winsys { struct pipe_fence_handle *fence, uint64_t timeout); /** * Reference counting for fences. */ void (*fence_reference)(struct pipe_fence_handle **dst, struct pipe_fence_handle *src); /** + * Create a new fence object corresponding to the given sync_file. + */ + struct pipe_fence_handle *(*fence_import_sync_file)(struct radeon_winsys *ws, + int fd); + + /** + * Return a sync_file FD corresponding to the given fence object. + */ + int (*fence_export_sync_file)(struct radeon_winsys *ws, + struct pipe_fence_handle *fence); + + /** * Initialize surface * * \param ws The winsys this function is called from. * \param tex Input texture description * \param flags Bitmask of RADEON_SURF_* flags * \param bpe Bytes per pixel, it can be different for Z buffers. * \param mode Preferred tile mode. (linear, 1D, or 2D) * \param surf Output structure */ int (*surface_init)(struct radeon_winsys *ws, diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 768a164..d9d2a8b 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -40,30 +40,86 @@ DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false) /* FENCES */ static struct pipe_fence_handle * amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type, unsigned ip_instance, unsigned ring) { struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence); fence->reference.count = 1; + fence->ws = ctx->ws; fence->ctx = ctx; fence->fence.context = ctx->ctx; fence->fence.ip_type = ip_type; fence->fence.ip_instance = ip_instance; fence->fence.ring = ring; fence->submission_in_progress = true; p_atomic_inc(&ctx->refcount); return (struct pipe_fence_handle *)fence; } +static struct pipe_fence_handle * +amdgpu_fence_import_sync_file(struct radeon_winsys *rws, int fd) +{ + struct amdgpu_winsys *ws = amdgpu_winsys(rws); + struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence); + + if (!fence) + return NULL; + + pipe_reference_init(&fence->reference, 1); + fence->ws = ws; + /* fence->ctx == NULL means that the fence is syncobj-based. */ + + /* Convert sync_file into syncobj. */ + int r = amdgpu_cs_create_syncobj(ws->dev, &fence->syncobj); + if (r) { + FREE(fence); + return NULL; + } + + r = amdgpu_cs_syncobj_import_sync_file(ws->dev, fence->syncobj, fd); + if (r) { + amdgpu_cs_destroy_syncobj(ws->dev, fence->syncobj); + FREE(fence); + return NULL; + } + return (struct pipe_fence_handle*)fence; +} + +static int amdgpu_fence_export_sync_file(struct radeon_winsys *rws, + struct pipe_fence_handle *pfence) +{ + struct amdgpu_winsys *ws = amdgpu_winsys(rws); + struct amdgpu_fence *fence = (struct amdgpu_fence*)pfence; + + if (amdgpu_fence_is_syncobj(fence)) { + int fd, r; + + /* Convert syncobj into sync_file. */ + r = amdgpu_cs_syncobj_export_sync_file(ws->dev, fence->syncobj, &fd); + return r ? -1 : fd; + } + + os_wait_until_zero(&fence->submission_in_progress, PIPE_TIMEOUT_INFINITE); + + /* Convert the amdgpu fence into a fence FD. */ + int fd; + if (amdgpu_cs_fence_to_handle(ws->dev, &fence->fence, + AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD, + (uint32_t*)&fd)) + return -1; + + return fd; +} + static void amdgpu_fence_submitted(struct pipe_fence_handle *fence, uint64_t seq_no, uint64_t *user_fence_cpu_address) { struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence; rfence->fence.fence = seq_no; rfence->user_fence_cpu_address = user_fence_cpu_address; rfence->submission_in_progress = false; } @@ -81,20 +137,35 @@ bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout, { struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence; uint32_t expired; int64_t abs_timeout; uint64_t *user_fence_cpu; int r; if (rfence->signalled) return true; + /* Handle syncobjs. */ + if (amdgpu_fence_is_syncobj(rfence)) { + /* Absolute timeouts are only be used by BO fences, which aren't + * backed by syncobjs. + */ + assert(!absolute); + + if (amdgpu_cs_syncobj_wait(rfence->ws->dev, &rfence->syncobj, 1, + timeout, 0, NULL)) + return false; + + rfence->signalled = true; + return true; + } + if (absolute) abs_timeout = timeout; else abs_timeout = os_time_get_absolute_timeout(timeout); /* The fence might not have a number assigned if its IB is being * submitted in the other thread right now. Wait until the submission * is done. */ if (!os_wait_until_zero_abs_timeout(&rfence->submission_in_progress, abs_timeout)) @@ -921,21 +992,22 @@ static unsigned add_fence_dependency_entry(struct amdgpu_cs_context *cs) increment * sizeof(cs->fence_dependencies[0])); } return idx; } static bool is_noop_fence_dependency(struct amdgpu_cs *acs, struct amdgpu_fence *fence) { struct amdgpu_cs_context *cs = acs->csc; - if (fence->ctx == acs->ctx && + if (!amdgpu_fence_is_syncobj(fence) && + fence->ctx == acs->ctx && fence->fence.ip_type == cs->ib[IB_MAIN].ip_type && fence->fence.ip_instance == cs->ib[IB_MAIN].ip_instance && fence->fence.ring == cs->ib[IB_MAIN].ring) return true; return amdgpu_fence_wait((void *)fence, 0, false); } static void amdgpu_cs_add_fence_dependency(struct radeon_winsys_cs *rws, struct pipe_fence_handle *pfence) @@ -1174,21 +1246,21 @@ bo_list_error: if (r) { fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r); amdgpu_fence_signalled(cs->fence); cs->error_code = r; goto cleanup; } if (acs->ctx->num_rejected_cs) { r = -ECANCELED; } else { - struct drm_amdgpu_cs_chunk chunks[3]; + struct drm_amdgpu_cs_chunk chunks[4]; unsigned num_chunks = 0; /* Convert from dwords to bytes. */ cs->ib[IB_MAIN].ib_bytes *= 4; /* IB */ chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB; chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4; chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_MAIN]; num_chunks++; @@ -1196,38 +1268,69 @@ bo_list_error: /* Fence */ if (has_user_fence) { chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_FENCE; chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4; chunks[num_chunks].chunk_data = (uintptr_t)&acs->fence_chunk; num_chunks++; } /* Dependencies */ unsigned num_dependencies = cs->num_fence_dependencies; + unsigned num_syncobj_dependencies = 0; + if (num_dependencies) { struct drm_amdgpu_cs_chunk_dep *dep_chunk = alloca(num_dependencies * sizeof(*dep_chunk)); + unsigned num = 0; for (unsigned i = 0; i < num_dependencies; i++) { struct amdgpu_fence *fence = (struct amdgpu_fence*)cs->fence_dependencies[i]; + if (amdgpu_fence_is_syncobj(fence)) { + num_syncobj_dependencies++; + continue; + } + assert(!fence->submission_in_progress); - amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[i]); + amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[num++]); } chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES; - chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 * - num_dependencies; + chunks[num_chunks].length_dw = sizeof(dep_chunk[0]) / 4 * num; chunks[num_chunks].chunk_data = (uintptr_t)dep_chunk; num_chunks++; } + + /* Syncobj dependencies. */ + if (num_syncobj_dependencies) { + struct drm_amdgpu_cs_chunk_sem *sem_chunk = + alloca(num_syncobj_dependencies * sizeof(sem_chunk[0])); + unsigned num = 0; + + for (unsigned i = 0; i < num_dependencies; i++) { + struct amdgpu_fence *fence = + (struct amdgpu_fence*)cs->fence_dependencies[i]; + + if (!amdgpu_fence_is_syncobj(fence)) + continue; + + assert(!fence->submission_in_progress); + sem_chunk[num++].handle = fence->syncobj; + } + + chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_IN; + chunks[num_chunks].length_dw = sizeof(sem_chunk[0]) / 4 * num; + chunks[num_chunks].chunk_data = (uintptr_t)sem_chunk; + num_chunks++; + } + assert(num_chunks <= ARRAY_SIZE(chunks)); r = amdgpu_cs_submit_raw(ws->dev, acs->ctx->ctx, bo_list, num_chunks, chunks, &seq_no); } cs->error_code = r; if (r) { if (r == -ENOMEM) fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); @@ -1423,11 +1526,13 @@ void amdgpu_cs_init_functions(struct amdgpu_winsys *ws) ws->base.cs_validate = amdgpu_cs_validate; ws->base.cs_check_space = amdgpu_cs_check_space; ws->base.cs_get_buffer_list = amdgpu_cs_get_buffer_list; ws->base.cs_flush = amdgpu_cs_flush; ws->base.cs_get_next_fence = amdgpu_cs_get_next_fence; ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced; ws->base.cs_sync_flush = amdgpu_cs_sync_flush; ws->base.cs_add_fence_dependency = amdgpu_cs_add_fence_dependency; ws->base.fence_wait = amdgpu_fence_wait_rel_timeout; ws->base.fence_reference = amdgpu_fence_reference; + ws->base.fence_import_sync_file = amdgpu_fence_import_sync_file; + ws->base.fence_export_sync_file = amdgpu_fence_export_sync_file; } diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index de00912..21e1354 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -132,49 +132,63 @@ struct amdgpu_cs { /* Flush CS. */ void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence); void *flush_data; struct util_queue_fence flush_completed; struct pipe_fence_handle *next_fence; }; struct amdgpu_fence { struct pipe_reference reference; + /* If ctx == NULL, this fence is syncobj-based. */ + uint32_t syncobj; + struct amdgpu_winsys *ws; struct amdgpu_ctx *ctx; /* submission context */ struct amdgpu_cs_fence fence; uint64_t *user_fence_cpu_address; /* If the fence is unknown due to an IB still being submitted * in the other thread. */ volatile int submission_in_progress; /* bool (int for atomicity) */ volatile int signalled; /* bool (int for atomicity) */ }; +static inline bool amdgpu_fence_is_syncobj(struct amdgpu_fence *fence) +{ + return fence->ctx == NULL; +} + static inline void amdgpu_ctx_unref(struct amdgpu_ctx *ctx) { if (p_atomic_dec_zero(&ctx->refcount)) { amdgpu_cs_ctx_free(ctx->ctx); amdgpu_bo_free(ctx->user_fence_bo); FREE(ctx); } } static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst, struct pipe_fence_handle *src) { struct amdgpu_fence **rdst = (struct amdgpu_fence **)dst; struct amdgpu_fence *rsrc = (struct amdgpu_fence *)src; if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) { - amdgpu_ctx_unref((*rdst)->ctx); - FREE(*rdst); + struct amdgpu_fence *fence = *rdst; + + if (amdgpu_fence_is_syncobj(fence)) + amdgpu_cs_destroy_syncobj(fence->ws->dev, fence->syncobj); + else + amdgpu_ctx_unref(fence->ctx); + + FREE(fence); } *rdst = rsrc; } int amdgpu_lookup_buffer(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo *bo); static inline struct amdgpu_ib * amdgpu_ib(struct radeon_winsys_cs *base) { return (struct amdgpu_ib *)base; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev