From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 86 +++++++++-------------- src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 15 ++-- 2 files changed, 42 insertions(+), 59 deletions(-)
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 7b18f54cea9..b202ce21cf9 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -860,60 +860,63 @@ static bool amdgpu_init_cs_context(struct amdgpu_winsys *ws, default: assert(0); } memset(cs->buffer_indices_hashlist, -1, sizeof(cs->buffer_indices_hashlist)); cs->last_added_bo = NULL; return true; } +static void cleanup_fence_list(struct amdgpu_fence_list *fences) +{ + for (unsigned i = 0; i < fences->num; i++) + amdgpu_fence_reference(&fences->list[i], NULL); + fences->num = 0; +} + static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs) { unsigned i; for (i = 0; i < cs->num_real_buffers; i++) { p_atomic_dec(&cs->real_buffers[i].bo->num_cs_references); amdgpu_winsys_bo_reference(&cs->real_buffers[i].bo, NULL); } for (i = 0; i < cs->num_slab_buffers; i++) { p_atomic_dec(&cs->slab_buffers[i].bo->num_cs_references); amdgpu_winsys_bo_reference(&cs->slab_buffers[i].bo, NULL); } for (i = 0; i < cs->num_sparse_buffers; i++) { p_atomic_dec(&cs->sparse_buffers[i].bo->num_cs_references); amdgpu_winsys_bo_reference(&cs->sparse_buffers[i].bo, NULL); } - for (i = 0; i < cs->num_fence_dependencies; i++) - amdgpu_fence_reference(&cs->fence_dependencies[i], NULL); - for (i = 0; i < cs->num_syncobj_to_signal; i++) - amdgpu_fence_reference(&cs->syncobj_to_signal[i], NULL); + cleanup_fence_list(&cs->fence_dependencies); + cleanup_fence_list(&cs->syncobj_to_signal); cs->num_real_buffers = 0; cs->num_slab_buffers = 0; cs->num_sparse_buffers = 0; - cs->num_fence_dependencies = 0; - cs->num_syncobj_to_signal = 0; amdgpu_fence_reference(&cs->fence, NULL); memset(cs->buffer_indices_hashlist, -1, sizeof(cs->buffer_indices_hashlist)); cs->last_added_bo = NULL; } static void amdgpu_destroy_cs_context(struct amdgpu_cs_context *cs) { amdgpu_cs_context_cleanup(cs); FREE(cs->real_buffers); FREE(cs->slab_buffers); FREE(cs->sparse_buffers); - FREE(cs->fence_dependencies); - FREE(cs->syncobj_to_signal); + FREE(cs->fence_dependencies.list); + FREE(cs->syncobj_to_signal.list); } static struct radeon_cmdbuf * amdgpu_cs_create(struct radeon_winsys_ctx *rwctx, enum ring_type ring_type, void (*flush)(void *ctx, unsigned flags, struct pipe_fence_handle **fence), void *flush_ctx, bool stop_exec_on_failure) @@ -1062,36 +1065,37 @@ static unsigned amdgpu_cs_get_buffer_list(struct radeon_cmdbuf *rcs, if (list) { for (i = 0; i < cs->num_real_buffers; i++) { list[i].bo_size = cs->real_buffers[i].bo->base.size; list[i].vm_address = cs->real_buffers[i].bo->va; list[i].priority_usage = cs->real_buffers[i].u.real.priority_usage; } } return cs->num_real_buffers; } -static unsigned add_fence_dependency_entry(struct amdgpu_cs_context *cs) +static void add_fence_to_list(struct amdgpu_fence_list *fences, + struct amdgpu_fence *fence) { - unsigned idx = cs->num_fence_dependencies++; + unsigned idx = fences->num++; - if (idx >= cs->max_fence_dependencies) { + if (idx >= fences->max) { unsigned size; const unsigned increment = 8; - cs->max_fence_dependencies = idx + increment; - size = cs->max_fence_dependencies * sizeof(cs->fence_dependencies[0]); - cs->fence_dependencies = realloc(cs->fence_dependencies, size); + fences->max = idx + increment; + size = fences->max * sizeof(fences->list[0]); + fences->list = realloc(fences->list, size); /* Clear the newly-allocated elements. */ - memset(cs->fence_dependencies + idx, 0, - increment * sizeof(cs->fence_dependencies[0])); + memset(fences->list + idx, 0, + increment * sizeof(fences->list[0])); } - return idx; + amdgpu_fence_reference(&fences->list[idx], (struct pipe_fence_handle*)fence); } static bool is_noop_fence_dependency(struct amdgpu_cs *acs, struct amdgpu_fence *fence) { struct amdgpu_cs_context *cs = acs->csc; if (!amdgpu_fence_is_syncobj(fence) && fence->ctx == acs->ctx && fence->fence.ip_type == cs->ib[IB_MAIN].ip_type && @@ -1107,23 +1111,21 @@ static void amdgpu_cs_add_fence_dependency(struct radeon_cmdbuf *rws, { struct amdgpu_cs *acs = amdgpu_cs(rws); struct amdgpu_cs_context *cs = acs->csc; struct amdgpu_fence *fence = (struct amdgpu_fence*)pfence; util_queue_fence_wait(&fence->submitted); if (is_noop_fence_dependency(acs, fence)) return; - unsigned idx = add_fence_dependency_entry(cs); - amdgpu_fence_reference(&cs->fence_dependencies[idx], - (struct pipe_fence_handle*)fence); + add_fence_to_list(&cs->fence_dependencies, fence); } static void amdgpu_add_bo_fence_dependencies(struct amdgpu_cs *acs, struct amdgpu_cs_buffer *buffer) { struct amdgpu_cs_context *cs = acs->csc; struct amdgpu_winsys_bo *bo = buffer->bo; unsigned new_num_fences = 0; for (unsigned j = 0; j < bo->num_fences; ++j) { @@ -1131,23 +1133,21 @@ static void amdgpu_add_bo_fence_dependencies(struct amdgpu_cs *acs, if (is_noop_fence_dependency(acs, bo_fence)) continue; amdgpu_fence_reference(&bo->fences[new_num_fences], bo->fences[j]); new_num_fences++; if (!(buffer->usage & RADEON_USAGE_SYNCHRONIZED)) continue; - unsigned idx = add_fence_dependency_entry(cs); - amdgpu_fence_reference(&cs->fence_dependencies[idx], - (struct pipe_fence_handle*)bo_fence); + add_fence_to_list(&cs->fence_dependencies, bo_fence); } for (unsigned j = new_num_fences; j < bo->num_fences; ++j) amdgpu_fence_reference(&bo->fences[j], NULL); bo->num_fences = new_num_fences; } /* Add the given list of fences to the buffer's fence list. * @@ -1209,48 +1209,29 @@ static void amdgpu_add_fence_dependencies_bo_list(struct amdgpu_cs *acs, */ static void amdgpu_add_fence_dependencies_bo_lists(struct amdgpu_cs *acs) { struct amdgpu_cs_context *cs = acs->csc; amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_real_buffers, cs->real_buffers); amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_slab_buffers, cs->slab_buffers); amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_sparse_buffers, cs->sparse_buffers); } -static unsigned add_syncobj_to_signal_entry(struct amdgpu_cs_context *cs) -{ - unsigned idx = cs->num_syncobj_to_signal++; - - if (idx >= cs->max_syncobj_to_signal) { - unsigned size; - const unsigned increment = 8; - - cs->max_syncobj_to_signal = idx + increment; - size = cs->max_syncobj_to_signal * sizeof(cs->syncobj_to_signal[0]); - cs->syncobj_to_signal = realloc(cs->syncobj_to_signal, size); - /* Clear the newly-allocated elements. */ - memset(cs->syncobj_to_signal + idx, 0, - increment * sizeof(cs->syncobj_to_signal[0])); - } - return idx; -} - static void amdgpu_cs_add_syncobj_signal(struct radeon_cmdbuf *rws, struct pipe_fence_handle *fence) { struct amdgpu_cs *acs = amdgpu_cs(rws); struct amdgpu_cs_context *cs = acs->csc; assert(amdgpu_fence_is_syncobj((struct amdgpu_fence *)fence)); - unsigned idx = add_syncobj_to_signal_entry(cs); - amdgpu_fence_reference(&cs->syncobj_to_signal[idx], fence); + add_fence_to_list(&cs->syncobj_to_signal, (struct amdgpu_fence*)fence); } /* Add backing of sparse buffers to the buffer list. * * This is done late, during submission, to keep the buffer list short before * submit, and to avoid managing fences for the backing buffers. */ static bool amdgpu_add_sparse_backing_buffers(struct amdgpu_cs_context *cs) { for (unsigned i = 0; i < cs->num_sparse_buffers; ++i) { @@ -1380,31 +1361,31 @@ void amdgpu_cs_submit_ib(void *job, int thread_index) /* Fence */ if (has_user_fence) { chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_FENCE; chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4; chunks[num_chunks].chunk_data = (uintptr_t)&acs->fence_chunk; num_chunks++; } /* Dependencies */ - unsigned num_dependencies = cs->num_fence_dependencies; + unsigned num_dependencies = cs->fence_dependencies.num; unsigned num_syncobj_dependencies = 0; if (num_dependencies) { struct drm_amdgpu_cs_chunk_dep *dep_chunk = alloca(num_dependencies * sizeof(*dep_chunk)); unsigned num = 0; for (unsigned i = 0; i < num_dependencies; i++) { struct amdgpu_fence *fence = - (struct amdgpu_fence*)cs->fence_dependencies[i]; + (struct amdgpu_fence*)cs->fence_dependencies.list[i]; if (amdgpu_fence_is_syncobj(fence)) { num_syncobj_dependencies++; continue; } assert(util_queue_fence_is_signalled(&fence->submitted)); amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[num++]); } @@ -1415,51 +1396,52 @@ void amdgpu_cs_submit_ib(void *job, int thread_index) } /* Syncobj dependencies. */ if (num_syncobj_dependencies) { struct drm_amdgpu_cs_chunk_sem *sem_chunk = alloca(num_syncobj_dependencies * sizeof(sem_chunk[0])); unsigned num = 0; for (unsigned i = 0; i < num_dependencies; i++) { struct amdgpu_fence *fence = - (struct amdgpu_fence*)cs->fence_dependencies[i]; + (struct amdgpu_fence*)cs->fence_dependencies.list[i]; if (!amdgpu_fence_is_syncobj(fence)) continue; assert(util_queue_fence_is_signalled(&fence->submitted)); sem_chunk[num++].handle = fence->syncobj; } chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_IN; chunks[num_chunks].length_dw = sizeof(sem_chunk[0]) / 4 * num; chunks[num_chunks].chunk_data = (uintptr_t)sem_chunk; num_chunks++; } - /* Syncobj sygnals. */ - if (cs->num_syncobj_to_signal) { + /* Syncobj signals. */ + unsigned num_syncobj_to_signal = cs->syncobj_to_signal.num; + if (num_syncobj_to_signal) { struct drm_amdgpu_cs_chunk_sem *sem_chunk = - alloca(cs->num_syncobj_to_signal * sizeof(sem_chunk[0])); + alloca(num_syncobj_to_signal * sizeof(sem_chunk[0])); - for (unsigned i = 0; i < cs->num_syncobj_to_signal; i++) { + for (unsigned i = 0; i < num_syncobj_to_signal; i++) { struct amdgpu_fence *fence = - (struct amdgpu_fence*)cs->syncobj_to_signal[i]; + (struct amdgpu_fence*)cs->syncobj_to_signal.list[i]; assert(amdgpu_fence_is_syncobj(fence)); sem_chunk[i].handle = fence->syncobj; } chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_OUT; chunks[num_chunks].length_dw = sizeof(sem_chunk[0]) / 4 - * cs->num_syncobj_to_signal; + * num_syncobj_to_signal; chunks[num_chunks].chunk_data = (uintptr_t)sem_chunk; num_chunks++; } /* BO list */ if (!use_bo_list_create) { chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_BO_HANDLES; chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_bo_list_in) / 4; chunks[num_chunks].chunk_data = (uintptr_t)&bo_list_in; num_chunks++; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index 07b5d4b350c..60b05910c71 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -66,20 +66,26 @@ struct amdgpu_ib { /* A buffer out of which new IBs are allocated. */ struct pb_buffer *big_ib_buffer; uint8_t *ib_mapped; unsigned used_ib_space; unsigned max_ib_size; uint32_t *ptr_ib_size; bool ptr_ib_size_inside_ib; enum ib_type ib_type; }; +struct amdgpu_fence_list { + struct pipe_fence_handle **list; + unsigned num; + unsigned max; +}; + struct amdgpu_cs_context { struct drm_amdgpu_cs_chunk_ib ib[IB_NUM]; /* Buffers. */ unsigned max_real_buffers; unsigned num_real_buffers; struct amdgpu_cs_buffer *real_buffers; unsigned num_slab_buffers; unsigned max_slab_buffers; @@ -89,27 +95,22 @@ struct amdgpu_cs_context { unsigned max_sparse_buffers; struct amdgpu_cs_buffer *sparse_buffers; int buffer_indices_hashlist[4096]; struct amdgpu_winsys_bo *last_added_bo; unsigned last_added_bo_index; unsigned last_added_bo_usage; uint32_t last_added_bo_priority_usage; - struct pipe_fence_handle **fence_dependencies; - unsigned num_fence_dependencies; - unsigned max_fence_dependencies; - - struct pipe_fence_handle **syncobj_to_signal; - unsigned num_syncobj_to_signal; - unsigned max_syncobj_to_signal; + struct amdgpu_fence_list fence_dependencies; + struct amdgpu_fence_list syncobj_to_signal; struct pipe_fence_handle *fence; /* the error returned from cs_flush for non-async submissions */ int error_code; }; struct amdgpu_cs { struct amdgpu_ib main; /* must be first because this is inherited */ struct amdgpu_ctx *ctx; -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev