FYI, I've replied on some radeonsi patches and skimmed through the rest without Rbs. I'll do another review once there is version 2.
Marek On Fri, May 19, 2017 at 6:52 PM, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > For each texture/image handles, we need to allocate a new > buffer for the resident descriptor. But when the number of > buffers added to the current CS becomes high, the overhead > in the winsys (and in the kernel) is important. > > To reduce this bottleneck, the idea is to suballocate the > resident descriptors using a slab similar to the one used > in the winsys. > > Currently, a buffer can hold 1024 resident descriptors but > this limit is arbitrary and could be changed in the future > for some reasons. Once a slab is allocated the "base" buffer > is added to a per-context residency list. > > Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > --- > src/gallium/drivers/radeonsi/si_descriptors.c | 150 > ++++++++++++++++++++++++++ > src/gallium/drivers/radeonsi/si_pipe.c | 10 ++ > src/gallium/drivers/radeonsi/si_pipe.h | 15 +++ > src/gallium/drivers/radeonsi/si_state.h | 8 ++ > 4 files changed, 183 insertions(+) > > diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c > b/src/gallium/drivers/radeonsi/si_descriptors.c > index 61eb2f10be..d337fc3f11 100644 > --- a/src/gallium/drivers/radeonsi/si_descriptors.c > +++ b/src/gallium/drivers/radeonsi/si_descriptors.c > @@ -2005,6 +2005,156 @@ void si_emit_compute_shader_userdata(struct > si_context *sctx) > sctx->shader_pointers_dirty &= ~compute_mask; > } > > +/* BINDLESS */ > + > +static int si_add_resident_descriptor(struct si_context *sctx, > + struct r600_resource *desc) > +{ > + int idx; > + > + /* New resident descriptor, check if the backing array is large > enough. */ > + if (sctx->num_resident_descriptors >= sctx->max_resident_descriptors) > { > + unsigned new_max_descriptors = > + MAX2(1, sctx->max_resident_descriptors * 2); > + struct r600_resource **new_descriptors = > + REALLOC(sctx->resident_descriptors, > + sctx->num_resident_descriptors * > (sizeof(*new_descriptors)), > + new_max_descriptors * > sizeof(*new_descriptors)); > + > + if (new_descriptors) { > + sctx->resident_descriptors = new_descriptors; > + sctx->max_resident_descriptors = new_max_descriptors; > + } else { > + fprintf(stderr, "si_add_resident_descriptor: " > + "allocation failed\n"); > + return -1; > + } > + } > + > + idx = sctx->num_resident_descriptors; > + sctx->resident_descriptors[idx] = desc; > + sctx->num_resident_descriptors++; > + > + return 0; > +} > + > +static void si_del_resident_descriptor(struct si_context *sctx, > + struct r600_resource *desc) > +{ > + unsigned i; > + int size; > + > + for (i = 0; i < sctx->num_resident_descriptors; i++) { > + if (sctx->resident_descriptors[i] != desc) > + continue; > + > + if (i < sctx->num_resident_descriptors - 1) { > + size = sizeof(*sctx->resident_descriptors) * > + (sctx->num_resident_descriptors - 1 - i); > + > + memmove(&sctx->resident_descriptors[i], > + &sctx->resident_descriptors[i + 1], size); > + } > + > + sctx->num_resident_descriptors--; > + return; > + } > +} > + > +struct si_resident_descriptor_slab > +{ > + struct pb_slab base; > + struct r600_resource *buffer; > + struct si_resident_descriptor *entries; > +}; > + > +bool si_resident_descriptor_can_reclaim_slab(void *priv, > + struct pb_slab_entry *entry) > +{ > + struct si_context *sctx = priv; > + struct radeon_winsys *ws = sctx->b.ws; > + struct si_resident_descriptor *desc = NULL; /* fix container_of */ > + > + desc = container_of(entry, desc, entry); > + > + if (ws->cs_is_buffer_referenced(sctx->b.gfx.cs, desc->buffer->buf, > + RADEON_USAGE_READ)) { > + /* Do not allow to reclaim the buffer if the resident > + * descriptor is currently used. > + */ > + return false; > + } > + > + return true; > +} > + > +struct pb_slab *si_resident_descriptor_slab_alloc(void *priv, unsigned heap, > + unsigned entry_size, > + unsigned group_index) > +{ > + struct si_context *sctx = priv; > + struct si_screen *sscreen = sctx->screen; > + struct si_resident_descriptor_slab *slab; > + > + slab = CALLOC_STRUCT(si_resident_descriptor_slab); > + if (!slab) > + return NULL; > + > + /* Create a buffer in VRAM for 1024 resident descriptors. */ > + slab->buffer = (struct r600_resource *) > + pipe_buffer_create(&sscreen->b.b, 0, > + PIPE_USAGE_IMMUTABLE, 64 * 1024); > + if (!slab->buffer) > + goto fail; > + > + slab->base.num_entries = slab->buffer->bo_size / entry_size; > + slab->base.num_free = slab->base.num_entries; > + slab->entries = CALLOC(slab->base.num_entries, > sizeof(*slab->entries)); > + if (!slab->entries) > + goto fail_buffer; > + > + LIST_INITHEAD(&slab->base.free); > + > + for (unsigned i = 0; i < slab->base.num_entries; ++i) { > + struct si_resident_descriptor *desc = &slab->entries[i]; > + > + desc->entry.slab = &slab->base; > + desc->entry.group_index = group_index; > + desc->buffer = slab->buffer; > + desc->offset = i * entry_size; > + > + LIST_ADDTAIL(&desc->entry.head, &slab->base.free); > + } > + > + /* Add the descriptor to the per-context residency list. */ > + if (si_add_resident_descriptor(sctx, slab->buffer)) > + goto fail_desc; > + > + return &slab->base; > + > +fail_desc: > + FREE(slab->entries); > +fail_buffer: > + r600_resource_reference(&slab->buffer, NULL); > +fail: > + FREE(slab); > + return NULL; > +} > + > +void si_resident_descriptor_slab_free(void *priv, struct pb_slab *pslab) > +{ > + struct si_context *sctx = priv; > + struct si_resident_descriptor_slab *slab = > + (struct si_resident_descriptor_slab *)pslab; > + > + /* Remove the descriptor from the per-context residency list. */ > + si_del_resident_descriptor(sctx, slab->buffer); > + > + r600_resource_reference(&slab->buffer, NULL); > + FREE(slab->entries); > + FREE(slab); > +} > + > /* INIT/DEINIT/UPLOAD */ > > /* GFX9 has only 4KB of CE, while previous chips had 32KB. In order > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c > b/src/gallium/drivers/radeonsi/si_pipe.c > index 8e55b807ce..5b1ddda321 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.c > +++ b/src/gallium/drivers/radeonsi/si_pipe.c > @@ -96,6 +96,9 @@ static void si_destroy_context(struct pipe_context *context) > r600_resource_reference(&sctx->last_trace_buf, NULL); > radeon_clear_saved_cs(&sctx->last_gfx); > > + pb_slabs_deinit(&sctx->resident_descriptor_slabs); > + > + FREE(sctx->resident_descriptors); > FREE(sctx); > } > > @@ -314,6 +317,13 @@ static struct pipe_context *si_create_context(struct > pipe_screen *screen, > > sctx->tm = si_create_llvm_target_machine(sscreen); > > + /* Create a slab allocator for all resident descriptors. */ > + if (!pb_slabs_init(&sctx->resident_descriptor_slabs, 6, 6, 1, sctx, > + si_resident_descriptor_can_reclaim_slab, > + si_resident_descriptor_slab_alloc, > + si_resident_descriptor_slab_free)) > + goto fail; > + > return &sctx->b.b; > fail: > fprintf(stderr, "radeonsi: Failed to create a context.\n"); > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h > b/src/gallium/drivers/radeonsi/si_pipe.h > index 13ec0729b1..41b0a2a79f 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.h > +++ b/src/gallium/drivers/radeonsi/si_pipe.h > @@ -224,6 +224,13 @@ union si_vgt_param_key { > uint32_t index; > }; > > +struct si_resident_descriptor > +{ > + struct pb_slab_entry entry; > + struct r600_resource *buffer; > + unsigned offset; > +}; > + > struct si_context { > struct r600_common_context b; > struct blitter_context *blitter; > @@ -384,6 +391,14 @@ struct si_context { > /* Precomputed IA_MULTI_VGT_PARAM */ > union si_vgt_param_key ia_multi_vgt_param_key; > unsigned ia_multi_vgt_param[SI_NUM_VGT_PARAM_STATES]; > + > + /* Slab allocator for resident descriptors. */ > + struct pb_slabs resident_descriptor_slabs; > + > + /* Resident descriptors. */ > + struct r600_resource **resident_descriptors; > + unsigned num_resident_descriptors; > + unsigned max_resident_descriptors; > }; > > /* cik_sdma.c */ > diff --git a/src/gallium/drivers/radeonsi/si_state.h > b/src/gallium/drivers/radeonsi/si_state.h > index 275f830613..3e9016c84a 100644 > --- a/src/gallium/drivers/radeonsi/si_state.h > +++ b/src/gallium/drivers/radeonsi/si_state.h > @@ -30,6 +30,8 @@ > #include "si_pm4.h" > #include "radeon/r600_pipe_common.h" > > +#include "pipebuffer/pb_slab.h" > + > #define SI_NUM_GRAPHICS_SHADERS (PIPE_SHADER_TESS_EVAL+1) > #define SI_NUM_SHADERS (PIPE_SHADER_COMPUTE+1) > > @@ -335,6 +337,12 @@ void si_set_active_descriptors(struct si_context *sctx, > unsigned desc_idx, > uint64_t new_active_mask); > void si_set_active_descriptors_for_shader(struct si_context *sctx, > struct si_shader_selector *sel); > +bool si_resident_descriptor_can_reclaim_slab(void *priv, > + struct pb_slab_entry *entry); > +struct pb_slab *si_resident_descriptor_slab_alloc(void *priv, unsigned heap, > + unsigned entry_size, > + unsigned group_index); > +void si_resident_descriptor_slab_free(void *priv, struct pb_slab *pslab); > > /* si_state.c */ > struct si_shader_selector; > -- > 2.13.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev