FYI, I've replied on some radeonsi patches and skimmed through the
rest without Rbs. I'll do another review once there is version 2.

Marek

On Fri, May 19, 2017 at 6:52 PM, Samuel Pitoiset
<samuel.pitoi...@gmail.com> wrote:
> For each texture/image handles, we need to allocate a new
> buffer for the resident descriptor. But when the number of
> buffers added to the current CS becomes high, the overhead
> in the winsys (and in the kernel) is important.
>
> To reduce this bottleneck, the idea is to suballocate the
> resident descriptors using a slab similar to the one used
> in the winsys.
>
> Currently, a buffer can hold 1024 resident descriptors but
> this limit is arbitrary and could be changed in the future
> for some reasons. Once a slab is allocated the "base" buffer
> is added to a per-context residency list.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
> ---
>  src/gallium/drivers/radeonsi/si_descriptors.c | 150 
> ++++++++++++++++++++++++++
>  src/gallium/drivers/radeonsi/si_pipe.c        |  10 ++
>  src/gallium/drivers/radeonsi/si_pipe.h        |  15 +++
>  src/gallium/drivers/radeonsi/si_state.h       |   8 ++
>  4 files changed, 183 insertions(+)
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
> b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 61eb2f10be..d337fc3f11 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -2005,6 +2005,156 @@ void si_emit_compute_shader_userdata(struct 
> si_context *sctx)
>         sctx->shader_pointers_dirty &= ~compute_mask;
>  }
>
> +/* BINDLESS */
> +
> +static int si_add_resident_descriptor(struct si_context *sctx,
> +                                     struct r600_resource *desc)
> +{
> +       int idx;
> +
> +       /* New resident descriptor, check if the backing array is large 
> enough. */
> +       if (sctx->num_resident_descriptors >= sctx->max_resident_descriptors) 
> {
> +               unsigned new_max_descriptors =
> +                       MAX2(1, sctx->max_resident_descriptors * 2);
> +               struct r600_resource **new_descriptors =
> +                       REALLOC(sctx->resident_descriptors,
> +                               sctx->num_resident_descriptors * 
> (sizeof(*new_descriptors)),
> +                               new_max_descriptors * 
> sizeof(*new_descriptors));
> +
> +               if (new_descriptors) {
> +                       sctx->resident_descriptors = new_descriptors;
> +                       sctx->max_resident_descriptors = new_max_descriptors;
> +               } else {
> +                       fprintf(stderr, "si_add_resident_descriptor: "
> +                               "allocation failed\n");
> +                       return -1;
> +               }
> +       }
> +
> +       idx = sctx->num_resident_descriptors;
> +       sctx->resident_descriptors[idx] = desc;
> +       sctx->num_resident_descriptors++;
> +
> +       return 0;
> +}
> +
> +static void si_del_resident_descriptor(struct si_context *sctx,
> +                                      struct r600_resource *desc)
> +{
> +       unsigned i;
> +       int size;
> +
> +       for (i = 0; i < sctx->num_resident_descriptors; i++) {
> +               if (sctx->resident_descriptors[i] != desc)
> +                       continue;
> +
> +               if (i < sctx->num_resident_descriptors - 1) {
> +                       size = sizeof(*sctx->resident_descriptors) *
> +                               (sctx->num_resident_descriptors - 1 - i);
> +
> +                       memmove(&sctx->resident_descriptors[i],
> +                               &sctx->resident_descriptors[i + 1], size);
> +               }
> +
> +               sctx->num_resident_descriptors--;
> +               return;
> +       }
> +}
> +
> +struct si_resident_descriptor_slab
> +{
> +       struct pb_slab base;
> +       struct r600_resource *buffer;
> +       struct si_resident_descriptor *entries;
> +};
> +
> +bool si_resident_descriptor_can_reclaim_slab(void *priv,
> +                                            struct pb_slab_entry *entry)
> +{
> +       struct si_context *sctx = priv;
> +       struct radeon_winsys *ws = sctx->b.ws;
> +       struct si_resident_descriptor *desc = NULL; /* fix container_of */
> +
> +       desc = container_of(entry, desc, entry);
> +
> +       if (ws->cs_is_buffer_referenced(sctx->b.gfx.cs, desc->buffer->buf,
> +                                       RADEON_USAGE_READ)) {
> +               /* Do not allow to reclaim the buffer if the resident
> +                * descriptor is currently used.
> +                */
> +               return false;
> +       }
> +
> +       return true;
> +}
> +
> +struct pb_slab *si_resident_descriptor_slab_alloc(void *priv, unsigned heap,
> +                                                 unsigned entry_size,
> +                                                 unsigned group_index)
> +{
> +       struct si_context *sctx = priv;
> +       struct si_screen *sscreen = sctx->screen;
> +       struct si_resident_descriptor_slab *slab;
> +
> +       slab = CALLOC_STRUCT(si_resident_descriptor_slab);
> +       if (!slab)
> +               return NULL;
> +
> +       /* Create a buffer in VRAM for 1024 resident descriptors. */
> +       slab->buffer = (struct r600_resource *)
> +               pipe_buffer_create(&sscreen->b.b, 0,
> +                                  PIPE_USAGE_IMMUTABLE, 64 * 1024);
> +       if (!slab->buffer)
> +               goto fail;
> +
> +       slab->base.num_entries = slab->buffer->bo_size / entry_size;
> +       slab->base.num_free = slab->base.num_entries;
> +       slab->entries = CALLOC(slab->base.num_entries, 
> sizeof(*slab->entries));
> +       if (!slab->entries)
> +               goto fail_buffer;
> +
> +       LIST_INITHEAD(&slab->base.free);
> +
> +       for (unsigned i = 0; i < slab->base.num_entries; ++i) {
> +               struct si_resident_descriptor *desc = &slab->entries[i];
> +
> +               desc->entry.slab = &slab->base;
> +               desc->entry.group_index = group_index;
> +               desc->buffer = slab->buffer;
> +               desc->offset = i * entry_size;
> +
> +               LIST_ADDTAIL(&desc->entry.head, &slab->base.free);
> +       }
> +
> +       /* Add the descriptor to the per-context residency list. */
> +       if (si_add_resident_descriptor(sctx, slab->buffer))
> +               goto fail_desc;
> +
> +       return &slab->base;
> +
> +fail_desc:
> +       FREE(slab->entries);
> +fail_buffer:
> +       r600_resource_reference(&slab->buffer, NULL);
> +fail:
> +       FREE(slab);
> +       return NULL;
> +}
> +
> +void si_resident_descriptor_slab_free(void *priv, struct pb_slab *pslab)
> +{
> +       struct si_context *sctx = priv;
> +       struct si_resident_descriptor_slab *slab =
> +               (struct si_resident_descriptor_slab *)pslab;
> +
> +       /* Remove the descriptor from the per-context residency list. */
> +       si_del_resident_descriptor(sctx, slab->buffer);
> +
> +       r600_resource_reference(&slab->buffer, NULL);
> +       FREE(slab->entries);
> +       FREE(slab);
> +}
> +
>  /* INIT/DEINIT/UPLOAD */
>
>  /* GFX9 has only 4KB of CE, while previous chips had 32KB. In order
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
> b/src/gallium/drivers/radeonsi/si_pipe.c
> index 8e55b807ce..5b1ddda321 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -96,6 +96,9 @@ static void si_destroy_context(struct pipe_context *context)
>         r600_resource_reference(&sctx->last_trace_buf, NULL);
>         radeon_clear_saved_cs(&sctx->last_gfx);
>
> +       pb_slabs_deinit(&sctx->resident_descriptor_slabs);
> +
> +       FREE(sctx->resident_descriptors);
>         FREE(sctx);
>  }
>
> @@ -314,6 +317,13 @@ static struct pipe_context *si_create_context(struct 
> pipe_screen *screen,
>
>         sctx->tm = si_create_llvm_target_machine(sscreen);
>
> +       /* Create a slab allocator for all resident descriptors. */
> +       if (!pb_slabs_init(&sctx->resident_descriptor_slabs, 6, 6, 1, sctx,
> +                          si_resident_descriptor_can_reclaim_slab,
> +                          si_resident_descriptor_slab_alloc,
> +                          si_resident_descriptor_slab_free))
> +               goto fail;
> +
>         return &sctx->b.b;
>  fail:
>         fprintf(stderr, "radeonsi: Failed to create a context.\n");
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
> b/src/gallium/drivers/radeonsi/si_pipe.h
> index 13ec0729b1..41b0a2a79f 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -224,6 +224,13 @@ union si_vgt_param_key {
>         uint32_t index;
>  };
>
> +struct si_resident_descriptor
> +{
> +       struct pb_slab_entry            entry;
> +       struct r600_resource            *buffer;
> +       unsigned                        offset;
> +};
> +
>  struct si_context {
>         struct r600_common_context      b;
>         struct blitter_context          *blitter;
> @@ -384,6 +391,14 @@ struct si_context {
>         /* Precomputed IA_MULTI_VGT_PARAM */
>         union si_vgt_param_key  ia_multi_vgt_param_key;
>         unsigned                ia_multi_vgt_param[SI_NUM_VGT_PARAM_STATES];
> +
> +       /* Slab allocator for resident descriptors. */
> +       struct pb_slabs         resident_descriptor_slabs;
> +
> +       /* Resident descriptors. */
> +       struct r600_resource    **resident_descriptors;
> +       unsigned                num_resident_descriptors;
> +       unsigned                max_resident_descriptors;
>  };
>
>  /* cik_sdma.c */
> diff --git a/src/gallium/drivers/radeonsi/si_state.h 
> b/src/gallium/drivers/radeonsi/si_state.h
> index 275f830613..3e9016c84a 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -30,6 +30,8 @@
>  #include "si_pm4.h"
>  #include "radeon/r600_pipe_common.h"
>
> +#include "pipebuffer/pb_slab.h"
> +
>  #define SI_NUM_GRAPHICS_SHADERS (PIPE_SHADER_TESS_EVAL+1)
>  #define SI_NUM_SHADERS (PIPE_SHADER_COMPUTE+1)
>
> @@ -335,6 +337,12 @@ void si_set_active_descriptors(struct si_context *sctx, 
> unsigned desc_idx,
>                                uint64_t new_active_mask);
>  void si_set_active_descriptors_for_shader(struct si_context *sctx,
>                                           struct si_shader_selector *sel);
> +bool si_resident_descriptor_can_reclaim_slab(void *priv,
> +                                            struct pb_slab_entry *entry);
> +struct pb_slab *si_resident_descriptor_slab_alloc(void *priv, unsigned heap,
> +                                                 unsigned entry_size,
> +                                                 unsigned group_index);
> +void si_resident_descriptor_slab_free(void *priv, struct pb_slab *pslab);
>
>  /* si_state.c */
>  struct si_shader_selector;
> --
> 2.13.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to