On Tue, Sep 13, 2016 at 11:56 AM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > From: Nicolai Hähnle <nicolai.haeh...@amd.com> > > --- > src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 165 > ++++++++++++++++++++++++++ > src/gallium/winsys/amdgpu/drm/amdgpu_bo.h | 18 +++ > src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 13 ++ > src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h | 5 + > 4 files changed, 201 insertions(+) > > diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > index f581d9b..6a61b30 100644 > --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c > @@ -31,20 +31,27 @@ > > #include "amdgpu_cs.h" > > #include "os/os_time.h" > #include "state_tracker/drm_driver.h" > #include <amdgpu_drm.h> > #include <xf86drm.h> > #include <stdio.h> > #include <inttypes.h> > > +static struct pb_buffer * > +amdgpu_bo_create(struct radeon_winsys *rws, > + uint64_t size, > + unsigned alignment, > + enum radeon_bo_domain domain, > + enum radeon_bo_flag flags); > + > static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout, > enum radeon_bo_usage usage) > { > struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); > struct amdgpu_winsys *ws = bo->ws; > int64_t abs_timeout; > > if (timeout == 0) { > if (p_atomic_read(&bo->num_active_ioctls)) > return false; > @@ -436,20 +443,130 @@ bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf) > { > struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); > > if (amdgpu_bo_is_referenced_by_any_cs(bo)) { > return false; > } > > return amdgpu_bo_wait(_buf, 0, RADEON_USAGE_READWRITE); > } > > +bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry) > +{ > + struct amdgpu_winsys_bo *bo = NULL; /* fix container_of */ > + bo = container_of(entry, bo, u.slab.entry); > + > + return amdgpu_bo_can_reclaim(&bo->base); > +} > + > +static void amdgpu_bo_slab_destroy(struct pb_buffer *_buf) > +{ > + struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); > + > + assert(!bo->bo); > + > + pb_slab_free(&bo->ws->bo_slabs, &bo->u.slab.entry); > +} > + > +static const struct pb_vtbl amdgpu_winsys_bo_slab_vtbl = { > + amdgpu_bo_slab_destroy > + /* other functions are never called */ > +}; > + > +struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, > + unsigned entry_size, > + unsigned group_index) > +{ > + struct amdgpu_winsys *ws = priv; > + struct amdgpu_slab *slab = CALLOC_STRUCT(amdgpu_slab); > + enum radeon_bo_domain domains; > + enum radeon_bo_flag flags = 0; > + uint32_t base_id; > + > + if (!slab) > + return NULL; > + > + if (heap & 1) > + flags |= RADEON_FLAG_GTT_WC; > + if (heap & 2) > + flags |= RADEON_FLAG_CPU_ACCESS; > + > + switch (heap >> 2) { > + case 0: > + domains = RADEON_DOMAIN_VRAM; > + break; > + default: > + case 1: > + domains = RADEON_DOMAIN_VRAM_GTT; > + break; > + case 2: > + domains = RADEON_DOMAIN_GTT; > + break; > + } > + > + slab->buffer = amdgpu_winsys_bo(amdgpu_bo_create(&ws->base, > + 64 * 1024, 64 * 1024, > + domains, flags)); > + if (!slab->buffer) > + goto fail; > + > + assert(slab->buffer->bo); > + > + slab->base.num_entries = slab->buffer->base.size / entry_size; > + slab->base.num_free = slab->base.num_entries; > + slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries)); > + if (!slab->entries) > + goto fail_buffer; > + > + LIST_INITHEAD(&slab->base.free); > + > + base_id = __sync_fetch_and_add(&ws->next_bo_unique_id, > slab->base.num_entries); > + > + for (unsigned i = 0; i < slab->base.num_entries; ++i) { > + struct amdgpu_winsys_bo *bo = &slab->entries[i]; > + > + bo->base.alignment = entry_size; > + bo->base.usage = slab->buffer->base.usage; > + bo->base.size = entry_size; > + bo->base.vtbl = &amdgpu_winsys_bo_slab_vtbl; > + bo->ws = ws; > + bo->va = slab->buffer->va + i * entry_size; > + bo->initial_domain = domains; > + bo->unique_id = base_id + i; > + bo->u.slab.entry.slab = &slab->base; > + bo->u.slab.entry.group_index = group_index; > + bo->u.slab.real = slab->buffer; > + > + LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free); > + } > + > + return &slab->base; > + > +fail_buffer: > + amdgpu_winsys_bo_reference(&slab->buffer, NULL); > +fail: > + FREE(slab); > + return NULL; > +} > + > +void amdgpu_bo_slab_free(void *priv, struct pb_slab *pslab) > +{ > + struct amdgpu_slab *slab = amdgpu_slab(pslab); > + > + for (unsigned i = 0; i < slab->base.num_entries; ++i) > + amdgpu_bo_remove_fences(&slab->entries[i]); > + > + FREE(slab->entries); > + amdgpu_winsys_bo_reference(&slab->buffer, NULL); > + FREE(slab); > +} > + > static unsigned eg_tile_split(unsigned tile_split) > { > switch (tile_split) { > case 0: tile_split = 64; break; > case 1: tile_split = 128; break; > case 2: tile_split = 256; break; > case 3: tile_split = 512; break; > default: > case 4: tile_split = 1024; break; > case 5: tile_split = 2048; break; > @@ -548,20 +665,67 @@ static struct pb_buffer * > amdgpu_bo_create(struct radeon_winsys *rws, > uint64_t size, > unsigned alignment, > enum radeon_bo_domain domain, > enum radeon_bo_flag flags) > { > struct amdgpu_winsys *ws = amdgpu_winsys(rws); > struct amdgpu_winsys_bo *bo; > unsigned usage = 0, pb_cache_bucket; > > + /* Sub-allocate small buffers from slabs. */ > + if (!(flags & RADEON_FLAG_HANDLE) && > + size <= (1 << AMDGPU_SLAB_MAX_SIZE_LOG2) && > + alignment <= MAX2(1 << AMDGPU_SLAB_MIN_SIZE_LOG2, > util_next_power_of_two(size))) { > + struct pb_slab_entry *entry; > + unsigned heap = 0; > +
A comment would useful here to say that the following code filters out textures. (i.e. HANDLE = textures, INVISIBLE VRAM = tiled textures) Marek > + if (flags & RADEON_FLAG_GTT_WC) > + heap |= 1; > + if (flags & RADEON_FLAG_CPU_ACCESS) > + heap |= 2; > + if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS)) > + goto no_slab; > + > + switch (domain) { > + case RADEON_DOMAIN_VRAM: > + heap |= 0 * 4; > + break; > + case RADEON_DOMAIN_VRAM_GTT: > + heap |= 1 * 4; > + break; > + case RADEON_DOMAIN_GTT: > + heap |= 2 * 4; > + break; > + default: > + goto no_slab; > + } > + > + entry = pb_slab_alloc(&ws->bo_slabs, size, heap); > + if (!entry) { > + /* Clear the cache and try again. */ > + pb_cache_release_all_buffers(&ws->bo_cache); > + > + entry = pb_slab_alloc(&ws->bo_slabs, size, heap); > + } > + if (!entry) > + return NULL; > + > + bo = NULL; > + bo = container_of(entry, bo, u.slab.entry); > + > + pipe_reference_init(&bo->base.reference, 1); > + > + return &bo->base; > + } > +no_slab: > + > /* This flag is irrelevant for the cache. */ > flags &= ~RADEON_FLAG_HANDLE; > > /* Align size to page size. This is the minimum alignment for normal > * BOs. Aligning this here helps the cached bufmgr. Especially small BOs, > * like constant/uniform buffers, can benefit from better and more reuse. > */ > size = align64(size, ws->info.gart_page_size); > alignment = align(alignment, ws->info.gart_page_size); > > @@ -590,20 +754,21 @@ amdgpu_bo_create(struct radeon_winsys *rws, > pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage, > pb_cache_bucket); > if (bo) > return &bo->base; > > /* Create a new one. */ > bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags, > pb_cache_bucket); > if (!bo) { > /* Clear the cache and try again. */ > + pb_slabs_reclaim(&ws->bo_slabs); > pb_cache_release_all_buffers(&ws->bo_cache); > bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags, > pb_cache_bucket); > if (!bo) > return NULL; > } > > bo->u.real.use_reusable_pool = true; > return &bo->base; > } > diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h > b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h > index e5b5cf5..1e25897 100644 > --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h > +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h > @@ -74,28 +74,46 @@ struct amdgpu_winsys_bo { > * it can only transition from false to true > */ > volatile int is_shared; /* bool (int for atomicity) */ > > /* Fences for buffer synchronization. */ > unsigned num_fences; > unsigned max_fences; > struct pipe_fence_handle **fences; > }; > > +struct amdgpu_slab { > + struct pb_slab base; > + struct amdgpu_winsys_bo *buffer; > + struct amdgpu_winsys_bo *entries; > +}; > + > bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf); > void amdgpu_bo_destroy(struct pb_buffer *_buf); > void amdgpu_bo_init_functions(struct amdgpu_winsys *ws); > > +bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry); > +struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, > + unsigned entry_size, > + unsigned group_index); > +void amdgpu_bo_slab_free(void *priv, struct pb_slab *slab); > + > static inline > struct amdgpu_winsys_bo *amdgpu_winsys_bo(struct pb_buffer *bo) > { > return (struct amdgpu_winsys_bo *)bo; > } > > static inline > +struct amdgpu_slab *amdgpu_slab(struct pb_slab *slab) > +{ > + return (struct amdgpu_slab *)slab; > +} > + > +static inline > void amdgpu_winsys_bo_reference(struct amdgpu_winsys_bo **dst, > struct amdgpu_winsys_bo *src) > { > pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src); > } > > #endif > diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c > b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c > index 3961ee3..c83489d 100644 > --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c > +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c > @@ -375,20 +375,21 @@ static void do_winsys_deinit(struct amdgpu_winsys *ws) > } > > static void amdgpu_winsys_destroy(struct radeon_winsys *rws) > { > struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws; > > if (util_queue_is_initialized(&ws->cs_queue)) > util_queue_destroy(&ws->cs_queue); > > pipe_mutex_destroy(ws->bo_fence_lock); > + pb_slabs_deinit(&ws->bo_slabs); > pb_cache_deinit(&ws->bo_cache); > pipe_mutex_destroy(ws->global_bo_list_lock); > do_winsys_deinit(ws); > FREE(rws); > } > > static void amdgpu_winsys_query_info(struct radeon_winsys *rws, > struct radeon_info *info) > { > *info = ((struct amdgpu_winsys *)rws)->info; > @@ -540,20 +541,29 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t > screen_create) > ws->info.drm_minor = drm_minor; > > if (!do_winsys_init(ws, fd)) > goto fail_alloc; > > /* Create managers. */ > pb_cache_init(&ws->bo_cache, 500000, ws->check_vm ? 1.0f : 2.0f, 0, > (ws->info.vram_size + ws->info.gart_size) / 8, > amdgpu_bo_destroy, amdgpu_bo_can_reclaim); > > + if (!pb_slabs_init(&ws->bo_slabs, > + AMDGPU_SLAB_MIN_SIZE_LOG2, AMDGPU_SLAB_MAX_SIZE_LOG2, > + 12, /* number of heaps (domain/flags combinations) */ > + ws, > + amdgpu_bo_can_reclaim_slab, > + amdgpu_bo_slab_alloc, > + amdgpu_bo_slab_free)) > + goto fail_cache; > + > /* init reference */ > pipe_reference_init(&ws->reference, 1); > > /* Set functions. */ > ws->base.unref = amdgpu_winsys_unref; > ws->base.destroy = amdgpu_winsys_destroy; > ws->base.query_info = amdgpu_winsys_query_info; > ws->base.cs_request_feature = amdgpu_cs_request_feature; > ws->base.query_value = amdgpu_query_value; > ws->base.read_registers = amdgpu_read_registers; > @@ -583,16 +593,19 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t > screen_create) > > util_hash_table_set(dev_tab, dev, ws); > > /* We must unlock the mutex once the winsys is fully initialized, so that > * other threads attempting to create the winsys from the same fd will > * get a fully initialized winsys and not just half-way initialized. */ > pipe_mutex_unlock(dev_tab_mutex); > > return &ws->base; > > +fail_cache: > + pb_cache_deinit(&ws->bo_cache); > + do_winsys_deinit(ws); > fail_alloc: > FREE(ws); > fail: > pipe_mutex_unlock(dev_tab_mutex); > return NULL; > } > diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h > b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h > index 96d4e6d..69c6638 100644 > --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h > +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h > @@ -26,31 +26,36 @@ > */ > /* > * Authors: > * Marek Olšák <mar...@gmail.com> > */ > > #ifndef AMDGPU_WINSYS_H > #define AMDGPU_WINSYS_H > > #include "pipebuffer/pb_cache.h" > +#include "pipebuffer/pb_slab.h" > #include "gallium/drivers/radeon/radeon_winsys.h" > #include "addrlib/addrinterface.h" > #include "util/u_queue.h" > #include <amdgpu.h> > > struct amdgpu_cs; > > +#define AMDGPU_SLAB_MIN_SIZE_LOG2 9 > +#define AMDGPU_SLAB_MAX_SIZE_LOG2 14 > + > struct amdgpu_winsys { > struct radeon_winsys base; > struct pipe_reference reference; > struct pb_cache bo_cache; > + struct pb_slabs bo_slabs; > > amdgpu_device_handle dev; > > pipe_mutex bo_fence_lock; > > int num_cs; /* The number of command streams created. */ > uint32_t next_bo_unique_id; > uint64_t allocated_vram; > uint64_t allocated_gtt; > uint64_t mapped_vram; > -- > 2.7.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev