Add a structure to hold anv_states. This table will initially be used to recicle anv_states, instead of relying on a linked list implemented in GPU memory. Later it could be used so that all anv_states just point to the content of this struct, instead of making copies of anv_states everywhere.
TODO: 1) I need to refine the API, specially anv_state_table_add(). So far we have to add an item, get the pointer to the anv_state, and then fill the content. I tried some different things so far but need to come back to this one. 2) There's a lot of common code between this table backing store memory and the anv_block_pool buffer, due to how we grow it. I think it's possible to refactory this and reuse code on both places. 3) Add unit tests. --- src/intel/vulkan/anv_allocator.c | 246 ++++++++++++++++++++++++++++++- src/intel/vulkan/anv_private.h | 44 ++++++ 2 files changed, 288 insertions(+), 2 deletions(-) diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 67f2f73aa11..3590ede6050 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -100,6 +100,9 @@ /* Allocations are always at least 64 byte aligned, so 1 is an invalid value. * We use it to indicate the free list is empty. */ #define EMPTY 1 +#define EMPTY2 UINT32_MAX + +#define PAGE_SIZE 4096 struct anv_mmap_cleanup { void *map; @@ -130,6 +133,246 @@ round_to_power_of_two(uint32_t value) return 1 << ilog2_round_up(value); } +struct anv_state_table_cleanup { + void *map; + size_t size; +}; + +#define ANV_STATE_TABLE_CLEANUP_INIT ((struct anv_state_table_cleanup){0}) +#define ANV_STATE_ENTRY_SIZE (sizeof(struct anv_free_entry)) + +static VkResult +anv_state_table_expand_range(struct anv_state_table *table, uint32_t size); + +VkResult +anv_state_table_init(struct anv_state_table *table, + struct anv_device *device, + uint32_t initial_entries) +{ + VkResult result; + + table->device = device; + + table->fd = memfd_create("free table", MFD_CLOEXEC); + if (table->fd == -1) + return vk_error(VK_ERROR_INITIALIZATION_FAILED); + + /* Just make it 2GB up-front. The Linux kernel won't actually back it + * with pages until we either map and fault on one of them or we use + * userptr and send a chunk of it off to the GPU. + */ + if (ftruncate(table->fd, BLOCK_POOL_MEMFD_SIZE) == -1) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); + goto fail_fd; + } + + if (!u_vector_init(&table->mmap_cleanups, + round_to_power_of_two(sizeof(struct anv_state_table_cleanup)), + 128)) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); + goto fail_fd; + } + + table->state.next = 0; + table->state.end = 0; + table->size = 0; + + uint32_t initial_size = initial_entries * ANV_STATE_ENTRY_SIZE; + result = anv_state_table_expand_range(table, initial_size); + if (result != VK_SUCCESS) + goto fail_mmap_cleanups; + + return VK_SUCCESS; + + fail_mmap_cleanups: + u_vector_finish(&table->mmap_cleanups); + fail_fd: + close(table->fd); + + return result; +} + +static VkResult +anv_state_table_expand_range(struct anv_state_table *table, uint32_t size) +{ + void *map; + struct anv_mmap_cleanup *cleanup; + + /* Assert that we only ever grow the pool */ + assert(size >= table->state.end); + + /* Assert that we don't go outside the bounds of the memfd */ + assert(size <= BLOCK_POOL_MEMFD_SIZE); + + cleanup = u_vector_add(&table->mmap_cleanups); + if (!cleanup) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + *cleanup = ANV_MMAP_CLEANUP_INIT; + + /* Just leak the old map until we destroy the pool. We can't munmap it + * without races or imposing locking on the block allocate fast path. On + * the whole the leaked maps adds up to less than the size of the + * current map. MAP_POPULATE seems like the right thing to do, but we + * should try to get some numbers. + */ + map = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, table->fd, 0); + if (map == MAP_FAILED) { + exit(-1); + return vk_errorf(table->device->instance, table->device, + VK_ERROR_MEMORY_MAP_FAILED, "mmap failed: %m"); + } + + cleanup->map = map; + cleanup->size = size; + + table->map = map; + table->size = size; + + return VK_SUCCESS; +} + +static uint32_t +anv_state_table_grow(struct anv_state_table *table) +{ + VkResult result = VK_SUCCESS; + + pthread_mutex_lock(&table->device->mutex); + + uint32_t used = align_u32(table->state.next * ANV_STATE_ENTRY_SIZE, + PAGE_SIZE); + uint32_t old_size = table->size; + + /* The block pool is always initialized to a nonzero size and this function + * is always called after initialization. + */ + assert(old_size > 0); + + uint32_t required = MAX2(used, old_size); + if (used * 2 <= required) { + /* If we're in this case then this isn't the firsta allocation and we + * already have enough space on both sides to hold double what we + * have allocated. There's nothing for us to do. + */ + goto done; + } + + uint32_t size = old_size * 2; + while (size < required) + size *= 2; + + assert(size > table->size); + + result = anv_state_table_expand_range(table, size); + + done: + pthread_mutex_unlock(&table->device->mutex); + + if (result == VK_SUCCESS) { + /* Return the appropriate new size. This function never actually + * updates state->next. Instead, we let the caller do that because it + * needs to do so in order to maintain its concurrency model. + */ + return table->size / ANV_STATE_ENTRY_SIZE; + } else { + return 0; + } +} + +void +anv_state_table_finish(struct anv_state_table *table) +{ + struct anv_state_table_cleanup *cleanup; + + u_vector_foreach(cleanup, &table->mmap_cleanups) { + if (cleanup->map) + munmap(cleanup->map, cleanup->size); + } + + u_vector_finish(&table->mmap_cleanups); + + close(table->fd); +} + +uint32_t +anv_state_table_add(struct anv_state_table *table, uint32_t count) +{ + struct anv_block_state state, old, new; + + while(1) { + state.u64 = __sync_fetch_and_add(&table->state.u64, count); + if (state.next + count <= state.end) { + assert(table->map); + struct anv_free_entry *entry = &table->map[state.next]; + for (int i = 0; i < count; i++) { + entry[i].state.idx = state.next + i; + } + return state.next; + } else if (state.next <= state.end) { + /* We allocated the first block outside the pool so we have to grow + * the pool. pool_state->next acts a mutex: threads who try to + * allocate now will get block indexes above the current limit and + * hit futex_wait below. + */ + new.next = state.next + count; + do { + new.end = anv_state_table_grow(table); + } while (new.end < new.next); + + old.u64 = __sync_lock_test_and_set(&table->state.u64, new.u64); + if (old.next != state.next) + futex_wake(&table->state.end, INT_MAX); + } else { + futex_wait(&table->state.end, state.end, NULL); + continue; + } + } +} + +void +anv_state_table_push(union anv_free_list2 *list, + struct anv_state_table *table, + uint32_t idx, uint32_t count) +{ + union anv_free_list2 current, old, new; + uint32_t next = idx; + + for (uint32_t i = 1; i < count; i++, next++) + table->map[next].list.offset = next + 1; + + old = *list; + do { + current = old; + table->map[next].list.offset = current.offset; + new.offset = idx; + new.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); + } while (old.u64 != current.u64); +} + +struct anv_state * +anv_state_table_pop(union anv_free_list2 *list, + struct anv_state_table *table) +{ + union anv_free_list2 current, new, old; + + current.u64 = list->u64; + while (current.offset != EMPTY2) { + __sync_synchronize(); + new.offset = table->map[current.offset].list.offset; + new.count = current.count; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); + if (old.u64 == current.u64) { + struct anv_free_entry *entry = &table->map[current.offset]; + return &entry->state; + } + current = old; + } + + return NULL; +} + static bool anv_free_list_pop(union anv_free_list *list, void **map, int32_t *offset) { @@ -311,8 +554,6 @@ anv_block_pool_finish(struct anv_block_pool *pool) close(pool->fd); } -#define PAGE_SIZE 4096 - static VkResult anv_block_pool_expand_range(struct anv_block_pool *pool, uint32_t center_bo_offset, uint32_t size) @@ -782,6 +1023,7 @@ anv_state_pool_alloc_no_vg(struct anv_state_pool *pool, &pool->block_pool, state.alloc_size, pool->block_size); + /* state.idx = anv_state_table_add(pool->table, state); */ done: state.map = pool->block_pool.map + state.offset; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index aff076a55d9..3fe299d55f9 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -615,7 +615,18 @@ union anv_free_list { uint64_t u64; }; +union anv_free_list2 { + struct { + uint32_t offset; + + /* A simple count that is incremented every time the head changes. */ + uint32_t count; + }; + uint64_t u64; +}; + #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } }) +#define ANV_FREE_LIST2_EMPTY ((union anv_free_list2) { { UINT32_MAX, 0 } }) struct anv_block_state { union { @@ -687,6 +698,7 @@ struct anv_state { int32_t offset; uint32_t alloc_size; void *map; + uint32_t idx; }; #define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 }) @@ -701,6 +713,21 @@ struct anv_fixed_size_state_pool { #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1) +struct anv_free_entry { + union anv_free_list2 list; + struct anv_state state; +}; + +struct anv_state_table { + struct anv_device *device; + int fd; + /* void *map; */ + struct anv_free_entry *map; + uint32_t size; + struct anv_block_state state; + struct u_vector mmap_cleanups; +}; + struct anv_state_pool { struct anv_block_pool block_pool; @@ -762,6 +789,23 @@ void anv_state_stream_finish(struct anv_state_stream *stream); struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, uint32_t size, uint32_t alignment); +VkResult anv_state_table_init(struct anv_state_table *table, + struct anv_device *device, + uint32_t initial_entries); +void anv_state_table_finish(struct anv_state_table *table); +uint32_t anv_state_table_add(struct anv_state_table *table, uint32_t count); +void anv_state_table_push(union anv_free_list2 *list, + struct anv_state_table *table, + uint32_t idx, uint32_t count); +struct anv_state* anv_state_table_pop(union anv_free_list2 *list, + struct anv_state_table *table); + + +static inline struct anv_state * +anv_state_table_get(struct anv_state_table *table, uint32_t idx) +{ + return &table->map[idx].state; +} /** * Implements a pool of re-usable BOs. The interface is identical to that * of block_pool except that each block is its own BO. -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev