This new extension adds an implicitly allocated block of uniforms into
the descriptors sets through a new descriptor type.
We implement this by having a single BO in the descriptor set pool
from which we source uniforms.
Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
---
src/intel/vulkan/anv_cmd_buffer.c | 3 +
src/intel/vulkan/anv_descriptor_set.c | 238 +++++++++++++++++-
src/intel/vulkan/anv_device.c | 22 ++
src/intel/vulkan/anv_extensions.py | 1 +
.../vulkan/anv_nir_apply_pipeline_layout.c | 52 ++++
src/intel/vulkan/anv_private.h | 33 +++
src/intel/vulkan/genX_cmd_buffer.c | 32 ++-
7 files changed, 367 insertions(+), 14 deletions(-)
diff --git a/src/intel/vulkan/anv_cmd_buffer.c
b/src/intel/vulkan/anv_cmd_buffer.c
index 8ef71b0ed9c..b14be94f470 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -651,6 +651,7 @@ anv_isl_format_for_descriptor_type(VkDescriptorType type)
switch (type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
return ISL_FORMAT_R32G32B32A32_FLOAT;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
@@ -1039,6 +1040,8 @@ void anv_CmdPushDescriptorSetKHR(
}
break;
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+ unreachable("Invalid descriptor type for push descriptors");
default:
break;
}
diff --git a/src/intel/vulkan/anv_descriptor_set.c
b/src/intel/vulkan/anv_descriptor_set.c
index 3439f828900..2e5f2a1f288 100644
--- a/src/intel/vulkan/anv_descriptor_set.c
+++ b/src/intel/vulkan/anv_descriptor_set.c
@@ -26,8 +26,10 @@
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
+#include <sys/mman.h>
#include "util/mesa-sha1.h"
+#include "vk_util.h"
#include "anv_private.h"
@@ -40,7 +42,8 @@ void anv_GetDescriptorSetLayoutSupport(
const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
VkDescriptorSetLayoutSupport* pSupport)
{
- uint32_t surface_count[MESA_SHADER_STAGES] = { 0, };
+ int16_t surface_count[MESA_SHADER_STAGES] = { 0, };
+ int16_t inline_surface_indexes[MESA_SHADER_STAGES] = { -1, };
for (uint32_t b = 0; b < pCreateInfo->bindingCount; b++) {
const VkDescriptorSetLayoutBinding *binding =
&pCreateInfo->pBindings[b];
@@ -50,6 +53,15 @@ void anv_GetDescriptorSetLayoutSupport(
/* There is no real limit on samplers */
break;
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+ anv_foreach_stage(s, binding->stageFlags) {
+ if (inline_surface_indexes[s] < 0) {
+ inline_surface_indexes[s] = surface_count[s];
+ surface_count[s] += 1;
+ }
+ }
+ break;
+
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
if (binding->pImmutableSamplers) {
for (uint32_t i = 0; i < binding->descriptorCount; i++) {
@@ -118,6 +130,9 @@ VkResult anv_CreateDescriptorSetLayout(
memset(set_layout, 0, sizeof(*set_layout));
set_layout->ref_cnt = 1;
set_layout->binding_count = max_binding + 1;
+ set_layout->inline_blocks_descriptor_index = -1;
+ memset(set_layout->inline_blocks_surface_indexes,
+ -1, sizeof(set_layout->inline_blocks_surface_indexes));
for (uint32_t b = 0; b <= max_binding; b++) {
/* Initialize all binding_layout entries to -1 */
@@ -159,9 +174,24 @@ VkResult anv_CreateDescriptorSetLayout(
#ifndef NDEBUG
set_layout->binding[b].type = binding->descriptorType;
#endif
- set_layout->binding[b].array_size = binding->descriptorCount;
- set_layout->binding[b].descriptor_index = set_layout->size;
- set_layout->size += binding->descriptorCount;
+
+ if (binding->descriptorType ==
VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+ /* We only a single descriptor entry for all the inline uniforms. */
+ set_layout->binding[b].array_size = 1;
+ if (set_layout->inline_blocks_descriptor_index < 0) {
+ set_layout->binding[b].descriptor_index =
+ set_layout->inline_blocks_descriptor_index =
+ set_layout->size;
+ set_layout->size += 1;
+ } else {
+ set_layout->binding[b].descriptor_index =
+ set_layout->inline_blocks_descriptor_index;
+ }
+ } else {
+ set_layout->binding[b].array_size = binding->descriptorCount;
+ set_layout->binding[b].descriptor_index = set_layout->size;
+ set_layout->size += binding->descriptorCount;
+ }
switch (binding->descriptorType) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
@@ -176,6 +206,24 @@ VkResult anv_CreateDescriptorSetLayout(
}
switch (binding->descriptorType) {
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+ set_layout->binding[b].inline_block_offset =
set_layout->inline_blocks_size;
+ set_layout->binding[b].inline_block_length = binding->descriptorCount;
+ set_layout->inline_blocks_size += binding->descriptorCount;
+
+ anv_foreach_stage(s, binding->stageFlags) {
+ if (set_layout->inline_blocks_surface_indexes[s] < 0) {
+ set_layout->binding[b].stage[s].surface_index =
+ set_layout->inline_blocks_surface_indexes[s] =
+ surface_count[s];
+ surface_count[s] += 1;
+ } else {
+ set_layout->binding[b].stage[s].surface_index =
+ set_layout->inline_blocks_surface_indexes[s];
+ }
+ }
+ break;
+
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -235,6 +283,12 @@ VkResult anv_CreateDescriptorSetLayout(
set_layout->shader_stages |= binding->stageFlags;
}
+ /* Align inline uniforms total size to 32 because we source the allocation
+ * from a single BO in the descriptor set pool and we want the alignment to
+ * match the push constant alignment constraint.
+ */
+ set_layout->inline_blocks_size = ALIGN(set_layout->inline_blocks_size, 32);
+
set_layout->buffer_count = buffer_count;
set_layout->dynamic_offset_count = dynamic_offset_count;
@@ -405,21 +459,45 @@ VkResult anv_CreateDescriptorPool(
ANV_FROM_HANDLE(anv_device, device, _device);
struct anv_descriptor_pool *pool;
+ vk_foreach_struct(ext, pCreateInfo->pNext) {
+ switch (ext->sType) {
+ case
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT:
+ /* Our driver doesn't need to know about this as we use a single
+ * binding table entry per stage if one of more inline descriptor
+ * blocks are used.
+ */
+ break;
+
+ default:
+ anv_debug_ignored_stype(ext->sType);
+ break;
+ }
+ }
+
uint32_t descriptor_count = 0;
uint32_t buffer_count = 0;
+ uint32_t inline_blocks_size = 0;
for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++) {
switch (pCreateInfo->pPoolSizes[i].type) {
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+ inline_blocks_size += pCreateInfo->pPoolSizes[i].descriptorCount;
+ break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
buffer_count += pCreateInfo->pPoolSizes[i].descriptorCount;
+ /* Fallthrough */
default:
descriptor_count += pCreateInfo->pPoolSizes[i].descriptorCount;
break;
}
}
+ /* We'll need one more descriptor for inline uniforms. */
+ if (inline_blocks_size > 0)
+ descriptor_count += MIN2(pCreateInfo->maxSets, inline_blocks_size / 4);
+
const size_t pool_size =
pCreateInfo->maxSets * sizeof(struct anv_descriptor_set) +
descriptor_count * sizeof(struct anv_descriptor) +
@@ -435,6 +513,34 @@ VkResult anv_CreateDescriptorPool(
pool->next_set = 0;
pool->free_set_list = EMPTY;
+ pool->inline_blocks_size = inline_blocks_size;
+ pool->next_block = 0;
+ pool->free_block_list = EMPTY;
+
+ pool->inline_blocks_bo = NULL;
+ if (pool->inline_blocks_size > 0) {
+ struct anv_physical_device *pdevice = &device->instance->physicalDevice;
+ uint64_t bo_flags =
+ (pdevice->supports_48bit_addresses ? EXEC_OBJECT_SUPPORTS_48B_ADDRESS
: 0) |
+ (pdevice->has_exec_async ? EXEC_OBJECT_ASYNC : 0) |
+ (pdevice->use_softpin ? EXEC_OBJECT_PINNED : 0);
+
+ VkResult result = anv_bo_cache_alloc(device, &device->bo_cache,
+ pool->inline_blocks_size,
+ bo_flags,
+ &pool->inline_blocks_bo);
+ if (result != VK_SUCCESS)
+ goto fail_iubo_alloc;
+
+ uint32_t gem_flags = !device->info.has_llc ? I915_MMAP_WC : 0;
+ void *map = anv_gem_mmap(device, pool->inline_blocks_bo->gem_handle,
+ 0, pool->inline_blocks_bo->size, gem_flags);
+ if (map == MAP_FAILED)
+ goto fail_iubo_mmap;
+
+ pool->inline_blocks_bo->map = map;
+ }
+
anv_state_stream_init(&pool->surface_state_stream,
&device->surface_state_pool, 4096);
pool->surface_state_free_list = NULL;
@@ -442,6 +548,12 @@ VkResult anv_CreateDescriptorPool(
*pDescriptorPool = anv_descriptor_pool_to_handle(pool);
return VK_SUCCESS;
+
+fail_iubo_mmap:
+ anv_bo_cache_release(device, &device->bo_cache, pool->inline_blocks_bo);
+fail_iubo_alloc:
+ vk_free2(&device->alloc, pAllocator, pool);
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
void anv_DestroyDescriptorPool(
@@ -455,6 +567,8 @@ void anv_DestroyDescriptorPool(
if (!pool)
return;
+ if (pool->inline_blocks_bo)
+ anv_bo_cache_release(device, &device->bo_cache, pool->inline_blocks_bo);
anv_state_stream_finish(&pool->surface_state_stream);
vk_free2(&device->alloc, pAllocator, pool);
}
@@ -469,6 +583,8 @@ VkResult anv_ResetDescriptorPool(
pool->next_set = 0;
pool->free_set_list = EMPTY;
+ pool->next_block = 0;
+ pool->free_block_list = EMPTY;
anv_state_stream_finish(&pool->surface_state_stream);
anv_state_stream_init(&pool->surface_state_stream,
&device->surface_state_pool, 4096);
@@ -496,6 +612,36 @@ struct surface_state_free_list_entry {
struct anv_state state;
};
+static struct anv_descriptor_set *
+anv_descriptor_inline_block_alloc(struct anv_descriptor_pool *pool,
+ struct anv_descriptor_set_layout *layout,
+ struct anv_descriptor_set *set)
+{
+ if (layout->inline_blocks_size == 0) {
+ set->inline_blocks = NULL;
+ return set;
+ }
+
+ if (layout->inline_blocks_size <= pool->inline_blocks_size -
pool->next_block) {
+ set->inline_blocks = pool->inline_blocks_bo->map + pool->next_block;
+ pool->next_block += layout->inline_blocks_size;
+ return set;
+ }
+
+ struct pool_free_list_entry *entry;
+ uint32_t *link = &pool->free_block_list;
+ for (uint32_t f = pool->free_block_list; f != EMPTY; f = entry->next) {
+ entry = (struct pool_free_list_entry *) (pool->inline_blocks_bo->map +
f);
+ if (layout->inline_blocks_size <= entry->size) {
+ *link = entry->next;
+ set->inline_blocks = entry;
+ return set;
+ }
+ }
+
+ return NULL;
+}
+
static struct anv_descriptor_set *
anv_descriptor_alloc(struct anv_descriptor_pool *pool,
struct anv_descriptor_set_layout *layout,
@@ -504,8 +650,10 @@ anv_descriptor_alloc(struct anv_descriptor_pool *pool,
struct anv_descriptor_set *set = NULL;
if (size <= pool->size - pool->next_set) {
- set = (struct anv_descriptor_set *) (pool->data + pool->next_set);
- pool->next_set += size;
+ set = anv_descriptor_inline_block_alloc(pool, layout,
+ (struct anv_descriptor_set *) (pool->data + pool->next_set));
+ if (set)
+ pool->next_set += size;
return set;
}
@@ -515,8 +663,10 @@ anv_descriptor_alloc(struct anv_descriptor_pool *pool,
entry = (struct pool_free_list_entry *) (pool->data + f);
if (size <= entry->size) {
uint32_t next = entry->next;
- set = (struct anv_descriptor_set *) entry;
- *link = next;
+ set = anv_descriptor_inline_block_alloc(pool, layout,
+ (struct anv_descriptor_set *) entry);
+ if (set)
+ *link = next;
return set;
}
link = &entry->next;
@@ -573,6 +723,18 @@ anv_descriptor_set_create(struct anv_device *device,
desc += layout->binding[b].array_size;
}
+ /* Also fill the inline uniforms blocks if needed. */
+ if (layout->inline_blocks_descriptor_index >= 0) {
+ set->descriptors[layout->inline_blocks_descriptor_index] = (struct
anv_descriptor) {
+ .type = VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT,
+ .inline_address = (struct anv_address) {
+ .bo = pool->inline_blocks_bo,
+ .offset = set->inline_blocks - pool->inline_blocks_bo->map,
+ },
+ .inline_range = layout->inline_blocks_size,
+ };
+ }
+
/* Allocate surface state for the buffer views. */
for (uint32_t b = 0; b < layout->buffer_count; b++) {
struct surface_state_free_list_entry *entry =
@@ -600,8 +762,6 @@ anv_descriptor_set_destroy(struct anv_device *device,
struct anv_descriptor_pool *pool,
struct anv_descriptor_set *set)
{
- anv_descriptor_set_layout_unref(device, set->layout);
-
/* Put the buffer view surface state back on the free list. */
for (uint32_t b = 0; b < set->buffer_count; b++) {
struct surface_state_free_list_entry *entry =
@@ -611,7 +771,22 @@ anv_descriptor_set_destroy(struct anv_device *device,
pool->surface_state_free_list = entry;
}
- /* Put the descriptor set allocation back on the free list. */
+ /* Put the inline uniform blocks back on the free list. */
+ if (set->inline_blocks) {
+ const uint32_t index = set->inline_blocks - pool->inline_blocks_bo->map;
+ if (index + set->layout->inline_blocks_size == pool->next_block) {
+ pool->next_block = index;
+ } else {
+ struct pool_free_list_entry *entry = (struct pool_free_list_entry *)
set->inline_blocks;
+ entry->next = pool->free_block_list;
+ entry->size = set->layout->inline_blocks_size;
+ pool->free_block_list = (char *) entry - pool->data;
+ }
+ }
+
+ anv_descriptor_set_layout_unref(device, set->layout);
+
+ /* Finally, put the descriptor set allocation back on the free list. */
const uint32_t index = (char *) set - pool->data;
if (index + set->size == pool->next_set) {
pool->next_set = index;
@@ -798,6 +973,23 @@ anv_descriptor_set_write_buffer(struct anv_descriptor_set
*set,
}
}
+static void
+anv_descriptor_set_write_inline_uniforms(struct anv_descriptor_set *set,
+ VkDescriptorType type,
+ uint32_t binding,
+ uint32_t dst_offset,
+ const void *data,
+ uint32_t data_length)
+{
+ const struct anv_descriptor_set_binding_layout *bind_layout =
+ &set->layout->binding[binding];
+
+ assert(type == bind_layout->type);
+
+ memcpy(set->inline_blocks + bind_layout->inline_block_offset + dst_offset,
+ data, data_length);
+}
+
void anv_UpdateDescriptorSets(
VkDevice _device,
uint32_t descriptorWriteCount,
@@ -826,6 +1018,20 @@ void anv_UpdateDescriptorSets(
}
break;
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
+ const VkWriteDescriptorSetInlineUniformBlockEXT *inline_ub =
+ vk_find_struct_const(write->pNext,
+
WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT);
+ assert(inline_ub);
+ anv_descriptor_set_write_inline_uniforms(set,
+ write->descriptorType,
+ write->dstBinding,
+ write->dstArrayElement,
+ inline_ub->pData,
+ inline_ub->dataSize);
+ break;
+ }
+
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
@@ -954,6 +1160,16 @@ anv_descriptor_set_write_template(struct
anv_descriptor_set *set,
}
break;
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: {
+ anv_descriptor_set_write_inline_uniforms(set,
+ entry->type,
+ entry->binding,
+ entry->array_element,
+ data + entry->offset,
+ entry->array_count);