On Tue, Sep 11, 2018 at 10:23 PM Lionel Landwerlin <lionel.g.landwer...@intel.com> wrote: > > This new extension adds an implicitly allocated block of uniforms into > the descriptors sets through a new descriptor type. > > We implement this by having a single BO in the descriptor set pool > from which we source uniforms. > > Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com> > --- > src/intel/vulkan/anv_cmd_buffer.c | 3 + > src/intel/vulkan/anv_descriptor_set.c | 238 +++++++++++++++++- > src/intel/vulkan/anv_device.c | 22 ++ > src/intel/vulkan/anv_extensions.py | 1 + > .../vulkan/anv_nir_apply_pipeline_layout.c | 52 ++++ > src/intel/vulkan/anv_private.h | 33 +++ > src/intel/vulkan/genX_cmd_buffer.c | 32 ++- > 7 files changed, 367 insertions(+), 14 deletions(-) > > diff --git a/src/intel/vulkan/anv_cmd_buffer.c > b/src/intel/vulkan/anv_cmd_buffer.c > index 8ef71b0ed9c..b14be94f470 100644 > --- a/src/intel/vulkan/anv_cmd_buffer.c > +++ b/src/intel/vulkan/anv_cmd_buffer.c > @@ -651,6 +651,7 @@ anv_isl_format_for_descriptor_type(VkDescriptorType type) > switch (type) { > case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: > case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: > + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: > return ISL_FORMAT_R32G32B32A32_FLOAT; > > case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: > @@ -1039,6 +1040,8 @@ void anv_CmdPushDescriptorSetKHR( > } > break; > > + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: > + unreachable("Invalid descriptor type for push descriptors"); > default: > break; > } > diff --git a/src/intel/vulkan/anv_descriptor_set.c > b/src/intel/vulkan/anv_descriptor_set.c > index 3439f828900..2e5f2a1f288 100644 > --- a/src/intel/vulkan/anv_descriptor_set.c > +++ b/src/intel/vulkan/anv_descriptor_set.c > @@ -26,8 +26,10 @@ > #include <string.h> > #include <unistd.h> > #include <fcntl.h> > +#include <sys/mman.h> > > #include "util/mesa-sha1.h" > +#include "vk_util.h" > > #include "anv_private.h" > > @@ -40,7 +42,8 @@ void anv_GetDescriptorSetLayoutSupport( > const VkDescriptorSetLayoutCreateInfo* pCreateInfo, > VkDescriptorSetLayoutSupport* pSupport) > { > - uint32_t surface_count[MESA_SHADER_STAGES] = { 0, }; > + int16_t surface_count[MESA_SHADER_STAGES] = { 0, }; > + int16_t inline_surface_indexes[MESA_SHADER_STAGES] = { -1, }; > > for (uint32_t b = 0; b < pCreateInfo->bindingCount; b++) { > const VkDescriptorSetLayoutBinding *binding = > &pCreateInfo->pBindings[b]; > @@ -50,6 +53,15 @@ void anv_GetDescriptorSetLayoutSupport( > /* There is no real limit on samplers */ > break; > > + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: > + anv_foreach_stage(s, binding->stageFlags) { > + if (inline_surface_indexes[s] < 0) { > + inline_surface_indexes[s] = surface_count[s]; > + surface_count[s] += 1; > + } > + } > + break; > + > case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: > if (binding->pImmutableSamplers) { > for (uint32_t i = 0; i < binding->descriptorCount; i++) { > @@ -118,6 +130,9 @@ VkResult anv_CreateDescriptorSetLayout( > memset(set_layout, 0, sizeof(*set_layout)); > set_layout->ref_cnt = 1; > set_layout->binding_count = max_binding + 1; > + set_layout->inline_blocks_descriptor_index = -1; > + memset(set_layout->inline_blocks_surface_indexes, > + -1, sizeof(set_layout->inline_blocks_surface_indexes)); > > for (uint32_t b = 0; b <= max_binding; b++) { > /* Initialize all binding_layout entries to -1 */ > @@ -159,9 +174,24 @@ VkResult anv_CreateDescriptorSetLayout( > #ifndef NDEBUG > set_layout->binding[b].type = binding->descriptorType; > #endif > - set_layout->binding[b].array_size = binding->descriptorCount; > - set_layout->binding[b].descriptor_index = set_layout->size; > - set_layout->size += binding->descriptorCount; > + > + if (binding->descriptorType == > VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) { > + /* We only a single descriptor entry for all the inline uniforms. */ > + set_layout->binding[b].array_size = 1; > + if (set_layout->inline_blocks_descriptor_index < 0) { > + set_layout->binding[b].descriptor_index = > + set_layout->inline_blocks_descriptor_index = > + set_layout->size; > + set_layout->size += 1; > + } else { > + set_layout->binding[b].descriptor_index = > + set_layout->inline_blocks_descriptor_index; > + } > + } else { > + set_layout->binding[b].array_size = binding->descriptorCount; > + set_layout->binding[b].descriptor_index = set_layout->size; > + set_layout->size += binding->descriptorCount; > + } > > switch (binding->descriptorType) { > case VK_DESCRIPTOR_TYPE_SAMPLER: > @@ -176,6 +206,24 @@ VkResult anv_CreateDescriptorSetLayout( > } > > switch (binding->descriptorType) { > + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: > + set_layout->binding[b].inline_block_offset = > set_layout->inline_blocks_size; > + set_layout->binding[b].inline_block_length = > binding->descriptorCount; > + set_layout->inline_blocks_size += binding->descriptorCount; > + > + anv_foreach_stage(s, binding->stageFlags) { > + if (set_layout->inline_blocks_surface_indexes[s] < 0) { > + set_layout->binding[b].stage[s].surface_index = > + set_layout->inline_blocks_surface_indexes[s] = > + surface_count[s]; > + surface_count[s] += 1; > + } else { > + set_layout->binding[b].stage[s].surface_index = > + set_layout->inline_blocks_surface_indexes[s]; > + } > + } > + break; > + > case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: > case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: > case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: > @@ -235,6 +283,12 @@ VkResult anv_CreateDescriptorSetLayout( > set_layout->shader_stages |= binding->stageFlags; > } > > + /* Align inline uniforms total size to 32 because we source the allocation > + * from a single BO in the descriptor set pool and we want the alignment > to > + * match the push constant alignment constraint. > + */ > + set_layout->inline_blocks_size = ALIGN(set_layout->inline_blocks_size, > 32); > + > set_layout->buffer_count = buffer_count; > set_layout->dynamic_offset_count = dynamic_offset_count; > > @@ -405,21 +459,45 @@ VkResult anv_CreateDescriptorPool( > ANV_FROM_HANDLE(anv_device, device, _device); > struct anv_descriptor_pool *pool; > > + vk_foreach_struct(ext, pCreateInfo->pNext) { > + switch (ext->sType) { > + case > VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT: > + /* Our driver doesn't need to know about this as we use a single > + * binding table entry per stage if one of more inline descriptor > + * blocks are used. > + */ > + break; > + > + default: > + anv_debug_ignored_stype(ext->sType); > + break; > + } > + } > + > uint32_t descriptor_count = 0; > uint32_t buffer_count = 0; > + uint32_t inline_blocks_size = 0; > for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++) { > switch (pCreateInfo->pPoolSizes[i].type) { > + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: > + inline_blocks_size += pCreateInfo->pPoolSizes[i].descriptorCount; > + break; > case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: > case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: > case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: > case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: > buffer_count += pCreateInfo->pPoolSizes[i].descriptorCount; > + /* Fallthrough */ > default: > descriptor_count += pCreateInfo->pPoolSizes[i].descriptorCount; > break; > } > } > > + /* We'll need one more descriptor for inline uniforms. */ > + if (inline_blocks_size > 0) > + descriptor_count += MIN2(pCreateInfo->maxSets, inline_blocks_size / 4); > + > const size_t pool_size = > pCreateInfo->maxSets * sizeof(struct anv_descriptor_set) + > descriptor_count * sizeof(struct anv_descriptor) + > @@ -435,6 +513,34 @@ VkResult anv_CreateDescriptorPool( > pool->next_set = 0; > pool->free_set_list = EMPTY; > > + pool->inline_blocks_size = inline_blocks_size; > + pool->next_block = 0; > + pool->free_block_list = EMPTY; > + > + pool->inline_blocks_bo = NULL; > + if (pool->inline_blocks_size > 0) { > + struct anv_physical_device *pdevice = > &device->instance->physicalDevice; > + uint64_t bo_flags = > + (pdevice->supports_48bit_addresses ? > EXEC_OBJECT_SUPPORTS_48B_ADDRESS : 0) | > + (pdevice->has_exec_async ? EXEC_OBJECT_ASYNC : 0) | > + (pdevice->use_softpin ? EXEC_OBJECT_PINNED : 0); > + > + VkResult result = anv_bo_cache_alloc(device, &device->bo_cache, > + pool->inline_blocks_size, > + bo_flags, > + &pool->inline_blocks_bo); > + if (result != VK_SUCCESS) > + goto fail_iubo_alloc; > + > + uint32_t gem_flags = !device->info.has_llc ? I915_MMAP_WC : 0; > + void *map = anv_gem_mmap(device, pool->inline_blocks_bo->gem_handle, > + 0, pool->inline_blocks_bo->size, gem_flags); > + if (map == MAP_FAILED) > + goto fail_iubo_mmap; > + > + pool->inline_blocks_bo->map = map; > + } > + > anv_state_stream_init(&pool->surface_state_stream, > &device->surface_state_pool, 4096); > pool->surface_state_free_list = NULL; > @@ -442,6 +548,12 @@ VkResult anv_CreateDescriptorPool( > *pDescriptorPool = anv_descriptor_pool_to_handle(pool); > > return VK_SUCCESS; > + > +fail_iubo_mmap: > + anv_bo_cache_release(device, &device->bo_cache, pool->inline_blocks_bo); > +fail_iubo_alloc: > + vk_free2(&device->alloc, pAllocator, pool); > + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); > } > > void anv_DestroyDescriptorPool( > @@ -455,6 +567,8 @@ void anv_DestroyDescriptorPool( > if (!pool) > return; > > + if (pool->inline_blocks_bo) > + anv_bo_cache_release(device, &device->bo_cache, > pool->inline_blocks_bo); > anv_state_stream_finish(&pool->surface_state_stream); > vk_free2(&device->alloc, pAllocator, pool); > } > @@ -469,6 +583,8 @@ VkResult anv_ResetDescriptorPool( > > pool->next_set = 0; > pool->free_set_list = EMPTY; > + pool->next_block = 0; > + pool->free_block_list = EMPTY; > anv_state_stream_finish(&pool->surface_state_stream); > anv_state_stream_init(&pool->surface_state_stream, > &device->surface_state_pool, 4096); > @@ -496,6 +612,36 @@ struct surface_state_free_list_entry { > struct anv_state state; > }; > > +static struct anv_descriptor_set * > +anv_descriptor_inline_block_alloc(struct anv_descriptor_pool *pool, > + struct anv_descriptor_set_layout *layout, > + struct anv_descriptor_set *set) > +{ > + if (layout->inline_blocks_size == 0) { > + set->inline_blocks = NULL; > + return set; > + } > + > + if (layout->inline_blocks_size <= pool->inline_blocks_size - > pool->next_block) { > + set->inline_blocks = pool->inline_blocks_bo->map + pool->next_block; > + pool->next_block += layout->inline_blocks_size; > + return set; > + } > + > + struct pool_free_list_entry *entry; > + uint32_t *link = &pool->free_block_list; > + for (uint32_t f = pool->free_block_list; f != EMPTY; f = entry->next) { > + entry = (struct pool_free_list_entry *) (pool->inline_blocks_bo->map + > f); > + if (layout->inline_blocks_size <= entry->size) { > + *link = entry->next; > + set->inline_blocks = entry; > + return set; > + } > + } > + > + return NULL; > +} > + > static struct anv_descriptor_set * > anv_descriptor_alloc(struct anv_descriptor_pool *pool, > struct anv_descriptor_set_layout *layout, > @@ -504,8 +650,10 @@ anv_descriptor_alloc(struct anv_descriptor_pool *pool, > struct anv_descriptor_set *set = NULL; > > if (size <= pool->size - pool->next_set) { > - set = (struct anv_descriptor_set *) (pool->data + pool->next_set); > - pool->next_set += size; > + set = anv_descriptor_inline_block_alloc(pool, layout, > + (struct anv_descriptor_set *) (pool->data + pool->next_set)); > + if (set) > + pool->next_set += size; > return set; > } > > @@ -515,8 +663,10 @@ anv_descriptor_alloc(struct anv_descriptor_pool *pool, > entry = (struct pool_free_list_entry *) (pool->data + f); > if (size <= entry->size) { > uint32_t next = entry->next; > - set = (struct anv_descriptor_set *) entry; > - *link = next; > + set = anv_descriptor_inline_block_alloc(pool, layout, > + (struct anv_descriptor_set *) entry); > + if (set) > + *link = next; > return set; > } > link = &entry->next; > @@ -573,6 +723,18 @@ anv_descriptor_set_create(struct anv_device *device, > desc += layout->binding[b].array_size; > } > > + /* Also fill the inline uniforms blocks if needed. */ > + if (layout->inline_blocks_descriptor_index >= 0) { > + set->descriptors[layout->inline_blocks_descriptor_index] = (struct > anv_descriptor) { > + .type = VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT, > + .inline_address = (struct anv_address) { > + .bo = pool->inline_blocks_bo, > + .offset = set->inline_blocks - pool->inline_blocks_bo->map, > + }, > + .inline_range = layout->inline_blocks_size, > + }; > + } > + > /* Allocate surface state for the buffer views. */ > for (uint32_t b = 0; b < layout->buffer_count; b++) { > struct surface_state_free_list_entry *entry = > @@ -600,8 +762,6 @@ anv_descriptor_set_destroy(struct anv_device *device, > struct anv_descriptor_pool *pool, > struct anv_descriptor_set *set) > { > - anv_descriptor_set_layout_unref(device, set->layout); > - > /* Put the buffer view surface state back on the free list. */ > for (uint32_t b = 0; b < set->buffer_count; b++) { > struct surface_state_free_list_entry *entry = > @@ -611,7 +771,22 @@ anv_descriptor_set_destroy(struct anv_device *device, > pool->surface_state_free_list = entry; > } > > - /* Put the descriptor set allocation back on the free list. */ > + /* Put the inline uniform blocks back on the free list. */ > + if (set->inline_blocks) { > + const uint32_t index = set->inline_blocks - > pool->inline_blocks_bo->map; > + if (index + set->layout->inline_blocks_size == pool->next_block) { > + pool->next_block = index; > + } else { > + struct pool_free_list_entry *entry = (struct pool_free_list_entry > *) set->inline_blocks; > + entry->next = pool->free_block_list; > + entry->size = set->layout->inline_blocks_size; > + pool->free_block_list = (char *) entry - pool->data; > + } > + } > + > + anv_descriptor_set_layout_unref(device, set->layout); > + > + /* Finally, put the descriptor set allocation back on the free list. */ > const uint32_t index = (char *) set - pool->data; > if (index + set->size == pool->next_set) { > pool->next_set = index; > @@ -798,6 +973,23 @@ anv_descriptor_set_write_buffer(struct > anv_descriptor_set *set, > } > } > > +static void > +anv_descriptor_set_write_inline_uniforms(struct anv_descriptor_set *set, > + VkDescriptorType type, > + uint32_t binding, > + uint32_t dst_offset, > + const void *data, > + uint32_t data_length) > +{ > + const struct anv_descriptor_set_binding_layout *bind_layout = > + &set->layout->binding[binding]; > + > + assert(type == bind_layout->type); > + > + memcpy(set->inline_blocks + bind_layout->inline_block_offset + dst_offset, > + data, data_length); > +} > + > void anv_UpdateDescriptorSets( > VkDevice _device, > uint32_t descriptorWriteCount, > @@ -826,6 +1018,20 @@ void anv_UpdateDescriptorSets( > } > break; > > + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: { > + const VkWriteDescriptorSetInlineUniformBlockEXT *inline_ub = > + vk_find_struct_const(write->pNext, > + > WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT); > + assert(inline_ub); > + anv_descriptor_set_write_inline_uniforms(set, > + write->descriptorType, > + write->dstBinding, > + write->dstArrayElement, > + inline_ub->pData, > + inline_ub->dataSize); > + break; > + } > + > case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: > case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: > for (uint32_t j = 0; j < write->descriptorCount; j++) { > @@ -954,6 +1160,16 @@ anv_descriptor_set_write_template(struct > anv_descriptor_set *set, > } > break; > > + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: { > + anv_descriptor_set_write_inline_uniforms(set, > + entry->type, > + entry->binding, > + entry->array_element, > + data + entry->offset, > + entry->array_count);
Where in the spec did you find that the data is specified directly instead of a VkWriteDescriptorSetInlineUniformBlockEXT struct? > + break; > + } > + > default: > break; > } > diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c > index 1e37876eb43..5bc9f7e7e88 100644 > --- a/src/intel/vulkan/anv_device.c > +++ b/src/intel/vulkan/anv_device.c > @@ -942,6 +942,15 @@ void anv_GetPhysicalDeviceFeatures2( > break; > } > > + case > VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: { > + VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features = > + (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext; > + > + features->inlineUniformBlock = true; > + features->descriptorBindingInlineUniformBlockUpdateAfterBind = > false; > + break; > + } > + > default: > anv_debug_ignored_stype(ext->sType); > break; > @@ -1198,6 +1207,19 @@ void anv_GetPhysicalDeviceProperties2( > break; > } > > + case > VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: { > + VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props = > + (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext; > + > + /* All required minimum values. */ > + props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE; > + props->maxPerStageDescriptorInlineUniformBlocks = > MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS; > + props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 4; > + props->maxDescriptorSetInlineUniformBlocks = 4; > + props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 4; > + break; > + } > + > default: > anv_debug_ignored_stype(ext->sType); > break; > diff --git a/src/intel/vulkan/anv_extensions.py > b/src/intel/vulkan/anv_extensions.py > index 951505a854e..61803c9d7fa 100644 > --- a/src/intel/vulkan/anv_extensions.py > +++ b/src/intel/vulkan/anv_extensions.py > @@ -125,6 +125,7 @@ EXTENSIONS = [ > Extension('VK_EXT_vertex_attribute_divisor', 3, True), > Extension('VK_EXT_post_depth_coverage', 1, > 'device->info.gen >= 9'), > Extension('VK_EXT_sampler_filter_minmax', 1, > 'device->info.gen >= 9'), > + Extension('VK_EXT_inline_uniform_block', 1, True), > ] > > class VkVersion: > diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c > b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c > index 856101cc2ff..75bf33806f9 100644 > --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c > +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c > @@ -160,6 +160,22 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin, > block_index = nir_iadd(b, nir_imm_int(b, surface_index), block_index); > } > > + /* We use a single binding table entry for all inline uniform blocks. That > + * means each block is at a different offset in that entry. We can get > this > + * offset from the layout using (set, binding) but we need to add that > + * offset into the actual load_ubo intrinsic. > + * > + * Here instead of just setting the block index we set a tuple > + * (block_index, inline_block_offset) which will be replace when run into > a > + * load_ubo intrinsic (see lower_inline_uniform_block function). > + */ > + uint32_t inline_block_offset = > + state->layout->set[set].layout->binding[binding].inline_block_offset; > + if (inline_block_offset != -1) { > + block_index = nir_vec2(b, block_index, > + nir_imm_int(b, inline_block_offset)); > + } > + > assert(intrin->dest.is_ssa); > nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index)); > nir_instr_remove(&intrin->instr); > @@ -268,6 +284,39 @@ lower_load_constant(nir_intrinsic_instr *intrin, > nir_instr_remove(&intrin->instr); > } > > +static void > +lower_inline_uniform_block(nir_intrinsic_instr *intrin, > + struct apply_pipeline_layout_state *state) > +{ > + if (!intrin->src[0].ssa->parent_instr || > + intrin->src[0].ssa->parent_instr->type != nir_instr_type_alu) > + return; > + > + nir_alu_instr *alu = nir_instr_as_alu(intrin->src[0].ssa->parent_instr); > + if (alu->op != nir_op_vec2) > + return; > + > + nir_builder *b = &state->builder; > + b->cursor = nir_before_instr(&intrin->instr); > + > + nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], alu->src[0].src); > + > + nir_const_value *const_inline_block_offset = > + nir_src_as_const_value(alu->src[1].src); > + assert(const_inline_block_offset); > + > + nir_const_value *const_offset = nir_src_as_const_value(intrin->src[1]); > + nir_ssa_def *offset; > + if (const_offset) { > + offset = nir_iadd(b, nir_imm_int(b, const_offset->u32[0]), > + nir_imm_int(b, const_inline_block_offset->u32[0])); > + } else { > + offset = nir_iadd(b, nir_ssa_for_src(b, intrin->src[1], 1), > + nir_imm_int(b, const_inline_block_offset->u32[0])); > + } > + nir_instr_rewrite_src(&intrin->instr, &intrin->src[1], > nir_src_for_ssa(offset)); > +} > + > static void > lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type, > unsigned *base_index, > @@ -401,6 +450,9 @@ apply_pipeline_layout_block(nir_block *block, > case nir_intrinsic_load_constant: > lower_load_constant(intrin, state); > break; > + case nir_intrinsic_load_ubo: > + lower_inline_uniform_block(intrin, state); > + break; > default: > break; > } > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h > index 372b7c69635..cea8e5786f5 100644 > --- a/src/intel/vulkan/anv_private.h > +++ b/src/intel/vulkan/anv_private.h > @@ -159,6 +159,8 @@ struct gen_l3_config; > #define MAX_DYNAMIC_BUFFERS 16 > #define MAX_IMAGES 8 > #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */ > +#define MAX_INLINE_UNIFORM_BLOCK_SIZE 256 /* Minimum requirement */ > +#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 4 /* Minimum requirement */ > > /* The kernel relocation API has a limitation of a 32-bit delta value > * applied to the address before it is written which, in spite of it being > @@ -1394,6 +1396,16 @@ struct anv_descriptor_set_binding_layout { > /* Index into the descriptor set buffer views */ > int16_t buffer_index; > > + /* TODO/question: should we union fields a bit? inline uniform blocks have > + * no use for array_size, buffer_index & dynamic_offset_index. > + */ > + > + /* Offset into the portion of data allocated for the inline uniforms. */ > + uint32_t inline_block_offset; > + > + /* Length of the portion of data allocated for inline uniforms */ > + uint32_t inline_block_length; > + > struct { > /* Index into the binding table for the associated surface */ > int16_t surface_index; > @@ -1428,6 +1440,15 @@ struct anv_descriptor_set_layout { > /* Number of dynamic offsets used by this descriptor set */ > uint16_t dynamic_offset_count; > > + /* Index into the flattend descriptor set (-1 if unused). */ > + int16_t inline_blocks_descriptor_index; > + > + /* Data to allocate into the pool descriptor's inline uniforms BO */ > + uint32_t inline_blocks_size; > + > + /* Index into the binding table for the associated surface */ > + int16_t inline_blocks_surface_indexes[MESA_SHADER_STAGES]; > + > /* Bindings in this descriptor set */ > struct anv_descriptor_set_binding_layout binding[0]; > }; > @@ -1464,6 +1485,11 @@ struct anv_descriptor { > uint64_t range; > }; > > + struct { > + struct anv_address inline_address; > + uint64_t inline_range; > + }; > + > struct anv_buffer_view *buffer_view; > }; > }; > @@ -1472,6 +1498,7 @@ struct anv_descriptor_set { > struct anv_descriptor_set_layout *layout; > uint32_t size; > uint32_t buffer_count; > + void *inline_blocks; > struct anv_buffer_view *buffer_views; > struct anv_descriptor descriptors[0]; > }; > @@ -1507,6 +1534,12 @@ struct anv_descriptor_pool { > struct anv_state_stream surface_state_stream; > void *surface_state_free_list; > > + struct anv_bo *inline_blocks_bo; > + uint32_t inline_blocks_size; > + > + uint32_t free_block_list; > + uint32_t next_block; > + > char data[0]; > }; > > diff --git a/src/intel/vulkan/genX_cmd_buffer.c > b/src/intel/vulkan/genX_cmd_buffer.c > index 80bebf5a12c..5cb4c0f13af 100644 > --- a/src/intel/vulkan/genX_cmd_buffer.c > +++ b/src/intel/vulkan/genX_cmd_buffer.c > @@ -2146,6 +2146,19 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer, > desc->buffer_view->address); > break; > > + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: { > + surface_state = > + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, > 64); > + enum isl_format format = > + anv_isl_format_for_descriptor_type(desc->type); > + > + anv_fill_buffer_surface_state(cmd_buffer->device, surface_state, > + format, desc->inline_address, > + desc->inline_range, 1); > + add_surface_reloc(cmd_buffer, surface_state, desc->inline_address); > + break; > + } > + > case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: > case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { > /* Compute the offset within the buffer */ > @@ -2445,14 +2458,15 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer > *cmd_buffer, > const struct anv_descriptor *desc = > anv_descriptor_for_binding(&gfx_state->base, binding); > > - if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) { > + switch (desc->type) { > + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: > read_len = MIN2(range->length, > DIV_ROUND_UP(desc->buffer_view->range, 32) - > range->start); > read_addr = anv_address_add(desc->buffer_view->address, > range->start * 32); > - } else { > - assert(desc->type == > VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC); > + break; > > + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: { > uint32_t dynamic_offset = > dynamic_offset_for_binding(&gfx_state->base, > binding); > uint32_t buf_offset = > @@ -2464,6 +2478,18 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer > *cmd_buffer, > DIV_ROUND_UP(buf_range, 32) - range->start); > read_addr = anv_address_add(desc->buffer->address, > buf_offset + range->start * > 32); > + break; > + } > + > + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: > + read_len = MIN2(range->length, > + DIV_ROUND_UP(desc->inline_range, 32) - range->start); > + read_addr = anv_address_add(desc->inline_address, > + range->start * 32); > + break; > + > + default: > + unreachable("Invalid descriptor"); > } > } > > -- > 2.19.0.rc1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev