In order to reduce a performance regression introduced by
4b13fe55a4 ("radv: Keep a global BO list for VkMemory."),
we are going to maintain two different paths.

One when VK_EXT_descriptor_indexing is enabled by the
application because we need to have a global BO list, and
one (the old one) when it's not enabled.

With Talos on Polaris, the global BO list reduces performance
by 10% which is too much for me.

This reverts commit ab6cadd3ecc7fbdd9079808b407674e0b19c52f0.
---
 src/amd/vulkan/radv_cmd_buffer.c     |  4 ++
 src/amd/vulkan/radv_debug.c          |  3 +
 src/amd/vulkan/radv_descriptor_set.c | 82 +++++++++++++++++++++++-----
 src/amd/vulkan/radv_descriptor_set.h |  4 ++
 src/amd/vulkan/radv_private.h        |  2 +
 5 files changed, 82 insertions(+), 13 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index f6b23f6e73..72fb6d6357 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2206,6 +2206,10 @@ radv_bind_descriptor_set(struct radv_cmd_buffer 
*cmd_buffer,
 
        assert(!(set->layout->flags & 
VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
 
+       for (unsigned j = 0; j < set->layout->buffer_count; ++j)
+               if (set->descriptors[j])
+                       radv_cs_add_buffer(ws, cmd_buffer->cs, 
set->descriptors[j], 7);
+
        if(set->bo)
                radv_cs_add_buffer(ws, cmd_buffer->cs, set->bo, 8);
 }
diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c
index 2e9e016523..368bc4b5d0 100644
--- a/src/amd/vulkan/radv_debug.c
+++ b/src/amd/vulkan/radv_debug.c
@@ -250,6 +250,7 @@ radv_dump_descriptor_set(enum chip_class chip_class,
        fprintf(f, "\tshader_stages: %x\n", layout->shader_stages);
        fprintf(f, "\tdynamic_shader_stages: %x\n",
                layout->dynamic_shader_stages);
+       fprintf(f, "\tbuffer_count: %d\n", layout->buffer_count);
        fprintf(f, "\tdynamic_offset_count: %d\n",
                layout->dynamic_offset_count);
        fprintf(f, "\n");
@@ -265,6 +266,8 @@ radv_dump_descriptor_set(enum chip_class chip_class,
                        layout->binding[i].array_size);
                fprintf(f, "\t\toffset: %d\n",
                        layout->binding[i].offset);
+               fprintf(f, "\t\tbuffer_offset: %d\n",
+                       layout->binding[i].buffer_offset);
                fprintf(f, "\t\tdynamic_offset_offset: %d\n",
                        layout->binding[i].dynamic_offset_offset);
                fprintf(f, "\t\tdynamic_offset_count: %d\n",
diff --git a/src/amd/vulkan/radv_descriptor_set.c 
b/src/amd/vulkan/radv_descriptor_set.c
index 55b4aaa388..4b08a1f0f8 100644
--- a/src/amd/vulkan/radv_descriptor_set.c
+++ b/src/amd/vulkan/radv_descriptor_set.c
@@ -117,12 +117,14 @@ VkResult radv_CreateDescriptorSetLayout(
 
        memset(set_layout->binding, 0, size - sizeof(struct 
radv_descriptor_set_layout));
 
+       uint32_t buffer_count = 0;
        uint32_t dynamic_offset_count = 0;
 
        for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
                const VkDescriptorSetLayoutBinding *binding = bindings + j;
                uint32_t b = binding->binding;
                uint32_t alignment;
+               unsigned binding_buffer_count = 0;
 
                switch (binding->descriptorType) {
                case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -131,6 +133,7 @@ VkResult radv_CreateDescriptorSetLayout(
                        set_layout->binding[b].dynamic_offset_count = 1;
                        set_layout->dynamic_shader_stages |= 
binding->stageFlags;
                        set_layout->binding[b].size = 0;
+                       binding_buffer_count = 1;
                        alignment = 1;
                        break;
                case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
@@ -138,6 +141,7 @@ VkResult radv_CreateDescriptorSetLayout(
                case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
                case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
                        set_layout->binding[b].size = 16;
+                       binding_buffer_count = 1;
                        alignment = 16;
                        break;
                case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
@@ -145,11 +149,13 @@ VkResult radv_CreateDescriptorSetLayout(
                case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
                        /* main descriptor + fmask descriptor */
                        set_layout->binding[b].size = 64;
+                       binding_buffer_count = 1;
                        alignment = 32;
                        break;
                case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
                        /* main descriptor + fmask descriptor + sampler */
                        set_layout->binding[b].size = 96;
+                       binding_buffer_count = 1;
                        alignment = 32;
                        break;
                case VK_DESCRIPTOR_TYPE_SAMPLER:
@@ -165,6 +171,7 @@ VkResult radv_CreateDescriptorSetLayout(
                set_layout->binding[b].type = binding->descriptorType;
                set_layout->binding[b].array_size = binding->descriptorCount;
                set_layout->binding[b].offset = set_layout->size;
+               set_layout->binding[b].buffer_offset = buffer_count;
                set_layout->binding[b].dynamic_offset_offset = 
dynamic_offset_count;
 
                if (variable_flags && binding->binding < 
variable_flags->bindingCount &&
@@ -197,6 +204,7 @@ VkResult radv_CreateDescriptorSetLayout(
                }
 
                set_layout->size += binding->descriptorCount * 
set_layout->binding[b].size;
+               buffer_count += binding->descriptorCount * binding_buffer_count;
                dynamic_offset_count += binding->descriptorCount *
                        set_layout->binding[b].dynamic_offset_count;
                set_layout->shader_stages |= binding->stageFlags;
@@ -204,6 +212,7 @@ VkResult radv_CreateDescriptorSetLayout(
 
        free(bindings);
 
+       set_layout->buffer_count = buffer_count;
        set_layout->dynamic_offset_count = dynamic_offset_count;
 
        *pSetLayout = radv_descriptor_set_layout_to_handle(set_layout);
@@ -396,7 +405,8 @@ radv_descriptor_set_create(struct radv_device *device,
                           struct radv_descriptor_set **out_set)
 {
        struct radv_descriptor_set *set;
-       unsigned range_offset = sizeof(struct radv_descriptor_set);
+       unsigned range_offset = sizeof(struct radv_descriptor_set) +
+               sizeof(struct radeon_winsys_bo *) * layout->buffer_count;
        unsigned mem_size = range_offset +
                sizeof(struct radv_descriptor_range) * 
layout->dynamic_offset_count;
 
@@ -700,16 +710,25 @@ VkResult radv_FreeDescriptorSets(
 }
 
 static void write_texel_buffer_descriptor(struct radv_device *device,
+                                         struct radv_cmd_buffer *cmd_buffer,
                                          unsigned *dst,
+                                         struct radeon_winsys_bo **buffer_list,
                                          const VkBufferView _buffer_view)
 {
        RADV_FROM_HANDLE(radv_buffer_view, buffer_view, _buffer_view);
 
        memcpy(dst, buffer_view->state, 4 * 4);
+
+       if (cmd_buffer)
+               radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer_view->bo, 
7);
+       else
+               *buffer_list = buffer_view->bo;
 }
 
 static void write_buffer_descriptor(struct radv_device *device,
+                                    struct radv_cmd_buffer *cmd_buffer,
                                     unsigned *dst,
+                                    struct radeon_winsys_bo **buffer_list,
                                     const VkDescriptorBufferInfo *buffer_info)
 {
        RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
@@ -730,10 +749,15 @@ static void write_buffer_descriptor(struct radv_device 
*device,
                S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
                S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
 
+       if (cmd_buffer)
+               radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo, 7);
+       else
+               *buffer_list = buffer->bo;
 }
 
 static void write_dynamic_buffer_descriptor(struct radv_device *device,
                                             struct radv_descriptor_range 
*range,
+                                            struct radeon_winsys_bo 
**buffer_list,
                                             const VkDescriptorBufferInfo 
*buffer_info)
 {
        RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
@@ -746,11 +770,15 @@ static void write_dynamic_buffer_descriptor(struct 
radv_device *device,
        va += buffer_info->offset + buffer->offset;
        range->va = va;
        range->size = size;
+
+       *buffer_list = buffer->bo;
 }
 
 static void
 write_image_descriptor(struct radv_device *device,
+                      struct radv_cmd_buffer *cmd_buffer,
                       unsigned *dst,
+                      struct radeon_winsys_bo **buffer_list,
                       VkDescriptorType descriptor_type,
                       const VkDescriptorImageInfo *image_info)
 {
@@ -764,18 +792,25 @@ write_image_descriptor(struct radv_device *device,
        }
 
        memcpy(dst, descriptor, 16 * 4);
+
+       if (cmd_buffer)
+               radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->bo, 7);
+       else
+               *buffer_list = iview->bo;
 }
 
 static void
 write_combined_image_sampler_descriptor(struct radv_device *device,
+                                       struct radv_cmd_buffer *cmd_buffer,
                                        unsigned *dst,
+                                       struct radeon_winsys_bo **buffer_list,
                                        VkDescriptorType descriptor_type,
                                        const VkDescriptorImageInfo *image_info,
                                        bool has_sampler)
 {
        RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler);
 
-       write_image_descriptor(device, dst, descriptor_type, image_info);
+       write_image_descriptor(device, cmd_buffer, dst, buffer_list, 
descriptor_type, image_info);
        /* copy over sampler state */
        if (has_sampler)
                memcpy(dst + 16, sampler->state, 16);
@@ -808,7 +843,7 @@ void radv_update_descriptor_sets(
                const struct radv_descriptor_set_binding_layout *binding_layout 
=
                        set->layout->binding + writeset->dstBinding;
                uint32_t *ptr = set->mapped_ptr;
-
+               struct radeon_winsys_bo **buffer_list =  set->descriptors;
                /* Immutable samplers are not copied into push descriptors when 
they are
                 * allocated, so if we are writing push descriptors we have to 
copy the
                 * immutable samplers into them now.
@@ -819,6 +854,8 @@ void radv_update_descriptor_sets(
 
                ptr += binding_layout->offset / 4;
                ptr += binding_layout->size * writeset->dstArrayElement / 4;
+               buffer_list += binding_layout->buffer_offset;
+               buffer_list += writeset->dstArrayElement;
                for (j = 0; j < writeset->descriptorCount; ++j) {
                        switch(writeset->descriptorType) {
                        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -827,25 +864,28 @@ void radv_update_descriptor_sets(
                                idx += binding_layout->dynamic_offset_offset;
                                assert(!(set->layout->flags & 
VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
                                write_dynamic_buffer_descriptor(device, 
set->dynamic_descriptors + idx,
-                                                               
writeset->pBufferInfo + j);
+                                                               buffer_list, 
writeset->pBufferInfo + j);
                                break;
                        }
                        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
                        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-                               write_buffer_descriptor(device, ptr, 
writeset->pBufferInfo + j);
+                               write_buffer_descriptor(device, cmd_buffer, 
ptr, buffer_list,
+                                                       writeset->pBufferInfo + 
j);
                                break;
                        case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
                        case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-                               write_texel_buffer_descriptor(device, ptr, 
writeset->pTexelBufferView[j]);
+                               write_texel_buffer_descriptor(device, 
cmd_buffer, ptr, buffer_list,
+                                                             
writeset->pTexelBufferView[j]);
                                break;
                        case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
                        case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
                        case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-                               write_image_descriptor(device, ptr, 
writeset->descriptorType,
+                               write_image_descriptor(device, cmd_buffer, ptr, 
buffer_list,
+                                                      writeset->descriptorType,
                                                       writeset->pImageInfo + 
j);
                                break;
                        case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-                               write_combined_image_sampler_descriptor(device, 
ptr,
+                               write_combined_image_sampler_descriptor(device, 
cmd_buffer, ptr, buffer_list,
                                                                        
writeset->descriptorType,
                                                                        
writeset->pImageInfo + j,
                                                                        
!binding_layout->immutable_samplers_offset);
@@ -868,6 +908,7 @@ void radv_update_descriptor_sets(
                                break;
                        }
                        ptr += binding_layout->size / 4;
+                       ++buffer_list;
                }
 
        }
@@ -884,6 +925,8 @@ void radv_update_descriptor_sets(
                        dst_set->layout->binding + copyset->dstBinding;
                uint32_t *src_ptr = src_set->mapped_ptr;
                uint32_t *dst_ptr = dst_set->mapped_ptr;
+               struct radeon_winsys_bo **src_buffer_list = 
src_set->descriptors;
+               struct radeon_winsys_bo **dst_buffer_list = 
dst_set->descriptors;
 
                src_ptr += src_binding_layout->offset / 4;
                dst_ptr += dst_binding_layout->offset / 4;
@@ -891,6 +934,12 @@ void radv_update_descriptor_sets(
                src_ptr += src_binding_layout->size * copyset->srcArrayElement 
/ 4;
                dst_ptr += dst_binding_layout->size * copyset->dstArrayElement 
/ 4;
 
+               src_buffer_list += src_binding_layout->buffer_offset;
+               src_buffer_list += copyset->srcArrayElement;
+
+               dst_buffer_list += dst_binding_layout->buffer_offset;
+               dst_buffer_list += copyset->dstArrayElement;
+
                for (j = 0; j < copyset->descriptorCount; ++j) {
                        switch (src_binding_layout->type) {
                        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -911,6 +960,9 @@ void radv_update_descriptor_sets(
                        }
                        src_ptr += src_binding_layout->size / 4;
                        dst_ptr += dst_binding_layout->size / 4;
+                       dst_buffer_list[j] = src_buffer_list[j];
+                       ++src_buffer_list;
+                       ++dst_buffer_list;
                }
        }
 }
@@ -952,6 +1004,7 @@ VkResult radv_CreateDescriptorUpdateTemplate(VkDevice 
_device,
                const VkDescriptorUpdateTemplateEntryKHR *entry = 
&pCreateInfo->pDescriptorUpdateEntries[i];
                const struct radv_descriptor_set_binding_layout *binding_layout 
=
                        set_layout->binding + entry->dstBinding;
+               const uint32_t buffer_offset = binding_layout->buffer_offset + 
entry->dstArrayElement;
                const uint32_t *immutable_samplers = NULL;
                uint32_t dst_offset;
                uint32_t dst_stride;
@@ -990,6 +1043,7 @@ VkResult radv_CreateDescriptorUpdateTemplate(VkDevice 
_device,
                        .src_stride = entry->stride,
                        .dst_offset = dst_offset,
                        .dst_stride = dst_stride,
+                       .buffer_offset = buffer_offset,
                        .has_sampler = 
!binding_layout->immutable_samplers_offset,
                        .immutable_samplers = immutable_samplers
                };
@@ -1022,6 +1076,7 @@ void radv_update_descriptor_set_with_template(struct 
radv_device *device,
        uint32_t i;
 
        for (i = 0; i < templ->entry_count; ++i) {
+               struct radeon_winsys_bo **buffer_list = set->descriptors + 
templ->entry[i].buffer_offset;
                uint32_t *pDst = set->mapped_ptr + templ->entry[i].dst_offset;
                const uint8_t *pSrc = ((const uint8_t *) pData) + 
templ->entry[i].src_offset;
                uint32_t j;
@@ -1033,28 +1088,28 @@ void radv_update_descriptor_set_with_template(struct 
radv_device *device,
                                const unsigned idx = templ->entry[i].dst_offset 
+ j;
                                assert(!(set->layout->flags & 
VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
                                write_dynamic_buffer_descriptor(device, 
set->dynamic_descriptors + idx,
-                                                               (struct 
VkDescriptorBufferInfo *) pSrc);
+                                                               buffer_list, 
(struct VkDescriptorBufferInfo *) pSrc);
                                break;
                        }
                        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
                        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-                               write_buffer_descriptor(device, pDst,
+                               write_buffer_descriptor(device, cmd_buffer, 
pDst, buffer_list,
                                                        (struct 
VkDescriptorBufferInfo *) pSrc);
                                break;
                        case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
                        case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-                               write_texel_buffer_descriptor(device, pDst,
+                               write_texel_buffer_descriptor(device, 
cmd_buffer, pDst, buffer_list,
                                                              *(VkBufferView *) 
pSrc);
                                break;
                        case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
                        case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
                        case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-                               write_image_descriptor(device, pDst,
+                               write_image_descriptor(device, cmd_buffer, 
pDst, buffer_list,
                                                       
templ->entry[i].descriptor_type,
                                                       (struct 
VkDescriptorImageInfo *) pSrc);
                                break;
                        case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-                               write_combined_image_sampler_descriptor(device, 
pDst,
+                               write_combined_image_sampler_descriptor(device, 
cmd_buffer, pDst, buffer_list,
                                                                        
templ->entry[i].descriptor_type,
                                                                        (struct 
VkDescriptorImageInfo *) pSrc,
                                                                        
templ->entry[i].has_sampler);
@@ -1074,6 +1129,7 @@ void radv_update_descriptor_set_with_template(struct 
radv_device *device,
                        }
                        pSrc += templ->entry[i].src_stride;
                        pDst += templ->entry[i].dst_stride;
+                       ++buffer_list;
                }
        }
 }
diff --git a/src/amd/vulkan/radv_descriptor_set.h 
b/src/amd/vulkan/radv_descriptor_set.h
index d1cba953f7..d8431241fd 100644
--- a/src/amd/vulkan/radv_descriptor_set.h
+++ b/src/amd/vulkan/radv_descriptor_set.h
@@ -35,6 +35,7 @@ struct radv_descriptor_set_binding_layout {
    uint32_t array_size;
 
    uint32_t offset;
+   uint32_t buffer_offset;
    uint16_t dynamic_offset_offset;
 
    uint16_t dynamic_offset_count;
@@ -61,6 +62,9 @@ struct radv_descriptor_set_layout {
    uint16_t shader_stages;
    uint16_t dynamic_shader_stages;
 
+   /* Number of buffers in this descriptor set */
+   uint32_t buffer_count;
+
    /* Number of dynamic offsets used by this descriptor set */
    uint16_t dynamic_offset_count;
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 4a860c595f..dfe4c5f942 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -698,6 +698,8 @@ struct radv_descriptor_set {
        uint64_t va;
        uint32_t *mapped_ptr;
        struct radv_descriptor_range *dynamic_descriptors;
+
+       struct radeon_winsys_bo *descriptors[0];
 };
 
 struct radv_push_descriptor_set
-- 
2.17.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to