On Wed, Feb 8, 2017, at 10:10, Nicolai Hähnle wrote: > On 05.02.2017 12:43, Bas Nieuwenhuizen wrote: > > Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> > > --- > > src/amd/vulkan/radv_radeon_winsys.h | 5 + > > src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 218 > > +++++++++++++++++++++++--- > > src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h | 35 ++++- > > src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 98 +++++++++++- > > 4 files changed, 330 insertions(+), 26 deletions(-) > > > > diff --git a/src/amd/vulkan/radv_radeon_winsys.h > > b/src/amd/vulkan/radv_radeon_winsys.h > > index 79c182007a6..20d6b1d60d2 100644 > > --- a/src/amd/vulkan/radv_radeon_winsys.h > > +++ b/src/amd/vulkan/radv_radeon_winsys.h > > @@ -47,6 +47,7 @@ enum radeon_bo_flag { /* bitfield */ > > RADEON_FLAG_GTT_WC = (1 << 0), > > RADEON_FLAG_CPU_ACCESS = (1 << 1), > > RADEON_FLAG_NO_CPU_ACCESS = (1 << 2), > > + RADEON_FLAG_VIRTUAL = (1 << 3) > > }; > > > > enum radeon_bo_usage { /* bitfield */ > > @@ -284,6 +285,10 @@ struct radeon_winsys { > > > > void (*buffer_set_metadata)(struct radeon_winsys_bo *bo, > > struct radeon_bo_metadata *md); > > + > > + void (*buffer_virtual_bind)(struct radeon_winsys_bo *parent, > > + uint64_t offset, uint64_t size, > > + struct radeon_winsys_bo *bo, uint64_t > > bo_offset); > > struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws); > > void (*ctx_destroy)(struct radeon_winsys_ctx *ctx); > > > > diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c > > b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c > > index 7319a988872..d5bce304510 100644 > > --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c > > +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c > > @@ -34,19 +34,182 @@ > > #include <amdgpu_drm.h> > > #include <inttypes.h> > > > > +static void > > +radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo, > > + const struct radv_amdgpu_map_range *range) > > +{ > > + assert(range->size); > > + > > + if (!range->bo) > > + return; /* TODO: PRT mapping */ > > + > > + int r = amdgpu_bo_va_op(range->bo->bo, range->bo_offset, range->size, > > + range->offset + bo->va, 0, AMDGPU_VA_OP_MAP); > > + if (r) > > + abort(); > > +} > > + > > +static void > > +radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo, > > + const struct radv_amdgpu_map_range *range) > > +{ > > + assert(range->size); > > + > > + if (!range->bo) > > + return; /* TODO: PRT mapping */ > > + > > + int r = amdgpu_bo_va_op(range->bo->bo, range->bo_offset, range->size, > > + range->offset + bo->va, 0, AMDGPU_VA_OP_UNMAP); > > + if (r) > > + abort(); > > +} > > + > > +static void > > +radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo) > > +{ > > + bo->bo_count = 0; > > + for (uint32_t i = 0; i < bo->range_count; ++i) { > > + bool found = false; > > + if (!bo->ranges[i].bo) > > + continue; > > + > > + for(uint32_t j = 0; j < bo->bo_count; ++j) { > > + if (bo->bos[j] == bo->ranges[i].bo) { > > + found = true; > > + break; > > + } > > + } > > + > > + if (!found) { > > + if (bo->bo_capacity == bo->bo_count) { > > + bo->bos = realloc(bo->bos, > > + (bo->bo_capacity + 1) * > > sizeof(struct radv_amdgpu_bo *)); > > + ++bo->bo_capacity; > > + } > > + bo->bos[bo->bo_count++] = bo->ranges[i].bo; > > + } > > + } > > +} > > + > > +static void > > +radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent, > > + uint64_t offset, uint64_t size, > > + struct radeon_winsys_bo *_bo, uint64_t > > bo_offset) > > +{ > > + struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo > > *)_parent; > > + struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo; > > + int range_count_delta, new_idx; > > + int first = 0, last; > > + struct radv_amdgpu_map_range new_first, new_last; > > + > > + assert(parent->is_virtual); > > + assert(!bo || !bo->is_virtual); > > + > > + if (!size) > > + return; > > + > > + if (parent->range_capacity - parent->range_count < 2) { > > + parent->range_capacity += 2; > > + parent->ranges = realloc(parent->ranges, parent->range_capacity > > * sizeof(struct radv_amdgpu_map_range)); > > + } > > + > > + /* > > + * [first, last] is exactly the range of ranges that either overlap the > > + * new parent, or are adjacent to it. This corresponds to the bind > > parents > > + * that may change. > > + */ > > + while(first + 1 < parent->range_count && parent->ranges[first].offset + > > parent->ranges[first].size < offset) > > + ++first; > > + > > + last = first; > > + while(last + 1 < parent->range_count && parent->ranges[last].offset <= > > offset + size) > > + ++last; > > + > > + bool remove_first = parent->ranges[first].offset == offset; > > + bool remove_last = parent->ranges[last].offset + > > parent->ranges[last].size == offset + size; > > + > > + assert(parent->ranges[first].offset <= offset); > > + assert(parent->ranges[last].offset + parent->ranges[last].size >= > > offset + size); > > + > > + if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == > > parent->ranges[first].offset - parent->ranges[first].bo_offset)) { > > + size += offset - parent->ranges[first].offset; > > + offset = parent->ranges[first].offset; > > + remove_first = true; > > + } > > + > > + if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == > > parent->ranges[last].offset - parent->ranges[last].bo_offset)) { > > + size = parent->ranges[last].offset + parent->ranges[last].size > > - offset; > > + remove_last = true; > > + } > > + > > + range_count_delta = 1 - (last - first + 1) + !remove_first + > > !remove_last; > > + new_idx = first + !remove_first; > > + > > + new_first = parent->ranges[first]; > > + new_last = parent->ranges[last]; > > + > > + if (parent->ranges[first].offset + parent->ranges[first].size > offset > > || remove_first) { > > + radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + > > first); > > + > > + if (!remove_first) { > > + new_first.size = offset - new_first.offset; > > + radv_amdgpu_winsys_virtual_map(parent, &new_first); > > + } > > + } > > + > > + if (parent->ranges[last].offset < offset + size || remove_last) { > > + if (first != last) > > + radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges > > + last); > > + > > + if (!remove_last) { > > + new_last.size -= offset + size - new_last.offset; > > + new_last.offset = offset + size; > > + radv_amdgpu_winsys_virtual_map(parent, &new_last); > > + } > > + } > > Not sure what the required semantics are for Vulkan, but I've looked > into this on the radeonsi side for ARB_sparse_buffer, and there it's > possible that you may have to unmap many pre-existing ranges, depending > on what the new range overlaps; I don't see you calling > radv_amdgpu_winsys_virtual_unmap in a loop.
Seems the loop got lost indeed. I will add it in v2. > > Cheers, > Nicolai > > > + > > + for (int i = parent->range_count - 1; i > last; --i) > > + parent->ranges[i + range_count_delta] = parent->ranges[i]; > > + > > + if (!remove_first) > > + parent->ranges[first] = new_first; > > + > > + if (!remove_last) > > + parent->ranges[new_idx + 1] = new_last; > > + > > + parent->ranges[new_idx].offset = offset; > > + parent->ranges[new_idx].size = size; > > + parent->ranges[new_idx].bo = bo; > > + parent->ranges[new_idx].bo_offset = bo_offset; > > + > > + radv_amdgpu_winsys_virtual_map(parent, parent->ranges + new_idx); > > + > > + parent->range_count += range_count_delta; > > + > > + radv_amdgpu_winsys_rebuild_bo_list(parent); > > +} > > + > > static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo) > > { > > struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); > > > > - if (bo->ws->debug_all_bos) { > > - pthread_mutex_lock(&bo->ws->global_bo_list_lock); > > - LIST_DEL(&bo->global_list_item); > > - bo->ws->num_buffers--; > > - pthread_mutex_unlock(&bo->ws->global_bo_list_lock); > > + if (bo->is_virtual) { > > + for (uint32_t i = 0; i < bo->range_count; ++i) { > > + radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i); > > + } > > + free(bo->bos); > > + free(bo->ranges); > > + } else { > > + if (bo->ws->debug_all_bos) { > > + pthread_mutex_lock(&bo->ws->global_bo_list_lock); > > + LIST_DEL(&bo->global_list_item); > > + bo->ws->num_buffers--; > > + pthread_mutex_unlock(&bo->ws->global_bo_list_lock); > > + } > > + amdgpu_bo_va_op(bo->bo, 0, bo->size, bo->va, 0, > > AMDGPU_VA_OP_UNMAP); > > + amdgpu_bo_free(bo->bo); > > } > > - amdgpu_bo_va_op(bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP); > > amdgpu_va_range_free(bo->va_handle); > > - amdgpu_bo_free(bo->bo); > > FREE(bo); > > } > > > > @@ -81,6 +244,31 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, > > return NULL; > > } > > > > + r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, > > + size, alignment, 0, &va, &va_handle, 0); > > + if (r) > > + goto error_va_alloc; > > + > > + bo->va = va; > > + bo->va_handle = va_handle; > > + bo->size = size; > > + bo->ws = ws; > > + bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL); > > + > > + if (flags & RADEON_FLAG_VIRTUAL) { > > + bo->ranges = realloc(NULL, sizeof(struct > > radv_amdgpu_map_range)); > > + bo->range_count = 1; > > + bo->range_capacity = 1; > > + > > + bo->ranges[0].offset = 0; > > + bo->ranges[0].size = size; > > + bo->ranges[0].bo = NULL; > > + bo->ranges[0].bo_offset = 0; > > + > > + radv_amdgpu_winsys_virtual_map(bo, bo->ranges); > > + return (struct radeon_winsys_bo *)bo; > > + } > > + > > request.alloc_size = size; > > request.phys_alignment = alignment; > > > > @@ -105,31 +293,22 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys > > *_ws, > > goto error_bo_alloc; > > } > > > > - r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, > > - size, alignment, 0, &va, &va_handle, 0); > > - if (r) > > - goto error_va_alloc; > > - > > r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP); > > if (r) > > goto error_va_map; > > > > bo->bo = buf_handle; > > - bo->va = va; > > - bo->va_handle = va_handle; > > bo->initial_domain = initial_domain; > > - bo->size = size; > > bo->is_shared = false; > > - bo->ws = ws; > > radv_amdgpu_add_buffer_to_global_list(bo); > > return (struct radeon_winsys_bo *)bo; > > error_va_map: > > - amdgpu_va_range_free(va_handle); > > - > > -error_va_alloc: > > amdgpu_bo_free(buf_handle); > > > > error_bo_alloc: > > + amdgpu_va_range_free(va_handle); > > + > > +error_va_alloc: > > FREE(bo); > > return NULL; > > } > > @@ -294,4 +473,5 @@ void radv_amdgpu_bo_init_functions(struct > > radv_amdgpu_winsys *ws) > > ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd; > > ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd; > > ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata; > > + ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind; > > } > > diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h > > b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h > > index 499b063d56f..c7d484bc8dd 100644 > > --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h > > +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h > > @@ -31,17 +31,40 @@ > > > > #include "radv_amdgpu_winsys.h" > > > > + > > +struct radv_amdgpu_map_range { > > + uint64_t offset; > > + uint64_t size; > > + struct radv_amdgpu_winsys_bo *bo; > > + uint64_t bo_offset; > > +}; > > + > > struct radv_amdgpu_winsys_bo { > > - amdgpu_bo_handle bo; > > amdgpu_va_handle va_handle; > > - > > uint64_t va; > > - enum radeon_bo_domain initial_domain; > > uint64_t size; > > - bool is_shared; > > - > > struct radv_amdgpu_winsys *ws; > > - struct list_head global_list_item; > > + bool is_virtual; > > + > > + union { > > + /* physical bo */ > > + struct { > > + amdgpu_bo_handle bo; > > + enum radeon_bo_domain initial_domain; > > + bool is_shared; > > + struct list_head global_list_item; > > + }; > > + /* virtual bo */ > > + struct { > > + struct radv_amdgpu_map_range *ranges; > > + uint32_t range_count; > > + uint32_t range_capacity; > > + > > + struct radv_amdgpu_winsys_bo **bos; > > + uint32_t bo_count; > > + uint32_t bo_capacity; > > + }; > > + }; > > }; > > > > static inline > > diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > > b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > > index 2f2c3e1338f..c094abc672c 100644 > > --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > > +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > > @@ -34,6 +34,11 @@ > > #include "radv_amdgpu_bo.h" > > #include "sid.h" > > > > + > > +enum { > > + VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024 > > +}; > > + > > struct radv_amdgpu_cs { > > struct radeon_winsys_cs base; > > struct radv_amdgpu_winsys *ws; > > @@ -56,6 +61,12 @@ struct radv_amdgpu_cs { > > > > int buffer_hash_table[1024]; > > unsigned hw_ip; > > + > > + unsigned num_virtual_buffers; > > + unsigned max_num_virtual_buffers; > > + struct radeon_winsys_bo **virtual_buffers; > > + uint8_t *virtual_buffer_priorities; > > + int *virtual_buffer_hash_table; > > }; > > > > static inline struct radv_amdgpu_cs * > > @@ -141,6 +152,9 @@ static void radv_amdgpu_cs_destroy(struct > > radeon_winsys_cs *rcs) > > cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]); > > > > free(cs->old_ib_buffers); > > + free(cs->virtual_buffers); > > + free(cs->virtual_buffer_priorities); > > + free(cs->virtual_buffer_hash_table); > > free(cs->handles); > > free(cs->priorities); > > free(cs); > > @@ -315,7 +329,13 @@ static void radv_amdgpu_cs_reset(struct > > radeon_winsys_cs *_cs) > > cs->buffer_hash_table[hash] = -1; > > } > > > > + for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) { > > + unsigned hash = ((uintptr_t)cs->virtual_buffers[i] >> 6) & > > (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1); > > + cs->virtual_buffer_hash_table[hash] = -1; > > + } > > + > > cs->num_buffers = 0; > > + cs->num_virtual_buffers = 0; > > > > if (cs->ws->use_ib_bos) { > > cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8); > > @@ -380,6 +400,49 @@ static void radv_amdgpu_cs_add_buffer_internal(struct > > radv_amdgpu_cs *cs, > > ++cs->num_buffers; > > } > > > > +static void radv_amdgpu_cs_add_virtual_buffer(struct radeon_winsys_cs *_cs, > > + struct radeon_winsys_bo *bo, > > + uint8_t priority) > > +{ > > + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs); > > + unsigned hash = ((uintptr_t)bo >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE > > - 1); > > + > > + > > + if (!cs->virtual_buffer_hash_table) { > > + cs->virtual_buffer_hash_table = > > malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE * sizeof(int)); > > + for (int i = 0; i < VIRTUAL_BUFFER_HASH_TABLE_SIZE; ++i) > > + cs->virtual_buffer_hash_table[i] = -1; > > + } > > + > > + if (cs->virtual_buffer_hash_table[hash] >= 0) { > > + int idx = cs->virtual_buffer_hash_table[hash]; > > + if (cs->virtual_buffers[idx] == bo) { > > + cs->virtual_buffer_priorities[idx] = > > MAX2(cs->virtual_buffer_priorities[idx], priority); > > + return; > > + } > > + for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) { > > + if (cs->virtual_buffers[i] == bo) { > > + cs->virtual_buffer_priorities[i] = > > MAX2(cs->virtual_buffer_priorities[i], priority); > > + cs->virtual_buffer_hash_table[hash] = i; > > + return; > > + } > > + } > > + } > > + > > + if(cs->max_num_virtual_buffers <= cs->num_virtual_buffers) { > > + cs->max_num_virtual_buffers = MAX2(2, > > cs->max_num_virtual_buffers * 2); > > + cs->virtual_buffers = realloc(cs->virtual_buffers, > > sizeof(struct radv_amdgpu_virtual_virtual_buffer*) * > > cs->max_num_virtual_buffers); > > + cs->virtual_buffer_priorities = > > realloc(cs->virtual_buffer_priorities, sizeof(uint8_t) * > > cs->max_num_virtual_buffers); > > + } > > + > > + cs->virtual_buffers[cs->num_virtual_buffers] = bo; > > + cs->virtual_buffer_priorities[cs->num_virtual_buffers] = priority; > > + > > + cs->virtual_buffer_hash_table[hash] = cs->num_virtual_buffers; > > + ++cs->num_virtual_buffers; > > + > > +} > > + > > static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs, > > struct radeon_winsys_bo *_bo, > > uint8_t priority) > > @@ -387,6 +450,11 @@ static void radv_amdgpu_cs_add_buffer(struct > > radeon_winsys_cs *_cs, > > struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs); > > struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); > > > > + if (bo->is_virtual) { > > + radv_amdgpu_cs_add_virtual_buffer(_cs, _bo, priority); > > + return; > > + } > > + > > radv_amdgpu_cs_add_buffer_internal(cs, bo->bo, priority); > > } > > > > @@ -401,6 +469,11 @@ static void radv_amdgpu_cs_execute_secondary(struct > > radeon_winsys_cs *_parent, > > child->priorities[i]); > > } > > > > + for (unsigned i = 0; i < child->num_virtual_buffers; ++i) { > > + radv_amdgpu_cs_add_buffer(&parent->base, > > child->virtual_buffers[i], > > + child->virtual_buffer_priorities[i]); > > + } > > + > > if (parent->ws->use_ib_bos) { > > if (parent->base.cdw + 4 > parent->base.max_dw) > > radv_amdgpu_cs_grow(&parent->base, 4); > > @@ -449,7 +522,8 @@ static int radv_amdgpu_create_bo_list(struct > > radv_amdgpu_winsys *ws, > > bo_list); > > free(handles); > > pthread_mutex_unlock(&ws->global_bo_list_lock); > > - } else if (count == 1 && !extra_bo && !extra_cs) { > > + } else if (count == 1 && !extra_bo && !extra_cs && > > + !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers) { > > struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0]; > > r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles, > > cs->priorities, bo_list); > > @@ -459,6 +533,8 @@ static int radv_amdgpu_create_bo_list(struct > > radv_amdgpu_winsys *ws, > > for (unsigned i = 0; i < count; ++i) { > > struct radv_amdgpu_cs *cs = (struct > > radv_amdgpu_cs*)cs_array[i]; > > total_buffer_count += cs->num_buffers; > > + for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) > > + total_buffer_count += > > radv_amdgpu_winsys_bo(cs->virtual_buffers[j])->bo_count; > > } > > > > if (extra_cs) { > > @@ -502,6 +578,26 @@ static int radv_amdgpu_create_bo_list(struct > > radv_amdgpu_winsys *ws, > > ++unique_bo_count; > > } > > } > > + for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) { > > + struct radv_amdgpu_winsys_bo *virtual_bo = > > radv_amdgpu_winsys_bo(cs->virtual_buffers[j]); > > + for(unsigned k = 0; k < virtual_bo->bo_count; > > ++k) { > > + struct radv_amdgpu_winsys_bo *bo = > > virtual_bo->bos[k]; > > + bool found = false; > > + for (unsigned m = 0; m < > > unique_bo_count; ++m) { > > + if (handles[m] == bo->bo) { > > + found = true; > > + priorities[m] = > > MAX2(priorities[m], > > + > > cs->virtual_buffer_priorities[j]); > > + break; > > + } > > + } > > + if (!found) { > > + handles[unique_bo_count] = > > bo->bo; > > + priorities[unique_bo_count] = > > cs->virtual_buffer_priorities[j]; > > + ++unique_bo_count; > > + } > > + } > > + } > > } > > r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles, > > priorities, bo_list); > > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev