Reviewed-by: Jordan Justen <jordan.l.jus...@intel.com>
On 2018-05-07 17:30:46, Scott D Phillips wrote: > These will be used to assign virtual addresses to soft pinned > buffers in a later patch. > > Two allocators are added for separate 'low' and 'high' virtual > memory areas. Another alternative would have been to add a > double-sided allocator, which wasn't done here just because it > didn't appear to give any code complexity advantages. > > v2: - rename has_exec_softpin to use_softpin (Jason) > - Only remove bottom one page and top 4 GiB from virt (Jason) > - refer to comment in anv_allocator about state address + size > overflowing 48 bits (Jason) > - Mention hi/lo allocators vs double-sided allocator in > commit message (Chris) > - assign state pool memory ranges statically (Jason) > --- > src/intel/vulkan/anv_device.c | 77 > ++++++++++++++++++++++++++++++++++++++++++ > src/intel/vulkan/anv_private.h | 60 ++++++++++++++++++++++++++++++++ > 2 files changed, 137 insertions(+) > > diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c > index 374fc16c4c9..9e21818ead1 100644 > --- a/src/intel/vulkan/anv_device.c > +++ b/src/intel/vulkan/anv_device.c > @@ -374,6 +374,9 @@ anv_physical_device_init(struct anv_physical_device > *device, > anv_gem_supports_syncobj_wait(fd); > device->has_context_priority = anv_gem_has_context_priority(fd); > > + device->use_softpin = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN) > + && device->supports_48bit_addresses; > + > bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X); > > /* Starting with Gen10, the timestamp frequency of the command streamer > may > @@ -1527,6 +1530,27 @@ VkResult anv_CreateDevice( > goto fail_fd; > } > > + if (physical_device->use_softpin) { > + if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) { > + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); > + goto fail_fd; > + } > + > + /* keep the page with address zero out of the allocator */ > + util_vma_heap_init(&device->vma_lo, LOW_HEAP_MIN_ADDRESS, > LOW_HEAP_SIZE); > + device->vma_lo_available = > + physical_device->memory.heaps[physical_device->memory.heap_count - > 1].size; > + > + /* Leave the last 4GiB out of the high vma range, so that no state base > + * address + size can overflow 48 bits. For more information see the > + * comment about Wa32bitGeneralStateOffset in anv_allocator.c > + */ > + util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS, > + HIGH_HEAP_SIZE); > + device->vma_hi_available = physical_device->memory.heap_count == 1 ? 0 > : > + physical_device->memory.heaps[0].size; > + } > + > /* As per spec, the driver implementation may deny requests to acquire > * a priority above the default priority (MEDIUM) if the caller does not > * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_EXT > @@ -1887,6 +1911,59 @@ VkResult anv_DeviceWaitIdle( > return anv_device_submit_simple_batch(device, &batch); > } > > +bool > +anv_vma_alloc(struct anv_device *device, struct anv_bo *bo) > +{ > + if (!(bo->flags & EXEC_OBJECT_PINNED)) > + return true; > + > + pthread_mutex_lock(&device->vma_mutex); > + > + bo->offset = 0; > + > + if (bo->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS && > + device->vma_hi_available >= bo->size) { > + uint64_t addr = util_vma_heap_alloc(&device->vma_hi, bo->size, 4096); > + if (addr) { > + bo->offset = canonical_address(addr); > + device->vma_hi_available -= bo->size; > + } > + } > + > + if (bo->offset == 0 && device->vma_lo_available >= bo->size) { > + uint64_t addr = util_vma_heap_alloc(&device->vma_lo, bo->size, 4096); > + if (addr) { > + bo->offset = canonical_address(addr); > + device->vma_lo_available -= bo->size; > + } > + } > + > + pthread_mutex_unlock(&device->vma_mutex); > + > + return bo->offset != 0; > +} > + > +void > +anv_vma_free(struct anv_device *device, struct anv_bo *bo) > +{ > + if (!(bo->flags & EXEC_OBJECT_PINNED)) > + return; > + > + pthread_mutex_lock(&device->vma_mutex); > + > + if (bo->offset >= 1ull << 32) { > + util_vma_heap_free(&device->vma_hi, bo->offset, bo->size); > + device->vma_hi_available += bo->size; > + } else { > + util_vma_heap_free(&device->vma_lo, bo->offset, bo->size); > + device->vma_lo_available += bo->size; > + } > + > + pthread_mutex_unlock(&device->vma_mutex); > + > + bo->offset = 0; > +} > + > VkResult > anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) > { > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h > index 761601d1e37..8807fe7e5fb 100644 > --- a/src/intel/vulkan/anv_private.h > +++ b/src/intel/vulkan/anv_private.h > @@ -49,6 +49,7 @@ > #include "util/list.h" > #include "util/u_atomic.h" > #include "util/u_vector.h" > +#include "util/vma.h" > #include "vk_alloc.h" > #include "vk_debug_report.h" > > @@ -79,6 +80,55 @@ struct gen_l3_config; > #include "common/intel_log.h" > #include "wsi_common.h" > > +/* anv Virtual Memory Layout > + * ========================= > + * > + * When the anv driver is determining the virtual graphics addresses of > memory > + * objects itself using the softpin mechanism, the following memory ranges > + * will be used. > + * > + * Three special considerations to notice: > + * > + * (1) the dynamic state pool is located within the same 4 GiB as the low > + * heap. This is to work around a VF cache issue described in a comment in > + * anv_physical_device_init_heaps. > + * > + * (2) the binding table pool is located at lower addresses than the surface > + * state pool, within a 4 GiB range. This allows surface state base addresses > + * to cover both binding tables (16 bit offsets) and surface states (32 bit > + * offsets). > + * > + * (3) the last 4 GiB of the address space is withheld from the high > + * heap. Various hardware units will read past the end of an object for > + * various reasons. This healthy margin prevents reads from wrapping around > + * 48-bit addresses. > + */ > +#define LOW_HEAP_MIN_ADDRESS 0x000000001000ULL /* 4 KiB */ > +#define LOW_HEAP_MAX_ADDRESS 0x0000bfffffffULL > +#define DYNAMIC_STATE_POOL_MIN_ADDRESS 0x0000c0000000ULL /* 3 GiB */ > +#define DYNAMIC_STATE_POOL_MAX_ADDRESS 0x0000ffffffffULL > +#define BINDING_TABLE_POOL_MIN_ADDRESS 0x000100000000ULL /* 4 GiB */ > +#define BINDING_TABLE_POOL_MAX_ADDRESS 0x00013fffffffULL > +#define SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB */ > +#define SURFACE_STATE_POOL_MAX_ADDRESS 0x00017fffffffULL > +#define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */ > +#define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL > +#define HIGH_HEAP_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */ > +#define HIGH_HEAP_MAX_ADDRESS 0xfffeffffffffULL > + > +#define LOW_HEAP_SIZE \ > + (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1) > +#define HIGH_HEAP_SIZE \ > + (HIGH_HEAP_MAX_ADDRESS - HIGH_HEAP_MIN_ADDRESS + 1) > +#define DYNAMIC_STATE_POOL_SIZE \ > + (DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1) > +#define BINDING_TABLE_POOL_SIZE \ > + (BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1) > +#define SURFACE_STATE_POOL_SIZE \ > + (SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1) > +#define INSTRUCTION_STATE_POOL_SIZE \ > + (INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS > + 1) > + > /* Allowing different clear colors requires us to perform a depth resolve at > * the end of certain render passes. This is because while slow clears store > * the clear color in the HiZ buffer, fast clears (without a resolve) don't. > @@ -805,6 +855,7 @@ struct anv_physical_device { > bool has_syncobj; > bool has_syncobj_wait; > bool has_context_priority; > + bool use_softpin; > > struct anv_device_extension_table supported_extensions; > > @@ -898,6 +949,12 @@ struct anv_device { > struct anv_device_extension_table enabled_extensions; > struct anv_dispatch_table dispatch; > > + pthread_mutex_t vma_mutex; > + struct util_vma_heap vma_lo; > + struct util_vma_heap vma_hi; > + uint64_t vma_lo_available; > + uint64_t vma_hi_available; > + > struct anv_bo_pool batch_bo_pool; > > struct anv_bo_cache bo_cache; > @@ -991,6 +1048,9 @@ int anv_gem_syncobj_wait(struct anv_device *device, > uint32_t *handles, uint32_t num_handles, > int64_t abs_timeout_ns, bool wait_all); > > +bool anv_vma_alloc(struct anv_device *device, struct anv_bo *bo); > +void anv_vma_free(struct anv_device *device, struct anv_bo *bo); > + > VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, > uint64_t size); > > struct anv_reloc_list { > -- > 2.14.3 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev