On Mon, May 7, 2018 at 5:30 PM, Scott D Phillips <scott.d.phill...@intel.com > wrote:
> These will be used to assign virtual addresses to soft pinned > buffers in a later patch. > > Two allocators are added for separate 'low' and 'high' virtual > memory areas. Another alternative would have been to add a > double-sided allocator, which wasn't done here just because it > didn't appear to give any code complexity advantages. > > v2: - rename has_exec_softpin to use_softpin (Jason) > - Only remove bottom one page and top 4 GiB from virt (Jason) > - refer to comment in anv_allocator about state address + size > overflowing 48 bits (Jason) > - Mention hi/lo allocators vs double-sided allocator in > commit message (Chris) > - assign state pool memory ranges statically (Jason) > --- > src/intel/vulkan/anv_device.c | 77 ++++++++++++++++++++++++++++++ > ++++++++++++ > src/intel/vulkan/anv_private.h | 60 ++++++++++++++++++++++++++++++++ > 2 files changed, 137 insertions(+) > > diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c > index 374fc16c4c9..9e21818ead1 100644 > --- a/src/intel/vulkan/anv_device.c > +++ b/src/intel/vulkan/anv_device.c > @@ -374,6 +374,9 @@ anv_physical_device_init(struct anv_physical_device > *device, > anv_gem_supports_syncobj_wait(fd); > device->has_context_priority = anv_gem_has_context_priority(fd); > > + device->use_softpin = anv_gem_get_param(fd, > I915_PARAM_HAS_EXEC_SOFTPIN) > + && device->supports_48bit_addresses; > + > bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X); > > /* Starting with Gen10, the timestamp frequency of the command > streamer may > @@ -1527,6 +1530,27 @@ VkResult anv_CreateDevice( > goto fail_fd; > } > > + if (physical_device->use_softpin) { > + if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) { > + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); > + goto fail_fd; > + } > + > + /* keep the page with address zero out of the allocator */ > + util_vma_heap_init(&device->vma_lo, LOW_HEAP_MIN_ADDRESS, > LOW_HEAP_SIZE); > + device->vma_lo_available = > + physical_device->memory.heaps[physical_device->memory.heap_count > - 1].size; > + > + /* Leave the last 4GiB out of the high vma range, so that no state > base > + * address + size can overflow 48 bits. For more information see the > + * comment about Wa32bitGeneralStateOffset in anv_allocator.c > + */ > + util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS, > + HIGH_HEAP_SIZE); > As Jordan pointed out, this should probably depend on the aperature size in case that ever changes to be something other than 48 bits. That can be handled as part of future platform enabling though. > + device->vma_hi_available = physical_device->memory.heap_count == 1 > ? 0 : > + physical_device->memory.heaps[0].size; > + } > + > /* As per spec, the driver implementation may deny requests to acquire > * a priority above the default priority (MEDIUM) if the caller does > not > * have sufficient privileges. In this scenario > VK_ERROR_NOT_PERMITTED_EXT > @@ -1887,6 +1911,59 @@ VkResult anv_DeviceWaitIdle( > return anv_device_submit_simple_batch(device, &batch); > } > > +bool > +anv_vma_alloc(struct anv_device *device, struct anv_bo *bo) > +{ > + if (!(bo->flags & EXEC_OBJECT_PINNED)) > + return true; > + > + pthread_mutex_lock(&device->vma_mutex); > + > + bo->offset = 0; > + > + if (bo->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS && > + device->vma_hi_available >= bo->size) { > + uint64_t addr = util_vma_heap_alloc(&device->vma_hi, bo->size, > 4096); > + if (addr) { > + bo->offset = canonical_address(addr); > + device->vma_hi_available -= bo->size; > + } > + } > + > + if (bo->offset == 0 && device->vma_lo_available >= bo->size) { > + uint64_t addr = util_vma_heap_alloc(&device->vma_lo, bo->size, > 4096); > + if (addr) { > + bo->offset = canonical_address(addr); > + device->vma_lo_available -= bo->size; > + } > + } > + > + pthread_mutex_unlock(&device->vma_mutex); > + > + return bo->offset != 0; > +} > + > +void > +anv_vma_free(struct anv_device *device, struct anv_bo *bo) > +{ > + if (!(bo->flags & EXEC_OBJECT_PINNED)) > + return; > + > + pthread_mutex_lock(&device->vma_mutex); > + > + if (bo->offset >= 1ull << 32) { > + util_vma_heap_free(&device->vma_hi, bo->offset, bo->size); > + device->vma_hi_available += bo->size; > + } else { > + util_vma_heap_free(&device->vma_lo, bo->offset, bo->size); > + device->vma_lo_available += bo->size; > + } > + > + pthread_mutex_unlock(&device->vma_mutex); > + > + bo->offset = 0; > +} > + > VkResult > anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t > size) > { > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_ > private.h > index 761601d1e37..8807fe7e5fb 100644 > --- a/src/intel/vulkan/anv_private.h > +++ b/src/intel/vulkan/anv_private.h > @@ -49,6 +49,7 @@ > #include "util/list.h" > #include "util/u_atomic.h" > #include "util/u_vector.h" > +#include "util/vma.h" > #include "vk_alloc.h" > #include "vk_debug_report.h" > > @@ -79,6 +80,55 @@ struct gen_l3_config; > #include "common/intel_log.h" > #include "wsi_common.h" > > +/* anv Virtual Memory Layout > + * ========================= > + * > + * When the anv driver is determining the virtual graphics addresses of > memory > + * objects itself using the softpin mechanism, the following memory ranges > + * will be used. > + * > + * Three special considerations to notice: > + * > + * (1) the dynamic state pool is located within the same 4 GiB as the low > + * heap. This is to work around a VF cache issue described in a comment in > + * anv_physical_device_init_heaps. > + * > + * (2) the binding table pool is located at lower addresses than the > surface > + * state pool, within a 4 GiB range. This allows surface state base > addresses > + * to cover both binding tables (16 bit offsets) and surface states (32 > bit > + * offsets). > + * > + * (3) the last 4 GiB of the address space is withheld from the high > + * heap. Various hardware units will read past the end of an object for > + * various reasons. This healthy margin prevents reads from wrapping > around > + * 48-bit addresses. > + */ > +#define LOW_HEAP_MIN_ADDRESS 0x000000001000ULL /* 4 KiB */ > +#define LOW_HEAP_MAX_ADDRESS 0x0000bfffffffULL > +#define DYNAMIC_STATE_POOL_MIN_ADDRESS 0x0000c0000000ULL /* 3 GiB */ > +#define DYNAMIC_STATE_POOL_MAX_ADDRESS 0x0000ffffffffULL > +#define BINDING_TABLE_POOL_MIN_ADDRESS 0x000100000000ULL /* 4 GiB */ > +#define BINDING_TABLE_POOL_MAX_ADDRESS 0x00013fffffffULL > +#define SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB */ > +#define SURFACE_STATE_POOL_MAX_ADDRESS 0x00017fffffffULL > +#define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */ > +#define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL > +#define HIGH_HEAP_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */ > +#define HIGH_HEAP_MAX_ADDRESS 0xfffeffffffffULL > Thanks for making these fixed. It's way easier to see what's going on now. Reviewed-by: Jason Ekstrand <ja...@jlekstrand.net> > + > +#define LOW_HEAP_SIZE \ > + (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1) > +#define HIGH_HEAP_SIZE \ > + (HIGH_HEAP_MAX_ADDRESS - HIGH_HEAP_MIN_ADDRESS + 1) > +#define DYNAMIC_STATE_POOL_SIZE \ > + (DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1) > +#define BINDING_TABLE_POOL_SIZE \ > + (BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1) > +#define SURFACE_STATE_POOL_SIZE \ > + (SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1) > +#define INSTRUCTION_STATE_POOL_SIZE \ > + (INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS > + 1) > + > /* Allowing different clear colors requires us to perform a depth resolve > at > * the end of certain render passes. This is because while slow clears > store > * the clear color in the HiZ buffer, fast clears (without a resolve) > don't. > @@ -805,6 +855,7 @@ struct anv_physical_device { > bool has_syncobj; > bool has_syncobj_wait; > bool has_context_priority; > + bool use_softpin; > > struct anv_device_extension_table supported_extensions; > > @@ -898,6 +949,12 @@ struct anv_device { > struct anv_device_extension_table enabled_extensions; > struct anv_dispatch_table dispatch; > > + pthread_mutex_t vma_mutex; > + struct util_vma_heap vma_lo; > + struct util_vma_heap vma_hi; > + uint64_t vma_lo_available; > + uint64_t vma_hi_available; > + > struct anv_bo_pool batch_bo_pool; > > struct anv_bo_cache bo_cache; > @@ -991,6 +1048,9 @@ int anv_gem_syncobj_wait(struct anv_device *device, > uint32_t *handles, uint32_t num_handles, > int64_t abs_timeout_ns, bool wait_all); > > +bool anv_vma_alloc(struct anv_device *device, struct anv_bo *bo); > +void anv_vma_free(struct anv_device *device, struct anv_bo *bo); > + > VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, > uint64_t size); > > struct anv_reloc_list { > -- > 2.14.3 > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev