Reviewed-by: Jordan Justen <jordan.l.jus...@intel.com>

On 2018-05-07 17:30:46, Scott D Phillips wrote:
> These will be used to assign virtual addresses to soft pinned
> buffers in a later patch.
> 
> Two allocators are added for separate 'low' and 'high' virtual
> memory areas. Another alternative would have been to add a
> double-sided allocator, which wasn't done here just because it
> didn't appear to give any code complexity advantages.
> 
> v2: - rename has_exec_softpin to use_softpin (Jason)
>     - Only remove bottom one page and top 4 GiB from virt (Jason)
>     - refer to comment in anv_allocator about state address + size
>       overflowing 48 bits (Jason)
>     - Mention hi/lo allocators vs double-sided allocator in
>       commit message (Chris)
>     - assign state pool memory ranges statically (Jason)
> ---
>  src/intel/vulkan/anv_device.c  | 77 
> ++++++++++++++++++++++++++++++++++++++++++
>  src/intel/vulkan/anv_private.h | 60 ++++++++++++++++++++++++++++++++
>  2 files changed, 137 insertions(+)
> 
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 374fc16c4c9..9e21818ead1 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -374,6 +374,9 @@ anv_physical_device_init(struct anv_physical_device 
> *device,
>                                anv_gem_supports_syncobj_wait(fd);
>     device->has_context_priority = anv_gem_has_context_priority(fd);
>  
> +   device->use_softpin = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN)
> +      && device->supports_48bit_addresses;
> +
>     bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
>  
>     /* Starting with Gen10, the timestamp frequency of the command streamer 
> may
> @@ -1527,6 +1530,27 @@ VkResult anv_CreateDevice(
>        goto fail_fd;
>     }
>  
> +   if (physical_device->use_softpin) {
> +      if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
> +         result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
> +         goto fail_fd;
> +      }
> +
> +      /* keep the page with address zero out of the allocator */
> +      util_vma_heap_init(&device->vma_lo, LOW_HEAP_MIN_ADDRESS, 
> LOW_HEAP_SIZE);
> +      device->vma_lo_available =
> +         physical_device->memory.heaps[physical_device->memory.heap_count - 
> 1].size;
> +
> +      /* Leave the last 4GiB out of the high vma range, so that no state base
> +       * address + size can overflow 48 bits. For more information see the
> +       * comment about Wa32bitGeneralStateOffset in anv_allocator.c
> +       */
> +      util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
> +                         HIGH_HEAP_SIZE);
> +      device->vma_hi_available = physical_device->memory.heap_count == 1 ? 0 
> :
> +         physical_device->memory.heaps[0].size;
> +   }
> +
>     /* As per spec, the driver implementation may deny requests to acquire
>      * a priority above the default priority (MEDIUM) if the caller does not
>      * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_EXT
> @@ -1887,6 +1911,59 @@ VkResult anv_DeviceWaitIdle(
>     return anv_device_submit_simple_batch(device, &batch);
>  }
>  
> +bool
> +anv_vma_alloc(struct anv_device *device, struct anv_bo *bo)
> +{
> +   if (!(bo->flags & EXEC_OBJECT_PINNED))
> +      return true;
> +
> +   pthread_mutex_lock(&device->vma_mutex);
> +
> +   bo->offset = 0;
> +
> +   if (bo->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS &&
> +       device->vma_hi_available >= bo->size) {
> +      uint64_t addr = util_vma_heap_alloc(&device->vma_hi, bo->size, 4096);
> +      if (addr) {
> +         bo->offset = canonical_address(addr);
> +         device->vma_hi_available -= bo->size;
> +      }
> +   }
> +
> +   if (bo->offset == 0 && device->vma_lo_available >= bo->size) {
> +      uint64_t addr = util_vma_heap_alloc(&device->vma_lo, bo->size, 4096);
> +      if (addr) {
> +         bo->offset = canonical_address(addr);
> +         device->vma_lo_available -= bo->size;
> +      }
> +   }
> +
> +   pthread_mutex_unlock(&device->vma_mutex);
> +
> +   return bo->offset != 0;
> +}
> +
> +void
> +anv_vma_free(struct anv_device *device, struct anv_bo *bo)
> +{
> +   if (!(bo->flags & EXEC_OBJECT_PINNED))
> +      return;
> +
> +   pthread_mutex_lock(&device->vma_mutex);
> +
> +   if (bo->offset >= 1ull << 32) {
> +      util_vma_heap_free(&device->vma_hi, bo->offset, bo->size);
> +      device->vma_hi_available += bo->size;
> +   } else {
> +      util_vma_heap_free(&device->vma_lo, bo->offset, bo->size);
> +      device->vma_lo_available += bo->size;
> +   }
> +
> +   pthread_mutex_unlock(&device->vma_mutex);
> +
> +   bo->offset = 0;
> +}
> +
>  VkResult
>  anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
>  {
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index 761601d1e37..8807fe7e5fb 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -49,6 +49,7 @@
>  #include "util/list.h"
>  #include "util/u_atomic.h"
>  #include "util/u_vector.h"
> +#include "util/vma.h"
>  #include "vk_alloc.h"
>  #include "vk_debug_report.h"
>  
> @@ -79,6 +80,55 @@ struct gen_l3_config;
>  #include "common/intel_log.h"
>  #include "wsi_common.h"
>  
> +/* anv Virtual Memory Layout
> + * =========================
> + *
> + * When the anv driver is determining the virtual graphics addresses of 
> memory
> + * objects itself using the softpin mechanism, the following memory ranges
> + * will be used.
> + *
> + * Three special considerations to notice:
> + *
> + * (1) the dynamic state pool is located within the same 4 GiB as the low
> + * heap. This is to work around a VF cache issue described in a comment in
> + * anv_physical_device_init_heaps.
> + *
> + * (2) the binding table pool is located at lower addresses than the surface
> + * state pool, within a 4 GiB range. This allows surface state base addresses
> + * to cover both binding tables (16 bit offsets) and surface states (32 bit
> + * offsets).
> + *
> + * (3) the last 4 GiB of the address space is withheld from the high
> + * heap. Various hardware units will read past the end of an object for
> + * various reasons. This healthy margin prevents reads from wrapping around
> + * 48-bit addresses.
> + */
> +#define LOW_HEAP_MIN_ADDRESS               0x000000001000ULL /* 4 KiB */
> +#define LOW_HEAP_MAX_ADDRESS               0x0000bfffffffULL
> +#define DYNAMIC_STATE_POOL_MIN_ADDRESS     0x0000c0000000ULL /* 3 GiB */
> +#define DYNAMIC_STATE_POOL_MAX_ADDRESS     0x0000ffffffffULL
> +#define BINDING_TABLE_POOL_MIN_ADDRESS     0x000100000000ULL /* 4 GiB */
> +#define BINDING_TABLE_POOL_MAX_ADDRESS     0x00013fffffffULL
> +#define SURFACE_STATE_POOL_MIN_ADDRESS     0x000140000000ULL /* 5 GiB */
> +#define SURFACE_STATE_POOL_MAX_ADDRESS     0x00017fffffffULL
> +#define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */
> +#define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
> +#define HIGH_HEAP_MIN_ADDRESS              0x0001c0000000ULL /* 7 GiB */
> +#define HIGH_HEAP_MAX_ADDRESS              0xfffeffffffffULL
> +
> +#define LOW_HEAP_SIZE               \
> +   (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
> +#define HIGH_HEAP_SIZE              \
> +   (HIGH_HEAP_MAX_ADDRESS - HIGH_HEAP_MIN_ADDRESS + 1)
> +#define DYNAMIC_STATE_POOL_SIZE     \
> +   (DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1)
> +#define BINDING_TABLE_POOL_SIZE     \
> +   (BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1)
> +#define SURFACE_STATE_POOL_SIZE     \
> +   (SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1)
> +#define INSTRUCTION_STATE_POOL_SIZE \
> +   (INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS 
> + 1)
> +
>  /* Allowing different clear colors requires us to perform a depth resolve at
>   * the end of certain render passes. This is because while slow clears store
>   * the clear color in the HiZ buffer, fast clears (without a resolve) don't.
> @@ -805,6 +855,7 @@ struct anv_physical_device {
>      bool                                        has_syncobj;
>      bool                                        has_syncobj_wait;
>      bool                                        has_context_priority;
> +    bool                                        use_softpin;
>  
>      struct anv_device_extension_table           supported_extensions;
>  
> @@ -898,6 +949,12 @@ struct anv_device {
>      struct anv_device_extension_table           enabled_extensions;
>      struct anv_dispatch_table                   dispatch;
>  
> +    pthread_mutex_t                             vma_mutex;
> +    struct util_vma_heap                        vma_lo;
> +    struct util_vma_heap                        vma_hi;
> +    uint64_t                                    vma_lo_available;
> +    uint64_t                                    vma_hi_available;
> +
>      struct anv_bo_pool                          batch_bo_pool;
>  
>      struct anv_bo_cache                         bo_cache;
> @@ -991,6 +1048,9 @@ int anv_gem_syncobj_wait(struct anv_device *device,
>                           uint32_t *handles, uint32_t num_handles,
>                           int64_t abs_timeout_ns, bool wait_all);
>  
> +bool anv_vma_alloc(struct anv_device *device, struct anv_bo *bo);
> +void anv_vma_free(struct anv_device *device, struct anv_bo *bo);
> +
>  VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, 
> uint64_t size);
>  
>  struct anv_reloc_list {
> -- 
> 2.14.3
> 
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to