Many apologies to Michal for incorrectly spelling his name in the CC list.

Thomas.

> -----Original Message-----
> From: Daniel, Thomas
> Sent: Tuesday, June 30, 2015 3:13 PM
> To: intel-gfx@lists.freedesktop.org
> Cc: Chris Wilson; Goel, Akash; Belgaumkar, Vinay; Michal Winiarsky; Zou,
> Nanhai; Daniel, Thomas
> Subject: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
> 
> From: Chris Wilson <ch...@chris-wilson.co.uk>
> 
> Userspace can pass in an offset that it presumes the object is located
> at. The kernel will then do its utmost to fit the object into that
> location. The assumption is that userspace is handling its own object
> locations (for example along with full-ppgtt) and that the kernel will
> rarely have to make space for the user's requests.
> 
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> 
> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by 
> Chris
> Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
> (Not published externally)
> 
> v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
> to allow eviction of soft-pinned objects when another soft-pinned object used
> by a subsequent execbuffer overlaps reported by Michal Winiarski.
> (Not published externally)
> 
> v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
> pinned first after an address conflict happens to avoid repeated conflicts in
> rare cases (Suggested by Chris Wilson).  Expanded comment on
> drm_i915_gem_exec_object2.offset to cover this new API.
> 
> Cc: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Akash Goel <akash.g...@intel.com>
> Cc: Vinay Belgaumkar <vinay.belgaum...@intel.com>
> Cc: Michal Winiarsky <michal.winiar...@intel.com>
> Cc: Zou Nanhai <nanhai....@intel.com>
> Signed-off-by: Thomas Daniel <thomas.dan...@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            |    4 +++
>  drivers/gpu/drm/i915/i915_gem.c            |   51 
> ++++++++++++++++++++--------
>  drivers/gpu/drm/i915/i915_gem_evict.c      |   38 +++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |   16 +++++++--
>  include/uapi/drm/i915_drm.h                |    9 +++--
>  5 files changed, 99 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index d90a782..e96c101 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2747,7 +2747,9 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
>  #define PIN_USER     (1<<4)
>  #define PIN_UPDATE   (1<<5)
>  #define PIN_FULL_RANGE       (1<<6)
> +#define PIN_OFFSET_FIXED     (1<<7)
>  #define PIN_OFFSET_MASK (~4095)
> +
>  int __must_check
>  i915_gem_object_pin(struct drm_i915_gem_object *obj,
>                   struct i915_address_space *vm,
> @@ -3085,6 +3087,8 @@ int __must_check i915_gem_evict_something(struct
> drm_device *dev,
>                                         unsigned long start,
>                                         unsigned long end,
>                                         unsigned flags);
> +int __must_check
> +i915_gem_evict_for_vma(struct i915_vma *target);
>  int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
>  int i915_gem_evict_everything(struct drm_device *dev);
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c
> b/drivers/gpu/drm/i915/i915_gem.c
> index f170da6..a7e5ff2 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3806,22 +3806,41 @@ i915_gem_object_bind_to_vm(struct
> drm_i915_gem_object *obj,
>       if (IS_ERR(vma))
>               goto err_unpin;
> 
> +     if (flags & PIN_OFFSET_FIXED) {
> +             uint64_t offset = flags & PIN_OFFSET_MASK;
> +             if (offset & (alignment - 1)) {
> +                     ret = -EINVAL;
> +                     goto err_free_vma;
> +             }
> +             vma->node.start = offset;
> +             vma->node.size = size;
> +             vma->node.color = obj->cache_level;
> +             ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> +             if (ret) {
> +                     ret = i915_gem_evict_for_vma(vma);
> +                     if (ret == 0)
> +                             ret = drm_mm_reserve_node(&vm->mm,
> &vma->node);
> +             }
> +             if (ret)
> +                     goto err_free_vma;
> +     } else {
>  search_free:
> -     ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> -                                               size, alignment,
> -                                               obj->cache_level,
> -                                               start, end,
> -                                               search_flag,
> -                                               alloc_flag);
> -     if (ret) {
> -             ret = i915_gem_evict_something(dev, vm, size, alignment,
> -                                            obj->cache_level,
> -                                            start, end,
> -                                            flags);
> -             if (ret == 0)
> -                     goto search_free;
> +             ret = drm_mm_insert_node_in_range_generic(&vm->mm,
> &vma->node,
> +                                                       size, alignment,
> +                                                       obj->cache_level,
> +                                                       start, end,
> +                                                       search_flag,
> +                                                       alloc_flag);
> +             if (ret) {
> +                     ret = i915_gem_evict_something(dev, vm, size,
> alignment,
> +                                                    obj->cache_level,
> +                                                    start, end,
> +                                                    flags);
> +                     if (ret == 0)
> +                             goto search_free;
> 
> -             goto err_free_vma;
> +                     goto err_free_vma;
> +             }
>       }
>       if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
>               ret = -EINVAL;
> @@ -4357,6 +4376,10 @@ i915_vma_misplaced(struct i915_vma *vma,
> uint32_t alignment, uint64_t flags)
>           vma->node.start < (flags & PIN_OFFSET_MASK))
>               return true;
> 
> +     if (flags & PIN_OFFSET_FIXED &&
> +         vma->node.start != (flags & PIN_OFFSET_MASK))
> +             return true;
> +
>       return false;
>  }
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c
> b/drivers/gpu/drm/i915/i915_gem_evict.c
> index d09e35e..6098e2f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -199,6 +199,44 @@ found:
>       return ret;
>  }
> 
> +int
> +i915_gem_evict_for_vma(struct i915_vma *target)
> +{
> +     struct drm_mm_node *node, *next;
> +
> +     list_for_each_entry_safe(node, next,
> +                     &target->vm->mm.head_node.node_list,
> +                     node_list) {
> +             struct i915_vma *vma;
> +             int ret;
> +
> +             if (node->start + node->size <= target->node.start)
> +                     continue;
> +             if (node->start >= target->node.start + target->node.size)
> +                     break;
> +
> +             vma = container_of(node, typeof(*vma), node);
> +
> +             if (vma->pin_count) {
> +                     /* We may need to evict a buffer in the same batch */
> +                     if (!vma->exec_entry)
> +                             return -EBUSY;
> +
> +                     if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
> +                             /* Overlapping fixed objects in the same batch
> */
> +                             return -EINVAL;
> +
> +                     return -ENOSPC;
> +             }
> +
> +             ret = i915_vma_unbind(vma);
> +             if (ret)
> +                     return ret;
> +     }
> +
> +     return 0;
> +}
> +
>  /**
>   * i915_gem_evict_vm - Evict all idle vmas from a vm
>   * @vm: Address space to cleanse
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 3d94744..4e6955e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -596,6 +596,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma
> *vma,
>                       flags |= PIN_GLOBAL | PIN_MAPPABLE;
>               if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
>                       flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
> +             if (entry->flags & EXEC_OBJECT_PINNED)
> +                     flags |= entry->offset | PIN_OFFSET_FIXED;
>       }
> 
>       ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
> @@ -665,6 +667,10 @@ eb_vma_misplaced(struct i915_vma *vma)
>           vma->node.start & (entry->alignment - 1))
>               return true;
> 
> +     if (entry->flags & EXEC_OBJECT_PINNED &&
> +         vma->node.start != entry->offset)
> +             return true;
> +
>       if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
>           vma->node.start < BATCH_OFFSET_BIAS)
>               return true;
> @@ -690,6 +696,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
> *ring,
>       struct i915_vma *vma;
>       struct i915_address_space *vm;
>       struct list_head ordered_vmas;
> +     struct list_head pinned_vmas;
>       bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
>       int retry;
> 
> @@ -698,6 +705,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
> *ring,
>       vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
> 
>       INIT_LIST_HEAD(&ordered_vmas);
> +     INIT_LIST_HEAD(&pinned_vmas);
>       while (!list_empty(vmas)) {
>               struct drm_i915_gem_exec_object2 *entry;
>               bool need_fence, need_mappable;
> @@ -716,7 +724,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
> *ring,
>                       obj->tiling_mode != I915_TILING_NONE;
>               need_mappable = need_fence || need_reloc_mappable(vma);
> 
> -             if (need_mappable) {
> +             if (entry->flags & EXEC_OBJECT_PINNED)
> +                     list_move_tail(&vma->exec_list, &pinned_vmas);
> +             else if (need_mappable) {
>                       entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
>                       list_move(&vma->exec_list, &ordered_vmas);
>               } else
> @@ -726,6 +736,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
> *ring,
>               obj->base.pending_write_domain = 0;
>       }
>       list_splice(&ordered_vmas, vmas);
> +     list_splice(&pinned_vmas, vmas);
> 
>       /* Attempt to pin all of the buffers into the GTT.
>        * This is done in 3 phases:
> @@ -1404,7 +1415,8 @@ eb_get_batch(struct eb_vmas *eb)
>        * Note that actual hangs have only been observed on gen7, but for
>        * paranoia do it everywhere.
>        */
> -     vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
> +     if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
> +             vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
> 
>       return vma->obj;
>  }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 55ba527..18f8f99 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -677,8 +677,10 @@ struct drm_i915_gem_exec_object2 {
>       __u64 alignment;
> 
>       /**
> -      * Returned value of the updated offset of the object, for future
> -      * presumed_offset writes.
> +      * When the EXEC_OBJECT_PINNED flag is specified this is populated by
> +      * the user with the GTT offset at which this object will be pinned.
> +      * Otherwise the kernel populates it with the value of the updated
> +      * offset of the object, for future presumed_offset writes.
>        */
>       __u64 offset;
> 
> @@ -686,7 +688,8 @@ struct drm_i915_gem_exec_object2 {
>  #define EXEC_OBJECT_NEEDS_GTT        (1<<1)
>  #define EXEC_OBJECT_WRITE    (1<<2)
>  #define EXEC_OBJECT_SUPPORTS_48BBADDRESS (1<<3)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -
> (EXEC_OBJECT_SUPPORTS_48BBADDRESS<<1)
> +#define EXEC_OBJECT_PINNED   (1<<4)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
>       __u64 flags;
> 
>       __u64 rsvd1;
> --
> 1.7.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to