On Wed, Oct 21, 2015 at 04:24:58PM +0100, Chris Wilson wrote: > Our GPUs impose certain requirements upon buffers that depend upon how > exactly they are used. Typically this is expressed as that they require > a larger surface than would be naively computed by pitch * height. > Normally such requirements are hidden away in the userspace driver, but > when we accept pointers from strangers and later impose extra conditions > on them, the original client allocator has no idea about the > monstrosities in the GPU and we require the userspace driver to inform > the kernel how many padding pages are required beyond the client > allocation. > > v2: Long time, no see > > Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
Usual "needs igt + open-source userspace" broken record from maintainer, in case someone really wants to use this. Noises I've heard is that it's for opencl, but who knows. -Daniel > --- > drivers/gpu/drm/i915/i915_drv.h | 6 ++- > drivers/gpu/drm/i915/i915_gem.c | 77 > +++++++++++++++--------------- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 ++++++- > include/uapi/drm/i915_drm.h | 5 +- > 4 files changed, 60 insertions(+), 44 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 2771b7edde02..a0ce011a5dc0 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2786,11 +2786,13 @@ void i915_gem_vma_destroy(struct i915_vma *vma); > int __must_check > i915_gem_object_pin(struct drm_i915_gem_object *obj, > struct i915_address_space *vm, > + uint64_t size, > uint32_t alignment, > uint64_t flags); > int __must_check > i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, > const struct i915_ggtt_view *view, > + uint64_t size, > uint32_t alignment, > uint64_t flags); > > @@ -3048,8 +3050,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, > uint32_t alignment, > unsigned flags) > { > - return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj), > - alignment, flags | PIN_GLOBAL); > + return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj), 0, alignment, > + flags | PIN_GLOBAL); > } > > static inline int > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 4f49c9dc24e2..d079111d15c5 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1784,7 +1784,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct > vm_fault *vmf) > } > > /* Now pin it into the GTT if needed */ > - ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); > + ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); > if (ret) > goto unlock; > > @@ -3362,20 +3362,20 @@ static struct i915_vma * > i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > struct i915_address_space *vm, > const struct i915_ggtt_view *ggtt_view, > + uint64_t size, > unsigned alignment, > uint64_t flags) > { > struct drm_device *dev = obj->base.dev; > struct drm_i915_private *dev_priv = dev->dev_private; > - u32 fence_alignment, unfenced_alignment; > u64 start, end; > - u64 size, fence_size; > u32 search_flag, alloc_flag; > struct i915_vma *vma; > int ret; > > if (i915_is_ggtt(vm)) { > - u32 view_size; > + u32 fence_size, fence_alignment, unfenced_alignment; > + u64 view_size; > > if (WARN_ON(!ggtt_view)) > return ERR_PTR(-EINVAL); > @@ -3393,21 +3393,22 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object > *obj, > view_size, > > obj->tiling_mode, > false); > - size = flags & PIN_MAPPABLE ? fence_size : view_size; > + size = max(size, view_size); > + if (flags & PIN_MAPPABLE) > + size = max_t(u64, size, fence_size); > + > + if (alignment == 0) > + alignment = flags & PIN_MAPPABLE ? fence_alignment : > + unfenced_alignment; > + if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { > + DRM_DEBUG("Invalid object (view type=%u) alignment > requested %u\n", > + ggtt_view ? ggtt_view->type : 0, > + alignment); > + return ERR_PTR(-EINVAL); > + } > } else { > - fence_size = i915_gem_get_gtt_size(dev, > - obj->base.size, > - obj->tiling_mode); > - fence_alignment = i915_gem_get_gtt_alignment(dev, > - obj->base.size, > - obj->tiling_mode, > - true); > - unfenced_alignment = > - i915_gem_get_gtt_alignment(dev, > - obj->base.size, > - obj->tiling_mode, > - false); > - size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; > + size = max_t(u64, size, obj->base.size); > + alignment = 4096; > } > > start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; > @@ -3418,24 +3419,14 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object > *obj, > if (flags & PIN_ZONE_4G) > end = min_t(u64, end, 1ULL << 32); > > - if (alignment == 0) > - alignment = flags & PIN_MAPPABLE ? fence_alignment : > - unfenced_alignment; > - if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { > - DRM_DEBUG("Invalid object (view type=%u) alignment requested > %u\n", > - ggtt_view ? ggtt_view->type : 0, > - alignment); > - return ERR_PTR(-EINVAL); > - } > - > /* If binding the object/GGTT view requires more space than the entire > * aperture has, reject it early before evicting everything in a vain > * attempt to find space. > */ > if (size > end) { > - DRM_DEBUG("Attempting to bind an object (view type=%u) larger > than the aperture: size=%llu > %s aperture=%llu\n", > + DRM_DEBUG("Attempting to bind an object (view type=%u) larger > than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", > ggtt_view ? ggtt_view->type : 0, > - size, > + size, obj->base.size, > flags & PIN_MAPPABLE ? "mappable" : "total", > end); > return ERR_PTR(-E2BIG); > @@ -3892,7 +3883,7 @@ i915_gem_object_pin_to_display_plane(struct > drm_i915_gem_object *obj, > * (e.g. libkms for the bootup splash), we have to ensure that we > * always use map_and_fenceable for all scanout buffers. > */ > - ret = i915_gem_object_ggtt_pin(obj, view, alignment, > + ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment, > view->type == I915_GGTT_VIEW_NORMAL ? > PIN_MAPPABLE : 0); > if (ret) > @@ -4043,12 +4034,17 @@ i915_gem_ring_throttle(struct drm_device *dev, struct > drm_file *file) > } > > static bool > -i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) > +i915_vma_misplaced(struct i915_vma *vma, > + uint64_t size, > + uint32_t alignment, > + uint64_t flags) > { > struct drm_i915_gem_object *obj = vma->obj; > > - if (alignment && > - vma->node.start & (alignment - 1)) > + if (vma->node.size < size) > + return true; > + > + if (alignment && vma->node.start & (alignment - 1)) > return true; > > if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) > @@ -4088,6 +4084,7 @@ static int > i915_gem_object_do_pin(struct drm_i915_gem_object *obj, > struct i915_address_space *vm, > const struct i915_ggtt_view *ggtt_view, > + uint64_t size, > uint32_t alignment, > uint64_t flags) > { > @@ -4118,7 +4115,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, > if (WARN_ON(vma->pin_count == > DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) > return -EBUSY; > > - if (i915_vma_misplaced(vma, alignment, flags)) { > + if (i915_vma_misplaced(vma, size, alignment, flags)) { > WARN(vma->pin_count, > "bo is already pinned in %s with incorrect > alignment:" > " offset=%08x %08x, req.alignment=%x, > req.map_and_fenceable=%d," > @@ -4139,8 +4136,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, > > bound = vma ? vma->bound : 0; > if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { > - vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, > - flags); > + vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, > + size, alignment, flags); > if (IS_ERR(vma)) > return PTR_ERR(vma); > } else { > @@ -4162,17 +4159,19 @@ i915_gem_object_do_pin(struct drm_i915_gem_object > *obj, > int > i915_gem_object_pin(struct drm_i915_gem_object *obj, > struct i915_address_space *vm, > + uint64_t size, > uint32_t alignment, > uint64_t flags) > { > return i915_gem_object_do_pin(obj, vm, > i915_is_ggtt(vm) ? &i915_ggtt_view_normal > : NULL, > - alignment, flags); > + size, alignment, flags); > } > > int > i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, > const struct i915_ggtt_view *view, > + uint64_t size, > uint32_t alignment, > uint64_t flags) > { > @@ -4180,7 +4179,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object > *obj, > return -EINVAL; > > return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view, > - alignment, flags | PIN_GLOBAL); > + size, alignment, flags | PIN_GLOBAL); > } > > void > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 045a7631faa0..b6bc22d49628 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -605,10 +605,14 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, > flags |= PIN_HIGH; > } > > - ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); > + ret = i915_gem_object_pin(obj, vma->vm, > + entry->rsvd1, /* pad_to_size */ > + entry->alignment, > + flags); > if ((ret == -ENOSPC || ret == -E2BIG) && > only_mappable_for_reloc(entry->flags)) > ret = i915_gem_object_pin(obj, vma->vm, > + entry->rsvd1, /* pad_to_size */ > entry->alignment, > flags & ~PIN_MAPPABLE); > if (ret) > @@ -672,6 +676,9 @@ eb_vma_misplaced(struct i915_vma *vma) > vma->node.start & (entry->alignment - 1)) > return true; > > + if (vma->node.size < entry->rsvd1) /* pad_to_size */ > + return true; > + > if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && > vma->node.start < BATCH_OFFSET_BIAS) > return true; > @@ -988,6 +995,13 @@ validate_exec_list(struct drm_device *dev, > if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) > return -EINVAL; > > + /* pad_to_size was once a reserved field, so sanitize it */ > + if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) { > + if (offset_in_page(exec[i].rsvd1)) > + return -EINVAL; > + } else > + exec[i].rsvd1 = 0; > + > /* First check for malicious input causing overflow in > * the worst case where we need to allocate the entire > * relocation tree as a single array. > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 08e047cba76a..678f7d5320ae 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -691,10 +691,11 @@ struct drm_i915_gem_exec_object2 { > #define EXEC_OBJECT_NEEDS_GTT (1<<1) > #define EXEC_OBJECT_WRITE (1<<2) > #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) > -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1) > +#define EXEC_OBJECT_PAD_TO_SIZE (1<<4) > +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1) > __u64 flags; > > - __u64 rsvd1; > + __u64 rsvd1; /* pad_to_size */ > __u64 rsvd2; > }; > > -- > 2.6.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx