On Thu, Oct 01, 2015 at 12:34:47PM +0100, Chris Wilson wrote:
> Whilst discussing possible ways to trigger an invalidate_range on a
> userptr with an aliased GGTT mmapping (and so cause a struct_mutex
> deadlock), the conclusion is that we can, and we must, prevent any
> possible deadlock by avoiding taking the mutex at all during
> invalidate_range. This has numerous advantages all of which stem from
> avoid the sleeping function from inside the unknown context. In
> particular, it simplifies the invalidate_range because we no longer
> have to juggle the spinlock/mutex and can just hold the spinlock
> for the entire walk. To compensate, we have to make get_pages a bit more
> complicated in order to serialise with a pending cancel_userptr worker.
> As we hold the struct_mutex, we have no choice but to return EAGAIN and
> hope that the worker is then flushed before we retry after reacquiring
> the struct_mutex.
> 
> The important caveat is that the invalidate_range itself is no longer
> synchronous. There exists a small but definite period in time in which
> the old PTE's page remain accessible via the GPU. Note however that the
> physical pages themselves are not invalidated by the mmu_notifier, just
> the CPU view of the address space. The impact should be limited to a
> delay in pages being flushed, rather than a possibility of writing to
> the wrong pages. The only race condition that this worsens is remapping
> an userptr active on the GPU where fresh work may still reference the
> old pages due to struct_mutex contention. Given that userspace is racing
> with the GPU, it is fair to say that the results are undefined.
> 
> v2: Only queue (and importantly only take one refcnt) the worker once.
> 
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: MichaƂ Winiarski <michal.winiar...@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>

Pulled in all 3 patches. Btw some pretty kerneldoc explaining the
high-level interactions would be neat for all the userptr stuff ... I'm
totally lost in i915_gem_userptr.c ;-)

Thanks, Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem_userptr.c | 148 
> +++++++++++++-------------------
>  1 file changed, 61 insertions(+), 87 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c 
> b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 161f7fbf5b76..1b3b451b6658 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -50,7 +50,6 @@ struct i915_mmu_notifier {
>       struct mmu_notifier mn;
>       struct rb_root objects;
>       struct list_head linear;
> -     unsigned long serial;
>       bool has_linear;
>  };
>  
> @@ -59,14 +58,16 @@ struct i915_mmu_object {
>       struct interval_tree_node it;
>       struct list_head link;
>       struct drm_i915_gem_object *obj;
> +     struct work_struct work;
>       bool active;
>       bool is_linear;
>  };
>  
> -static unsigned long cancel_userptr(struct drm_i915_gem_object *obj)
> +static void __cancel_userptr__worker(struct work_struct *work)
>  {
> +     struct i915_mmu_object *mo = container_of(work, typeof(*mo), work);
> +     struct drm_i915_gem_object *obj = mo->obj;
>       struct drm_device *dev = obj->base.dev;
> -     unsigned long end;
>  
>       mutex_lock(&dev->struct_mutex);
>       /* Cancel any active worker and force us to re-evaluate gup */
> @@ -89,46 +90,28 @@ static unsigned long cancel_userptr(struct 
> drm_i915_gem_object *obj)
>               dev_priv->mm.interruptible = was_interruptible;
>       }
>  
> -     end = obj->userptr.ptr + obj->base.size;
> -
>       drm_gem_object_unreference(&obj->base);
>       mutex_unlock(&dev->struct_mutex);
> -
> -     return end;
>  }
>  
> -static void *invalidate_range__linear(struct i915_mmu_notifier *mn,
> -                                   struct mm_struct *mm,
> -                                   unsigned long start,
> -                                   unsigned long end)
> +static unsigned long cancel_userptr(struct i915_mmu_object *mo)
>  {
> -     struct i915_mmu_object *mo;
> -     unsigned long serial;
> -
> -restart:
> -     serial = mn->serial;
> -     list_for_each_entry(mo, &mn->linear, link) {
> -             struct drm_i915_gem_object *obj;
> -
> -             if (mo->it.last < start || mo->it.start > end)
> -                     continue;
> -
> -             obj = mo->obj;
> -
> -             if (!mo->active ||
> -                 !kref_get_unless_zero(&obj->base.refcount))
> -                     continue;
> -
> -             spin_unlock(&mn->lock);
> -
> -             cancel_userptr(obj);
> -
> -             spin_lock(&mn->lock);
> -             if (serial != mn->serial)
> -                     goto restart;
> +     unsigned long end = mo->obj->userptr.ptr + mo->obj->base.size;
> +
> +     /* The mmu_object is released late when destroying the
> +      * GEM object so it is entirely possible to gain a
> +      * reference on an object in the process of being freed
> +      * since our serialisation is via the spinlock and not
> +      * the struct_mutex - and consequently use it after it
> +      * is freed and then double free it.
> +      */
> +     if (mo->active && kref_get_unless_zero(&mo->obj->base.refcount)) {
> +             schedule_work(&mo->work);
> +             /* only schedule one work packet to avoid the refleak */
> +             mo->active = false;
>       }
>  
> -     return NULL;
> +     return end;
>  }
>  
>  static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier 
> *_mn,
> @@ -136,45 +119,32 @@ static void 
> i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>                                                      unsigned long start,
>                                                      unsigned long end)
>  {
> -     struct i915_mmu_notifier *mn = container_of(_mn, struct 
> i915_mmu_notifier, mn);
> -     struct interval_tree_node *it = NULL;
> -     unsigned long next = start;
> -     unsigned long serial = 0;
> -
> -     end--; /* interval ranges are inclusive, but invalidate range is 
> exclusive */
> -     while (next < end) {
> -             struct drm_i915_gem_object *obj = NULL;
> -
> -             spin_lock(&mn->lock);
> -             if (mn->has_linear)
> -                     it = invalidate_range__linear(mn, mm, start, end);
> -             else if (serial == mn->serial)
> -                     it = interval_tree_iter_next(it, next, end);
> -             else
> -                     it = interval_tree_iter_first(&mn->objects, start, end);
> -             if (it != NULL) {
> -                     struct i915_mmu_object *mo =
> -                             container_of(it, struct i915_mmu_object, it);
> -
> -                     /* The mmu_object is released late when destroying the
> -                      * GEM object so it is entirely possible to gain a
> -                      * reference on an object in the process of being freed
> -                      * since our serialisation is via the spinlock and not
> -                      * the struct_mutex - and consequently use it after it
> -                      * is freed and then double free it.
> -                      */
> -                     if (mo->active &&
> -                         kref_get_unless_zero(&mo->obj->base.refcount))
> -                             obj = mo->obj;
> -
> -                     serial = mn->serial;
> +     struct i915_mmu_notifier *mn =
> +             container_of(_mn, struct i915_mmu_notifier, mn);
> +     struct i915_mmu_object *mo;
> +
> +     /* interval ranges are inclusive, but invalidate range is exclusive */
> +     end--;
> +
> +     spin_lock(&mn->lock);
> +     if (mn->has_linear) {
> +             list_for_each_entry(mo, &mn->linear, link) {
> +                     if (mo->it.last < start || mo->it.start > end)
> +                             continue;
> +
> +                     cancel_userptr(mo);
>               }
> -             spin_unlock(&mn->lock);
> -             if (obj == NULL)
> -                     return;
> +     } else {
> +             struct interval_tree_node *it;
>  
> -             next = cancel_userptr(obj);
> +             it = interval_tree_iter_first(&mn->objects, start, end);
> +             while (it) {
> +                     mo = container_of(it, struct i915_mmu_object, it);
> +                     start = cancel_userptr(mo);
> +                     it = interval_tree_iter_next(it, start, end);
> +             }
>       }
> +     spin_unlock(&mn->lock);
>  }
>  
>  static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
> @@ -194,7 +164,6 @@ i915_mmu_notifier_create(struct mm_struct *mm)
>       spin_lock_init(&mn->lock);
>       mn->mn.ops = &i915_gem_userptr_notifier;
>       mn->objects = RB_ROOT;
> -     mn->serial = 1;
>       INIT_LIST_HEAD(&mn->linear);
>       mn->has_linear = false;
>  
> @@ -208,12 +177,6 @@ i915_mmu_notifier_create(struct mm_struct *mm)
>       return mn;
>  }
>  
> -static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mn)
> -{
> -     if (++mn->serial == 0)
> -             mn->serial = 1;
> -}
> -
>  static int
>  i915_mmu_notifier_add(struct drm_device *dev,
>                     struct i915_mmu_notifier *mn,
> @@ -260,10 +223,9 @@ i915_mmu_notifier_add(struct drm_device *dev,
>       } else
>               interval_tree_insert(&mo->it, &mn->objects);
>  
> -     if (ret == 0) {
> +     if (ret == 0)
>               list_add(&mo->link, &mn->linear);
> -             __i915_mmu_notifier_update_serial(mn);
> -     }
> +
>       spin_unlock(&mn->lock);
>       mutex_unlock(&dev->struct_mutex);
>  
> @@ -291,7 +253,6 @@ i915_mmu_notifier_del(struct i915_mmu_notifier *mn,
>               mn->has_linear = i915_mmu_notifier_has_linear(mn);
>       else
>               interval_tree_remove(&mo->it, &mn->objects);
> -     __i915_mmu_notifier_update_serial(mn);
>       spin_unlock(&mn->lock);
>  }
>  
> @@ -358,6 +319,7 @@ i915_gem_userptr_init__mmu_notifier(struct 
> drm_i915_gem_object *obj,
>       mo->it.start = obj->userptr.ptr;
>       mo->it.last = mo->it.start + obj->base.size - 1;
>       mo->obj = obj;
> +     INIT_WORK(&mo->work, __cancel_userptr__worker);
>  
>       ret = i915_mmu_notifier_add(obj->base.dev, mn, mo);
>       if (ret) {
> @@ -566,10 +528,12 @@ __i915_gem_userptr_set_pages(struct drm_i915_gem_object 
> *obj,
>       return ret;
>  }
>  
> -static void
> +static int
>  __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
>                             bool value)
>  {
> +     int ret = 0;
> +
>       /* During mm_invalidate_range we need to cancel any userptr that
>        * overlaps the range being invalidated. Doing so requires the
>        * struct_mutex, and that risks recursion. In order to cause
> @@ -582,12 +546,20 @@ __i915_gem_userptr_set_active(struct 
> drm_i915_gem_object *obj,
>        */
>  #if defined(CONFIG_MMU_NOTIFIER)
>       if (obj->userptr.mmu_object == NULL)
> -             return;
> +             return 0;
>  
>       spin_lock(&obj->userptr.mmu_object->mn->lock);
> -     obj->userptr.mmu_object->active = value;
> +     /* In order to serialise get_pages with an outstanding
> +      * cancel_userptr, we must drop the struct_mutex and try again.
> +      */
> +     if (!value || !work_pending(&obj->userptr.mmu_object->work))
> +             obj->userptr.mmu_object->active = value;
> +     else
> +             ret = -EAGAIN;
>       spin_unlock(&obj->userptr.mmu_object->mn->lock);
>  #endif
> +
> +     return ret;
>  }
>  
>  static void
> @@ -736,7 +708,9 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object 
> *obj)
>               return -EAGAIN;
>  
>       /* Let the mmu-notifier know that we have begun and need cancellation */
> -     __i915_gem_userptr_set_active(obj, true);
> +     ret = __i915_gem_userptr_set_active(obj, true);
> +     if (ret)
> +             return ret;
>  
>       pvec = NULL;
>       pinned = 0;
> -- 
> 2.6.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to