On ti, 2016-07-19 at 11:51 +0100, Chris Wilson wrote:
> When transitioning to the GTT or CPU domain we wait on all rendering
> from i915 to complete (with the optimisation of allowing concurrent read
> access by both the GPU and client). We don't yet ensure all rendering
> from third parties (tracked by implicit fences on the dma-buf) is
> complete. Since implicitly tracked rendering by third parties will
> ignore our cache-domain tracking, we have to always wait upon rendering
> from third-parties when transitioning to direct access to the backing
> store. We still rely on clients notifying us of cache domain changes
> (i.e. they need to move to the GTT read or write domain after doing a CPU
> access before letting the third party render again).
> 
> v2:
> This introduces a potential WARN_ON into i915_gem_object_free() as the
> current i915_vma_unbind() calls i915_gem_object_wait_rendering(). To
> hit this path we first need to render with the GPU, have a dma-buf
> attached with an unsignaled fence and then interrupt the wait. It does
> get fixed later in the series (when i915_vma_unbind() only waits on the
> active VMA and not all, including third-party, rendering.
> 
> To offset that risk, use the __i915_vma_unbind_no_wait hack.
> 
> Testcase: igt/prime_vgem/basic-fence-read
> Testcase: igt/prime_vgem/basic-fence-mmap
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 44 
> ++++++++++++++++++++++++++---------------
>  1 file changed, 28 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 079e09cee16a..37868cc594cb 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -29,10 +29,12 @@
>  #include 
>  #include 
>  #include "i915_drv.h"
> +#include "i915_gem_dmabuf.h"
>  #include "i915_vgpu.h"
>  #include "i915_trace.h"
>  #include "intel_drv.h"
>  #include "intel_mocs.h"
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -511,6 +513,10 @@ int i915_gem_obj_prepare_shmem_read(struct 
> drm_i915_gem_object *obj,
>       if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
>               return -EINVAL;
>  
> +     ret = i915_gem_object_wait_rendering(obj, true);
> +     if (ret)
> +             return ret;
> +
>       if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
>               /* If we're not in the cpu read domain, set ourself into the gtt
>                * read domain and manually flush cachelines (if required). This
> @@ -518,9 +524,6 @@ int i915_gem_obj_prepare_shmem_read(struct 
> drm_i915_gem_object *obj,
>                * anyway again before the next pread happens. */
>               *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
>                                                       obj->cache_level);
> -             ret = i915_gem_object_wait_rendering(obj, true);
> -             if (ret)
> -                     return ret;
>       }
>  
>       ret = i915_gem_object_get_pages(obj);
> @@ -1132,15 +1135,16 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>  
>       obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
>  
> +     ret = i915_gem_object_wait_rendering(obj, false);
> +     if (ret)
> +             return ret;
> +
>       if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
>               /* If we're not in the cpu write domain, set ourself into the 
> gtt
>                * write domain and manually flush cachelines (if required). 
> This
>                * optimizes for the case when the gpu will use the data
>                * right away and we therefore have to clflush anyway. */
>               needs_clflush_after = cpu_write_needs_clflush(obj);
> -             ret = i915_gem_object_wait_rendering(obj, false);
> -             if (ret)
> -                     return ret;
>       }
>       /* Same trick applies to invalidate partially written cachelines read
>        * before writing. */
> @@ -1335,11 +1339,9 @@ int
>  i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
>                              bool readonly)
>  {
> +     struct reservation_object *resv;
>       int ret, i;
>  
> -     if (!obj->active)
> -             return 0;
> -
>       if (readonly) {
>               if (obj->last_write_req != NULL) {
>                       ret = i915_wait_request(obj->last_write_req);
> @@ -1366,6 +1368,16 @@ i915_gem_object_wait_rendering(struct 
> drm_i915_gem_object *obj,
>               GEM_BUG_ON(obj->active);
>       }
>  
> +     resv = i915_gem_object_get_dmabuf_resv(obj);
> +     if (resv) {
> +             long err;

We already have ret in the function scope.

> +
> +             err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
> +                                                       MAX_SCHEDULE_TIMEOUT);
> +             if (err < 0)
> +                     return err;
> +     }
> +
>       return 0;
>  }
>  
> @@ -3402,13 +3414,13 @@ i915_gem_object_set_to_gtt_domain(struct 
> drm_i915_gem_object *obj, bool write)
>       struct i915_vma *vma;
>       int ret;
>  
> -     if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
> -             return 0;
> -
>       ret = i915_gem_object_wait_rendering(obj, !write);
>       if (ret)
>               return ret;
>  
> +     if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
> +             return 0;
> +

Not sure I follow this change, wait rendering would only be issued if
DOMAIN_CPU was in place, this function was about moving to gtt_domain
so should really be NOP if in GTT domain already, no? Maybe calling
site should do an explicit wait_rendering if needed.

>       /* Flush and acquire obj->pages so that we are coherent through
>        * direct access in memory with previous cached writes through
>        * shmemfs and that our cache domain tracking remains valid.
> @@ -3752,13 +3764,13 @@ i915_gem_object_set_to_cpu_domain(struct 
> drm_i915_gem_object *obj, bool write)
>       uint32_t old_write_domain, old_read_domains;
>       int ret;
>  
> -     if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
> -             return 0;
> -
>       ret = i915_gem_object_wait_rendering(obj, !write);
>       if (ret)
>               return ret;
>  
> +     if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
> +             return 0;
> +

Ditto.

>       i915_gem_object_flush_gtt_write_domain(obj);
>  
>       old_write_domain = obj->base.write_domain;
> @@ -4238,7 +4250,7 @@ void i915_gem_free_object(struct drm_gem_object 
> *gem_obj)
>               int ret;
>  
>               vma->pin_count = 0;
> -             ret = i915_vma_unbind(vma);

Maybe add a comment/TODO/FIXME: about the potential WARN.

Regards, Joonas

> +             ret = __i915_vma_unbind_no_wait(vma);
>               if (WARN_ON(ret == -ERESTARTSYS)) {
>                       bool was_interruptible;
>  
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to