On Tue, Dec 23, 2014 at 05:16:15PM +0000, Michel Thierry wrote:
> From: Ben Widawsky <benjamin.widaw...@intel.com>
> 
> This patch was formerly known as, "Force pd restore when PDEs change,
> gen6-7." I had to change the name because it is needed for GEN8 too.
> 
> The real issue this is trying to solve is when a new object is mapped
> into the current address space. The GPU does not snoop the new mapping
> so we must do the gen specific action to reload the page tables.
> 
> GEN8 and GEN7 do differ in the way they load page tables for the RCS.
> GEN8 does so with the context restore, while GEN7 requires the proper
> load commands in the command streamer. Non-render is similar for both.
> 
> Caveat for GEN7
> The docs say you cannot change the PDEs of a currently running context.
> We never map new PDEs of a running context, and expect them to be
> present - so I think this is okay. (We can unmap, but this should also
> be okay since we only unmap unreferenced objects that the GPU shouldn't
> be tryingto va->pa xlate.) The MI_SET_CONTEXT command does have a flag
> to signal that even if the context is the same, force a reload. It's
> unclear exactly what this does, but I have a hunch it's the right thing
> to do.
> 
> The logic assumes that we always emit a context switch after mapping new
> PDEs, and before we submit a batch. This is the case today, and has been
> the case since the inception of hardware contexts. A note in the comment
> let's the user know.
> 
> Signed-off-by: Ben Widawsky <b...@bwidawsk.net>
> 
> squash! drm/i915: Force pd restore when PDEs change, gen6-7
> 
> It's not just for gen8. If the current context has mappings change, we
> need a context reload to switch
> 
> v2: Rebased after ppgtt clean up patches. Split the warning for aliasing
> and true ppgtt options. And do not break aliasing ppgtt, where to->ppgtt
> is always null.
> 
> v3: Invalidate PPGTT TLBs inside alloc_va_range and teardown_va_range.
> Signed-off-by: Michel Thierry <michel.thie...@intel.com> (v2+)
> ---
>  drivers/gpu/drm/i915/i915_gem_context.c    | 27 ++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 +++++++++++
>  drivers/gpu/drm/i915/i915_gem_gtt.c        | 12 ++++++++++++
>  drivers/gpu/drm/i915/i915_gem_gtt.h        |  2 ++
>  4 files changed, 47 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
> b/drivers/gpu/drm/i915/i915_gem_context.c
> index 7b20bd4..fa9d4a1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -567,8 +567,18 @@ static inline bool should_skip_switch(struct 
> intel_engine_cs *ring,
>                                     struct intel_context *from,
>                                     struct intel_context *to)
>  {
> -     if (from == to && !to->remap_slice)
> -             return true;
> +     struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +
> +     if (to->remap_slice)
> +             return false;
> +
> +     if (to->ppgtt) {
> +             if (from == to && !test_bit(ring->id, 
> &to->ppgtt->base.pd_reload_mask))
> +                     return true;
> +     } else {
> +             if (from == to && !test_bit(ring->id, 
> &dev_priv->mm.aliasing_ppgtt->base.pd_reload_mask))
> +                     return true;
> +     }
>  
>       return false;
>  }
> @@ -585,9 +595,8 @@ needs_pd_load_pre(struct intel_engine_cs *ring, struct 
> intel_context *to)
>  static bool
>  needs_pd_load_post(struct intel_engine_cs *ring, struct intel_context *to)
>  {
> -     return (!to->legacy_hw_ctx.initialized ||
> -                     i915_gem_context_is_default(to)) &&
> -                     to->ppgtt && IS_GEN8(ring->dev);
> +     return IS_GEN8(ring->dev) &&
> +                     (to->ppgtt || &to->ppgtt->base.pd_reload_mask);
>  }
>  
>  static int do_switch(struct intel_engine_cs *ring,
> @@ -632,6 +641,12 @@ static int do_switch(struct intel_engine_cs *ring,
>               ret = to->ppgtt->switch_mm(to->ppgtt, ring);
>               if (ret)
>                       goto unpin_out;
> +
> +             /* Doing a PD load always reloads the page dirs */
> +             if (to->ppgtt)
> +                     clear_bit(ring->id, &to->ppgtt->base.pd_reload_mask);
> +             else
> +                     clear_bit(ring->id, 
> &dev_priv->mm.aliasing_ppgtt->base.pd_reload_mask);
>       }
>  
>       if (ring != &dev_priv->ring[RCS]) {
> @@ -670,6 +685,8 @@ static int do_switch(struct intel_engine_cs *ring,
>        */
>       if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
>               hw_flags |= MI_RESTORE_INHIBIT;
> +     else if (to->ppgtt && test_and_clear_bit(ring->id, 
> &to->ppgtt->base.pd_reload_mask))
> +             hw_flags |= MI_FORCE_RESTORE;
>  
>       ret = mi_set_context(ring, to, hw_flags);
>       if (ret)
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 8330660..09d864f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1199,6 +1199,13 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, 
> struct drm_file *file,
>       if (ret)
>               goto error;
>  
> +     if (ctx->ppgtt)
> +             WARN(ctx->ppgtt->base.pd_reload_mask & (1<<ring->id),
> +                     "%s didn't clear reload\n", ring->name);
> +     else
> +             WARN(dev_priv->mm.aliasing_ppgtt->base.pd_reload_mask &
> +                     (1<<ring->id), "%s didn't clear reload\n", ring->name);
> +
>       instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
>       instp_mask = I915_EXEC_CONSTANTS_MASK;
>       switch (instp_mode) {
> @@ -1446,6 +1453,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
> *data,
>       if (ret)
>               goto err;
>  
> +     /* XXX: Reserve has possibly change PDEs which means we must do a
> +      * context switch before we can coherently read some of the reserved
> +      * VMAs. */
> +
>       /* The objects are in their final locations, apply the relocations. */
>       if (need_relocs)
>               ret = i915_gem_execbuffer_relocate(eb);
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
> b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 313432e..54c7ca7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1126,6 +1126,15 @@ static void gen6_ppgtt_unmap_pages(struct 
> i915_hw_ppgtt *ppgtt)
>                              4096, PCI_DMA_BIDIRECTIONAL);
>  }
>  
> +/* PDE TLBs are a pain invalidate pre GEN8. It requires a context reload. If 
> we
> + * are switching between contexts with the same LRCA, we also must do a force
> + * restore.
> + */
> +#define ppgtt_invalidate_tlbs(vm) do {\
> +     /* If current vm != vm, */ \
> +     vm->pd_reload_mask = INTEL_INFO(vm->dev)->ring_mask; \
> +} while (0)

Again this should be a proper static inline. Also maybe call this
mark_tlbs_dirty since that's what it does, the invalidate happens later
on in the ctx switch code. In the same spirit:
s/pd_realod_mask/pd_dirty_rings/.
-Daniel

> +
>  static int gen6_alloc_va_range(struct i915_address_space *vm,
>                              uint64_t start, uint64_t length)
>  {
> @@ -1154,6 +1163,7 @@ static int gen6_alloc_va_range(struct 
> i915_address_space *vm,
>                               I915_PPGTT_PT_ENTRIES);
>       }
>  
> +     ppgtt_invalidate_tlbs(vm);
>       return 0;
>  }
>  
> @@ -1169,6 +1179,8 @@ static void gen6_teardown_va_range(struct 
> i915_address_space *vm,
>               bitmap_clear(pt->used_ptes, gen6_pte_index(start),
>                            gen6_pte_count(start, length));
>       }
> +
> +     ppgtt_invalidate_tlbs(vm);
>  }
>  
>  static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h 
> b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index d579f74..dc71cae 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -226,6 +226,8 @@ struct i915_address_space {
>               struct page *page;
>       } scratch;
>  
> +     unsigned long pd_reload_mask;

Conceptually this should be in i915_hw_ppgtt, not in struct
i915_address_space.  Anything holding up that movement that I don't see?

> +
>       /**
>        * List of objects currently involved in rendering.
>        *
> -- 
> 2.1.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to