Chris Wilson <ch...@chris-wilson.co.uk> writes:

> Take a copy of the HW state after a reset upon module loading by
> executing a context switch from a blank context to the kernel context,
> thus saving the default hw state over the blank context image.
> We can then use the default hw state to initialise any future context,
> ensuring that each starts with the default view of hw state.
>
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gvt/scheduler.c    |  2 -
>  drivers/gpu/drm/i915/i915_debugfs.c     |  1 -
>  drivers/gpu/drm/i915/i915_gem.c         | 69 
> +++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_context.c | 50 +++---------------------
>  drivers/gpu/drm/i915/i915_gem_context.h |  4 +-
>  drivers/gpu/drm/i915/intel_engine_cs.c  |  3 ++
>  drivers/gpu/drm/i915/intel_lrc.c        | 35 ++++++++++-------
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 47 +++++++++++++++-------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
>  9 files changed, 137 insertions(+), 75 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c 
> b/drivers/gpu/drm/i915/gvt/scheduler.c
> index f6ded475bb2c..42cc61230ca7 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -723,8 +723,6 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu)
>       if (IS_ERR(vgpu->shadow_ctx))
>               return PTR_ERR(vgpu->shadow_ctx);
>  
> -     vgpu->shadow_ctx->engine[RCS].initialised = true;
> -
>       bitmap_zero(vgpu->shadow_ctx_desc_updated, I915_NUM_ENGINES);
>  
>       return 0;
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index 39883cd915db..cfcef1899da8 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1974,7 +1974,6 @@ static int i915_context_status(struct seq_file *m, void 
> *unused)
>                       struct intel_context *ce = &ctx->engine[engine->id];
>  
>                       seq_printf(m, "%s: ", engine->name);
> -                     seq_putc(m, ce->initialised ? 'I' : 'i');
>                       if (ce->state)
>                               describe_obj(m, ce->state->obj);
>                       if (ce->ring)
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e36a3a840552..ed4b1708fec5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4967,6 +4967,71 @@ bool intel_sanitize_semaphores(struct drm_i915_private 
> *dev_priv, int value)
>       return true;
>  }
>  
> +static int __intel_engines_record_defaults(struct drm_i915_private *i915)
> +{
> +     struct i915_gem_context *ctx;
> +     struct intel_engine_cs *engine;
> +     enum intel_engine_id id;
> +     int err;
> +
> +     /*
> +      * As we reset the gpu during very early sanitisation, the current
> +      * register state on the GPU should reflect its defaults values.
> +      * We load a context onto the hw (with restore-inhibit), then switch
> +      * over to a second context to save that default register state. We
> +      * can then prime every new context with that state so they all start
> +      * from defaults.
> +      */
> +
> +     ctx = i915_gem_context_create_kernel(i915, 0);
> +     if (IS_ERR(ctx))
> +             return PTR_ERR(ctx);
> +
> +     for_each_engine(engine, i915, id) {
> +             struct drm_i915_gem_request *rq;
> +
> +             rq = i915_gem_request_alloc(engine, ctx);
> +             if (IS_ERR(rq)) {
> +                     err = PTR_ERR(rq);
> +                     goto out_ctx;
> +             }
> +
> +             err = i915_switch_context(rq);
> +             if (engine->init_context)
> +                     err = engine->init_context(rq);
> +
> +             __i915_add_request(rq, true);
> +             if (err)
> +                     goto out_ctx;
> +     }
> +
> +     err = i915_gem_switch_to_kernel_context(i915);
> +     if (err)
> +             goto out_ctx;
> +
> +     err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED);
> +     if (err)
> +             goto out_ctx;
> +
> +     for_each_engine(engine, i915, id) {
> +             if (!ctx->engine[id].state)
> +                     continue;
> +
> +             engine->default_state =
> +                     i915_gem_object_get(ctx->engine[id].state->obj);
> +
> +             err = i915_gem_object_set_to_cpu_domain(engine->default_state,
> +                                                     false);
> +             if (err)
> +                     goto out_ctx;
> +     }
> +
> +out_ctx:
> +     i915_gem_context_set_closed(ctx);
> +     i915_gem_context_put(ctx);
> +     return err;
> +}
> +
>  int i915_gem_init(struct drm_i915_private *dev_priv)
>  {
>       int ret;
> @@ -5022,6 +5087,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>  
>       intel_init_gt_powersave(dev_priv);
>  
> +     ret = __intel_engines_record_defaults(dev_priv);
> +     if (ret)
> +             goto out_unlock;
> +
>  out_unlock:
>       if (ret == -EIO) {
>               /* Allow engine initialisation to fail by marking the GPU as
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
> b/drivers/gpu/drm/i915/i915_gem_context.c
> index 10affb35ac56..499cc0f25653 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -343,7 +343,7 @@ static void __destroy_hw_context(struct i915_gem_context 
> *ctx,
>   * context state of the GPU for applications that don't utilize HW contexts, 
> as
>   * well as an idle case.
>   */
> -static struct i915_gem_context *
> +struct i915_gem_context *
>  i915_gem_create_context(struct drm_i915_private *dev_priv,
>                       struct drm_i915_file_private *file_priv)
>  {
> @@ -418,8 +418,8 @@ i915_gem_context_create_gvt(struct drm_device *dev)
>       return ctx;
>  }
>  
> -static struct i915_gem_context *
> -create_kernel_context(struct drm_i915_private *i915, int prio)
> +struct i915_gem_context *
> +i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
>  {
>       struct i915_gem_context *ctx;
>  
> @@ -473,7 +473,7 @@ int i915_gem_contexts_init(struct drm_i915_private 
> *dev_priv)
>       ida_init(&dev_priv->contexts.hw_ida);
>  
>       /* lowest priority; idle task */
> -     ctx = create_kernel_context(dev_priv, I915_PRIORITY_MIN);
> +     ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN);
>       if (IS_ERR(ctx)) {
>               DRM_ERROR("Failed to create default global context\n");
>               err = PTR_ERR(ctx);
> @@ -487,7 +487,7 @@ int i915_gem_contexts_init(struct drm_i915_private 
> *dev_priv)
>       dev_priv->kernel_context = ctx;
>  
>       /* highest priority; preempting task */
> -     ctx = create_kernel_context(dev_priv, INT_MAX);
> +     ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX);
>       if (IS_ERR(ctx)) {
>               DRM_ERROR("Failed to create default preempt context\n");
>               err = PTR_ERR(ctx);
> @@ -522,28 +522,6 @@ void i915_gem_contexts_lost(struct drm_i915_private 
> *dev_priv)
>               engine->context_unpin(engine, engine->last_retired_context);
>               engine->last_retired_context = NULL;
>       }
> -
> -     /* Force the GPU state to be restored on enabling */
> -     if (!i915_modparams.enable_execlists) {
> -             struct i915_gem_context *ctx;
> -
> -             list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
> -                     if (!i915_gem_context_is_default(ctx))
> -                             continue;
> -
> -                     for_each_engine(engine, dev_priv, id)
> -                             ctx->engine[engine->id].initialised = false;
> -
> -                     ctx->remap_slice = ALL_L3_SLICES(dev_priv);
> -             }
> -
> -             for_each_engine(engine, dev_priv, id) {
> -                     struct intel_context *kce =
> -                             &dev_priv->kernel_context->engine[engine->id];
> -
> -                     kce->initialised = true;
> -             }
> -     }
>  }
>  
>  void i915_gem_contexts_fini(struct drm_i915_private *i915)
> @@ -718,9 +696,6 @@ static inline bool skip_rcs_switch(struct i915_hw_ppgtt 
> *ppgtt,
>       if (to->remap_slice)
>               return false;
>  
> -     if (!to->engine[RCS].initialised)
> -             return false;
> -
>       if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
>               return false;
>  
> @@ -795,11 +770,7 @@ static int do_rcs_switch(struct drm_i915_gem_request 
> *req)
>                       return ret;
>       }
>  
> -     if (!to->engine[RCS].initialised || i915_gem_context_is_default(to))
> -             /* NB: If we inhibit the restore, the context is not allowed to
> -              * die because future work may end up depending on valid address
> -              * space. This means we must enforce that a page table load
> -              * occur when this occurs. */
> +     if (i915_gem_context_is_kernel(to))
>               hw_flags = MI_RESTORE_INHIBIT;
>       else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings)
>               hw_flags = MI_FORCE_RESTORE;
> @@ -843,15 +814,6 @@ static int do_rcs_switch(struct drm_i915_gem_request 
> *req)
>               to->remap_slice &= ~(1<<i);
>       }
>  
> -     if (!to->engine[RCS].initialised) {
> -             if (engine->init_context) {
> -                     ret = engine->init_context(req);
> -                     if (ret)
> -                             return ret;
> -             }
> -             to->engine[RCS].initialised = true;
> -     }
> -
>       return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h 
> b/drivers/gpu/drm/i915/i915_gem_context.h
> index 44688e22a5c2..4bfb72f8e1cb 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -157,7 +157,6 @@ struct i915_gem_context {
>               u32 *lrc_reg_state;
>               u64 lrc_desc;
>               int pin_count;
> -             bool initialised;
>       } engine[I915_NUM_ENGINES];
>  
>       /** ring_size: size for allocating the per-engine ring buffer */
> @@ -292,6 +291,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device 
> *dev, void *data,
>  int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
>                                      struct drm_file *file);
>  
> +struct i915_gem_context *
> +i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
> +
>  static inline struct i915_gem_context *
>  i915_gem_context_get(struct i915_gem_context *ctx)
>  {
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/intel_engine_cs.c
> index c15e288bed02..6b8519401d4d 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -679,6 +679,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs 
> *engine)
>       intel_engine_cleanup_cmd_parser(engine);
>       i915_gem_batch_pool_fini(&engine->batch_pool);
>  
> +     if (engine->default_state)
> +             i915_gem_object_put(engine->default_state);
> +
>       if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
>               engine->context_unpin(engine, engine->i915->preempt_context);
>       engine->context_unpin(engine, engine->i915->kernel_context);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index 6840ec8db037..297ce0327273 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1147,7 +1147,6 @@ static int execlists_request_alloc(struct 
> drm_i915_gem_request *request)
>       struct intel_engine_cs *engine = request->engine;
>       struct intel_context *ce = &request->ctx->engine[engine->id];
>       u32 *cs;
> -     int ret;
>  
>       GEM_BUG_ON(!ce->pin_count);
>  
> @@ -1161,14 +1160,6 @@ static int execlists_request_alloc(struct 
> drm_i915_gem_request *request)
>       if (IS_ERR(cs))
>               return PTR_ERR(cs);
>  
> -     if (!ce->initialised) {
> -             ret = engine->init_context(request);
> -             if (ret)
> -                     return ret;
> -
> -             ce->initialised = true;
> -     }
> -
>       /* Note that after this point, we have committed to using
>        * this request as it is being used to both track the
>        * state of engine initialisation and liveness of the
> @@ -2100,7 +2091,6 @@ static void execlists_init_reg_state(u32 *regs,
>  
>       CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine),
>               _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
> -                                CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>                                  (HAS_RESOURCE_STREAMER(dev_priv) ?
>                                  CTX_CTRL_RS_CTX_ENABLE : 0)));
>       CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
> @@ -2177,6 +2167,7 @@ populate_lr_context(struct i915_gem_context *ctx,
>                   struct intel_ring *ring)
>  {
>       void *vaddr;
> +     u32 *regs;
>       int ret;
>  
>       ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
> @@ -2193,11 +2184,28 @@ populate_lr_context(struct i915_gem_context *ctx,
>       }
>       ctx_obj->mm.dirty = true;
>  
> +     if (engine->default_state) {
> +             void *defaults;
> +
> +             defaults = i915_gem_object_pin_map(engine->default_state,
> +                                                I915_MAP_WB);
> +             if (IS_ERR(defaults))
> +                     return PTR_ERR(defaults);
> +
> +             memcpy(vaddr + LRC_HEADER_PAGES * PAGE_SIZE,
> +                    defaults + LRC_HEADER_PAGES * PAGE_SIZE,
> +                    engine->context_size);
> +             i915_gem_object_unpin_map(engine->default_state);
> +     }
> +
>       /* The second page of the context object contains some fields which must
>        * be set up prior to the first execution. */
> -
> -     execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
> -                              ctx, engine, ring);
> +     regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
> +     execlists_init_reg_state(regs, ctx, engine, ring);
> +     if (!engine->default_state) {
> +             regs[CTX_CONTEXT_CONTROL+1] |=
> +                     _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
> +     }

We will get the default state we copy from now with restore inhibit set
for all copied context. Where do we clear it?
-Mika


>  
>       i915_gem_object_unpin_map(ctx_obj);
>  
> @@ -2250,7 +2258,6 @@ static int execlists_context_deferred_alloc(struct 
> i915_gem_context *ctx,
>  
>       ce->ring = ring;
>       ce->state = vma;
> -     ce->initialised |= engine->init_context == NULL;
>  
>       return 0;
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
> b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 47fadf8da84e..3e5c296cd8bc 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1368,7 +1368,7 @@ static int context_pin(struct i915_gem_context *ctx)
>        * on an active context (which by nature is already on the GPU).
>        */
>       if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
> -             ret = i915_gem_object_set_to_gtt_domain(vma->obj, false);
> +             ret = i915_gem_object_set_to_gtt_domain(vma->obj, true);
>               if (ret)
>                       return ret;
>       }
> @@ -1383,11 +1383,34 @@ alloc_context_vma(struct intel_engine_cs *engine)
>       struct drm_i915_private *i915 = engine->i915;
>       struct drm_i915_gem_object *obj;
>       struct i915_vma *vma;
> +     int err;
>  
>       obj = i915_gem_object_create(i915, engine->context_size);
>       if (IS_ERR(obj))
>               return ERR_CAST(obj);
>  
> +     if (engine->default_state) {
> +             void *defaults, *vaddr;
> +
> +             vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
> +             if (IS_ERR(vaddr)) {
> +                     err = PTR_ERR(vaddr);
> +                     goto err_obj;
> +             }
> +
> +             defaults = i915_gem_object_pin_map(engine->default_state,
> +                                                I915_MAP_WB);
> +             if (IS_ERR(defaults)) {
> +                     err = PTR_ERR(defaults);
> +                     goto err_map;
> +             }
> +
> +             memcpy(vaddr, defaults, engine->context_size);
> +
> +             i915_gem_object_unpin_map(engine->default_state);
> +             i915_gem_object_unpin_map(obj);
> +     }
> +
>       /*
>        * Try to make the context utilize L3 as well as LLC.
>        *
> @@ -1409,10 +1432,18 @@ alloc_context_vma(struct intel_engine_cs *engine)
>       }
>  
>       vma = i915_vma_instance(obj, &i915->ggtt.base, NULL);
> -     if (IS_ERR(vma))
> -             i915_gem_object_put(obj);
> +     if (IS_ERR(vma)) {
> +             err = PTR_ERR(vma);
> +             goto err_obj;
> +     }
>  
>       return vma;
> +
> +err_map:
> +     i915_gem_object_unpin_map(obj);
> +err_obj:
> +     i915_gem_object_put(obj);
> +     return ERR_PTR(err);
>  }
>  
>  static struct intel_ring *
> @@ -1449,16 +1480,6 @@ intel_ring_context_pin(struct intel_engine_cs *engine,
>               ce->state->obj->pin_global++;
>       }
>  
> -     /* The kernel context is only used as a placeholder for flushing the
> -      * active context. It is never used for submitting user rendering and
> -      * as such never requires the golden render context, and so we can skip
> -      * emitting it when we switch to the kernel context. This is required
> -      * as during eviction we cannot allocate and pin the renderstate in
> -      * order to initialise the context.
> -      */
> -     if (i915_gem_context_is_kernel(ctx))
> -             ce->initialised = true;
> -
>       i915_gem_context_get(ctx);
>  
>  out:
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
> b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 69ad875fd011..1d752b9a3065 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -303,6 +303,7 @@ struct intel_engine_cs {
>       struct intel_ring *buffer;
>       struct intel_timeline *timeline;
>  
> +     struct drm_i915_gem_object *default_state;
>       struct intel_render_state *render_state;
>  
>       atomic_t irq_count;
> -- 
> 2.15.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to