Chris Wilson <ch...@chris-wilson.co.uk> writes:

> On Tigerlake, MI_SEMAPHORE_WAIT grew an extra dword, so be sure to
> update the length field and emit that extra parameter and any padding
> noop as required.
>
> v2: Define the token shift while we are adding the updated MI_SEMAPHORE_WAIT
>
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuopp...@linux.intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospu...@intel.com>
> Cc: Michal Winiarski <michal.winiar...@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  2 +
>  drivers/gpu/drm/i915/gt/intel_lrc.c          | 71 ++++++++++++++++++--
>  drivers/gpu/drm/i915/i915_pci.c              |  1 -
>  drivers/gpu/drm/i915/i915_request.c          | 21 ++++--
>  4 files changed, 83 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
> b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index fbad403ab7ac..da2025bc332c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -112,6 +112,7 @@
>  #define MI_SEMAPHORE_SIGNAL  MI_INSTR(0x1b, 0) /* GEN8+ */
>  #define   MI_SEMAPHORE_TARGET(engine)        ((engine)<<15)
>  #define MI_SEMAPHORE_WAIT    MI_INSTR(0x1c, 2) /* GEN8+ */
> +#define MI_SEMAPHORE_WAIT_TOKEN      MI_INSTR(0x1c, 3) /* GEN12+ */
>  #define   MI_SEMAPHORE_POLL          (1 << 15)
>  #define   MI_SEMAPHORE_SAD_GT_SDD    (0 << 12)
>  #define   MI_SEMAPHORE_SAD_GTE_SDD   (1 << 12)
> @@ -119,6 +120,7 @@
>  #define   MI_SEMAPHORE_SAD_LTE_SDD   (3 << 12)
>  #define   MI_SEMAPHORE_SAD_EQ_SDD    (4 << 12)
>  #define   MI_SEMAPHORE_SAD_NEQ_SDD   (5 << 12)
> +#define   MI_SEMAPHORE_TOKEN_SHIFT   5

Do we need a mask too?

>  #define MI_STORE_DWORD_IMM   MI_INSTR(0x20, 1)
>  #define MI_STORE_DWORD_IMM_GEN4      MI_INSTR(0x20, 2)
>  #define   MI_MEM_VIRTUAL     (1 << 22) /* 945,g33,965 */
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 64fa2db5905f..c74fc75e4980 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -3237,6 +3237,22 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct 
> i915_request *request, u32 *cs)
>       return gen8_emit_fini_breadcrumb_footer(request, cs);
>  }
>  
> +static u32 *
> +gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> +{
> +     cs = gen8_emit_ggtt_write_rcs(cs,
> +                                   request->fence.seqno,
> +                                   request->timeline->hwsp_offset,
> +                                   PIPE_CONTROL_CS_STALL |
> +                                   PIPE_CONTROL_TILE_CACHE_FLUSH |
> +                                   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> +                                   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> +                                   PIPE_CONTROL_DC_FLUSH_ENABLE |
> +                                   PIPE_CONTROL_FLUSH_ENABLE);
> +
> +     return gen8_emit_fini_breadcrumb_footer(request, cs);
> +}
> +
>  /*
>   * Note that the CS instruction pre-parser will not stall on the breadcrumb
>   * flush and will continue pre-fetching the instructions after it before the
> @@ -3255,8 +3271,49 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct 
> i915_request *request, u32 *cs)
>   * All the above applies only to the instructions themselves. Non-inline data
>   * used by the instructions is not pre-fetched.
>   */
> -static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
> -                                        u32 *cs)
> +
> +static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 
> *cs)
> +{
> +     *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
> +             MI_SEMAPHORE_GLOBAL_GTT |
> +             MI_SEMAPHORE_POLL |
> +             MI_SEMAPHORE_SAD_EQ_SDD;
> +     *cs++ = 0;
> +     *cs++ = intel_hws_preempt_address(request->engine);

This is supposed to be in canonical form.

> +     *cs++ = 0;
> +     *cs++ = 0;
> +     *cs++ = MI_NOOP;
> +
> +     return cs;
> +}
> +
> +static __always_inline u32*
> +gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
> +{
> +     *cs++ = MI_USER_INTERRUPT;
> +
> +     *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> +     if (intel_engine_has_semaphores(request->engine))
> +             cs = gen12_emit_preempt_busywait(request, cs);
> +
> +     request->tail = intel_ring_offset(request, cs);
> +     assert_ring_tail_valid(request->ring, request->tail);
> +
> +     return gen8_emit_wa_tail(request, cs);
> +}
> +
> +static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
> +{
> +     cs = gen8_emit_ggtt_write(cs,
> +                               request->fence.seqno,
> +                               request->timeline->hwsp_offset,
> +                               0);
> +
> +     return gen12_emit_fini_breadcrumb_footer(request, cs);
> +}
> +
> +static u32 *
> +gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>  {
>       cs = gen8_emit_ggtt_write_rcs(cs,
>                                     request->fence.seqno,
> @@ -3268,7 +3325,7 @@ static u32 *gen11_emit_fini_breadcrumb_rcs(struct 
> i915_request *request,
>                                     PIPE_CONTROL_DC_FLUSH_ENABLE |
>                                     PIPE_CONTROL_FLUSH_ENABLE);
>  
> -     return gen8_emit_fini_breadcrumb_footer(request, cs);
> +     return gen12_emit_fini_breadcrumb_footer(request, cs);
>  }
>  
>  static void execlists_park(struct intel_engine_cs *engine)
> @@ -3297,9 +3354,6 @@ void intel_execlists_set_default_submission(struct 
> intel_engine_cs *engine)
>                       engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>       }
>  
> -     if (INTEL_GEN(engine->i915) >= 12) /* XXX disabled for debugging */
> -             engine->flags &= ~I915_ENGINE_HAS_SEMAPHORES;
> -
>       if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 11)

this is not against tip :)

>               engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
>  }
> @@ -3329,6 +3383,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs 
> *engine)
>       engine->emit_flush = gen8_emit_flush;
>       engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
>       engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
> +     if (INTEL_GEN(engine->i915) >= 12)
> +             engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
>  
>       engine->set_default_submission = intel_execlists_set_default_submission;
>  
> @@ -3374,6 +3430,9 @@ static void rcs_submission_override(struct 
> intel_engine_cs *engine)
>  {
>       switch (INTEL_GEN(engine->i915)) {
>       case 12:
> +             engine->emit_flush = gen11_emit_flush_render;
> +             engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
> +             break;
>       case 11:
>               engine->emit_flush = gen11_emit_flush_render;
>               engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index ee9a7959204c..f0e6172ccd97 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -798,7 +798,6 @@ static const struct intel_device_info 
> intel_tigerlake_12_info = {
>       .engine_mask =
>               BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
>       .has_rc6 = false, /* XXX disabled for debugging */
> -     .has_logical_ring_preemption = false, /* XXX disabled for debugging */
>       .engine_mask = BIT(RCS0), /* XXX reduced for debugging */
>  };
>  
> diff --git a/drivers/gpu/drm/i915/i915_request.c 
> b/drivers/gpu/drm/i915/i915_request.c
> index 754a78364a63..a967afff5e51 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -783,7 +783,9 @@ emit_semaphore_wait(struct i915_request *to,
>                   struct i915_request *from,
>                   gfp_t gfp)
>  {
> +     bool has_token = INTEL_GEN(to->i915) >= 12;
>       u32 hwsp_offset;
> +     int len;
>       u32 *cs;
>       int err;
>  
> @@ -810,7 +812,11 @@ emit_semaphore_wait(struct i915_request *to,
>       if (err)
>               return err;
>  
> -     cs = intel_ring_begin(to, 4);
> +     len = 4;
> +     if (has_token)
> +             len += 2;
> +
> +     cs = intel_ring_begin(to, len);
>       if (IS_ERR(cs))
>               return PTR_ERR(cs);
>  
> @@ -822,13 +828,18 @@ emit_semaphore_wait(struct i915_request *to,
>        * (post-wrap) values than they were expecting (and so wait
>        * forever).
>        */
> -     *cs++ = MI_SEMAPHORE_WAIT |
> -             MI_SEMAPHORE_GLOBAL_GTT |
> -             MI_SEMAPHORE_POLL |
> -             MI_SEMAPHORE_SAD_GTE_SDD;
> +     *cs++ = (MI_SEMAPHORE_WAIT |
> +              MI_SEMAPHORE_GLOBAL_GTT |
> +              MI_SEMAPHORE_POLL |
> +              MI_SEMAPHORE_SAD_GTE_SDD) +
> +             has_token;

Pls change to int :O

Also, should we just pass the token in anticipation that it will
be used in future?

-Mika

>       *cs++ = from->fence.seqno;
>       *cs++ = hwsp_offset;
>       *cs++ = 0;
> +     if (has_token) {
> +             *cs++ = 0;
> +             *cs++ = MI_NOOP;
> +     }
>  
>       intel_ring_advance(to, cs);
>       to->sched.semaphores |= from->engine->mask;
> -- 
> 2.23.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to