gen8: Add WaRsRestoreWithPerCtxtBb workaround

Ville Syrjälä Mon, 22 Jun 2015 09:22:25 -0700

On Fri, Jun 19, 2015 at 06:37:15PM +0100, Arun Siluvery wrote:
> In Per context w/a batch buffer,
> WaRsRestoreWithPerCtxtBb
> 
> This WA performs writes to scratch page so it must be valid, this check
> is performed before initializing the batch with this WA.
> 
> v2: This patches modifies definitions of MI_LOAD_REGISTER_MEM and
> MI_LOAD_REGISTER_REG; Add GEN8 specific defines for these instructions
> so as to not break any future users of existing definitions (Michel)
> 
> v3: Length defined in current definitions of LRM, LRR instructions was 
> specified
> as 0. It seems it is common convention for instructions whose length vary 
> between
> platforms. This is not an issue so far because they are not used anywhere 
> except
> command parser; now that we use in this patch update them with correct length
> and also move them out of command parser placeholder to appropriate place.
> remove unnecessary padding and follow the WA programming sequence exactly
> as mentioned in spec which is essential for this WA (Dave).
> 
> Cc: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Dave Gordon <david.s.gor...@intel.com>
> Signed-off-by: Rafael Barbalho <rafael.barba...@intel.com>
> Signed-off-by: Arun Siluvery <arun.siluv...@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_reg.h  | 29 +++++++++++++++++++--
>  drivers/gpu/drm/i915/intel_lrc.c | 54 
> ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 81 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 7637e64..208620d 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -347,6 +347,31 @@
>  #define   MI_INVALIDATE_BSD          (1<<7)
>  #define   MI_FLUSH_DW_USE_GTT                (1<<2)
>  #define   MI_FLUSH_DW_USE_PPGTT              (0<<2)
> +#define MI_LOAD_REGISTER_MEM    MI_INSTR(0x29, 1)
> +#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2)
> +#define   MI_LRM_USE_GLOBAL_GTT (1<<22)
> +#define   MI_LRM_ASYNC_MODE_ENABLE (1<<21)
> +#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
> +#define MI_ATOMIC(len)       MI_INSTR(0x2F, (len-2))
> +#define   MI_ATOMIC_MEMORY_TYPE_GGTT (1<<22)
> +#define   MI_ATOMIC_INLINE_DATA              (1<<18)
> +#define   MI_ATOMIC_CS_STALL         (1<<17)
> +#define   MI_ATOMIC_RETURN_DATA_CTL  (1<<16)
> +#define MI_ATOMIC_OP_MASK(op)  ((op) << 8)
> +#define MI_ATOMIC_AND        MI_ATOMIC_OP_MASK(0x01)
> +#define MI_ATOMIC_OR MI_ATOMIC_OP_MASK(0x02)
> +#define MI_ATOMIC_XOR        MI_ATOMIC_OP_MASK(0x03)
> +#define MI_ATOMIC_MOVE       MI_ATOMIC_OP_MASK(0x04)
> +#define MI_ATOMIC_INC        MI_ATOMIC_OP_MASK(0x05)
> +#define MI_ATOMIC_DEC        MI_ATOMIC_OP_MASK(0x06)
> +#define MI_ATOMIC_ADD        MI_ATOMIC_OP_MASK(0x07)
> +#define MI_ATOMIC_SUB        MI_ATOMIC_OP_MASK(0x08)
> +#define MI_ATOMIC_RSUB       MI_ATOMIC_OP_MASK(0x09)
> +#define MI_ATOMIC_IMAX       MI_ATOMIC_OP_MASK(0x0A)
> +#define MI_ATOMIC_IMIN       MI_ATOMIC_OP_MASK(0x0B)
> +#define MI_ATOMIC_UMAX       MI_ATOMIC_OP_MASK(0x0C)
> +#define MI_ATOMIC_UMIN       MI_ATOMIC_OP_MASK(0x0D)
> +
>  #define MI_BATCH_BUFFER              MI_INSTR(0x30, 1)
>  #define   MI_BATCH_NON_SECURE                (1)
>  /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
> @@ -451,8 +476,6 @@
>  #define MI_CLFLUSH              MI_INSTR(0x27, 0)
>  #define MI_REPORT_PERF_COUNT    MI_INSTR(0x28, 0)
>  #define   MI_REPORT_PERF_COUNT_GGTT (1<<0)
> -#define MI_LOAD_REGISTER_MEM    MI_INSTR(0x29, 0)
> -#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 0)
>  #define MI_RS_STORE_DATA_IMM    MI_INSTR(0x2B, 0)
>  #define MI_LOAD_URB_MEM         MI_INSTR(0x2C, 0)
>  #define MI_STORE_URB_MEM        MI_INSTR(0x2D, 0)
> @@ -1799,6 +1822,8 @@ enum skl_disp_power_wells {
>  #define   GEN8_RC_SEMA_IDLE_MSG_DISABLE      (1 << 12)
>  #define   GEN8_FF_DOP_CLOCK_GATE_DISABLE     (1<<10)
>  
> +#define GEN8_RS_PREEMPT_STATUS               0x215C
> +
>  /* Fuse readout registers for GT */
>  #define CHV_FUSE_GT                  (VLV_DISPLAY_BASE + 0x2168)
>  #define   CHV_FGT_DISABLE_SS0                (1 << 10)
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index 664455c..28198c4 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1215,11 +1215,65 @@ static int gen8_init_perctx_bb(struct intel_engine_cs 
> *ring,
>                              uint32_t *const batch,
>                              uint32_t *offset)
>  {
> +     uint32_t scratch_addr;
>       uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
>  
> +     /* Actual scratch location is at 128 bytes offset */
> +     scratch_addr = ring->scratch.gtt_offset + 2*CACHELINE_BYTES;
> +     scratch_addr |= PIPE_CONTROL_GLOBAL_GTT;
> +
>       /* WaDisableCtxRestoreArbitration:bdw,chv */
>       wa_ctx_emit(batch, MI_ARB_ON_OFF | MI_ARB_ENABLE);
>  
> +     /*
> +      * As per Bspec, to workaround a known HW issue, SW must perform the
> +      * below programming sequence prior to programming MI_BATCH_BUFFER_END.
> +      *
> +      * This is only applicable for Gen8.
> +      */
> +
> +     /* WaRsRestoreWithPerCtxtBb:bdw,chv */


This w/a doesn't seem to be needed for CHV. Also BDW seems to have
gained a chicken bit in H0 (FF_SLICE_CS_CHICKEN3[5]) that supposedly
means we shouldn't need this w/a on BDW either.

> +     wa_ctx_emit(batch, MI_LOAD_REGISTER_IMM(1));
> +     wa_ctx_emit(batch, INSTPM);
> +     wa_ctx_emit(batch, _MASKED_BIT_DISABLE(INSTPM_FORCE_ORDERING));
> +
> +     wa_ctx_emit(batch, (MI_ATOMIC(5) |
> +                         MI_ATOMIC_MEMORY_TYPE_GGTT |
> +                         MI_ATOMIC_INLINE_DATA |
> +                         MI_ATOMIC_CS_STALL |
> +                         MI_ATOMIC_RETURN_DATA_CTL |
> +                         MI_ATOMIC_MOVE));
> +     wa_ctx_emit(batch, scratch_addr);
> +     wa_ctx_emit(batch, 0);
> +     wa_ctx_emit(batch, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
> +     wa_ctx_emit(batch, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
> +
> +     /*
> +      * BSpec says MI_LOAD_REGISTER_MEM, MI_LOAD_REGISTER_REG and
> +      * MI_BATCH_BUFFER_END instructions in this sequence need to be
> +      * in the same cacheline. To satisfy this case even if more WA are
> +      * added in future, pad current cacheline and start remaining sequence
> +      * in new cacheline.
> +      */
> +     while (index % CACHELINE_DWORDS)
> +             wa_ctx_emit(batch, MI_NOOP);
> +
> +     wa_ctx_emit(batch, (MI_LOAD_REGISTER_MEM_GEN8 |
> +                         MI_LRM_USE_GLOBAL_GTT |
> +                         MI_LRM_ASYNC_MODE_ENABLE));
> +     wa_ctx_emit(batch, INSTPM);
> +     wa_ctx_emit(batch, scratch_addr);
> +     wa_ctx_emit(batch, 0);
> +
> +     /*
> +      * BSpec says there should not be any commands programmed
> +      * between MI_LOAD_REGISTER_REG and MI_BATCH_BUFFER_END so
> +      * do not add any new commands
> +      */
> +     wa_ctx_emit(batch, MI_LOAD_REGISTER_REG);
> +     wa_ctx_emit(batch, GEN8_RS_PREEMPT_STATUS);
> +     wa_ctx_emit(batch, GEN8_RS_PREEMPT_STATUS);
> +
>       wa_ctx_emit(batch, MI_BATCH_BUFFER_END);
>  
>       return wa_ctx_end(wa_ctx, *offset = index, 1);
> -- 
> 2.3.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v6 6/6] drm/i915/gen8: Add WaRsRestoreWithPerCtxtBb workaround

Reply via email to