On Sun, Sep 07, 2014 at 09:08:31AM +0100, Chris Wilson wrote:
> Running igt, I was encountering the invalid TLB bug on my 845g, despite
> that it was using the CS workaround. Examining the w/a buffer in the
> error state, showed that the copy from the user batch into the
> workaround itself was suffering from the invalid TLB bug (the first
> cacheline was broken with the first two words reversed). Time to try a
> fresh approach. This extends the workaround to write into each page of
> our scratch buffer in order to overflow the TLB and evict the invalid
> entries. This could be refined to only do so after we update the GTT,
> but for simplicity, we do it before each batch.
> 
> I suspect this supersedes our current workaround, but for safety keep
> doing both.

I suspect that we might end up with just an elaborate delay
implementation, but if it works then it's good. One nitpick below, with
that addressed this is Reviewed-by: Daniel Vetter <daniel.vet...@ffwll.ch>
> 
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vet...@ffwll.ch>
> Cc: sta...@vger.kernel.org
> ---
>  drivers/gpu/drm/i915/i915_reg.h         | 12 ++++---
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 62 
> +++++++++++++++++++--------------
>  2 files changed, 44 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index e4d7607da2c4..f29b44c86a2f 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -334,16 +334,20 @@
>  #define GFX_OP_DESTBUFFER_INFO        ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1)
>  #define GFX_OP_DRAWRECT_INFO     ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3))
>  #define GFX_OP_DRAWRECT_INFO_I965  ((0x7900<<16)|0x2)
> -#define SRC_COPY_BLT_CMD                ((2<<29)|(0x43<<22)|4)
> +
> +#define COLOR_BLT_CMD                        (2<<29 | 0x40<<22 | (5-2))
> +#define SRC_COPY_BLT_CMD             ((2<<29)|(0x43<<22)|4)
>  #define XY_SRC_COPY_BLT_CMD          ((2<<29)|(0x53<<22)|6)
>  #define XY_MONO_SRC_COPY_IMM_BLT     ((2<<29)|(0x71<<22)|5)
> -#define XY_SRC_COPY_BLT_WRITE_ALPHA  (1<<21)
> -#define XY_SRC_COPY_BLT_WRITE_RGB    (1<<20)
> +#define   BLT_WRITE_A                        (2<<20)
> +#define   BLT_WRITE_RGB                      (1<<20)
> +#define   BLT_WRITE_RGBA             (BLT_WRITE_RGB | BLT_WRITE_A)
>  #define   BLT_DEPTH_8                        (0<<24)
>  #define   BLT_DEPTH_16_565           (1<<24)
>  #define   BLT_DEPTH_16_1555          (2<<24)
>  #define   BLT_DEPTH_32                       (3<<24)
> -#define   BLT_ROP_GXCOPY             (0xcc<<16)
> +#define   BLT_ROP_SRC_COPY           (0xcc<<16)
> +#define   BLT_ROP_COLOR_COPY         (0xf0<<16)
>  #define XY_SRC_COPY_BLT_SRC_TILED    (1<<15) /* 965+ only */
>  #define XY_SRC_COPY_BLT_DST_TILED    (1<<11) /* 965+ only */
>  #define CMD_OP_DISPLAYBUFFER_INFO ((0x0<<29)|(0x14<<23)|2)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
> b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 16371a444426..acd933b1b027 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1368,49 +1368,59 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,
>                               u64 offset, u32 len,
>                               unsigned flags)
>  {
> +     u32 cs_offset = ring->scratch.gtt_offset;
>       int ret;
>  
> -     if (flags & I915_DISPATCH_PINNED) {
> -             ret = intel_ring_begin(ring, 4);
> -             if (ret)
> -                     return ret;
> +     ret = intel_ring_begin(ring, 6);
> +     if (ret)
> +             return ret;
>  
> -             intel_ring_emit(ring, MI_BATCH_BUFFER);
> -             intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 
> 0 : MI_BATCH_NON_SECURE));
> -             intel_ring_emit(ring, offset + len - 8);
> -             intel_ring_emit(ring, MI_NOOP);
> -             intel_ring_advance(ring);
> -     } else {
> -             u32 cs_offset = ring->scratch.gtt_offset;
> +     /* Evict the invalid PTE TLBs */
> +     intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA);
> +     intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096);
> +     intel_ring_emit(ring, 64 << 16 | 4); /* load each page */

s/64/(I830M_BACHT_LIMIT / 4096)/ for clarity

> +     intel_ring_emit(ring, cs_offset);
> +     intel_ring_emit(ring, 0xdeadbeef);
> +     intel_ring_emit(ring, MI_NOOP);
> +     intel_ring_advance(ring);
>  
> +     if ((flags & I915_DISPATCH_PINNED) == 0) {
>               if (len > I830_BATCH_LIMIT)
>                       return -ENOSPC;
>  
> -             ret = intel_ring_begin(ring, 9+3);
> +             ret = intel_ring_begin(ring, 6 + 2);
>               if (ret)
>                       return ret;
> -             /* Blit the batch (which has now all relocs applied) to the 
> stable batch
> -              * scratch bo area (so that the CS never stumbles over its tlb
> -              * invalidation bug) ... */
> -             intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD |
> -                             XY_SRC_COPY_BLT_WRITE_ALPHA |
> -                             XY_SRC_COPY_BLT_WRITE_RGB);
> -             intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096);
> -             intel_ring_emit(ring, 0);
> -             intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024);
> +
> +             /* Blit the batch (which has now all relocs applied) to the
> +              * stable batch scratch bo area (so that the CS never
> +              * stumbles over its tlb invalidation bug) ...
> +              */
> +             intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA);
> +             intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096);
> +             intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 1024);
>               intel_ring_emit(ring, cs_offset);
> -             intel_ring_emit(ring, 0);
>               intel_ring_emit(ring, 4096);
>               intel_ring_emit(ring, offset);
> +
>               intel_ring_emit(ring, MI_FLUSH);
> +             intel_ring_emit(ring, MI_NOOP);
> +             intel_ring_advance(ring);
>  
>               /* ... and execute it. */
> -             intel_ring_emit(ring, MI_BATCH_BUFFER);
> -             intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE 
> ? 0 : MI_BATCH_NON_SECURE));
> -             intel_ring_emit(ring, cs_offset + len - 8);
> -             intel_ring_advance(ring);
> +             offset = cs_offset;
>       }
>  
> +     ret = intel_ring_begin(ring, 4);
> +     if (ret)
> +             return ret;
> +
> +     intel_ring_emit(ring, MI_BATCH_BUFFER);
> +     intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : 
> MI_BATCH_NON_SECURE));
> +     intel_ring_emit(ring, offset + len - 8);
> +     intel_ring_emit(ring, MI_NOOP);
> +     intel_ring_advance(ring);
> +
>       return 0;
>  }
>  
> -- 
> 2.1.0
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to