On Mon,  3 Oct 2011 23:02:40 -0700
Kenneth Graunke <kenn...@whitecape.org> wrote:

> From: Jesse Barnes <jbar...@virtuousgeek.org>
> 
> Signed-off-by: Jesse Barnes <jbar...@virtuousgeek.org>
> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org>
> ---
>  drivers/gpu/drm/i915/i915_reg.h         |    5 +
>  drivers/gpu/drm/i915/intel_ringbuffer.c |  136 
> ++++++++++++++++++++++++++++---
>  2 files changed, 129 insertions(+), 12 deletions(-)
> 
> v2:
>  - Add State & Constant Cache bits as suggested by Daniel.
>  - Specify length directly rather than hiding it in a GEN6 #define.
>  - Use more verbose bit field names, for clarity.
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index bfe8488..81713ae 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -243,6 +243,7 @@
>  #define   DISPLAY_PLANE_A           (0<<20)
>  #define   DISPLAY_PLANE_B           (1<<20)
>  #define GFX_OP_PIPE_CONTROL  ((0x3<<29)|(0x3<<27)|(0x2<<24))
> +#define   PIPE_CONTROL_CS_STALL                      (1<<20)
>  #define   PIPE_CONTROL_QW_WRITE                      (1<<14)
>  #define   PIPE_CONTROL_DEPTH_STALL           (1<<13)
>  #define   PIPE_CONTROL_WRITE_FLUSH           (1<<12)
> @@ -250,7 +251,11 @@
>  #define   PIPE_CONTROL_TEXTURE_CACHE_FLUSH   (1<<10) /* GM45+ only */
>  #define   PIPE_CONTROL_INDIRECT_STATE_DISABLE        (1<<9)
>  #define   PIPE_CONTROL_NOTIFY                        (1<<8)
> +#define   PIPE_CONTROL_VF_CACHE_INVALIDATE   (1<<4)
> +#define   PIPE_CONTROL_CONST_CACHE_INVALIDATE        (1<<3)
> +#define   PIPE_CONTROL_STATE_CACHE_INVALIDATE        (1<<2)
>  #define   PIPE_CONTROL_STALL_AT_SCOREBOARD   (1<<1)
> +#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH     (1<<0)
>  #define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
>  
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
> b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 2b572fd..f841d5c 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -34,6 +34,16 @@
>  #include "i915_trace.h"
>  #include "intel_drv.h"
>  
> +/*
> + * 965+ support PIPE_CONTROL commands, which provide finer grained control
> + * over cache flushing.
> + */
> +struct pipe_control {
> +     struct drm_i915_gem_object *obj;
> +     volatile u32 *cpu_page;
> +     u32 gtt_offset;
> +};
> +
>  static inline int ring_space(struct intel_ring_buffer *ring)
>  {
>       int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
> @@ -123,6 +133,118 @@ render_ring_flush(struct intel_ring_buffer *ring,
>       return 0;
>  }
>  
> +/**
> + * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
> + * implementing two workarounds on gen6.  From section 1.4.7.1
> + * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
> + *
> + * [DevSNB-C+{W/A}] Before any depth stall flush (including those
> + * produced by non-pipelined state commands), software needs to first
> + * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
> + * 0.
> + *
> + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
> + * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
> + *
> + * And the workaround for these two requires this workaround first:
> + *
> + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
> + * BEFORE the pipe-control with a post-sync op and no write-cache
> + * flushes.
> + *
> + * And this last workaround is tricky because of the requirements on
> + * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
> + * volume 2 part 1:
> + *
> + *     "1 of the following must also be set:
> + *      - Render Target Cache Flush Enable ([12] of DW1)
> + *      - Depth Cache Flush Enable ([0] of DW1)
> + *      - Stall at Pixel Scoreboard ([1] of DW1)
> + *      - Depth Stall ([13] of DW1)
> + *      - Post-Sync Operation ([13] of DW1)
> + *      - Notify Enable ([8] of DW1)"
> + *
> + * The cache flushes require the workaround flush that triggered this
> + * one, so we can't use it.  Depth stall would trigger the same.
> + * Post-sync nonzero is what triggered this second workaround, so we
> + * can't use that one either.  Notify enable is IRQs, which aren't
> + * really our business.  That leaves only stall at scoreboard.
> + */
> +static int
> +intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
> +{
> +     struct pipe_control *pc = ring->private;
> +     u32 scratch_addr = pc->gtt_offset + 128;
> +     int ret;
> +
> +
> +     ret = intel_ring_begin(ring, 6);
> +     if (ret)
> +             return ret;
> +
> +     intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | 3);
> +     intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
> +                     PIPE_CONTROL_STALL_AT_SCOREBOARD);
> +     intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* 
> address */
> +     intel_ring_emit(ring, 0); /* low dword */
> +     intel_ring_emit(ring, 0); /* high dword */
> +     intel_ring_emit(ring, MI_NOOP);
> +     intel_ring_advance(ring);
> +
> +     ret = intel_ring_begin(ring, 6);
> +     if (ret)
> +             return ret;
> +
> +     intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | 3);
> +     intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
> +     intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* 
> address */
> +     intel_ring_emit(ring, 0);
> +     intel_ring_emit(ring, 0);
> +     intel_ring_emit(ring, MI_NOOP);
> +     intel_ring_advance(ring);
> +
> +     return 0;
> +}
> +
> +static int
> +gen6_render_ring_flush(struct intel_ring_buffer *ring,
> +                         u32 invalidate_domains, u32 flush_domains)
> +{
> +     u32 flags = 0;
> +     struct pipe_control *pc = ring->private;
> +     u32 scratch_addr = pc->gtt_offset + 128;
> +     int ret;
> +
> +     /* Force SNB workarounds for PIPE_CONTROL flushes */
> +     intel_emit_post_sync_nonzero_flush(ring);
> +
> +     /* Just flush everything.  Experiments have shown that reducing the
> +      * number of bits based on the write domains has little performance
> +      * impact.
> +      */
> +     flags |= PIPE_CONTROL_WRITE_FLUSH;
> +     flags |= PIPE_CONTROL_INSTRUCTION_CACHE_FLUSH;
> +     flags |= PIPE_CONTROL_TEXTURE_CACHE_FLUSH;
> +     flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
> +     flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
> +     flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
> +     flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;

I think we also want a TLB invalidate here, bit 18.  This requires another
workaround before issuing this flush: We need 2 Store Data Commands (such as
MI_STORE_DATA_IMM or MI_STORE_DATA_INDEX) before sending PIPE_CONTROL w/ stall
(20) and TLB inv bit (18) set

Ben
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to