From: Jerome Glisse <jgli...@redhat.com>

Flushing and synchronization only need to happen at begining
and end of cs, and after each draw packet if necessary. This
patch is especialy needed for hyperz/htile feature.

v2: Separate evergreen and r6xx/r7xx flushing/syncing allow
    easier specialization of each functions. Fix r6xx/r7xx
    regression.

Signed-off-by: Jerome Glisse <jgli...@redhat.com>
---
 src/gallium/drivers/r600/evergreen_compute.c       |   23 +--
 .../drivers/r600/evergreen_compute_internal.c      |    4 +-
 src/gallium/drivers/r600/evergreen_hw_context.c    |  110 ++++++++++-
 src/gallium/drivers/r600/evergreen_state.c         |   14 +-
 src/gallium/drivers/r600/evergreend.h              |    3 +-
 src/gallium/drivers/r600/r600.h                    |   19 +-
 src/gallium/drivers/r600/r600_buffer.c             |    2 +-
 src/gallium/drivers/r600/r600_hw_context.c         |  203 ++++++++++++--------
 src/gallium/drivers/r600/r600_hw_context_priv.h    |    3 +-
 src/gallium/drivers/r600/r600_pipe.c               |    2 -
 src/gallium/drivers/r600/r600_pipe.h               |    6 +-
 src/gallium/drivers/r600/r600_state.c              |   23 +--
 src/gallium/drivers/r600/r600_state_common.c       |   68 ++-----
 13 files changed, 297 insertions(+), 183 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index 947a328..37c3395 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -96,7 +96,7 @@ static void evergreen_cs_set_vertex_buffer(
        vb->buffer = buffer;
        vb->user_buffer = NULL;
 
-       r600_inval_vertex_cache(rctx);
+       rctx->flags |= R600_CONTEXT_VTX_FLUSH;
        state->dirty_mask |= 1 << vb_index;
        r600_atom_dirty(rctx, &state->atom);
 }
@@ -208,8 +208,7 @@ static void evergreen_bind_compute_state(struct 
pipe_context *ctx_, void *state)
        res->usage = RADEON_USAGE_READ;
        res->coher_bo_size = ctx->cs_shader->bc.ndw*4;
 
-       r600_inval_shader_cache(ctx);
-
+       ctx->flags |= R600_CONTEXT_SH_FLUSH;
 }
 
 /* The kernel parameters are stored a vtx buffer (ID=0), besides the explicit
@@ -364,8 +363,11 @@ static void compute_emit_cs(struct r600_context *ctx)
         */
        r600_emit_atom(ctx, &ctx->start_compute_cs_cmd.atom);
 
+       ctx->flags |= R600_CONTEXT_CB_FLUSH;
+       r600_flush_emit(ctx);
+
        /* Emit cb_state */
-        cb_state = ctx->states[R600_PIPE_STATE_FRAMEBUFFER];
+       cb_state = ctx->states[R600_PIPE_STATE_FRAMEBUFFER];
        r600_context_pipe_state_emit(ctx, cb_state, 
RADEON_CP_PACKET3_COMPUTE_MODE);
 
        /* Emit vertex buffer state */
@@ -405,15 +407,8 @@ static void compute_emit_cs(struct r600_context *ctx)
                }
        }
 
-       /* r600_flush_framebuffer() updates the cb_flush_flags and then
-        * calls r600_emit_atom() on the ctx->surface_sync_cmd.atom, which emits
-        * a SURFACE_SYNC packet via r600_emit_surface_sync().
-        *
-        * XXX r600_emit_surface_sync() hardcodes the CP_COHER_SIZE to
-        * 0xffffffff, so we will need to add a field to struct
-        * r600_surface_sync_cmd if we want to manually set this value.
-        */
-       r600_flush_framebuffer(ctx, true /* Flush now */);
+       ctx->flags |= R600_CONTEXT_CB_FLUSH;
+       r600_flush_emit(ctx);
 
 #if 0
        COMPUTE_DBG("cdw: %i\n", cs->cdw);
@@ -460,6 +455,8 @@ static void evergreen_launch_grid(
        evergreen_set_lds(ctx->cs_shader, 0, 0, num_waves);
        evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input);
        evergreen_direct_dispatch(ctx_, block_layout, grid_layout);
+       /* set draw pending so flush function know we mean business */
+       ctx->flags |= R600_CONTEXT_DRAW_PENDING;
        compute_emit_cs(ctx);
 }
 
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c 
b/src/gallium/drivers/r600/evergreen_compute_internal.c
index 1d11bab..8bb6426 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.c
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
@@ -559,7 +559,7 @@ void evergreen_set_tex_resource(
 
        res->coher_bo_size = tmp->offset[0] + 
util_format_get_blockwidth(tmp->real_format)*view->base.texture->width0*height*depth;
 
-       r600_inval_texture_cache(pipe->ctx);
+       pipe->ctx->flags |= R600_CONTEXT_TEX_FLUSH;
 
        evergreen_emit_force_reloc(res);
        evergreen_emit_force_reloc(res);
@@ -618,7 +618,7 @@ void evergreen_set_const_cache(
        res->usage = RADEON_USAGE_READ;
        res->coher_bo_size = size;
 
-       r600_inval_shader_cache(pipe->ctx);
+       pipe->ctx->flags |= R600_CONTEXT_SH_FLUSH;
 }
 
 struct r600_resource* r600_compute_buffer_alloc_vram(
diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c 
b/src/gallium/drivers/r600/evergreen_hw_context.c
index 546c884..cf4a225 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -761,8 +761,9 @@ static inline void 
evergreen_context_pipe_state_set_sampler_border(struct r600_c
        /* We have to flush the shaders before we change the border color
         * registers, or previous draw commands that haven't completed yet
         * will end up using the new border color. */
-       if (dirty & R600_BLOCK_STATUS_DIRTY)
-               r600_context_ps_partial_flush(ctx);
+       if (dirty & R600_BLOCK_STATUS_DIRTY) {
+               ctx->flags |= R600_CONTEXT_DRAW_FLUSH;
+       }
        if (dirty)
                r600_context_dirty_block(ctx, block, dirty, 4);
 }
@@ -823,3 +824,108 @@ void evergreen_set_streamout_enable(struct r600_context 
*ctx, unsigned buffer_en
                cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(0);
        }
 }
+
+void evergreen_flush_emit(struct r600_context *rctx)
+{
+       struct radeon_winsys_cs *cs = rctx->cs;
+       unsigned mask;
+
+       if (!(rctx->flags & R600_CONTEXT_DRAW_PENDING)) {
+               return;
+       }
+       /* for GPU without vertex cache flush the texture cache */
+       if (!rctx->has_vertex_cache) {
+               rctx->flags |= R600_CONTEXT_TEX_FLUSH;
+       }
+
+       if (rctx->flags & R600_CONTEXT_DRAW_FLUSH) {
+               rctx->flags &= ~R600_CONTEXT_DRAW_FLUSH;
+               cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+               cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | 
EVENT_INDEX(4);
+       }
+
+       if (rctx->flags & R600_CONTEXT_DB_FLUSH) {
+               cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+               cs->buf[cs->cdw++] = EVENT_TYPE(FLUSH_AND_INV_DB_META) | 
EVENT_INDEX(0);
+       }
+
+       mask = R600_CONTEXT_CB_FLUSH |
+              R600_CONTEXT_DB_FLUSH |
+              R600_CONTEXT_SH_FLUSH |
+              R600_CONTEXT_TEX_FLUSH |
+              R600_CONTEXT_VTX_FLUSH |
+              R600_CONTEXT_STREAMOUT_FLUSH;
+       if (rctx->flags & mask) {
+               /* anything left (cb, vtx, shader, streamout) can be flushed
+                * in a more generic way
+                */
+               unsigned flags = 0;
+
+               flags |= (rctx->flags & R600_CONTEXT_CB_FLUSH) ? 
S_0085F0_CB_ACTION_ENA(1) |
+                                                                
S_0085F0_CB0_DEST_BASE_ENA(1) |
+                                                                
S_0085F0_CB1_DEST_BASE_ENA(1) |
+                                                                
S_0085F0_CB2_DEST_BASE_ENA(1) |
+                                                                
S_0085F0_CB3_DEST_BASE_ENA(1) |
+                                                                
S_0085F0_CB4_DEST_BASE_ENA(1) |
+                                                                
S_0085F0_CB5_DEST_BASE_ENA(1) |
+                                                                
S_0085F0_CB6_DEST_BASE_ENA(1) |
+                                                                
S_0085F0_CB7_DEST_BASE_ENA(1) |
+                                                                
S_0085F0_CB8_DEST_BASE_ENA(1) |
+                                                                
S_0085F0_CB9_DEST_BASE_ENA(1) |
+                                                                
S_0085F0_CB10_DEST_BASE_ENA(1) |
+                                                                
S_0085F0_CB11_DEST_BASE_ENA(1) : 0;
+               flags |= (rctx->flags & R600_CONTEXT_DB_FLUSH) ? 
S_0085F0_DB_ACTION_ENA(1) |
+                                                                
S_0085F0_DB_DEST_BASE_ENA(1) : 0;
+               flags |= (rctx->flags & R600_CONTEXT_SH_FLUSH) ? 
S_0085F0_SH_ACTION_ENA(1) : 0;
+               flags |= (rctx->flags & R600_CONTEXT_TEX_FLUSH) ? 
S_0085F0_TC_ACTION_ENA(1) : 0;
+               flags |= (rctx->flags & R600_CONTEXT_VTX_FLUSH) ? 
S_0085F0_VC_ACTION_ENA(1) : 0;
+               flags |= (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) ? 
S_0085F0_DEST_BASE_0_ENA(1) |
+                                                                       
S_0085F0_SMX_ACTION_ENA(1) |
+                                                                       
S_0085F0_SO0_DEST_BASE_ENA(1) |
+                                                                       
S_0085F0_SO1_DEST_BASE_ENA(1) |
+                                                                       
S_0085F0_SO2_DEST_BASE_ENA(1) |
+                                                                       
S_0085F0_SO3_DEST_BASE_ENA(1) : 0;
+               rctx->flags &= ~mask;
+               cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
+               cs->buf[cs->cdw++] = flags;           /* CP_COHER_CNTL */
+               cs->buf[cs->cdw++] = 0xffffffff;      /* CP_COHER_SIZE */
+               cs->buf[cs->cdw++] = 0;               /* CP_COHER_BASE */
+               cs->buf[cs->cdw++] = 0x0000000A;      /* POLL_INTERVAL */
+       }
+
+       if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) {
+               rctx->flags &= ~R600_CONTEXT_FLUSH_AND_INV;
+
+               cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+               cs->buf[cs->cdw++] = 
EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
+               cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
+               /* CP_COHER_CNTL */
+               cs->buf[cs->cdw++] = S_0085F0_SO0_DEST_BASE_ENA(1) |
+                                    S_0085F0_SO1_DEST_BASE_ENA(1) |
+                                    S_0085F0_SO2_DEST_BASE_ENA(1) |
+                                    S_0085F0_SO3_DEST_BASE_ENA(1) |
+                                    S_0085F0_CB0_DEST_BASE_ENA(1) |
+                                    S_0085F0_CB1_DEST_BASE_ENA(1) |
+                                    S_0085F0_CB2_DEST_BASE_ENA(1) |
+                                    S_0085F0_CB3_DEST_BASE_ENA(1) |
+                                    S_0085F0_CB4_DEST_BASE_ENA(1) |
+                                    S_0085F0_CB5_DEST_BASE_ENA(1) |
+                                    S_0085F0_CB6_DEST_BASE_ENA(1) |
+                                    S_0085F0_CB7_DEST_BASE_ENA(1) |
+                                    S_0085F0_DB_DEST_BASE_ENA(1) |
+                                    S_0085F0_CB8_DEST_BASE_ENA(1) |
+                                    S_0085F0_CB9_DEST_BASE_ENA(1) |
+                                    S_0085F0_CB10_DEST_BASE_ENA(1) |
+                                    S_0085F0_CB11_DEST_BASE_ENA(1) |
+                                    S_0085F0_TC_ACTION_ENA(1) |
+                                    S_0085F0_VC_ACTION_ENA(1) |
+                                    S_0085F0_SH_ACTION_ENA(1) |
+                                    S_0085F0_SMX_ACTION_ENA(1);
+               cs->buf[cs->cdw++] = 0xffffffff;      /* CP_COHER_SIZE */
+               cs->buf[cs->cdw++] = 0;               /* CP_COHER_BASE */
+               cs->buf[cs->cdw++] = 0x0000000A;      /* POLL_INTERVAL */
+       }
+
+       /* everything is properly flush */
+       rctx->flags &= R600_CONTEXT_DRAW_PENDING;
+}
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 214d76b..8e3eb95 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1160,8 +1160,9 @@ static void evergreen_bind_ps_sampler(struct pipe_context 
*ctx, unsigned count,
        struct r600_context *rctx = (struct r600_context *)ctx;
        struct r600_pipe_state **rstates = (struct r600_pipe_state **)states;
 
-       if (count)
-               r600_inval_texture_cache(rctx);
+       if (count) {
+               rctx->flags |= R600_CONTEXT_TEX_FLUSH;
+       }
 
        memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count);
        rctx->ps_samplers.n_samplers = count;
@@ -1176,8 +1177,9 @@ static void evergreen_bind_vs_sampler(struct pipe_context 
*ctx, unsigned count,
        struct r600_context *rctx = (struct r600_context *)ctx;
        struct r600_pipe_state **rstates = (struct r600_pipe_state **)states;
 
-       if (count)
-               r600_inval_texture_cache(rctx);
+       if (count) {
+               rctx->flags |= R600_CONTEXT_TEX_FLUSH;
+       }
 
        for (int i = 0; i < count; i++) {
                evergreen_context_pipe_state_set_vs_sampler(rctx, rstates[i], 
i);
@@ -1674,6 +1676,7 @@ static void evergreen_db(struct r600_context *rctx, 
struct r600_pipe_state *rsta
        if (rtex->hyperz) {
                uint64_t htile_offset = 
rtex->hyperz->surface.level[level].offset;
 
+               rctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
                rctx->db_misc_state.hyperz = true;
                rctx->db_misc_state.db_htile_surface_mask = 0xffffffff;
                r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
@@ -1709,7 +1712,7 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
        if (rstate == NULL)
                return;
 
-       r600_flush_framebuffer(rctx, false);
+       rctx->flags |= R600_CONTEXT_CB_FLUSH | R600_CONTEXT_FLUSH_AND_INV;
 
        /* unreference old buffer and reference new one */
        rstate->id = R600_PIPE_STATE_FRAMEBUFFER;
@@ -1729,6 +1732,7 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
        }
 
        if (state->zsbuf) {
+               rctx->flags |= R600_CONTEXT_DB_FLUSH;
                evergreen_db(rctx, rstate, state);
        }
 
diff --git a/src/gallium/drivers/r600/evergreend.h 
b/src/gallium/drivers/r600/evergreend.h
index 1ac5944..672e698 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -46,7 +46,8 @@
 #define EVENT_TYPE_PS_PARTIAL_FLUSH            0x10
 #define EVENT_TYPE_ZPASS_DONE                  0x15
 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT   0x16
-#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH       0x1f
+#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH       0x1f
+#define FLUSH_AND_INV_DB_META                  0x2c
 
 #define                EVENT_TYPE(x)                           ((x) << 0)
 #define                EVENT_INDEX(x)                          ((x) << 8)
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 0ae7959..820b356 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -188,8 +188,17 @@ struct r600_so_target {
        unsigned                so_index;
 };
 
-#define R600_CONTEXT_DRAW_PENDING      (1 << 0)
-#define R600_CONTEXT_DST_CACHES_DIRTY  (1 << 1)
+#define R600_CONTEXT_DRAW_PENDING              (1 << 0)
+#define R600_CONTEXT_DRAW_FLUSH                        (1 << 1)
+#define R600_CONTEXT_CB_FLUSH                  (1 << 2)
+#define R600_CONTEXT_DB_FLUSH                  (1 << 3)
+#define R600_CONTEXT_SH_FLUSH                  (1 << 4)
+#define R600_CONTEXT_TEX_FLUSH                 (1 << 5)
+#define R600_CONTEXT_VTX_FLUSH                 (1 << 6)
+#define R600_CONTEXT_STREAMOUT_FLUSH           (1 << 7)
+#define R600_CONTEXT_WAIT_IDLE                 (1 << 8)
+#define R600_CONTEXT_FLUSH_AND_INV             (1 << 9)
+#define R600_CONTEXT_HTILE_ERRATA              (1 << 10)
 
 struct r600_context;
 struct r600_screen;
@@ -207,10 +216,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned 
flags);
 
 void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource 
*fence,
                              unsigned offset, unsigned value);
-void r600_inval_shader_cache(struct r600_context *ctx);
-void r600_inval_texture_cache(struct r600_context *ctx);
-void r600_inval_vertex_cache(struct r600_context *ctx);
-void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now);
+void r600_flush_emit(struct r600_context *ctx);
 
 void r600_context_streamout_begin(struct r600_context *ctx);
 void r600_context_streamout_end(struct r600_context *ctx);
@@ -222,6 +228,7 @@ void r600_context_block_resource_emit_dirty(struct 
r600_context *ctx, struct r60
 int evergreen_context_init(struct r600_context *ctx);
 void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, 
struct r600_pipe_state *state, unsigned id);
 void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, 
struct r600_pipe_state *state, unsigned id);
+void evergreen_flush_emit(struct r600_context *rctx);
 
 void _r600_pipe_state_add_reg_bo(struct r600_context *ctx,
                                 struct r600_pipe_state *state,
diff --git a/src/gallium/drivers/r600/r600_buffer.c 
b/src/gallium/drivers/r600/r600_buffer.c
index 8e2deb1..774b876 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -110,7 +110,7 @@ static void *r600_buffer_transfer_map(struct pipe_context 
*pipe,
                                        struct r600_vertexbuf_state * state =
                                                &rctx->vertex_buffer_state;
                                        state->dirty_mask |= 1 << i;
-                                       r600_inval_vertex_cache(rctx);
+                                       rctx->flags |= R600_CONTEXT_VTX_FLUSH;
                                        r600_atom_dirty(rctx, &state->atom);
                                }
                        }
diff --git a/src/gallium/drivers/r600/r600_hw_context.c 
b/src/gallium/drivers/r600/r600_hw_context.c
index 4f2c03a..95c861a 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -114,19 +114,6 @@ err:
        return;
 }
 
-void r600_context_ps_partial_flush(struct r600_context *ctx)
-{
-       struct radeon_winsys_cs *cs = ctx->cs;
-
-       if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING))
-               return;
-
-       cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
-       cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | 
EVENT_INDEX(4);
-
-       ctx->flags &= ~R600_CONTEXT_DRAW_PENDING;
-}
-
 static void r600_init_block(struct r600_context *ctx,
                            struct r600_block *block,
                            const struct r600_reg *reg, int index, int nreg,
@@ -825,7 +812,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned 
num_dw,
        }
 
        /* Count in framebuffer cache flushes at the end of CS. */
-       num_dw += 7; /* one SURFACE_SYNC and CACHE_FLUSH_AND_INV (r6xx-only) */
+       num_dw += 44; /* one SURFACE_SYNC and CACHE_FLUSH_AND_INV (r6xx-only) */
 
        /* Save 16 dwords for the fence mechanism. */
        num_dw += 16;
@@ -853,7 +840,7 @@ void r600_context_dirty_block(struct r600_context *ctx,
                LIST_ADDTAIL(&block->list,&ctx->dirty);
 
                if (block->flags & REG_FLAG_FLUSH_CHANGE) {
-                       r600_context_ps_partial_flush(ctx);
+                       ctx->flags |= R600_CONTEXT_DRAW_FLUSH;
                }
        }
 }
@@ -1085,8 +1072,9 @@ static inline void 
r600_context_pipe_state_set_sampler_border(struct r600_contex
        /* We have to flush the shaders before we change the border color
         * registers, or previous draw commands that haven't completed yet
         * will end up using the new border color. */
-       if (dirty & R600_BLOCK_STATUS_DIRTY)
-               r600_context_ps_partial_flush(ctx);
+       if (dirty & R600_BLOCK_STATUS_DIRTY) {
+               ctx->flags |= R600_CONTEXT_DRAW_FLUSH;
+       }
        if (dirty)
                r600_context_dirty_block(ctx, block, dirty, 3);
 }
@@ -1200,54 +1188,116 @@ void r600_context_block_resource_emit_dirty(struct 
r600_context *ctx, struct r60
        LIST_DELINIT(&block->list);
 }
 
-void r600_inval_shader_cache(struct r600_context *ctx)
+void r600_flush_emit(struct r600_context *rctx)
 {
-       ctx->surface_sync_cmd.flush_flags |= S_0085F0_SH_ACTION_ENA(1);
-       r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom);
-}
-
-void r600_inval_texture_cache(struct r600_context *ctx)
-{
-       ctx->surface_sync_cmd.flush_flags |= S_0085F0_TC_ACTION_ENA(1);
-       r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom);
-}
-
-void r600_inval_vertex_cache(struct r600_context *ctx)
-{
-       if (ctx->has_vertex_cache) {
-               ctx->surface_sync_cmd.flush_flags |= S_0085F0_VC_ACTION_ENA(1);
-       } else {
-               /* Some GPUs don't have the vertex cache and must use the 
texture cache instead. */
-               ctx->surface_sync_cmd.flush_flags |= S_0085F0_TC_ACTION_ENA(1);
-       }
-       r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom);
-}
+       struct radeon_winsys_cs *cs = rctx->cs;
+       unsigned mask;
 
-void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now)
-{
-       if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY))
+       if (!(rctx->flags & R600_CONTEXT_DRAW_PENDING)) {
                return;
+       }
+       /* for GPU without vertex cache flush the texture cache */
+       if (!rctx->has_vertex_cache) {
+               rctx->flags |= R600_CONTEXT_TEX_FLUSH;
+       }
 
-       ctx->surface_sync_cmd.flush_flags |=
-               r600_get_cb_flush_flags(ctx) |
-               (ctx->framebuffer.zsbuf ? S_0085F0_DB_ACTION_ENA(1) | 
S_0085F0_DB_DEST_BASE_ENA(1) : 0);
+       if (rctx->flags & R600_CONTEXT_DRAW_FLUSH) {
+               rctx->flags &= ~R600_CONTEXT_DRAW_FLUSH;
+               cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+               cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | 
EVENT_INDEX(4);
+       }
 
-       if (flush_now) {
-               r600_emit_atom(ctx, &ctx->surface_sync_cmd.atom);
-       } else {
-               r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom);
+       if (rctx->flags & R600_CONTEXT_WAIT_IDLE) {
+               /* wait for things to settle */
+               rctx->flags &= ~R600_CONTEXT_WAIT_IDLE;
+               cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
+               cs->buf[cs->cdw++] = (R_008040_WAIT_UNTIL - 
R600_CONFIG_REG_OFFSET) >> 2;
+               cs->buf[cs->cdw++] = S_008040_WAIT_3D_IDLE(1);
        }
 
-       /* Also add a complete cache flush to work around broken flushing on 
R6xx. */
-       if (ctx->chip_class == R600) {
-               if (flush_now) {
-                       r600_emit_atom(ctx, &ctx->r6xx_flush_and_inv_cmd);
-               } else {
-                       r600_atom_dirty(ctx, &ctx->r6xx_flush_and_inv_cmd);
+       if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) {
+               rctx->flags &= ~R600_CONTEXT_FLUSH_AND_INV;
+
+               cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+               cs->buf[cs->cdw++] = 
EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0);
+
+               /* db flush are special due to errata with hyperz, we need to
+                * insert noop so cache have time to really flush
+                */
+               if (rctx->flags & R600_CONTEXT_HTILE_ERRATA) {
+                       /* R600_CONTEXT_HTILE_ERRATA is persistant for whole cs 
*/
+                       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 31, 0);
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
+                       cs->buf[cs->cdw++] = 0xdeadcafe;
                }
        }
 
-       ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY;
+       mask = R600_CONTEXT_CB_FLUSH |
+              R600_CONTEXT_DB_FLUSH |
+              R600_CONTEXT_SH_FLUSH |
+              R600_CONTEXT_TEX_FLUSH |
+              R600_CONTEXT_VTX_FLUSH |
+              R600_CONTEXT_STREAMOUT_FLUSH;
+       if (rctx->flags & mask) {
+               /* anything left (cb, vtx, shader, streamout) can be flushed
+                * in a more generic way
+                */
+               unsigned flags = 0;
+
+               flags |= S_0085F0_CB1_DEST_BASE_ENA(1) | 
S_0085F0_DEST_BASE_0_ENA(1);
+               flags |= (rctx->flags & R600_CONTEXT_CB_FLUSH) ? 
S_0085F0_CB_ACTION_ENA(1) : 0;
+               flags |= (rctx->flags & R600_CONTEXT_DB_FLUSH) ? 
S_0085F0_DB_ACTION_ENA(1) : 0;
+               flags |= (rctx->flags & R600_CONTEXT_SH_FLUSH) ? 
S_0085F0_SH_ACTION_ENA(1) : 0;
+               flags |= (rctx->flags & R600_CONTEXT_TEX_FLUSH) ? 
S_0085F0_TC_ACTION_ENA(1) : 0;
+               flags |= (rctx->flags & R600_CONTEXT_VTX_FLUSH) ? 
S_0085F0_VC_ACTION_ENA(1) : 0;
+               flags |= (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) ? 
S_0085F0_DEST_BASE_0_ENA(1) |
+                                                                       
S_0085F0_SO0_DEST_BASE_ENA(1) |
+                                                                       
S_0085F0_SO1_DEST_BASE_ENA(1) |
+                                                                       
S_0085F0_SO2_DEST_BASE_ENA(1) |
+                                                                       
S_0085F0_SO3_DEST_BASE_ENA(1) |
+                                                                       
S_0085F0_SMX_ACTION_ENA(1) : 0;
+               rctx->flags &= ~mask;
+               cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
+               cs->buf[cs->cdw++] = flags;           /* CP_COHER_CNTL */
+               cs->buf[cs->cdw++] = 0xffffffff;      /* CP_COHER_SIZE */
+               cs->buf[cs->cdw++] = 0;               /* CP_COHER_BASE */
+               cs->buf[cs->cdw++] = 0x0000000A;      /* POLL_INTERVAL */
+       }
+
+       /* everything is properly flush */
+       rctx->flags &= R600_CONTEXT_DRAW_PENDING |
+       /* R600_CONTEXT_HTILE_ERRATA is persistant for whole cs */
+                      R600_CONTEXT_HTILE_ERRATA;
 }
 
 void r600_context_flush(struct r600_context *ctx, unsigned flags)
@@ -1276,10 +1326,13 @@ void r600_context_flush(struct r600_context *ctx, 
unsigned flags)
                streamout_suspended = true;
        }
 
-       r600_flush_framebuffer(ctx, true);
-
-       /* partial flush is needed to avoid lockups on some chips with user 
fences */
-       r600_context_ps_partial_flush(ctx);
+       ctx->flags |= R600_CONTEXT_DRAW_FLUSH;
+       ctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
+       if (ctx->chip_class >= EVERGREEN) {
+               evergreen_flush_emit(ctx);
+       } else {
+               r600_flush_emit(ctx);
+       }
 
        /* old kernels and userspace don't set SX_MISC, so we must reset it to 
0 here */
        if (ctx->chip_class <= R700) {
@@ -1298,11 +1351,9 @@ void r600_context_flush(struct r600_context *ctx, 
unsigned flags)
        /* Begin a new CS. */
        r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
 
-       /* Invalidate caches. */
-       r600_inval_vertex_cache(ctx);
-       r600_inval_texture_cache(ctx);
-       r600_inval_shader_cache(ctx);
-       r600_flush_framebuffer(ctx, false);
+       ctx->flags |= R600_CONTEXT_SH_FLUSH |
+                     R600_CONTEXT_TEX_FLUSH |
+                     R600_CONTEXT_VTX_FLUSH;
 
        /* Re-emit states. */
        r600_atom_dirty(ctx, &ctx->cb_misc_state.atom);
@@ -1357,7 +1408,10 @@ void r600_context_emit_fence(struct r600_context *ctx, 
struct r600_resource *fen
        va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo);
        va = va + (offset << 2);
 
-       r600_context_ps_partial_flush(ctx);
+       ctx->flags &= ~R600_CONTEXT_DRAW_FLUSH;
+       cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+       cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | 
EVENT_INDEX(4);
+
        cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
        cs->buf[cs->cdw++] = 
EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
        cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL;       /* ADDRESS_LO */
@@ -1518,7 +1572,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
 {
        struct radeon_winsys_cs *cs = ctx->cs;
        struct r600_so_target **t = ctx->so_targets;
-       unsigned i, flush_flags = 0;
+       unsigned i;
        uint64_t va;
 
        if (ctx->chip_class >= EVERGREEN) {
@@ -1545,7 +1599,6 @@ void r600_context_streamout_end(struct r600_context *ctx)
                                r600_context_bo_reloc(ctx,  t[i]->filled_size,
                                                      RADEON_USAGE_WRITE);
 
-                       flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i;
                }
        }
 
@@ -1554,23 +1607,11 @@ void r600_context_streamout_end(struct r600_context 
*ctx)
        } else {
                r600_set_streamout_enable(ctx, 0);
        }
-
+       ctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH;
        /* This is needed to fix cache flushes on r600. */
        if (ctx->chip_class == R600) {
-               if (ctx->family == CHIP_RV670 ||
-                   ctx->family == CHIP_RS780 ||
-                   ctx->family == CHIP_RS880) {
-                       flush_flags |= S_0085F0_DEST_BASE_0_ENA(1);
-               }
-
-               r600_atom_dirty(ctx, &ctx->r6xx_flush_and_inv_cmd);
+               ctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
        }
-
-       /* Flush streamout caches. */
-       ctx->surface_sync_cmd.flush_flags |=
-               S_0085F0_SMX_ACTION_ENA(1) | flush_flags;
-       r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom);
-
        ctx->num_cs_dw_streamout_end = 0;
 
 #if 0
diff --git a/src/gallium/drivers/r600/r600_hw_context_priv.h 
b/src/gallium/drivers/r600/r600_hw_context_priv.h
index 037d5e3..6929336 100644
--- a/src/gallium/drivers/r600/r600_hw_context_priv.h
+++ b/src/gallium/drivers/r600/r600_hw_context_priv.h
@@ -28,7 +28,8 @@
 
 #include "r600_pipe.h"
 
-#define R600_MAX_DRAW_CS_DWORDS 11
+/* we alsoe here account size needed for flushing */
+#define R600_MAX_DRAW_CS_DWORDS 64
 
 /* these flags are used in register flags and added into block flags */
 #define REG_FLAG_NEED_BO 1
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 9f20560..07a398f 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -231,8 +231,6 @@ static struct pipe_context *r600_create_context(struct 
pipe_screen *screen, void
        rctx->context.create_video_decoder = vl_create_decoder;
        rctx->context.create_video_buffer = vl_video_buffer_create;
 
-       r600_init_common_atoms(rctx);
-
        switch (rctx->chip_class) {
        case R600:
        case R700:
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index 5ff4d51..4add90c 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -341,9 +341,7 @@ struct r600_context {
        struct r600_command_buffer      start_cs_cmd; /* invariant state mostly 
*/
        /** Compute specific registers initializations.  The start_cs_cmd atom
         *  must be emitted before start_compute_cs_cmd. */
-        struct r600_command_buffer      start_compute_cs_cmd;
-       struct r600_surface_sync_cmd    surface_sync_cmd;
-       struct r600_atom                r6xx_flush_and_inv_cmd;
+        struct r600_command_buffer     start_compute_cs_cmd;
        struct r600_cb_misc_state       cb_misc_state;
        struct r600_db_misc_state       db_misc_state;
        /** Vertex buffers for fetch shaders */
@@ -528,8 +526,6 @@ void r600_translate_index_buffer(struct r600_context *r600,
 void r600_init_atom(struct r600_atom *atom,
                    void (*emit)(struct r600_context *ctx, struct r600_atom 
*state),
                    unsigned num_dw, enum r600_atom_flags flags);
-void r600_init_common_atoms(struct r600_context *rctx);
-unsigned r600_get_cb_flush_flags(struct r600_context *rctx);
 void r600_texture_barrier(struct pipe_context *ctx);
 void r600_set_index_buffer(struct pipe_context *ctx,
                           const struct pipe_index_buffer *ib);
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index 8925a23..3f43a9d 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1133,8 +1133,9 @@ static void r600_set_sampler_views(struct r600_context 
*rctx,
        struct r600_pipe_sampler_view **rviews = (struct r600_pipe_sampler_view 
**)views;
        unsigned i;
 
-       if (count)
-               r600_inval_texture_cache(rctx);
+       if (count) {
+               rctx->flags |= R600_CONTEXT_TEX_FLUSH;
+       }
 
        for (i = 0; i < count; i++) {
                if (rviews[i]) {
@@ -1632,6 +1633,8 @@ static void r600_db(struct r600_context *rctx, struct 
r600_pipe_state *rstate,
        if (rtex->hyperz) {
                uint64_t htile_offset = 
rtex->hyperz->surface.level[level].offset;
 
+               rctx->flags |= R600_CONTEXT_FLUSH_AND_INV;
+               rctx->flags |= R600_CONTEXT_HTILE_ERRATA;
                rctx->db_misc_state.hyperz = true;
                rctx->db_misc_state.db_htile_surface_mask = 0xffffffff;
                r600_atom_dirty(rctx, &rctx->db_misc_state.atom);
@@ -1676,7 +1679,10 @@ static void r600_set_framebuffer_state(struct 
pipe_context *ctx,
        if (rstate == NULL)
                return;
 
-       r600_flush_framebuffer(rctx, false);
+       /* the htile errata is also needed for cb should probably rename */
+       rctx->flags |= R600_CONTEXT_CB_FLUSH |
+                      R600_CONTEXT_DRAW_FLUSH |
+                      R600_CONTEXT_FLUSH_AND_INV;
 
        /* unreference old buffer and reference new one */
        rstate->id = R600_PIPE_STATE_FRAMEBUFFER;
@@ -1692,6 +1698,7 @@ static void r600_set_framebuffer_state(struct 
pipe_context *ctx,
                r600_cb(rctx, rstate, state, i);
        }
        if (state->zsbuf) {
+               rctx->flags |= R600_CONTEXT_DB_FLUSH;
                r600_db(rctx, rstate, state);
        }
 
@@ -1962,14 +1969,8 @@ void r600_adjust_gprs(struct r600_context *rctx)
        unsigned tmp;
        int diff;
 
-       /* XXX: Following call moved from r600_bind_[ps|vs]_shader,
-        * it seems eg+ doesn't need it, r6xx/7xx probably need it only for
-        * adjusting the GPR allocation?
-        * Do we need this if we aren't really changing config below? */
-       r600_inval_shader_cache(rctx);
-
-       if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs)
-       {
+       rctx->flags |= R600_CONTEXT_SH_FLUSH;
+       if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) {
                diff = rctx->ps_shader->current->shader.bc.ngpr - 
rctx->default_ps_gprs;
                num_vs_gprs -= diff;
                num_ps_gprs += diff;
diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index 3c42a44..c9fd362 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -56,27 +56,6 @@ void r600_release_command_buffer(struct r600_command_buffer 
*cb)
        FREE(cb->buf);
 }
 
-static void r600_emit_surface_sync(struct r600_context *rctx, struct r600_atom 
*atom)
-{
-       struct radeon_winsys_cs *cs = rctx->cs;
-       struct r600_surface_sync_cmd *a = (struct r600_surface_sync_cmd*)atom;
-
-       cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
-       cs->buf[cs->cdw++] = a->flush_flags;  /* CP_COHER_CNTL */
-       cs->buf[cs->cdw++] = 0xffffffff;      /* CP_COHER_SIZE */
-       cs->buf[cs->cdw++] = 0;               /* CP_COHER_BASE */
-       cs->buf[cs->cdw++] = 0x0000000A;      /* POLL_INTERVAL */
-
-       a->flush_flags = 0;
-}
-
-static void r600_emit_r6xx_flush_and_inv(struct r600_context *rctx, struct 
r600_atom *atom)
-{
-       struct radeon_winsys_cs *cs = rctx->cs;
-       cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
-       cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | 
EVENT_INDEX(0);
-}
-
 void r600_init_atom(struct r600_atom *atom,
                    void (*emit)(struct r600_context *ctx, struct r600_atom 
*state),
                    unsigned num_dw, enum r600_atom_flags flags)
@@ -86,37 +65,11 @@ void r600_init_atom(struct r600_atom *atom,
        atom->flags = flags;
 }
 
-void r600_init_common_atoms(struct r600_context *rctx)
-{
-       r600_init_atom(&rctx->surface_sync_cmd.atom,    r600_emit_surface_sync, 
        5, EMIT_EARLY);
-       r600_init_atom(&rctx->r6xx_flush_and_inv_cmd,   
r600_emit_r6xx_flush_and_inv,   2, EMIT_EARLY);
-}
-
-unsigned r600_get_cb_flush_flags(struct r600_context *rctx)
-{
-       unsigned flags = 0;
-
-       if (rctx->framebuffer.nr_cbufs) {
-               flags |= S_0085F0_CB_ACTION_ENA(1) |
-                        (((1 << rctx->framebuffer.nr_cbufs) - 1) << 
S_0085F0_CB0_DEST_BASE_ENA_SHIFT);
-       }
-
-       /* Workaround for broken flushing on some R6xx chipsets. */
-       if (rctx->family == CHIP_RV670 ||
-           rctx->family == CHIP_RS780 ||
-           rctx->family == CHIP_RS880) {
-               flags |=  S_0085F0_CB1_DEST_BASE_ENA(1) |
-                         S_0085F0_DEST_BASE_0_ENA(1);
-       }
-       return flags;
-}
-
 void r600_texture_barrier(struct pipe_context *ctx)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
 
-       rctx->surface_sync_cmd.flush_flags |= S_0085F0_TC_ACTION_ENA(1) | 
r600_get_cb_flush_flags(rctx);
-       r600_atom_dirty(rctx, &rctx->surface_sync_cmd.atom);
+       rctx->flags |= R600_CONTEXT_DRAW_FLUSH | R600_CONTEXT_CB_FLUSH | 
R600_CONTEXT_TEX_FLUSH;
 }
 
 static bool r600_conv_pipe_prim(unsigned pprim, unsigned *prim)
@@ -368,7 +321,7 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, 
void *state)
 
        rctx->vertex_elements = v;
        if (v) {
-               r600_inval_shader_cache(rctx);
+               rctx->flags |= R600_CONTEXT_SH_FLUSH;
 
                rctx->states[v->rstate.id] = &v->rstate;
                r600_context_pipe_state_set(rctx, &v->rstate);
@@ -412,9 +365,9 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, 
unsigned count,
 
        util_copy_vertex_buffers(rctx->vertex_buffer, &rctx->nr_vertex_buffers, 
buffers, count);
 
-       r600_inval_vertex_cache(rctx);
+       rctx->flags |= R600_CONTEXT_VTX_FLUSH;
        state->atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 11) *
-                                          rctx->nr_vertex_buffers;
+                            rctx->nr_vertex_buffers;
        for (i = 0 ; i < rctx->nr_vertex_buffers; i++) {
                state->dirty_mask |= 1 << i;
        }
@@ -523,6 +476,9 @@ static int r600_shader_select(struct pipe_context *ctx,
        if (dirty)
                *dirty = 1;
 
+       /* we are changing shader need a flush */
+       rctx->flags |= R600_CONTEXT_SH_FLUSH;
+
        shader->next_variant = sel->current;
        sel->current = shader;
 
@@ -667,7 +623,7 @@ static void r600_update_alpha_ref(struct r600_context *rctx)
 
 void r600_constant_buffers_dirty(struct r600_context *rctx, struct 
r600_constbuf_state *state)
 {
-       r600_inval_shader_cache(rctx);
+       rctx->flags |= R600_CONTEXT_SH_FLUSH;
        state->atom.num_dw = rctx->chip_class >= EVERGREEN ? 
util_bitcount(state->dirty_mask)*20
                                                           : 
util_bitcount(state->dirty_mask)*19;
        r600_atom_dirty(rctx, &state->atom);
@@ -893,6 +849,12 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *dinfo)
 
        r600_update_derived_state(rctx);
 
+       if (rctx->chip_class >= EVERGREEN) {
+               evergreen_flush_emit(rctx);
+       } else {
+               r600_flush_emit(rctx);
+       }
+
        if (info.indexed) {
                /* Initialize the index buffer struct. */
                pipe_resource_reference(&ib.buffer, rctx->index_buffer.buffer);
@@ -999,7 +961,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *dinfo)
                                        (info.count_from_stream_output ? 
S_0287F0_USE_OPAQUE(1) : 0);
        }
 
-       rctx->flags |= R600_CONTEXT_DST_CACHES_DIRTY | 
R600_CONTEXT_DRAW_PENDING;
+       rctx->flags |= R600_CONTEXT_DRAW_PENDING;
 
        /* Set the depth buffer as dirty. */
        if (rctx->framebuffer.zsbuf) {
-- 
1.7.10.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to