From: Jerome Glisse <jgli...@redhat.com> Flushing and synchronization only need to happen at begining and end of cs, and after each draw packet if necessary. This patch is especialy needed for hyperz/htile feature.
v2: Separate evergreen and r6xx/r7xx flushing/syncing allow easier specialization of each functions. Fix r6xx/r7xx regression. Signed-off-by: Jerome Glisse <jgli...@redhat.com> --- src/gallium/drivers/r600/evergreen_compute.c | 23 +-- .../drivers/r600/evergreen_compute_internal.c | 4 +- src/gallium/drivers/r600/evergreen_hw_context.c | 110 ++++++++++- src/gallium/drivers/r600/evergreen_state.c | 14 +- src/gallium/drivers/r600/evergreend.h | 3 +- src/gallium/drivers/r600/r600.h | 19 +- src/gallium/drivers/r600/r600_buffer.c | 2 +- src/gallium/drivers/r600/r600_hw_context.c | 203 ++++++++++++-------- src/gallium/drivers/r600/r600_hw_context_priv.h | 3 +- src/gallium/drivers/r600/r600_pipe.c | 2 - src/gallium/drivers/r600/r600_pipe.h | 6 +- src/gallium/drivers/r600/r600_state.c | 23 +-- src/gallium/drivers/r600/r600_state_common.c | 68 ++----- 13 files changed, 297 insertions(+), 183 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 947a328..37c3395 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -96,7 +96,7 @@ static void evergreen_cs_set_vertex_buffer( vb->buffer = buffer; vb->user_buffer = NULL; - r600_inval_vertex_cache(rctx); + rctx->flags |= R600_CONTEXT_VTX_FLUSH; state->dirty_mask |= 1 << vb_index; r600_atom_dirty(rctx, &state->atom); } @@ -208,8 +208,7 @@ static void evergreen_bind_compute_state(struct pipe_context *ctx_, void *state) res->usage = RADEON_USAGE_READ; res->coher_bo_size = ctx->cs_shader->bc.ndw*4; - r600_inval_shader_cache(ctx); - + ctx->flags |= R600_CONTEXT_SH_FLUSH; } /* The kernel parameters are stored a vtx buffer (ID=0), besides the explicit @@ -364,8 +363,11 @@ static void compute_emit_cs(struct r600_context *ctx) */ r600_emit_atom(ctx, &ctx->start_compute_cs_cmd.atom); + ctx->flags |= R600_CONTEXT_CB_FLUSH; + r600_flush_emit(ctx); + /* Emit cb_state */ - cb_state = ctx->states[R600_PIPE_STATE_FRAMEBUFFER]; + cb_state = ctx->states[R600_PIPE_STATE_FRAMEBUFFER]; r600_context_pipe_state_emit(ctx, cb_state, RADEON_CP_PACKET3_COMPUTE_MODE); /* Emit vertex buffer state */ @@ -405,15 +407,8 @@ static void compute_emit_cs(struct r600_context *ctx) } } - /* r600_flush_framebuffer() updates the cb_flush_flags and then - * calls r600_emit_atom() on the ctx->surface_sync_cmd.atom, which emits - * a SURFACE_SYNC packet via r600_emit_surface_sync(). - * - * XXX r600_emit_surface_sync() hardcodes the CP_COHER_SIZE to - * 0xffffffff, so we will need to add a field to struct - * r600_surface_sync_cmd if we want to manually set this value. - */ - r600_flush_framebuffer(ctx, true /* Flush now */); + ctx->flags |= R600_CONTEXT_CB_FLUSH; + r600_flush_emit(ctx); #if 0 COMPUTE_DBG("cdw: %i\n", cs->cdw); @@ -460,6 +455,8 @@ static void evergreen_launch_grid( evergreen_set_lds(ctx->cs_shader, 0, 0, num_waves); evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input); evergreen_direct_dispatch(ctx_, block_layout, grid_layout); + /* set draw pending so flush function know we mean business */ + ctx->flags |= R600_CONTEXT_DRAW_PENDING; compute_emit_cs(ctx); } diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c index 1d11bab..8bb6426 100644 --- a/src/gallium/drivers/r600/evergreen_compute_internal.c +++ b/src/gallium/drivers/r600/evergreen_compute_internal.c @@ -559,7 +559,7 @@ void evergreen_set_tex_resource( res->coher_bo_size = tmp->offset[0] + util_format_get_blockwidth(tmp->real_format)*view->base.texture->width0*height*depth; - r600_inval_texture_cache(pipe->ctx); + pipe->ctx->flags |= R600_CONTEXT_TEX_FLUSH; evergreen_emit_force_reloc(res); evergreen_emit_force_reloc(res); @@ -618,7 +618,7 @@ void evergreen_set_const_cache( res->usage = RADEON_USAGE_READ; res->coher_bo_size = size; - r600_inval_shader_cache(pipe->ctx); + pipe->ctx->flags |= R600_CONTEXT_SH_FLUSH; } struct r600_resource* r600_compute_buffer_alloc_vram( diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index 546c884..cf4a225 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -761,8 +761,9 @@ static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_c /* We have to flush the shaders before we change the border color * registers, or previous draw commands that haven't completed yet * will end up using the new border color. */ - if (dirty & R600_BLOCK_STATUS_DIRTY) - r600_context_ps_partial_flush(ctx); + if (dirty & R600_BLOCK_STATUS_DIRTY) { + ctx->flags |= R600_CONTEXT_DRAW_FLUSH; + } if (dirty) r600_context_dirty_block(ctx, block, dirty, 4); } @@ -823,3 +824,108 @@ void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned buffer_en cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(0); } } + +void evergreen_flush_emit(struct r600_context *rctx) +{ + struct radeon_winsys_cs *cs = rctx->cs; + unsigned mask; + + if (!(rctx->flags & R600_CONTEXT_DRAW_PENDING)) { + return; + } + /* for GPU without vertex cache flush the texture cache */ + if (!rctx->has_vertex_cache) { + rctx->flags |= R600_CONTEXT_TEX_FLUSH; + } + + if (rctx->flags & R600_CONTEXT_DRAW_FLUSH) { + rctx->flags &= ~R600_CONTEXT_DRAW_FLUSH; + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); + } + + if (rctx->flags & R600_CONTEXT_DB_FLUSH) { + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(FLUSH_AND_INV_DB_META) | EVENT_INDEX(0); + } + + mask = R600_CONTEXT_CB_FLUSH | + R600_CONTEXT_DB_FLUSH | + R600_CONTEXT_SH_FLUSH | + R600_CONTEXT_TEX_FLUSH | + R600_CONTEXT_VTX_FLUSH | + R600_CONTEXT_STREAMOUT_FLUSH; + if (rctx->flags & mask) { + /* anything left (cb, vtx, shader, streamout) can be flushed + * in a more generic way + */ + unsigned flags = 0; + + flags |= (rctx->flags & R600_CONTEXT_CB_FLUSH) ? S_0085F0_CB_ACTION_ENA(1) | + S_0085F0_CB0_DEST_BASE_ENA(1) | + S_0085F0_CB1_DEST_BASE_ENA(1) | + S_0085F0_CB2_DEST_BASE_ENA(1) | + S_0085F0_CB3_DEST_BASE_ENA(1) | + S_0085F0_CB4_DEST_BASE_ENA(1) | + S_0085F0_CB5_DEST_BASE_ENA(1) | + S_0085F0_CB6_DEST_BASE_ENA(1) | + S_0085F0_CB7_DEST_BASE_ENA(1) | + S_0085F0_CB8_DEST_BASE_ENA(1) | + S_0085F0_CB9_DEST_BASE_ENA(1) | + S_0085F0_CB10_DEST_BASE_ENA(1) | + S_0085F0_CB11_DEST_BASE_ENA(1) : 0; + flags |= (rctx->flags & R600_CONTEXT_DB_FLUSH) ? S_0085F0_DB_ACTION_ENA(1) | + S_0085F0_DB_DEST_BASE_ENA(1) : 0; + flags |= (rctx->flags & R600_CONTEXT_SH_FLUSH) ? S_0085F0_SH_ACTION_ENA(1) : 0; + flags |= (rctx->flags & R600_CONTEXT_TEX_FLUSH) ? S_0085F0_TC_ACTION_ENA(1) : 0; + flags |= (rctx->flags & R600_CONTEXT_VTX_FLUSH) ? S_0085F0_VC_ACTION_ENA(1) : 0; + flags |= (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) ? S_0085F0_DEST_BASE_0_ENA(1) | + S_0085F0_SMX_ACTION_ENA(1) | + S_0085F0_SO0_DEST_BASE_ENA(1) | + S_0085F0_SO1_DEST_BASE_ENA(1) | + S_0085F0_SO2_DEST_BASE_ENA(1) | + S_0085F0_SO3_DEST_BASE_ENA(1) : 0; + rctx->flags &= ~mask; + cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); + cs->buf[cs->cdw++] = flags; /* CP_COHER_CNTL */ + cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ + cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ + cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ + } + + if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) { + rctx->flags &= ~R600_CONTEXT_FLUSH_AND_INV; + + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); + cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); + /* CP_COHER_CNTL */ + cs->buf[cs->cdw++] = S_0085F0_SO0_DEST_BASE_ENA(1) | + S_0085F0_SO1_DEST_BASE_ENA(1) | + S_0085F0_SO2_DEST_BASE_ENA(1) | + S_0085F0_SO3_DEST_BASE_ENA(1) | + S_0085F0_CB0_DEST_BASE_ENA(1) | + S_0085F0_CB1_DEST_BASE_ENA(1) | + S_0085F0_CB2_DEST_BASE_ENA(1) | + S_0085F0_CB3_DEST_BASE_ENA(1) | + S_0085F0_CB4_DEST_BASE_ENA(1) | + S_0085F0_CB5_DEST_BASE_ENA(1) | + S_0085F0_CB6_DEST_BASE_ENA(1) | + S_0085F0_CB7_DEST_BASE_ENA(1) | + S_0085F0_DB_DEST_BASE_ENA(1) | + S_0085F0_CB8_DEST_BASE_ENA(1) | + S_0085F0_CB9_DEST_BASE_ENA(1) | + S_0085F0_CB10_DEST_BASE_ENA(1) | + S_0085F0_CB11_DEST_BASE_ENA(1) | + S_0085F0_TC_ACTION_ENA(1) | + S_0085F0_VC_ACTION_ENA(1) | + S_0085F0_SH_ACTION_ENA(1) | + S_0085F0_SMX_ACTION_ENA(1); + cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ + cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ + cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ + } + + /* everything is properly flush */ + rctx->flags &= R600_CONTEXT_DRAW_PENDING; +} diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 214d76b..8e3eb95 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1160,8 +1160,9 @@ static void evergreen_bind_ps_sampler(struct pipe_context *ctx, unsigned count, struct r600_context *rctx = (struct r600_context *)ctx; struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; - if (count) - r600_inval_texture_cache(rctx); + if (count) { + rctx->flags |= R600_CONTEXT_TEX_FLUSH; + } memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count); rctx->ps_samplers.n_samplers = count; @@ -1176,8 +1177,9 @@ static void evergreen_bind_vs_sampler(struct pipe_context *ctx, unsigned count, struct r600_context *rctx = (struct r600_context *)ctx; struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; - if (count) - r600_inval_texture_cache(rctx); + if (count) { + rctx->flags |= R600_CONTEXT_TEX_FLUSH; + } for (int i = 0; i < count; i++) { evergreen_context_pipe_state_set_vs_sampler(rctx, rstates[i], i); @@ -1674,6 +1676,7 @@ static void evergreen_db(struct r600_context *rctx, struct r600_pipe_state *rsta if (rtex->hyperz) { uint64_t htile_offset = rtex->hyperz->surface.level[level].offset; + rctx->flags |= R600_CONTEXT_FLUSH_AND_INV; rctx->db_misc_state.hyperz = true; rctx->db_misc_state.db_htile_surface_mask = 0xffffffff; r600_atom_dirty(rctx, &rctx->db_misc_state.atom); @@ -1709,7 +1712,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, if (rstate == NULL) return; - r600_flush_framebuffer(rctx, false); + rctx->flags |= R600_CONTEXT_CB_FLUSH | R600_CONTEXT_FLUSH_AND_INV; /* unreference old buffer and reference new one */ rstate->id = R600_PIPE_STATE_FRAMEBUFFER; @@ -1729,6 +1732,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, } if (state->zsbuf) { + rctx->flags |= R600_CONTEXT_DB_FLUSH; evergreen_db(rctx, rstate, state); } diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 1ac5944..672e698 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -46,7 +46,8 @@ #define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 #define EVENT_TYPE_ZPASS_DONE 0x15 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 -#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f +#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f +#define FLUSH_AND_INV_DB_META 0x2c #define EVENT_TYPE(x) ((x) << 0) #define EVENT_INDEX(x) ((x) << 8) diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 0ae7959..820b356 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -188,8 +188,17 @@ struct r600_so_target { unsigned so_index; }; -#define R600_CONTEXT_DRAW_PENDING (1 << 0) -#define R600_CONTEXT_DST_CACHES_DIRTY (1 << 1) +#define R600_CONTEXT_DRAW_PENDING (1 << 0) +#define R600_CONTEXT_DRAW_FLUSH (1 << 1) +#define R600_CONTEXT_CB_FLUSH (1 << 2) +#define R600_CONTEXT_DB_FLUSH (1 << 3) +#define R600_CONTEXT_SH_FLUSH (1 << 4) +#define R600_CONTEXT_TEX_FLUSH (1 << 5) +#define R600_CONTEXT_VTX_FLUSH (1 << 6) +#define R600_CONTEXT_STREAMOUT_FLUSH (1 << 7) +#define R600_CONTEXT_WAIT_IDLE (1 << 8) +#define R600_CONTEXT_FLUSH_AND_INV (1 << 9) +#define R600_CONTEXT_HTILE_ERRATA (1 << 10) struct r600_context; struct r600_screen; @@ -207,10 +216,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags); void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence, unsigned offset, unsigned value); -void r600_inval_shader_cache(struct r600_context *ctx); -void r600_inval_texture_cache(struct r600_context *ctx); -void r600_inval_vertex_cache(struct r600_context *ctx); -void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now); +void r600_flush_emit(struct r600_context *ctx); void r600_context_streamout_begin(struct r600_context *ctx); void r600_context_streamout_end(struct r600_context *ctx); @@ -222,6 +228,7 @@ void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r60 int evergreen_context_init(struct r600_context *ctx); void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); +void evergreen_flush_emit(struct r600_context *rctx); void _r600_pipe_state_add_reg_bo(struct r600_context *ctx, struct r600_pipe_state *state, diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 8e2deb1..774b876 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -110,7 +110,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe, struct r600_vertexbuf_state * state = &rctx->vertex_buffer_state; state->dirty_mask |= 1 << i; - r600_inval_vertex_cache(rctx); + rctx->flags |= R600_CONTEXT_VTX_FLUSH; r600_atom_dirty(rctx, &state->atom); } } diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 4f2c03a..95c861a 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -114,19 +114,6 @@ err: return; } -void r600_context_ps_partial_flush(struct r600_context *ctx) -{ - struct radeon_winsys_cs *cs = ctx->cs; - - if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING)) - return; - - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); - - ctx->flags &= ~R600_CONTEXT_DRAW_PENDING; -} - static void r600_init_block(struct r600_context *ctx, struct r600_block *block, const struct r600_reg *reg, int index, int nreg, @@ -825,7 +812,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, } /* Count in framebuffer cache flushes at the end of CS. */ - num_dw += 7; /* one SURFACE_SYNC and CACHE_FLUSH_AND_INV (r6xx-only) */ + num_dw += 44; /* one SURFACE_SYNC and CACHE_FLUSH_AND_INV (r6xx-only) */ /* Save 16 dwords for the fence mechanism. */ num_dw += 16; @@ -853,7 +840,7 @@ void r600_context_dirty_block(struct r600_context *ctx, LIST_ADDTAIL(&block->list,&ctx->dirty); if (block->flags & REG_FLAG_FLUSH_CHANGE) { - r600_context_ps_partial_flush(ctx); + ctx->flags |= R600_CONTEXT_DRAW_FLUSH; } } } @@ -1085,8 +1072,9 @@ static inline void r600_context_pipe_state_set_sampler_border(struct r600_contex /* We have to flush the shaders before we change the border color * registers, or previous draw commands that haven't completed yet * will end up using the new border color. */ - if (dirty & R600_BLOCK_STATUS_DIRTY) - r600_context_ps_partial_flush(ctx); + if (dirty & R600_BLOCK_STATUS_DIRTY) { + ctx->flags |= R600_CONTEXT_DRAW_FLUSH; + } if (dirty) r600_context_dirty_block(ctx, block, dirty, 3); } @@ -1200,54 +1188,116 @@ void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r60 LIST_DELINIT(&block->list); } -void r600_inval_shader_cache(struct r600_context *ctx) +void r600_flush_emit(struct r600_context *rctx) { - ctx->surface_sync_cmd.flush_flags |= S_0085F0_SH_ACTION_ENA(1); - r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom); -} - -void r600_inval_texture_cache(struct r600_context *ctx) -{ - ctx->surface_sync_cmd.flush_flags |= S_0085F0_TC_ACTION_ENA(1); - r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom); -} - -void r600_inval_vertex_cache(struct r600_context *ctx) -{ - if (ctx->has_vertex_cache) { - ctx->surface_sync_cmd.flush_flags |= S_0085F0_VC_ACTION_ENA(1); - } else { - /* Some GPUs don't have the vertex cache and must use the texture cache instead. */ - ctx->surface_sync_cmd.flush_flags |= S_0085F0_TC_ACTION_ENA(1); - } - r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom); -} + struct radeon_winsys_cs *cs = rctx->cs; + unsigned mask; -void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now) -{ - if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY)) + if (!(rctx->flags & R600_CONTEXT_DRAW_PENDING)) { return; + } + /* for GPU without vertex cache flush the texture cache */ + if (!rctx->has_vertex_cache) { + rctx->flags |= R600_CONTEXT_TEX_FLUSH; + } - ctx->surface_sync_cmd.flush_flags |= - r600_get_cb_flush_flags(ctx) | - (ctx->framebuffer.zsbuf ? S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1) : 0); + if (rctx->flags & R600_CONTEXT_DRAW_FLUSH) { + rctx->flags &= ~R600_CONTEXT_DRAW_FLUSH; + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); + } - if (flush_now) { - r600_emit_atom(ctx, &ctx->surface_sync_cmd.atom); - } else { - r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom); + if (rctx->flags & R600_CONTEXT_WAIT_IDLE) { + /* wait for things to settle */ + rctx->flags &= ~R600_CONTEXT_WAIT_IDLE; + cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0); + cs->buf[cs->cdw++] = (R_008040_WAIT_UNTIL - R600_CONFIG_REG_OFFSET) >> 2; + cs->buf[cs->cdw++] = S_008040_WAIT_3D_IDLE(1); } - /* Also add a complete cache flush to work around broken flushing on R6xx. */ - if (ctx->chip_class == R600) { - if (flush_now) { - r600_emit_atom(ctx, &ctx->r6xx_flush_and_inv_cmd); - } else { - r600_atom_dirty(ctx, &ctx->r6xx_flush_and_inv_cmd); + if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) { + rctx->flags &= ~R600_CONTEXT_FLUSH_AND_INV; + + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); + + /* db flush are special due to errata with hyperz, we need to + * insert noop so cache have time to really flush + */ + if (rctx->flags & R600_CONTEXT_HTILE_ERRATA) { + /* R600_CONTEXT_HTILE_ERRATA is persistant for whole cs */ + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 31, 0); + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; + cs->buf[cs->cdw++] = 0xdeadcafe; } } - ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY; + mask = R600_CONTEXT_CB_FLUSH | + R600_CONTEXT_DB_FLUSH | + R600_CONTEXT_SH_FLUSH | + R600_CONTEXT_TEX_FLUSH | + R600_CONTEXT_VTX_FLUSH | + R600_CONTEXT_STREAMOUT_FLUSH; + if (rctx->flags & mask) { + /* anything left (cb, vtx, shader, streamout) can be flushed + * in a more generic way + */ + unsigned flags = 0; + + flags |= S_0085F0_CB1_DEST_BASE_ENA(1) | S_0085F0_DEST_BASE_0_ENA(1); + flags |= (rctx->flags & R600_CONTEXT_CB_FLUSH) ? S_0085F0_CB_ACTION_ENA(1) : 0; + flags |= (rctx->flags & R600_CONTEXT_DB_FLUSH) ? S_0085F0_DB_ACTION_ENA(1) : 0; + flags |= (rctx->flags & R600_CONTEXT_SH_FLUSH) ? S_0085F0_SH_ACTION_ENA(1) : 0; + flags |= (rctx->flags & R600_CONTEXT_TEX_FLUSH) ? S_0085F0_TC_ACTION_ENA(1) : 0; + flags |= (rctx->flags & R600_CONTEXT_VTX_FLUSH) ? S_0085F0_VC_ACTION_ENA(1) : 0; + flags |= (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) ? S_0085F0_DEST_BASE_0_ENA(1) | + S_0085F0_SO0_DEST_BASE_ENA(1) | + S_0085F0_SO1_DEST_BASE_ENA(1) | + S_0085F0_SO2_DEST_BASE_ENA(1) | + S_0085F0_SO3_DEST_BASE_ENA(1) | + S_0085F0_SMX_ACTION_ENA(1) : 0; + rctx->flags &= ~mask; + cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); + cs->buf[cs->cdw++] = flags; /* CP_COHER_CNTL */ + cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ + cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ + cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ + } + + /* everything is properly flush */ + rctx->flags &= R600_CONTEXT_DRAW_PENDING | + /* R600_CONTEXT_HTILE_ERRATA is persistant for whole cs */ + R600_CONTEXT_HTILE_ERRATA; } void r600_context_flush(struct r600_context *ctx, unsigned flags) @@ -1276,10 +1326,13 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) streamout_suspended = true; } - r600_flush_framebuffer(ctx, true); - - /* partial flush is needed to avoid lockups on some chips with user fences */ - r600_context_ps_partial_flush(ctx); + ctx->flags |= R600_CONTEXT_DRAW_FLUSH; + ctx->flags |= R600_CONTEXT_FLUSH_AND_INV; + if (ctx->chip_class >= EVERGREEN) { + evergreen_flush_emit(ctx); + } else { + r600_flush_emit(ctx); + } /* old kernels and userspace don't set SX_MISC, so we must reset it to 0 here */ if (ctx->chip_class <= R700) { @@ -1298,11 +1351,9 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) /* Begin a new CS. */ r600_emit_atom(ctx, &ctx->start_cs_cmd.atom); - /* Invalidate caches. */ - r600_inval_vertex_cache(ctx); - r600_inval_texture_cache(ctx); - r600_inval_shader_cache(ctx); - r600_flush_framebuffer(ctx, false); + ctx->flags |= R600_CONTEXT_SH_FLUSH | + R600_CONTEXT_TEX_FLUSH | + R600_CONTEXT_VTX_FLUSH; /* Re-emit states. */ r600_atom_dirty(ctx, &ctx->cb_misc_state.atom); @@ -1357,7 +1408,10 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo); va = va + (offset << 2); - r600_context_ps_partial_flush(ctx); + ctx->flags &= ~R600_CONTEXT_DRAW_FLUSH; + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; /* ADDRESS_LO */ @@ -1518,7 +1572,7 @@ void r600_context_streamout_end(struct r600_context *ctx) { struct radeon_winsys_cs *cs = ctx->cs; struct r600_so_target **t = ctx->so_targets; - unsigned i, flush_flags = 0; + unsigned i; uint64_t va; if (ctx->chip_class >= EVERGREEN) { @@ -1545,7 +1599,6 @@ void r600_context_streamout_end(struct r600_context *ctx) r600_context_bo_reloc(ctx, t[i]->filled_size, RADEON_USAGE_WRITE); - flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i; } } @@ -1554,23 +1607,11 @@ void r600_context_streamout_end(struct r600_context *ctx) } else { r600_set_streamout_enable(ctx, 0); } - + ctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH; /* This is needed to fix cache flushes on r600. */ if (ctx->chip_class == R600) { - if (ctx->family == CHIP_RV670 || - ctx->family == CHIP_RS780 || - ctx->family == CHIP_RS880) { - flush_flags |= S_0085F0_DEST_BASE_0_ENA(1); - } - - r600_atom_dirty(ctx, &ctx->r6xx_flush_and_inv_cmd); + ctx->flags |= R600_CONTEXT_FLUSH_AND_INV; } - - /* Flush streamout caches. */ - ctx->surface_sync_cmd.flush_flags |= - S_0085F0_SMX_ACTION_ENA(1) | flush_flags; - r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom); - ctx->num_cs_dw_streamout_end = 0; #if 0 diff --git a/src/gallium/drivers/r600/r600_hw_context_priv.h b/src/gallium/drivers/r600/r600_hw_context_priv.h index 037d5e3..6929336 100644 --- a/src/gallium/drivers/r600/r600_hw_context_priv.h +++ b/src/gallium/drivers/r600/r600_hw_context_priv.h @@ -28,7 +28,8 @@ #include "r600_pipe.h" -#define R600_MAX_DRAW_CS_DWORDS 11 +/* we alsoe here account size needed for flushing */ +#define R600_MAX_DRAW_CS_DWORDS 64 /* these flags are used in register flags and added into block flags */ #define REG_FLAG_NEED_BO 1 diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 9f20560..07a398f 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -231,8 +231,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->context.create_video_decoder = vl_create_decoder; rctx->context.create_video_buffer = vl_video_buffer_create; - r600_init_common_atoms(rctx); - switch (rctx->chip_class) { case R600: case R700: diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 5ff4d51..4add90c 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -341,9 +341,7 @@ struct r600_context { struct r600_command_buffer start_cs_cmd; /* invariant state mostly */ /** Compute specific registers initializations. The start_cs_cmd atom * must be emitted before start_compute_cs_cmd. */ - struct r600_command_buffer start_compute_cs_cmd; - struct r600_surface_sync_cmd surface_sync_cmd; - struct r600_atom r6xx_flush_and_inv_cmd; + struct r600_command_buffer start_compute_cs_cmd; struct r600_cb_misc_state cb_misc_state; struct r600_db_misc_state db_misc_state; /** Vertex buffers for fetch shaders */ @@ -528,8 +526,6 @@ void r600_translate_index_buffer(struct r600_context *r600, void r600_init_atom(struct r600_atom *atom, void (*emit)(struct r600_context *ctx, struct r600_atom *state), unsigned num_dw, enum r600_atom_flags flags); -void r600_init_common_atoms(struct r600_context *rctx); -unsigned r600_get_cb_flush_flags(struct r600_context *rctx); void r600_texture_barrier(struct pipe_context *ctx); void r600_set_index_buffer(struct pipe_context *ctx, const struct pipe_index_buffer *ib); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 8925a23..3f43a9d 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1133,8 +1133,9 @@ static void r600_set_sampler_views(struct r600_context *rctx, struct r600_pipe_sampler_view **rviews = (struct r600_pipe_sampler_view **)views; unsigned i; - if (count) - r600_inval_texture_cache(rctx); + if (count) { + rctx->flags |= R600_CONTEXT_TEX_FLUSH; + } for (i = 0; i < count; i++) { if (rviews[i]) { @@ -1632,6 +1633,8 @@ static void r600_db(struct r600_context *rctx, struct r600_pipe_state *rstate, if (rtex->hyperz) { uint64_t htile_offset = rtex->hyperz->surface.level[level].offset; + rctx->flags |= R600_CONTEXT_FLUSH_AND_INV; + rctx->flags |= R600_CONTEXT_HTILE_ERRATA; rctx->db_misc_state.hyperz = true; rctx->db_misc_state.db_htile_surface_mask = 0xffffffff; r600_atom_dirty(rctx, &rctx->db_misc_state.atom); @@ -1676,7 +1679,10 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, if (rstate == NULL) return; - r600_flush_framebuffer(rctx, false); + /* the htile errata is also needed for cb should probably rename */ + rctx->flags |= R600_CONTEXT_CB_FLUSH | + R600_CONTEXT_DRAW_FLUSH | + R600_CONTEXT_FLUSH_AND_INV; /* unreference old buffer and reference new one */ rstate->id = R600_PIPE_STATE_FRAMEBUFFER; @@ -1692,6 +1698,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, r600_cb(rctx, rstate, state, i); } if (state->zsbuf) { + rctx->flags |= R600_CONTEXT_DB_FLUSH; r600_db(rctx, rstate, state); } @@ -1962,14 +1969,8 @@ void r600_adjust_gprs(struct r600_context *rctx) unsigned tmp; int diff; - /* XXX: Following call moved from r600_bind_[ps|vs]_shader, - * it seems eg+ doesn't need it, r6xx/7xx probably need it only for - * adjusting the GPR allocation? - * Do we need this if we aren't really changing config below? */ - r600_inval_shader_cache(rctx); - - if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) - { + rctx->flags |= R600_CONTEXT_SH_FLUSH; + if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) { diff = rctx->ps_shader->current->shader.bc.ngpr - rctx->default_ps_gprs; num_vs_gprs -= diff; num_ps_gprs += diff; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 3c42a44..c9fd362 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -56,27 +56,6 @@ void r600_release_command_buffer(struct r600_command_buffer *cb) FREE(cb->buf); } -static void r600_emit_surface_sync(struct r600_context *rctx, struct r600_atom *atom) -{ - struct radeon_winsys_cs *cs = rctx->cs; - struct r600_surface_sync_cmd *a = (struct r600_surface_sync_cmd*)atom; - - cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); - cs->buf[cs->cdw++] = a->flush_flags; /* CP_COHER_CNTL */ - cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ - cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ - cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ - - a->flush_flags = 0; -} - -static void r600_emit_r6xx_flush_and_inv(struct r600_context *rctx, struct r600_atom *atom) -{ - struct radeon_winsys_cs *cs = rctx->cs; - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); -} - void r600_init_atom(struct r600_atom *atom, void (*emit)(struct r600_context *ctx, struct r600_atom *state), unsigned num_dw, enum r600_atom_flags flags) @@ -86,37 +65,11 @@ void r600_init_atom(struct r600_atom *atom, atom->flags = flags; } -void r600_init_common_atoms(struct r600_context *rctx) -{ - r600_init_atom(&rctx->surface_sync_cmd.atom, r600_emit_surface_sync, 5, EMIT_EARLY); - r600_init_atom(&rctx->r6xx_flush_and_inv_cmd, r600_emit_r6xx_flush_and_inv, 2, EMIT_EARLY); -} - -unsigned r600_get_cb_flush_flags(struct r600_context *rctx) -{ - unsigned flags = 0; - - if (rctx->framebuffer.nr_cbufs) { - flags |= S_0085F0_CB_ACTION_ENA(1) | - (((1 << rctx->framebuffer.nr_cbufs) - 1) << S_0085F0_CB0_DEST_BASE_ENA_SHIFT); - } - - /* Workaround for broken flushing on some R6xx chipsets. */ - if (rctx->family == CHIP_RV670 || - rctx->family == CHIP_RS780 || - rctx->family == CHIP_RS880) { - flags |= S_0085F0_CB1_DEST_BASE_ENA(1) | - S_0085F0_DEST_BASE_0_ENA(1); - } - return flags; -} - void r600_texture_barrier(struct pipe_context *ctx) { struct r600_context *rctx = (struct r600_context *)ctx; - rctx->surface_sync_cmd.flush_flags |= S_0085F0_TC_ACTION_ENA(1) | r600_get_cb_flush_flags(rctx); - r600_atom_dirty(rctx, &rctx->surface_sync_cmd.atom); + rctx->flags |= R600_CONTEXT_DRAW_FLUSH | R600_CONTEXT_CB_FLUSH | R600_CONTEXT_TEX_FLUSH; } static bool r600_conv_pipe_prim(unsigned pprim, unsigned *prim) @@ -368,7 +321,7 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) rctx->vertex_elements = v; if (v) { - r600_inval_shader_cache(rctx); + rctx->flags |= R600_CONTEXT_SH_FLUSH; rctx->states[v->rstate.id] = &v->rstate; r600_context_pipe_state_set(rctx, &v->rstate); @@ -412,9 +365,9 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, util_copy_vertex_buffers(rctx->vertex_buffer, &rctx->nr_vertex_buffers, buffers, count); - r600_inval_vertex_cache(rctx); + rctx->flags |= R600_CONTEXT_VTX_FLUSH; state->atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 11) * - rctx->nr_vertex_buffers; + rctx->nr_vertex_buffers; for (i = 0 ; i < rctx->nr_vertex_buffers; i++) { state->dirty_mask |= 1 << i; } @@ -523,6 +476,9 @@ static int r600_shader_select(struct pipe_context *ctx, if (dirty) *dirty = 1; + /* we are changing shader need a flush */ + rctx->flags |= R600_CONTEXT_SH_FLUSH; + shader->next_variant = sel->current; sel->current = shader; @@ -667,7 +623,7 @@ static void r600_update_alpha_ref(struct r600_context *rctx) void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state) { - r600_inval_shader_cache(rctx); + rctx->flags |= R600_CONTEXT_SH_FLUSH; state->atom.num_dw = rctx->chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20 : util_bitcount(state->dirty_mask)*19; r600_atom_dirty(rctx, &state->atom); @@ -893,6 +849,12 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo) r600_update_derived_state(rctx); + if (rctx->chip_class >= EVERGREEN) { + evergreen_flush_emit(rctx); + } else { + r600_flush_emit(rctx); + } + if (info.indexed) { /* Initialize the index buffer struct. */ pipe_resource_reference(&ib.buffer, rctx->index_buffer.buffer); @@ -999,7 +961,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo) (info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0); } - rctx->flags |= R600_CONTEXT_DST_CACHES_DIRTY | R600_CONTEXT_DRAW_PENDING; + rctx->flags |= R600_CONTEXT_DRAW_PENDING; /* Set the depth buffer as dirty. */ if (rctx->framebuffer.zsbuf) { -- 1.7.10.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev