On Mon, Dec 17, 2012 at 8:28 PM, <j.gli...@gmail.com> wrote: > From: Jerome Glisse <jgli...@redhat.com> > > This bring r600g allmost inline with closed source driver when > it comes to flushing and synchronization pattern. > > v2-v4: history lost somewhere in outer space > v5: Fix compute size of flushing, use define for flags, update > worst case cs size requirement for flush, treat rs780 and > newer as r7xx when it comes to streamout. > v6: Fix num dw computation for framebuffer state, remove dead > code, use define instead of hardcoded value. > > Signed-off-by: Jerome Glisse <jgli...@redhat.com> > --- > src/gallium/drivers/r600/evergreen_compute.c | 8 +- > .../drivers/r600/evergreen_compute_internal.c | 4 +- > src/gallium/drivers/r600/evergreen_state.c | 4 +- > src/gallium/drivers/r600/r600.h | 16 +- > src/gallium/drivers/r600/r600_hw_context.c | 179 > +++++++-------------- > src/gallium/drivers/r600/r600_hw_context_priv.h | 2 +- > src/gallium/drivers/r600/r600_state.c | 20 ++- > src/gallium/drivers/r600/r600_state_common.c | 19 +-- > 8 files changed, 90 insertions(+), 162 deletions(-) > > diff --git a/src/gallium/drivers/r600/evergreen_compute.c > b/src/gallium/drivers/r600/evergreen_compute.c > index 66b0cc6..ea75d80 100644 > --- a/src/gallium/drivers/r600/evergreen_compute.c > +++ b/src/gallium/drivers/r600/evergreen_compute.c > @@ -98,7 +98,7 @@ static void evergreen_cs_set_vertex_buffer( > > /* The vertex instructions in the compute shaders use the texture > cache, > * so we need to invalidate it. */ > - rctx->flags |= R600_CONTEXT_TEX_FLUSH; > + rctx->flags |= R600_CONTEXT_GPU_FLUSH; > state->enabled_mask |= 1 << vb_index; > state->dirty_mask |= 1 << vb_index; > state->atom.dirty = true; > @@ -329,7 +329,7 @@ static void compute_emit_cs(struct r600_context *ctx, > const uint *block_layout, > */ > r600_emit_command_buffer(ctx->cs, &ctx->start_compute_cs_cmd); > > - ctx->flags |= R600_CONTEXT_CB_FLUSH; > + ctx->flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV; > r600_flush_emit(ctx); > > /* Emit colorbuffers. */ > @@ -409,7 +409,7 @@ static void compute_emit_cs(struct r600_context *ctx, > const uint *block_layout, > > /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to > 0xffffffff > */ > - ctx->flags |= R600_CONTEXT_CB_FLUSH; > + ctx->flags |= R600_CONTEXT_GPU_FLUSH; > r600_flush_emit(ctx); > > #if 0 > @@ -468,7 +468,7 @@ void evergreen_emit_cs_shader( > r600_write_value(cs, r600_context_bo_reloc(rctx, kernel->code_bo, > RADEON_USAGE_READ)); > > - rctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH; > + rctx->flags |= R600_CONTEXT_GPU_FLUSH; > } > > static void evergreen_launch_grid( > diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c > b/src/gallium/drivers/r600/evergreen_compute_internal.c > index f7aebf2..94f556f 100644 > --- a/src/gallium/drivers/r600/evergreen_compute_internal.c > +++ b/src/gallium/drivers/r600/evergreen_compute_internal.c > @@ -545,7 +545,7 @@ void evergreen_set_tex_resource( > > util_format_get_blockwidth(tmp->resource.b.b.format) * > view->base.texture->width0*height*depth; > > - pipe->ctx->flags |= R600_CONTEXT_TEX_FLUSH; > + pipe->ctx->flags |= R600_CONTEXT_GPU_FLUSH; > > evergreen_emit_force_reloc(res); > evergreen_emit_force_reloc(res); > @@ -604,7 +604,7 @@ void evergreen_set_const_cache( > res->usage = RADEON_USAGE_READ; > res->coher_bo_size = size; > > - pipe->ctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH; > + pipe->ctx->flags |= R600_CONTEXT_GPU_FLUSH; > } > > struct r600_resource* r600_compute_buffer_alloc_vram( > diff --git a/src/gallium/drivers/r600/evergreen_state.c > b/src/gallium/drivers/r600/evergreen_state.c > index 996c1b4..58964c4 100644 > --- a/src/gallium/drivers/r600/evergreen_state.c > +++ b/src/gallium/drivers/r600/evergreen_state.c > @@ -1557,14 +1557,14 @@ static void evergreen_set_framebuffer_state(struct > pipe_context *ctx, > uint32_t i, log_samples; > > if (rctx->framebuffer.state.nr_cbufs) { > - rctx->flags |= R600_CONTEXT_CB_FLUSH; > + rctx->flags |= R600_CONTEXT_WAIT_IDLE | > R600_CONTEXT_FLUSH_AND_INV; > > if (rctx->framebuffer.state.cbufs[0]->texture->nr_samples > > 1) { > rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META; > } > } > if (rctx->framebuffer.state.zsbuf) { > - rctx->flags |= R600_CONTEXT_DB_FLUSH; > + rctx->flags |= R600_CONTEXT_WAIT_IDLE | > R600_CONTEXT_FLUSH_AND_INV; > } > > util_copy_framebuffer_state(&rctx->framebuffer.state, state); > diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h > index d15cd52..c351982 100644 > --- a/src/gallium/drivers/r600/r600.h > +++ b/src/gallium/drivers/r600/r600.h > @@ -182,17 +182,11 @@ struct r600_so_target { > unsigned so_index; > }; > > -#define R600_CONTEXT_PS_PARTIAL_FLUSH (1 << 0) > -#define R600_CONTEXT_CB_FLUSH (1 << 1) > -#define R600_CONTEXT_DB_FLUSH (1 << 2) > -#define R600_CONTEXT_SHADERCONST_FLUSH (1 << 3) > -#define R600_CONTEXT_TEX_FLUSH (1 << 4) > -#define R600_CONTEXT_VTX_FLUSH (1 << 5) > -#define R600_CONTEXT_STREAMOUT_FLUSH (1 << 6) > -#define R600_CONTEXT_WAIT_IDLE (1 << 7) > -#define R600_CONTEXT_FLUSH_AND_INV (1 << 8) > -#define R600_CONTEXT_HTILE_ERRATA (1 << 9) > -#define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 10) > +#define R600_CONTEXT_GPU_FLUSH (1 << 0) > +#define R600_CONTEXT_STREAMOUT_FLUSH (1 << 1) > +#define R600_CONTEXT_WAIT_IDLE (1 << 2) > +#define R600_CONTEXT_FLUSH_AND_INV (1 << 3) > +#define R600_CONTEXT_FLUSH_AND_INV_CB_META (1 << 4) > > struct r600_context; > struct r600_screen; > diff --git a/src/gallium/drivers/r600/r600_hw_context.c > b/src/gallium/drivers/r600/r600_hw_context.c > index c7a357e..bce7cc8 100644 > --- a/src/gallium/drivers/r600/r600_hw_context.c > +++ b/src/gallium/drivers/r600/r600_hw_context.c > @@ -424,7 +424,7 @@ void r600_context_dirty_block(struct r600_context *ctx, > LIST_ADDTAIL(&block->list,&ctx->dirty); > > if (block->flags & REG_FLAG_FLUSH_CHANGE) { > - ctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; > + ctx->flags |= R600_CONTEXT_WAIT_IDLE; > } > } > } > @@ -595,16 +595,13 @@ out: > void r600_flush_emit(struct r600_context *rctx) > { > struct radeon_winsys_cs *cs = rctx->cs; > + unsigned cp_coher_cntl = 0; > + unsigned emit_flush = 0; > > if (!rctx->flags) { > return; > } > > - if (rctx->flags & R600_CONTEXT_PS_PARTIAL_FLUSH) { > - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); > - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) > | EVENT_INDEX(4); > - } > - > if (rctx->chip_class >= R700 && > (rctx->flags & R600_CONTEXT_FLUSH_AND_INV_CB_META)) { > cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); > @@ -614,110 +611,55 @@ void r600_flush_emit(struct r600_context *rctx) > if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) { > cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); > cs->buf[cs->cdw++] = > EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); > - > - /* DB flushes are special due to errata with hyperz, we need > to > - * insert a no-op, so that the cache has time to really flush. > - */ > - if (rctx->chip_class <= R700 && > - rctx->flags & R600_CONTEXT_HTILE_ERRATA) { > - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 31, 0); > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > - cs->buf[cs->cdw++] = 0xdeadcafe; > + if (rctx->chip_class >= EVERGREEN) { > + cp_coher_cntl = S_0085F0_CB0_DEST_BASE_ENA(1) | > + S_0085F0_CB1_DEST_BASE_ENA(1) | > + S_0085F0_CB2_DEST_BASE_ENA(1) | > + S_0085F0_CB3_DEST_BASE_ENA(1) | > + S_0085F0_CB4_DEST_BASE_ENA(1) | > + S_0085F0_CB5_DEST_BASE_ENA(1) | > + S_0085F0_CB6_DEST_BASE_ENA(1) | > + S_0085F0_CB7_DEST_BASE_ENA(1) | > + S_0085F0_CB8_DEST_BASE_ENA(1) | > + S_0085F0_CB9_DEST_BASE_ENA(1) | > + S_0085F0_CB10_DEST_BASE_ENA(1) | > + S_0085F0_CB11_DEST_BASE_ENA(1) | > + S_0085F0_DB_DEST_BASE_ENA(1) | > + S_0085F0_TC_ACTION_ENA(1) | > + S_0085F0_CB_ACTION_ENA(1) | > + S_0085F0_DB_ACTION_ENA(1) | > + S_0085F0_SH_ACTION_ENA(1) | > + S_0085F0_SMX_ACTION_ENA(1) | > + (1 << 20); /* unknown bit */ > + } else { > + cp_coher_cntl = S_0085F0_SMX_ACTION_ENA(1) | > + S_0085F0_SH_ACTION_ENA(1) | > + S_0085F0_VC_ACTION_ENA(1) | > + S_0085F0_TC_ACTION_ENA(1) | > + (1 << 20); /* unknown bit */ > } > } > > - if (rctx->flags & (R600_CONTEXT_CB_FLUSH | > - R600_CONTEXT_DB_FLUSH | > - R600_CONTEXT_SHADERCONST_FLUSH | > - R600_CONTEXT_TEX_FLUSH | > - R600_CONTEXT_VTX_FLUSH | > - R600_CONTEXT_STREAMOUT_FLUSH)) { > - /* anything left (cb, vtx, shader, streamout) can be flushed > - * using the surface sync packet > - */ > - unsigned flags = 0; > - > - if (rctx->flags & R600_CONTEXT_CB_FLUSH) { > - flags |= S_0085F0_CB_ACTION_ENA(1) | > - S_0085F0_CB0_DEST_BASE_ENA(1) | > - S_0085F0_CB1_DEST_BASE_ENA(1) | > - S_0085F0_CB2_DEST_BASE_ENA(1) | > - S_0085F0_CB3_DEST_BASE_ENA(1) | > - S_0085F0_CB4_DEST_BASE_ENA(1) | > - S_0085F0_CB5_DEST_BASE_ENA(1) | > - S_0085F0_CB6_DEST_BASE_ENA(1) | > - S_0085F0_CB7_DEST_BASE_ENA(1); > - > - if (rctx->chip_class >= EVERGREEN) { > - flags |= S_0085F0_CB8_DEST_BASE_ENA(1) | > - S_0085F0_CB9_DEST_BASE_ENA(1) | > - S_0085F0_CB10_DEST_BASE_ENA(1) | > - S_0085F0_CB11_DEST_BASE_ENA(1); > - } > - > - /* RV670 errata > - * (CB1_DEST_BASE_ENA is also required, which is > - * included unconditionally above). */ > - if (rctx->family == CHIP_RV670 || > - rctx->family == CHIP_RS780 || > - rctx->family == CHIP_RS880) { > - flags |= S_0085F0_DEST_BASE_0_ENA(1); > - } > - } > - > - if (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) { > - flags |= S_0085F0_SO0_DEST_BASE_ENA(1) | > - S_0085F0_SO1_DEST_BASE_ENA(1) | > - S_0085F0_SO2_DEST_BASE_ENA(1) | > - S_0085F0_SO3_DEST_BASE_ENA(1) | > - S_0085F0_SMX_ACTION_ENA(1); > - > - /* RV670 errata */ > - if (rctx->family == CHIP_RV670 || > - rctx->family == CHIP_RS780 || > - rctx->family == CHIP_RS880) { > - flags |= S_0085F0_DEST_BASE_0_ENA(1); > - } > - } > + if (rctx->flags & R600_CONTEXT_GPU_FLUSH) { > + cp_coher_cntl |= 0x01900000;
You still have a magic number here. I had already told you about it. We don't allow magic numbers like that in the code. Other than that, the patch looks good. Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev