For the series: Reviewed-by: Marek Olšák <marek.ol...@amd.com>
Marek On Fri, Aug 25, 2017 at 4:40 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > From: Nicolai Hähnle <nicolai.haeh...@amd.com> > > The data is read when the render_cond_atom is emitted, so we must > delay emitting the atom until after the flush. > > Fixes: 0fe0320dc074 ("radeonsi: use optimal packet order when doing a > pipeline sync") > --- > src/gallium/drivers/radeon/r600_pipe_common.h | 3 ++- > src/gallium/drivers/radeon/r600_query.c | 9 ++++++--- > src/gallium/drivers/radeonsi/si_state_draw.c | 15 ++++++++++----- > 3 files changed, 18 insertions(+), 9 deletions(-) > > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h > b/src/gallium/drivers/radeon/r600_pipe_common.h > index dca56734cd7..f78e38b65af 100644 > --- a/src/gallium/drivers/radeon/r600_pipe_common.h > +++ b/src/gallium/drivers/radeon/r600_pipe_common.h > @@ -54,21 +54,22 @@ struct u_log_context; > #define R600_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV > << 0) > #define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV > << 1) > #define R600_RESOURCE_FLAG_FORCE_TILING > (PIPE_RESOURCE_FLAG_DRV_PRIV << 2) > #define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV > << 3) > #define R600_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV > << 4) > > #define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0) > /* Pipeline & streamout query controls. */ > #define R600_CONTEXT_START_PIPELINE_STATS (1u << 1) > #define R600_CONTEXT_STOP_PIPELINE_STATS (1u << 2) > -#define R600_CONTEXT_PRIVATE_FLAG (1u << 3) > +#define R600_CONTEXT_FLUSH_FOR_RENDER_COND (1u << 3) > +#define R600_CONTEXT_PRIVATE_FLAG (1u << 4) > > /* special primitive types */ > #define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX > > #define R600_NOT_QUERY 0xffffffff > > /* Debug flags. */ > /* logging and features */ > #define DBG_TEX (1 << 0) > #define DBG_NIR (1 << 1) > diff --git a/src/gallium/drivers/radeon/r600_query.c > b/src/gallium/drivers/radeon/r600_query.c > index f937612bc1f..03ff1018a71 100644 > --- a/src/gallium/drivers/radeon/r600_query.c > +++ b/src/gallium/drivers/radeon/r600_query.c > @@ -1828,25 +1828,28 @@ static void r600_render_condition(struct pipe_context > *ctx, > * from launching the compute grid. > */ > rctx->render_cond = NULL; > > ctx->get_query_result_resource( > ctx, query, true, PIPE_QUERY_TYPE_U64, 0, > &rquery->workaround_buf->b.b, > rquery->workaround_offset); > > /* Settings this in the render cond atom is too late, > * so set it here. */ > - rctx->flags |= rctx->screen->barrier_flags.L2_to_cp; > - > - atom->num_dw = 5; > + rctx->flags |= rctx->screen->barrier_flags.L2_to_cp | > + R600_CONTEXT_FLUSH_FOR_RENDER_COND; > > rctx->render_cond_force_off = old_force_off; > + } > + > + if (needs_workaround) { > + atom->num_dw = 5; > } else { > for (qbuf = &rquery->buffer; qbuf; qbuf = > qbuf->previous) > atom->num_dw += (qbuf->results_end / > rquery->result_size) * 5; > > if (rquery->b.type == > PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) > atom->num_dw *= R600_MAX_STREAMS; > } > } > > rctx->render_cond = query; > diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c > b/src/gallium/drivers/radeonsi/si_state_draw.c > index 1d8be49a480..81751d2186e 100644 > --- a/src/gallium/drivers/radeonsi/si_state_draw.c > +++ b/src/gallium/drivers/radeonsi/si_state_draw.c > @@ -1385,34 +1385,39 @@ void si_draw_vbo(struct pipe_context *ctx, const > struct pipe_draw_info *info) > SI_CONTEXT_PS_PARTIAL_FLUSH | > SI_CONTEXT_CS_PARTIAL_FLUSH))) { > /* If we have to wait for idle, set all states first, so that > all > * SET packets are processed in parallel with previous draw > calls. > * Then upload descriptors, set shader pointers, and draw, and > * prefetch at the end. This ensures that the time the CUs > * are idle is very short. (there are only SET_SH packets > between > * the wait and the draw) > */ > struct r600_atom *shader_pointers = > &sctx->shader_pointers.atom; > + unsigned masked_atoms = 1u << shader_pointers->id; > > - /* Emit all states except shader pointers. */ > - si_emit_all_states(sctx, info, 1 << shader_pointers->id); > + if (unlikely(sctx->b.flags & > R600_CONTEXT_FLUSH_FOR_RENDER_COND)) > + masked_atoms |= 1u << sctx->b.render_cond_atom.id; > + > + /* Emit all states except shader pointers and render > condition. */ > + si_emit_all_states(sctx, info, masked_atoms); > si_emit_cache_flush(sctx); > > /* <-- CUs are idle here. */ > if (!si_upload_graphics_shader_descriptors(sctx)) > return; > > /* Set shader pointers after descriptors are uploaded. */ > - if (si_is_atom_dirty(sctx, shader_pointers)) { > + if (si_is_atom_dirty(sctx, shader_pointers)) > shader_pointers->emit(&sctx->b, NULL); > - sctx->dirty_atoms = 0; > - } > + if (si_is_atom_dirty(sctx, &sctx->b.render_cond_atom)) > + sctx->b.render_cond_atom.emit(&sctx->b, NULL); > + sctx->dirty_atoms = 0; > > si_emit_draw_packets(sctx, info, indexbuf, index_size, > index_offset); > /* <-- CUs are busy here. */ > > /* Start prefetches after the draw has been started. Both > will run > * in parallel, but starting the draw first is more important. > */ > if (sctx->b.chip_class >= CIK && sctx->prefetch_L2_mask) > cik_emit_prefetch_L2(sctx); > } else { > -- > 2.11.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev