From: Nicolai Hähnle <nicolai.haeh...@amd.com> The result written by the shader workaround needs to be written back, or the CP may read stale data.
Fixes: 78476cfe071a ("radeonsi: enable ARB_transform_feedback_overflow_query") --- src/gallium/drivers/radeon/r600_pipe_common.h | 5 +++++ src/gallium/drivers/radeon/r600_query.c | 4 ++++ src/gallium/drivers/radeonsi/si_pipe.c | 4 +++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 59886ecccc6..dca56734cd7 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -447,20 +447,25 @@ struct r600_common_screen { * contexts' compressed texture binding masks. */ unsigned compressed_colortex_counter; struct { /* Context flags to set so that all writes from earlier jobs * in the CP are seen by L2 clients. */ unsigned cp_to_L2; + /* Context flags to set so that all writes from earlier jobs + * that end in L2 are seen by CP. + */ + unsigned L2_to_cp; + /* Context flags to set so that all writes from earlier * compute jobs are seen by L2 clients. */ unsigned compute_to_L2; } barrier_flags; void (*query_opaque_metadata)(struct r600_common_screen *rscreen, struct r600_texture *rtex, struct radeon_bo_metadata *md); diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index eaff39c830d..f937612bc1f 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -1826,20 +1826,24 @@ static void r600_render_condition(struct pipe_context *ctx, /* Reset to NULL to avoid a redundant SET_PREDICATION * from launching the compute grid. */ rctx->render_cond = NULL; ctx->get_query_result_resource( ctx, query, true, PIPE_QUERY_TYPE_U64, 0, &rquery->workaround_buf->b.b, rquery->workaround_offset); + /* Settings this in the render cond atom is too late, + * so set it here. */ + rctx->flags |= rctx->screen->barrier_flags.L2_to_cp; + atom->num_dw = 5; rctx->render_cond_force_off = old_force_off; } else { for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) atom->num_dw += (qbuf->results_end / rquery->result_size) * 5; if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) atom->num_dw *= R600_MAX_STREAMS; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 74900439320..93f9e5c49af 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1071,22 +1071,24 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, (sscreen->b.family == CHIP_STONEY || sscreen->b.family == CHIP_RAVEN); } (void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain); sscreen->use_monolithic_shaders = (sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0; sscreen->b.barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 | SI_CONTEXT_INV_VMEM_L1; - if (sscreen->b.chip_class <= VI) + if (sscreen->b.chip_class <= VI) { sscreen->b.barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2; + sscreen->b.barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; + } sscreen->b.barrier_flags.compute_to_L2 = SI_CONTEXT_CS_PARTIAL_FLUSH; if (debug_get_bool_option("RADEON_DUMP_SHADERS", false)) sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS; for (i = 0; i < num_compiler_threads; i++) sscreen->tm[i] = si_create_llvm_target_machine(sscreen); for (i = 0; i < num_compiler_threads_lowprio; i++) sscreen->tm_low_priority[i] = si_create_llvm_target_machine(sscreen); -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev