From: Samuel Pitoiset <samuel.pitoi...@gmail.com> When a stencil buffer is part of the framebuffer state, it is decompressed but because it's bindless, all draw calls set stencil_dirty_level_mask to 1.
v2: Marek - set the flags outside the loop - also clear and set framebuffer.do_update_surf_dirtiness there - do it in the DB->CB copy path too v3: Marek - save and restore the do_update_surf_dirtiness flag Signed-off-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_blit.c | 19 +++++++++++++++++++ src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_state.c | 10 ++++++++-- 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index f5d9048..448533d 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -113,20 +113,24 @@ si_blit_dbcb_copy(struct si_context *sctx, unsigned fully_copied_levels = 0; if (planes & PIPE_MASK_Z) sctx->dbcb_depth_copy_enabled = true; if (planes & PIPE_MASK_S) sctx->dbcb_stencil_copy_enabled = true; si_mark_atom_dirty(sctx, &sctx->db_render_state); assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled); + bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness; + sctx->decompression_enabled = true; + sctx->framebuffer.do_update_surf_dirtiness = false; + while (level_mask) { unsigned level = u_bit_scan(&level_mask); /* The smaller the mipmap level, the less layers there are * as far as 3D textures are concerned. */ max_layer = util_max_layer(&src->resource.b.b, level); checked_last_layer = MIN2(last_layer, max_layer); surf_tmpl.u.tex.level = level; @@ -156,20 +160,22 @@ si_blit_dbcb_copy(struct si_context *sctx, pipe_surface_reference(&zsurf, NULL); pipe_surface_reference(&cbsurf, NULL); } if (first_layer == 0 && last_layer >= max_layer && first_sample == 0 && last_sample >= u_max_sample(&src->resource.b.b)) fully_copied_levels |= 1u << level; } + sctx->decompression_enabled = false; + sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness; sctx->dbcb_depth_copy_enabled = false; sctx->dbcb_stencil_copy_enabled = false; si_mark_atom_dirty(sctx, &sctx->db_render_state); return fully_copied_levels; } static void si_blit_decompress_depth(struct pipe_context *ctx, struct r600_texture *texture, struct r600_texture *staging, @@ -211,20 +217,24 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx, return; if (planes & PIPE_MASK_S) sctx->db_flush_stencil_inplace = true; if (planes & PIPE_MASK_Z) sctx->db_flush_depth_inplace = true; si_mark_atom_dirty(sctx, &sctx->db_render_state); surf_tmpl.format = texture->resource.b.b.format; + bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness; + sctx->decompression_enabled = true; + sctx->framebuffer.do_update_surf_dirtiness = false; + while (level_mask) { unsigned level = u_bit_scan(&level_mask); surf_tmpl.u.tex.level = level; /* The smaller the mipmap level, the less layers there are * as far as 3D textures are concerned. */ max_layer = util_max_layer(&texture->resource.b.b, level); checked_last_layer = MIN2(last_layer, max_layer); @@ -248,20 +258,22 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx, if (first_layer == 0 && last_layer >= max_layer) { fully_decompressed_mask |= 1u << level; } } if (planes & PIPE_MASK_Z) texture->dirty_level_mask &= ~fully_decompressed_mask; if (planes & PIPE_MASK_S) texture->stencil_dirty_level_mask &= ~fully_decompressed_mask; + sctx->decompression_enabled = false; + sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness; sctx->db_flush_depth_inplace = false; sctx->db_flush_stencil_inplace = false; si_mark_atom_dirty(sctx, &sctx->db_render_state); } /* Helper function of si_flush_depth_texture: decompress the given levels * of Z and/or S planes in place. */ static void si_blit_decompress_zs_in_place(struct si_context *sctx, @@ -429,20 +441,24 @@ static void si_blit_decompress_color(struct pipe_context *ctx, for (int i = first_level; i <= last_level; i++) { if (!vi_dcc_enabled(rtex, i)) level_mask &= ~(1 << i); } } else if (rtex->fmask.size) { custom_blend = sctx->custom_blend_decompress; } else { custom_blend = sctx->custom_blend_fastclear; } + bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness; + sctx->decompression_enabled = true; + sctx->framebuffer.do_update_surf_dirtiness = false; + while (level_mask) { unsigned level = u_bit_scan(&level_mask); /* The smaller the mipmap level, the less layers there are * as far as 3D textures are concerned. */ max_layer = util_max_layer(&rtex->resource.b.b, level); checked_last_layer = MIN2(last_layer, max_layer); for (layer = first_layer; layer <= checked_last_layer; layer++) { struct pipe_surface *cbsurf, surf_tmpl; @@ -459,20 +475,23 @@ static void si_blit_decompress_color(struct pipe_context *ctx, pipe_surface_reference(&cbsurf, NULL); } /* The texture will always be dirty if some layers aren't flushed. * I don't think this case occurs often though. */ if (first_layer == 0 && last_layer >= max_layer) { rtex->dirty_level_mask &= ~(1 << level); } } + + sctx->decompression_enabled = false; + sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness; } static void si_decompress_color_texture(struct si_context *sctx, struct r600_texture *tex, unsigned first_level, unsigned last_level) { /* CMASK or DCC can be discarded and we can still end up here. */ if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset) return; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index e917cb1..9364ef2 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -376,20 +376,21 @@ struct si_context { bool is_debug; struct radeon_saved_cs last_gfx; struct r600_resource *last_trace_buf; struct r600_resource *trace_buf; unsigned trace_id; uint64_t dmesg_timestamp; unsigned apitrace_call_number; /* Other state */ bool need_check_render_feedback; + bool decompression_enabled; /* Precomputed IA_MULTI_VGT_PARAM */ union si_vgt_param_key ia_multi_vgt_param_key; unsigned ia_multi_vgt_param[SI_NUM_VGT_PARAM_STATES]; }; /* cik_sdma.c */ void cik_init_sdma_functions(struct si_context *sctx); /* si_blit.c */ diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index ae7c91f..6cf3559 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2601,23 +2601,29 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, R600_ERR("Requested an invalid number of samples %i.\n", sctx->framebuffer.nr_samples); assert(0); } constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf); si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom); } - sctx->need_check_render_feedback = true; sctx->do_update_shaders = true; - sctx->framebuffer.do_update_surf_dirtiness = true; + + if (!sctx->decompression_enabled) { + /* Prevent textures decompression when the framebuffer state + * changes come from the decompression passes themselves. + */ + sctx->need_check_render_feedback = true; + sctx->framebuffer.do_update_surf_dirtiness = true; + } } static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; struct pipe_framebuffer_state *state = &sctx->framebuffer.state; unsigned i, nr_cbufs = state->nr_cbufs; struct r600_texture *tex = NULL; struct r600_surface *cb = NULL; unsigned cb_color_info = 0; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev