From: Marek Olšák <marek.ol...@amd.com> Needed by displayable DCC.
We need to flush L2 after rendering if PIPE_ALIGNED=0 and DCC is enabled. --- src/gallium/drivers/radeonsi/si_blit.c | 7 ++++--- .../drivers/radeonsi/si_compute_blit.c | 9 +++++++-- src/gallium/drivers/radeonsi/si_pipe.h | 6 ++++-- src/gallium/drivers/radeonsi/si_state.c | 20 ++++++++++++++----- 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index f39cb5d143f..7613a63e3cb 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -414,21 +414,21 @@ si_decompress_depth(struct si_context *sctx, */ si_make_DB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, inplace_planes & PIPE_MASK_S, tc_compat_htile); } /* set_framebuffer_state takes care of coherency for single-sample. * The DB->CB copy uses CB for the final writes. */ if (copy_planes && tex->buffer.b.b.nr_samples > 1) si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, - false); + false, true /* no DCC */); } static void si_decompress_sampler_depth_textures(struct si_context *sctx, struct si_samplers *textures) { unsigned i; unsigned mask = textures->needs_depth_decompress_mask; while (mask) { @@ -527,21 +527,22 @@ static void si_blit_decompress_color(struct si_context *sctx, /* The texture will always be dirty if some layers aren't flushed. * I don't think this case occurs often though. */ if (first_layer == 0 && last_layer >= max_layer) { tex->dirty_level_mask &= ~(1 << level); } } sctx->decompression_enabled = false; si_make_CB_shader_coherent(sctx, tex->buffer.b.b.nr_samples, - vi_dcc_enabled(tex, first_level)); + vi_dcc_enabled(tex, first_level), + tex->surface.u.gfx9.dcc.pipe_aligned); } static void si_decompress_color_texture(struct si_context *sctx, struct si_texture *tex, unsigned first_level, unsigned last_level) { /* CMASK or DCC can be discarded and we can still end up here. */ if (!tex->cmask_buffer && !tex->surface.fmask_size && !tex->dcc_offset) return; @@ -1069,21 +1070,21 @@ static void si_do_CB_resolve(struct si_context *sctx, si_blitter_begin(sctx, SI_COLOR_RESOLVE | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND)); util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, dst_z, info->src.resource, info->src.box.z, ~0, sctx->custom_blend_resolve, format); si_blitter_end(sctx); /* Flush caches for possible texturing. */ - si_make_CB_shader_coherent(sctx, 1, false); + si_make_CB_shader_coherent(sctx, 1, false, true /* no DCC */); } static bool do_hardware_msaa_resolve(struct pipe_context *ctx, const struct pipe_blit_info *info) { struct si_context *sctx = (struct si_context*)ctx; struct si_texture *src = (struct si_texture*)info->src.resource; struct si_texture *dst = (struct si_texture*)info->dst.resource; MAYBE_UNUSED struct si_texture *stmp; unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level); diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index f5e9c02dd10..2ce56d6a81a 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -317,21 +317,25 @@ void si_compute_copy_image(struct si_context *sctx, unsigned depth = src_box->depth; unsigned data[] = {src_box->x, src_box->y, src_box->z, 0, dstx, dsty, dstz, 0}; if (width == 0 || height == 0) return; si_compute_internal_begin(sctx); sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); - si_make_CB_shader_coherent(sctx, dst->nr_samples, true); + + /* src and dst have the same number of samples. */ + si_make_CB_shader_coherent(sctx, src->nr_samples, true, + /* Only src can have DCC.*/ + ((struct si_texture*)src)->surface.u.gfx9.dcc.pipe_aligned); struct pipe_constant_buffer saved_cb = {}; si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &saved_cb); struct si_images *images = &sctx->images[PIPE_SHADER_COMPUTE]; struct pipe_image_view saved_image[2] = {0}; util_copy_image_view(&saved_image[0], &images->views[0]); util_copy_image_view(&saved_image[1], &images->views[1]); void *saved_cs = sctx->cs_shader_state.program; @@ -443,21 +447,22 @@ void si_compute_clear_render_target(struct pipe_context *ctx, memcpy(data + 4, color_srgb.ui, sizeof(color->ui)); } else { memcpy(data + 4, color->ui, sizeof(color->ui)); } si_compute_internal_begin(sctx); sctx->render_cond_force_off = !render_condition_enabled; sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); - si_make_CB_shader_coherent(sctx, dstsurf->texture->nr_samples, true); + si_make_CB_shader_coherent(sctx, dstsurf->texture->nr_samples, true, + true /* DCC is not possible with image stores */); struct pipe_constant_buffer saved_cb = {}; si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &saved_cb); struct si_images *images = &sctx->images[PIPE_SHADER_COMPUTE]; struct pipe_image_view saved_image = {0}; util_copy_image_view(&saved_image, &images->views[0]); void *saved_cs = sctx->cs_shader_state.program; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index b3198d45ea6..39152587a99 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -631,20 +631,21 @@ struct si_framebuffer { ubyte compressed_cb_mask; ubyte uncompressed_cb_mask; ubyte color_is_int8; ubyte color_is_int10; ubyte dirty_cbufs; ubyte dcc_overwrite_combiner_watermark; bool dirty_zsbuf; bool any_dst_linear; bool CB_has_shader_readable_metadata; bool DB_has_shader_readable_metadata; + bool all_DCC_pipe_aligned; }; enum si_quant_mode { /* This is the list we want to support. */ SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH, SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH, SI_QUANT_MODE_12_12_FIXED_POINT_1_4096TH, }; struct si_signed_scissor { @@ -1539,31 +1540,32 @@ static inline void si_saved_cs_reference(struct si_saved_cs **dst, struct si_saved_cs *src) { if (pipe_reference(&(*dst)->reference, &src->reference)) si_destroy_saved_cs(*dst); *dst = src; } static inline void si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples, - bool shaders_read_metadata) + bool shaders_read_metadata, bool dcc_pipe_aligned) { sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_INV_VMEM_L1; if (sctx->chip_class >= GFX9) { /* Single-sample color is coherent with shaders on GFX9, but * L2 metadata must be flushed if shaders read metadata. * (DCC, CMASK). */ - if (num_samples >= 2) + if (num_samples >= 2 || + (shaders_read_metadata && !dcc_pipe_aligned)) sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2; else if (shaders_read_metadata) sctx->flags |= SI_CONTEXT_INV_L2_METADATA; } else { /* SI-CI-VI */ sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2; } } static inline void diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 458b108a7e3..017a06a808d 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2800,23 +2800,25 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, * * DB caches are flushed on demand (using si_decompress_textures). * * When MSAA is enabled, CB and TC caches are flushed on demand * (after FMASK decompression). Shader write -> FB read transitions * cannot happen for MSAA textures, because MSAA shader images are * not supported. * * Only flush and wait for CB if there is actually a bound color buffer. */ - if (sctx->framebuffer.uncompressed_cb_mask) + if (sctx->framebuffer.uncompressed_cb_mask) { si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples, - sctx->framebuffer.CB_has_shader_readable_metadata); + sctx->framebuffer.CB_has_shader_readable_metadata, + sctx->framebuffer.all_DCC_pipe_aligned); + } sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; /* u_blitter doesn't invoke depth decompression when it does multiple * blits in a row, but the only case when it matters for DB is when * doing generate_mipmap. So here we flush DB manually between * individual generate_mipmap blits. * Note that lower mipmap levels aren't compressed. */ if (sctx->generate_mipmap_for_depth) { @@ -2851,20 +2853,21 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, sctx->framebuffer.color_is_int10 = 0; sctx->framebuffer.compressed_cb_mask = 0; sctx->framebuffer.uncompressed_cb_mask = 0; sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); sctx->framebuffer.nr_color_samples = sctx->framebuffer.nr_samples; sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); sctx->framebuffer.any_dst_linear = false; sctx->framebuffer.CB_has_shader_readable_metadata = false; sctx->framebuffer.DB_has_shader_readable_metadata = false; + sctx->framebuffer.all_DCC_pipe_aligned = true; unsigned num_bpp64_colorbufs = 0; for (i = 0; i < state->nr_cbufs; i++) { if (!state->cbufs[i]) continue; surf = (struct si_surface*)state->cbufs[i]; tex = (struct si_texture*)surf->base.texture; if (!surf->color_initialized) { @@ -2901,23 +2904,28 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, tex->buffer.b.b.nr_storage_samples); sctx->framebuffer.nr_color_samples = MAX2(1, sctx->framebuffer.nr_color_samples); } if (tex->surface.is_linear) sctx->framebuffer.any_dst_linear = true; if (tex->surface.bpe >= 8) num_bpp64_colorbufs++; - if (vi_dcc_enabled(tex, surf->base.u.tex.level)) + if (vi_dcc_enabled(tex, surf->base.u.tex.level)) { sctx->framebuffer.CB_has_shader_readable_metadata = true; + if (sctx->chip_class >= GFX9 && + !tex->surface.u.gfx9.dcc.pipe_aligned) + sctx->framebuffer.all_DCC_pipe_aligned = false; + } + si_context_add_resource_size(sctx, surf->base.texture); p_atomic_inc(&tex->framebuffers_bound); if (tex->dcc_gather_statistics) { /* Dirty tracking must be enabled for DCC usage analysis. */ sctx->framebuffer.compressed_cb_mask |= 1 << i; vi_separate_dcc_start_query(sctx, tex); } } @@ -4693,23 +4701,25 @@ static void si_set_tess_state(struct pipe_context *ctx, pipe_resource_reference(&cb.buffer, NULL); } static void si_texture_barrier(struct pipe_context *ctx, unsigned flags) { struct si_context *sctx = (struct si_context *)ctx; si_update_fb_dirtiness_after_rendering(sctx); /* Multisample surfaces are flushed in si_decompress_textures. */ - if (sctx->framebuffer.uncompressed_cb_mask) + if (sctx->framebuffer.uncompressed_cb_mask) { si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples, - sctx->framebuffer.CB_has_shader_readable_metadata); + sctx->framebuffer.CB_has_shader_readable_metadata, + sctx->framebuffer.all_DCC_pipe_aligned); + } } /* This only ensures coherency for shader image/buffer stores. */ void si_memory_barrier(struct pipe_context *ctx, unsigned flags) { struct si_context *sctx = (struct si_context *)ctx; /* Subsequent commands must wait for all shader invocations to * complete. */ sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev