From: Marek Olšák <marek.ol...@amd.com> This is a workaround for a thread deadlock that I have no idea why it occurs.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108879 Fixes: 9b331e462e5021d994859756d46cd2519d9c9c6e --- src/gallium/drivers/radeonsi/si_clear.c | 6 +++--- src/gallium/drivers/radeonsi/si_compute_blit.c | 8 +++++--- src/gallium/drivers/radeonsi/si_pipe.c | 2 +- src/gallium/drivers/radeonsi/si_pipe.h | 3 ++- src/gallium/drivers/radeonsi/si_test_dma.c | 2 +- 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index e1805f2a1c9..ead680b857b 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -256,21 +256,21 @@ void vi_dcc_clear_level(struct si_context *sctx, * would be more efficient than separate per-layer clear operations. */ assert(tex->buffer.b.b.nr_storage_samples <= 2 || num_layers == 1); dcc_offset += tex->surface.u.legacy.level[level].dcc_offset; clear_size = tex->surface.u.legacy.level[level].dcc_fast_clear_size * num_layers; } si_clear_buffer(sctx, dcc_buffer, dcc_offset, clear_size, - &clear_value, 4, SI_COHERENCY_CB_META); + &clear_value, 4, SI_COHERENCY_CB_META, false); } /* Set the same micro tile mode as the destination of the last MSAA resolve. * This allows hitting the MSAA resolve fast path, which requires that both * src and dst micro tile modes match. */ static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, struct si_texture *tex) { if (tex->buffer.b.is_shared || @@ -489,21 +489,21 @@ static void si_do_fast_color_clear(struct si_context *sctx, /* DCC fast clear with MSAA should clear CMASK to 0xC. */ if (tex->buffer.b.b.nr_samples >= 2 && tex->cmask_buffer) { /* TODO: This doesn't work with MSAA. */ if (eliminate_needed) continue; uint32_t clear_value = 0xCCCCCCCC; si_clear_buffer(sctx, &tex->cmask_buffer->b.b, tex->cmask_offset, tex->surface.cmask_size, - &clear_value, 4, SI_COHERENCY_CB_META); + &clear_value, 4, SI_COHERENCY_CB_META, false); fmask_decompress_needed = true; } vi_dcc_clear_level(sctx, tex, 0, reset_value); tex->separate_dcc_dirty = true; } else { if (too_small) continue; /* 128-bit formats are unusupported */ @@ -517,21 +517,21 @@ static void si_do_fast_color_clear(struct si_context *sctx, /* ensure CMASK is enabled */ si_alloc_separate_cmask(sctx->screen, tex); if (!tex->cmask_buffer) continue; /* Do the fast clear. */ uint32_t clear_value = 0; si_clear_buffer(sctx, &tex->cmask_buffer->b.b, tex->cmask_offset, tex->surface.cmask_size, - &clear_value, 4, SI_COHERENCY_CB_META); + &clear_value, 4, SI_COHERENCY_CB_META, false); eliminate_needed = true; } if ((eliminate_needed || fmask_decompress_needed) && !(tex->dirty_level_mask & (1 << level))) { tex->dirty_level_mask |= 1 << level; p_atomic_inc(&sctx->screen->compressed_colortex_counter); } /* We can change the micro tile mode before a full clear. */ diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index 1abeac6adb0..fb0d8d2f1b6 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -179,21 +179,22 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx, /* Restore states. */ ctx->bind_compute_state(ctx, saved_cs); ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, src ? 2 : 1, saved_sb, saved_writable_mask); si_compute_internal_end(sctx); } void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, uint32_t *clear_value, - uint32_t clear_value_size, enum si_coherency coher) + uint32_t clear_value_size, enum si_coherency coher, + bool force_cpdma) { if (!size) return; unsigned clear_alignment = MIN2(clear_value_size, 4); assert(clear_value_size != 3 && clear_value_size != 6); /* 12 is allowed. */ assert(offset % clear_alignment == 0); assert(size % clear_alignment == 0); assert(size < (UINT_MAX & ~0xf)); /* TODO: test 64-bit sizes in all codepaths */ @@ -243,21 +244,22 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, return; } uint64_t aligned_size = size & ~3ull; if (aligned_size >= 4) { /* Before GFX9, CP DMA was very slow when clearing GTT, so never * use CP DMA clears on those chips, because we can't be certain * about buffer placements. */ if (clear_value_size > 4 || - (clear_value_size == 4 && + (!force_cpdma && + clear_value_size == 4 && offset % 4 == 0 && (size > 32*1024 || sctx->chip_class <= VI))) { si_compute_do_clear_or_copy(sctx, dst, offset, NULL, 0, aligned_size, clear_value, clear_value_size, coher); } else { assert(clear_value_size == 4); si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, dst, offset, aligned_size, *clear_value, 0, coher, get_cache_policy(sctx, coher, size)); @@ -277,21 +279,21 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, } } static void si_pipe_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, unsigned offset, unsigned size, const void *clear_value, int clear_value_size) { si_clear_buffer((struct si_context*)ctx, dst, offset, size, (uint32_t*)clear_value, - clear_value_size, SI_COHERENCY_SHADER); + clear_value_size, SI_COHERENCY_SHADER, false); } void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src, uint64_t dst_offset, uint64_t src_offset, unsigned size) { if (!size) return; enum si_coherency coher = SI_COHERENCY_SHADER; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 5caeb575623..5d376e6181a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -634,21 +634,21 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, sizeof(sctx->sample_positions), &sctx->sample_positions); /* this must be last */ si_begin_new_gfx_cs(sctx); if (sctx->chip_class == CIK) { /* Clear the NULL constant buffer, because loads should return zeros. */ uint32_t clear_value = 0; si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0, sctx->null_const_buf.buffer->width0, - &clear_value, 4, SI_COHERENCY_SHADER); + &clear_value, 4, SI_COHERENCY_SHADER, true); } return &sctx->b; fail: fprintf(stderr, "radeonsi: Failed to create a context.\n"); si_destroy_context(&sctx->b); return NULL; } static struct pipe_context *si_pipe_create_context(struct pipe_screen *screen, void *priv, unsigned flags) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 301d38649bf..aaa95f32d20 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1182,21 +1182,22 @@ bool vi_alpha_is_on_msb(enum pipe_format format); void vi_dcc_clear_level(struct si_context *sctx, struct si_texture *tex, unsigned level, unsigned clear_value); void si_init_clear_functions(struct si_context *sctx); /* si_compute_blit.c */ unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher, enum si_cache_policy cache_policy); void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, uint32_t *clear_value, - uint32_t clear_value_size, enum si_coherency coher); + uint32_t clear_value_size, enum si_coherency coher, + bool force_cpdma); void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src, uint64_t dst_offset, uint64_t src_offset, unsigned size); void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_level, struct pipe_resource *src, unsigned src_level, unsigned dstx, unsigned dsty, unsigned dstz, const struct pipe_box *src_box); diff --git a/src/gallium/drivers/radeonsi/si_test_dma.c b/src/gallium/drivers/radeonsi/si_test_dma.c index 90a2032cd80..7e396e671be 100644 --- a/src/gallium/drivers/radeonsi/si_test_dma.c +++ b/src/gallium/drivers/radeonsi/si_test_dma.c @@ -302,21 +302,21 @@ void si_test_dma(struct si_screen *sscreen) tsrc.width0, tsrc.height0, tsrc.array_size, array_mode_to_string(sscreen, &ssrc->surface), bpp); fflush(stdout); /* set src pixels */ set_random_pixels(ctx, src, &src_cpu); /* clear dst pixels */ uint32_t zero = 0; si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, &zero, 4, - SI_COHERENCY_SHADER); + SI_COHERENCY_SHADER, false); memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size); /* preparation */ max_width = MIN2(tsrc.width0, tdst.width0); max_height = MIN2(tsrc.height0, tdst.height0); max_depth = MIN2(tsrc.array_size, tdst.array_size); num = do_partial_copies ? num_partial_copies : 1; for (j = 0; j < num; j++) { int width, height, depth; -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev