I would suggest to document that workaround somewhere in the code.

On 4/12/19 5:17 PM, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

This is a workaround for a thread deadlock that I have no idea
why it occurs.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108879
Fixes: 9b331e462e5021d994859756d46cd2519d9c9c6e
---
  src/gallium/drivers/radeonsi/si_clear.c        | 6 +++---
  src/gallium/drivers/radeonsi/si_compute_blit.c | 8 +++++---
  src/gallium/drivers/radeonsi/si_pipe.c         | 2 +-
  src/gallium/drivers/radeonsi/si_pipe.h         | 3 ++-
  src/gallium/drivers/radeonsi/si_test_dma.c     | 2 +-
  5 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_clear.c 
b/src/gallium/drivers/radeonsi/si_clear.c
index e1805f2a1c9..ead680b857b 100644
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -256,21 +256,21 @@ void vi_dcc_clear_level(struct si_context *sctx,
                 * would be more efficient than separate per-layer clear 
operations.
                 */
                assert(tex->buffer.b.b.nr_storage_samples <= 2 || num_layers == 
1);
dcc_offset += tex->surface.u.legacy.level[level].dcc_offset;
                clear_size = 
tex->surface.u.legacy.level[level].dcc_fast_clear_size *
                             num_layers;
        }
si_clear_buffer(sctx, dcc_buffer, dcc_offset, clear_size,
-                       &clear_value, 4, SI_COHERENCY_CB_META);
+                       &clear_value, 4, SI_COHERENCY_CB_META, false);
  }
/* Set the same micro tile mode as the destination of the last MSAA resolve.
   * This allows hitting the MSAA resolve fast path, which requires that both
   * src and dst micro tile modes match.
   */
  static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen,
                                           struct si_texture *tex)
  {
        if (tex->buffer.b.is_shared ||
@@ -489,21 +489,21 @@ static void si_do_fast_color_clear(struct si_context 
*sctx,
/* DCC fast clear with MSAA should clear CMASK to 0xC. */
                        if (tex->buffer.b.b.nr_samples >= 2 && 
tex->cmask_buffer) {
                                /* TODO: This doesn't work with MSAA. */
                                if (eliminate_needed)
                                        continue;
uint32_t clear_value = 0xCCCCCCCC;
                                si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
                                                tex->cmask_offset, 
tex->surface.cmask_size,
-                                               &clear_value, 4, 
SI_COHERENCY_CB_META);
+                                               &clear_value, 4, 
SI_COHERENCY_CB_META, false);
                                fmask_decompress_needed = true;
                        }
vi_dcc_clear_level(sctx, tex, 0, reset_value);
                        tex->separate_dcc_dirty = true;
                } else {
                        if (too_small)
                                continue;
/* 128-bit formats are unusupported */
@@ -517,21 +517,21 @@ static void si_do_fast_color_clear(struct si_context 
*sctx,
/* ensure CMASK is enabled */
                        si_alloc_separate_cmask(sctx->screen, tex);
                        if (!tex->cmask_buffer)
                                continue;
/* Do the fast clear. */
                        uint32_t clear_value = 0;
                        si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
                                        tex->cmask_offset, 
tex->surface.cmask_size,
-                                       &clear_value, 4, SI_COHERENCY_CB_META);
+                                       &clear_value, 4, SI_COHERENCY_CB_META, 
false);
                        eliminate_needed = true;
                }
if ((eliminate_needed || fmask_decompress_needed) &&
                    !(tex->dirty_level_mask & (1 << level))) {
                        tex->dirty_level_mask |= 1 << level;
                        
p_atomic_inc(&sctx->screen->compressed_colortex_counter);
                }
/* We can change the micro tile mode before a full clear. */
diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c 
b/src/gallium/drivers/radeonsi/si_compute_blit.c
index 1abeac6adb0..fb0d8d2f1b6 100644
--- a/src/gallium/drivers/radeonsi/si_compute_blit.c
+++ b/src/gallium/drivers/radeonsi/si_compute_blit.c
@@ -179,21 +179,22 @@ static void si_compute_do_clear_or_copy(struct si_context 
*sctx,
/* Restore states. */
        ctx->bind_compute_state(ctx, saved_cs);
        ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, src ? 2 : 1, 
saved_sb,
                                saved_writable_mask);
        si_compute_internal_end(sctx);
  }
void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
                     uint64_t offset, uint64_t size, uint32_t *clear_value,
-                    uint32_t clear_value_size, enum si_coherency coher)
+                    uint32_t clear_value_size, enum si_coherency coher,
+                    bool force_cpdma)
  {
        if (!size)
                return;
unsigned clear_alignment = MIN2(clear_value_size, 4); assert(clear_value_size != 3 && clear_value_size != 6); /* 12 is allowed. */
        assert(offset % clear_alignment == 0);
        assert(size % clear_alignment == 0);
        assert(size < (UINT_MAX & ~0xf)); /* TODO: test 64-bit sizes in all 
codepaths */
@@ -243,21 +244,22 @@ void si_clear_buffer(struct si_context *sctx, struct 
pipe_resource *dst,
                return;
        }
uint64_t aligned_size = size & ~3ull;
        if (aligned_size >= 4) {
                /* Before GFX9, CP DMA was very slow when clearing GTT, so never
                 * use CP DMA clears on those chips, because we can't be certain
                 * about buffer placements.
                 */
                if (clear_value_size > 4 ||
-                   (clear_value_size == 4 &&
+                   (!force_cpdma &&
+                    clear_value_size == 4 &&
                     offset % 4 == 0 &&
                     (size > 32*1024 || sctx->chip_class <= VI))) {
                        si_compute_do_clear_or_copy(sctx, dst, offset, NULL, 0,
                                                    aligned_size, clear_value,
                                                    clear_value_size, coher);
                } else {
                        assert(clear_value_size == 4);
                        si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, dst, offset,
                                               aligned_size, *clear_value, 0, 
coher,
                                               get_cache_policy(sctx, coher, 
size));
@@ -277,21 +279,21 @@ void si_clear_buffer(struct si_context *sctx, struct 
pipe_resource *dst,
        }
  }
static void si_pipe_clear_buffer(struct pipe_context *ctx,
                                 struct pipe_resource *dst,
                                 unsigned offset, unsigned size,
                                 const void *clear_value,
                                 int clear_value_size)
  {
        si_clear_buffer((struct si_context*)ctx, dst, offset, size, 
(uint32_t*)clear_value,
-                       clear_value_size, SI_COHERENCY_SHADER);
+                       clear_value_size, SI_COHERENCY_SHADER, false);
  }
void si_copy_buffer(struct si_context *sctx,
                    struct pipe_resource *dst, struct pipe_resource *src,
                    uint64_t dst_offset, uint64_t src_offset, unsigned size)
  {
        if (!size)
                return;
enum si_coherency coher = SI_COHERENCY_SHADER;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 5caeb575623..5d376e6181a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -634,21 +634,21 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
                          sizeof(sctx->sample_positions), 
&sctx->sample_positions);
/* this must be last */
        si_begin_new_gfx_cs(sctx);
if (sctx->chip_class == CIK) {
                /* Clear the NULL constant buffer, because loads should return 
zeros. */
                uint32_t clear_value = 0;
                si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0,
                                sctx->null_const_buf.buffer->width0,
-                               &clear_value, 4, SI_COHERENCY_SHADER);
+                               &clear_value, 4, SI_COHERENCY_SHADER, true);
        }
        return &sctx->b;
  fail:
        fprintf(stderr, "radeonsi: Failed to create a context.\n");
        si_destroy_context(&sctx->b);
        return NULL;
  }
static struct pipe_context *si_pipe_create_context(struct pipe_screen *screen,
                                                   void *priv, unsigned flags)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 301d38649bf..aaa95f32d20 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1182,21 +1182,22 @@ bool vi_alpha_is_on_msb(enum pipe_format format);
  void vi_dcc_clear_level(struct si_context *sctx,
                        struct si_texture *tex,
                        unsigned level, unsigned clear_value);
  void si_init_clear_functions(struct si_context *sctx);
/* si_compute_blit.c */
  unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher,
                            enum si_cache_policy cache_policy);
  void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
                     uint64_t offset, uint64_t size, uint32_t *clear_value,
-                    uint32_t clear_value_size, enum si_coherency coher);
+                    uint32_t clear_value_size, enum si_coherency coher,
+                    bool force_cpdma);
  void si_copy_buffer(struct si_context *sctx,
                    struct pipe_resource *dst, struct pipe_resource *src,
                    uint64_t dst_offset, uint64_t src_offset, unsigned size);
  void si_compute_copy_image(struct si_context *sctx,
                           struct pipe_resource *dst,
                           unsigned dst_level,
                           struct pipe_resource *src,
                           unsigned src_level,
                           unsigned dstx, unsigned dsty, unsigned dstz,
                           const struct pipe_box *src_box);
diff --git a/src/gallium/drivers/radeonsi/si_test_dma.c 
b/src/gallium/drivers/radeonsi/si_test_dma.c
index 90a2032cd80..7e396e671be 100644
--- a/src/gallium/drivers/radeonsi/si_test_dma.c
+++ b/src/gallium/drivers/radeonsi/si_test_dma.c
@@ -302,21 +302,21 @@ void si_test_dma(struct si_screen *sscreen)
                       tsrc.width0, tsrc.height0, tsrc.array_size,
                       array_mode_to_string(sscreen, &ssrc->surface), bpp);
                fflush(stdout);
/* set src pixels */
                set_random_pixels(ctx, src, &src_cpu);
/* clear dst pixels */
                uint32_t zero = 0;
                si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, &zero, 4,
-                               SI_COHERENCY_SHADER);
+                               SI_COHERENCY_SHADER, false);
                memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size);
/* preparation */
                max_width = MIN2(tsrc.width0, tdst.width0);
                max_height = MIN2(tsrc.height0, tdst.height0);
                max_depth = MIN2(tsrc.array_size, tdst.array_size);
num = do_partial_copies ? num_partial_copies : 1;
                for (j = 0; j < num; j++) {
                        int width, height, depth;
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to