From: Marek Olšák <marek.ol...@amd.com>

ported from Vulkan

Cc: 12.0 <mesa-sta...@lists.freedesktop.org>
---
 src/gallium/drivers/radeonsi/si_compute.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 2f7e172..f19e830 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -443,6 +443,21 @@ static void si_launch_grid(
        struct si_context *sctx = (struct si_context*)ctx;
        struct si_compute *program = sctx->cs_shader_state.program;
        int i;
+       /* HW bug workaround when CS threadgroups > 256 threads and async
+        * compute isn't used, i.e. only one compute job can run at a time.
+        * If async compute is possible, the threadgroup size must be limited
+        * to 256 threads on all queues to avoid the bug.
+        * Only SI and certain CIK chips are affected.
+        */
+       bool cs_regalloc_hang =
+               (sctx->b.chip_class == SI ||
+                sctx->b.family == CHIP_BONAIRE ||
+                sctx->b.family == CHIP_KABINI) &&
+               info->block[0] * info->block[1] * info->block[2] > 256;
+
+       if (cs_regalloc_hang)
+               sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+                                SI_CONTEXT_CS_PARTIAL_FLUSH;
 
        si_decompress_compute_textures(sctx);
 
@@ -493,6 +508,9 @@ static void si_launch_grid(
        sctx->b.num_compute_calls++;
        if (sctx->cs_shader_state.uses_scratch)
                sctx->b.num_spill_compute_calls++;
+
+       if (cs_regalloc_hang)
+               sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
 }
 
 
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to