From: Marek Olšák <marek.ol...@amd.com>

---
 src/gallium/drivers/radeonsi/si_state_draw.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index e7f8389caf3..d61374e95ca 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -163,20 +163,28 @@ static bool si_emit_derived_tess_state(struct si_context 
*sctx,
        /* Make sure the output data fits in the offchip buffer */
        *num_patches = MIN2(*num_patches,
                            (sctx->screen->tess_offchip_block_dw_size * 4) /
                            output_patch_size);
 
        /* Not necessary for correctness, but improves performance. The
         * specific value is taken from the proprietary driver.
         */
        *num_patches = MIN2(*num_patches, 40);
 
+       /* Make sure that vector lanes are reasonably occupied. It probably
+        * doesn't matter much because this is LS-HS, and TES is likely to
+        * occupy significantly more CUs.
+        */
+       unsigned temp_verts_per_tg = *num_patches * max_verts_per_patch;
+       if (temp_verts_per_tg > 64 && temp_verts_per_tg % 64 < 48)
+               *num_patches = (temp_verts_per_tg & ~63) / max_verts_per_patch;
+
        if (sctx->chip_class == SI) {
                /* SI bug workaround, related to power management. Limit LS-HS
                 * threadgroups to only one wave.
                 */
                unsigned one_wave = 64 / max_verts_per_patch;
                *num_patches = MIN2(*num_patches, one_wave);
        }
 
        /* The VGT HS block increments the patch ID unconditionally
         * within a single threadgroup. This results in incorrect
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to