From: Marek Olšák <marek.ol...@amd.com>

This should be as fast as no HTILE for stencil. I think we can still get full
performance with depth-only rendering even if stencil is present in the buffer
but not used, but I'm not 100% sure. This may be revisited when HiS and fast
stencil clear are implemented.

This fixes a hang in Brutal Legend.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=64471
---
 src/gallium/drivers/r600/evergreen_state.c |  5 ++++-
 src/gallium/drivers/radeonsi/si_state.c    | 18 ++++++++++--------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 34a275c..85c27bb 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1749,7 +1749,10 @@ static void evergreen_emit_db_misc_state(struct 
r600_context *rctx, struct r600_
        unsigned db_count_control = 0;
        unsigned db_render_override =
                S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
-               S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
+               S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) |
+               /* There is a hang with HTILE if stencil is used and
+                * fast stencil is enabled. */
+               S_02800C_FAST_STENCIL_DISABLE(1);
 
        if (a->occlusion_query_enabled) {
                db_count_control |= S_028004_PERFECT_ZPASS_COUNTS(1);
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index e5b2bb6..cbc2967 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1843,8 +1843,6 @@ static void si_init_depth_surface(struct si_context *sctx,
        /* HiZ aka depth buffer htile */
        /* use htile only for first level */
        if (rtex->htile_buffer && !level) {
-               const struct util_format_description *fmt_desc;
-
                z_info |= S_028040_TILE_SURFACE_ENABLE(1);
 
                /* This is optimal for the clear value of 1.0 and using
@@ -1853,11 +1851,9 @@ static void si_init_depth_surface(struct si_context 
*sctx,
                 * clearing. */
                z_info |= S_028040_ZRANGE_PRECISION(1);
 
-               fmt_desc = util_format_description(rtex->resource.b.b.format);
-               if (!util_format_has_stencil(fmt_desc)) {
-                       /* Use all of the htile_buffer for depth */
-                       s_info |= S_028044_TILE_STENCIL_DISABLE(1);
-               }
+               /* Use all of the htile_buffer for depth, because we don't
+                * use HTILE for stencil because of FAST_STENCIL_DISABLE. */
+               s_info |= S_028044_TILE_STENCIL_DISABLE(1);
 
                uint64_t va = rtex->htile_buffer->gpu_address;
                db_htile_data_base = va >> 8;
@@ -3123,9 +3119,15 @@ void si_init_config(struct si_context *sctx)
        si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
        si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
        si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
+
+       /* There is a hang if stencil is used and fast stencil is enabled
+        * regardless of whether HTILE is depth-only or not.
+        */
        si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE,
                       S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
-                      S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));
+                      S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) |
+                      S_02800C_FAST_STENCIL_DISABLE(1));
+
        si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
        si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
        si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
-- 
1.9.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to