This avoids bug 105396 somehow. I suspect it is a VI and GFX9 hardware
bug which PAL calls WaTcCompatZRange, but I don't know for sure.

In the VK_FORMAT_D32_SFLOAT case, TILE_STENCIL_DISABLE is not set for
tc compatible image formats regardless of not having a stencil aspect.
If TILE_STENCIL_DISABLE was set, ZRANGE_PRECISION would have no effect
and the bug would occur again.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105396
CC: <mesa-sta...@lists.freedesktop.org>
CC: Dave Airlie <airl...@redhat.com>
CC: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>
CC: Samuel Pitoiset <samuel.pitoi...@gmail.com>
---
 src/amd/vulkan/radv_cmd_buffer.c | 52 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 49 insertions(+), 3 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 3e0ed0e9a9..89e31a0347 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -915,6 +915,37 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer,
 
        }
 
+       if (image->surface.htile_size)
+       {
+               /* If the last depth clear value was 0.0f, set ZRANGE_PRECISION
+                * to 0 in dp_z_info for more accuracy with reverse depth; and
+                * to avoid https://bugs.freedesktop.org/show_bug.cgi?id=105396.
+                * Otherwise, we leave it set to 1.
+                */
+               radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_WRITE, 7, 0));
+
+               const uint32_t write_space = 0 << 8;    /* register */
+               const uint32_t poll_space = 1 << 4;     /* memory */
+               const uint32_t function = 3 << 0;       /* equal to the 
reference */
+               const uint32_t options = write_space | poll_space | function;
+               radeon_emit(cmd_buffer->cs, options);
+
+               /* poll address - location of the depth clear value */
+               uint64_t va = radv_buffer_get_va(image->bo);
+               va += image->offset + image->clear_value_offset;
+               radeon_emit(cmd_buffer->cs, va);
+               radeon_emit(cmd_buffer->cs, va >> 32);
+
+               radeon_emit(cmd_buffer->cs, fui(0.0f));         /* reference 
value */
+               radeon_emit(cmd_buffer->cs, (uint32_t)-1);      /* comparison 
mask */
+               radeon_emit(cmd_buffer->cs, R_028040_DB_Z_INFO >> 2); /* write 
address low */
+               radeon_emit(cmd_buffer->cs, 0u);                /* write 
address high */
+
+               /* The value to write data when the condition passes */
+               uint32_t db_z_info_clear_zero = db_z_info & 
C_028040_ZRANGE_PRECISION;
+               radeon_emit(cmd_buffer->cs, db_z_info_clear_zero);
+       }
+
        radeon_set_context_reg(cmd_buffer->cs, 
R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
                               ds->pa_su_poly_offset_db_fmt_cntl);
 }
@@ -3479,7 +3510,8 @@ void radv_CmdEndRenderPass(
 
 /*
  * For HTILE we have the following interesting clear words:
- *   0xfffff30f: Uncompressed, full depth range, for depth+stencil HTILE
+ *   0xfffff30f: Uncompressed, full depth range, for depth+stencil HTILE when 
ZRANGE_PRECISION is 1
+ *   0x0003f30f: Uncompressed, full depth range, for depth+stencil HTILE when 
ZRANGE_PRECISION is 0
  *   0xfffc000f: Uncompressed, full depth range, for depth only HTILE.
  *   0xfffffff0: Clear depth to 1.0
  *   0x00000000: Clear depth to 0.0
@@ -3528,8 +3560,22 @@ static void radv_handle_depth_image_transition(struct 
radv_cmd_buffer *cmd_buffe
                radv_initialize_htile(cmd_buffer, image, range, 0);
        } else if (!radv_layout_is_htile_compressed(image, src_layout, 
src_queue_mask) &&
                   radv_layout_is_htile_compressed(image, dst_layout, 
dst_queue_mask)) {
-               uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 
0xfffff30f : 0xfffc000f;
-               radv_initialize_htile(cmd_buffer, image, range, clear_value);
+               if (vk_format_is_stencil(image->vk_format)) {
+                       /* The appropriate clear value depends on DB_Z_INFO's
+                        * ZRANGE_PRECISION, which can vary depending on the
+                        * last used clear value, which could be from another
+                        * command buffer. Instead of picking the appropriate
+                        * clear value on the GPU, resummarize accurately.
+                        */
+                       VkImageSubresourceRange local_range = *range;
+                       local_range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
+                       local_range.baseMipLevel = 0;
+                       local_range.levelCount = 1;
+
+                       radv_resummarize_depth_image_inplace(cmd_buffer, image, 
&local_range);
+               } else {
+                       radv_initialize_htile(cmd_buffer, image, range, 
0xfffc000f);
+               }
        } else if (radv_layout_is_htile_compressed(image, src_layout, 
src_queue_mask) &&
                   !radv_layout_is_htile_compressed(image, dst_layout, 
dst_queue_mask)) {
                VkImageSubresourceRange local_range = *range;
-- 
2.14.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to