On 06/03/2011 12:47 PM, Chad Versace wrote:
When emitting 3DSTATE_DEPTH_BUFFER, also emit 3DSTATE_HIER_DEPTH_BUFFER if
there is a hiz buffer. Ditto for 3DSTATE_STENCIL_BUFFER and a separate
stencil buffer.

Signed-off-by: Chad Versace<c...@chad-versace.us>

Overall, looks good...a few comments inline.

---
  src/mesa/drivers/dri/i965/brw_misc_state.c       |  114 ++++++++++++++++++++--
  src/mesa/drivers/dri/i965/brw_wm_surface_state.c |    1 +
  2 files changed, 106 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c 
b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 3ec9009..84120da 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -202,6 +202,8 @@ static void prepare_depthbuffer(struct brw_context *brw)

     if (drb)
        brw_add_validated_bo(brw, drb->region->buffer);
+   if (drb&&  drb->hiz_region)
+      brw_add_validated_bo(brw, drb->hiz_region->buffer);
     if (srb)
        brw_add_validated_bo(brw, srb->region->buffer);
  }
@@ -212,14 +214,28 @@ static void emit_depthbuffer(struct brw_context *brw)
     struct gl_context *ctx =&intel->ctx;
     struct gl_framebuffer *fb = ctx->DrawBuffer;
     /* _NEW_BUFFERS */
-   struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, 
BUFFER_DEPTH);
+   struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, 
BUFFER_STENCIL);
+   struct intel_region *hiz_region = depth_irb ? depth_irb->hiz_region : NULL;
     unsigned int len;

-   /* If we're combined depth stencil but no depth is attached, look
-    * up stencil.
+   /*
+    * If depth and stencil buffers are identical, then don't use separate
+    * stencil.
      */
-   if (!irb)
-      irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
+   if (depth_irb&&  depth_irb == stencil_irb) {
+      stencil_irb = NULL;
+   }
+
+   /*
+    * If stencil buffer uses combined depth/stencil format, but no depth buffer
+    * is attached, then use stencil buffer as depth buffer.
+    */
+   if (!depth_irb&&  stencil_irb
+&&  stencil_irb->Base.Format == MESA_FORMAT_S8_Z24) {
+      depth_irb = stencil_irb;
+      stencil_irb = NULL;
+   }

     if (intel->gen>= 6)
        len = 7;
@@ -228,7 +244,7 @@ static void emit_depthbuffer(struct brw_context *brw)
     else
        len = 5;

-   if (!irb) {
+   if (!depth_irb&&  !stencil_irb) {
        BEGIN_BATCH(len);
        OUT_BATCH(_3DSTATE_DEPTH_BUFFER<<  16 | (len - 2));
        OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT<<  18) |
@@ -244,11 +260,57 @@ static void emit_depthbuffer(struct brw_context *brw)
         OUT_BATCH(0);

        ADVANCE_BATCH();
+
+   } else if (!depth_irb&&  stencil_irb) {
+      /*
+       * There exists a separate stencil buffer but no depth buffer.
+       *
+       * The stencil buffer inherits most of its fields from
+       * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
+       * height.
+       *
+       * Since the stencil buffer has quirky pitch requirements, its region
+       * was allocated with half height and double cpp. So we need
+       * a multiplier of 2 to obtain the surface's real height.
+       *
+       * Enable the hiz bit because it and the separate stencil bit must have
+       * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit
+       * 1.21 "Separate Stencil Enable":
+       *     [DevIL]: If this field is enabled, Hierarchical Depth Buffer
+       *     Enable must also be enabled.
+       *
+       *     [DevGT]: This field must be set to the same value (enabled or
+       *     disabled) as Hierarchical Depth Buffer Enable
+       */
+      assert(intel->has_separate_stencil);
+      assert(stencil_irb->Base.Format == MESA_FORMAT_S8);
+
+      BEGIN_BATCH(len);
+      OUT_BATCH(_3DSTATE_DEPTH_BUFFER<<  16 | (len - 2));
+      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT<<  18) |
+               (1<<  21) | /* separate stencil enable */
+               (1<<  22) | /* hiz enable */
+               (BRW_TILEWALK_YMAJOR<<  26) |
+               (BRW_SURFACE_2D<<  29));
+      OUT_BATCH(0);
+      OUT_BATCH(((stencil_irb->region->width - 1)<<  6) |
+                (2 * stencil_irb->region->height - 1)<<  19);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+
+      if (intel->gen>= 6)
+        OUT_BATCH(0);
+
+      ADVANCE_BATCH();
+
     } else {
-      struct intel_region *region = irb->region;
+      struct intel_region *region = depth_irb->region;
        unsigned int format;
        uint32_t tile_x, tile_y, offset;

+      /* If using separate stencil, hiz must be enabled. */
+      assert(!stencil_irb || hiz_region);
+
        switch (region->cpp) {
        case 2:
         format = BRW_DEPTHFORMAT_D16_UNORM;
@@ -256,6 +318,8 @@ static void emit_depthbuffer(struct brw_context *brw)
        case 4:
         if (intel->depth_buffer_is_float)
            format = BRW_DEPTHFORMAT_D32_FLOAT;
+        else if (hiz_region)
+           format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
         else
            format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
         break;
@@ -267,11 +331,14 @@ static void emit_depthbuffer(struct brw_context *brw)
        offset = intel_region_tile_offsets(region,&tile_x,&tile_y);

        assert(intel->gen<  6 || region->tiling == I915_TILING_Y);
+      assert(!hiz_region || region->tiling == I915_TILING_Y);

        BEGIN_BATCH(len);
        OUT_BATCH(_3DSTATE_DEPTH_BUFFER<<  16 | (len - 2));
        OUT_BATCH(((region->pitch * region->cpp) - 1) |
                (format<<  18) |
+               ((hiz_region ? 1 : 0)<<  21) | /* separate stencil enable */
+               ((hiz_region ? 1 : 0)<<  22) | /* hiz enable */

This surprised me for a moment. The reason these two need to be the same is the DevGT comment - HiZ if-and-only-if Separate Stencil. But stencil_irb may not exist here. I don't know if that's really a problem, though...is it?

                (BRW_TILEWALK_YMAJOR<<  26) |
                ((region->tiling != I915_TILING_NONE)<<  27) |
                (BRW_SURFACE_2D<<  29));
@@ -294,8 +361,37 @@ static void emit_depthbuffer(struct brw_context *brw)
        ADVANCE_BATCH();
     }

-   /* Initialize it for safety. */
-   if (intel->gen>= 6) {
+   /* Emit hiz buffer. */
+   if (hiz_region) {
+      BEGIN_BATCH(3);
+      OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER<<  16) | (3 - 2));
+      OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
+      OUT_RELOC(hiz_region->buffer,
+               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+               0);
+      ADVANCE_BATCH();
+   }
+
+   /* Emit stencil buffer. */
+   if (stencil_irb) {
+      BEGIN_BATCH(3);
+      OUT_BATCH((_3DSTATE_STENCIL_BUFFER<<  16) | (3 - 2));
+      OUT_BATCH(stencil_irb->region->pitch * stencil_irb->region->cpp - 1);
+      OUT_RELOC(stencil_irb->region->buffer,
+               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+               0);
+      ADVANCE_BATCH();
+   }

Do we need to emit 3DSTATE_STENCIL_BUFFER with all 0's in the stencil_irb == NULL case? Ditto for HiZ I guess. Just being a bit paranoid.

+   /*
+    * On Gen>= 6, emit clear params for safety. If using hiz, then clear
+    * params must be emitted.
+    *
+    * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
+    *     3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
+    *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
+    */
+   if (intel->gen>= 6 || hiz_region) {
        BEGIN_BATCH(2);
        OUT_BATCH(_3DSTATE_CLEAR_PARAMS<<  16 | (2 - 2));
        OUT_BATCH(0);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 6c1eba6..5dadb5b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -134,6 +134,7 @@ brw_render_target_supported(gl_format format)
      */
     if (format == MESA_FORMAT_S8_Z24 ||
         format == MESA_FORMAT_X8_Z24 ||
+       format == MESA_FORMAT_S8 ||
         format == MESA_FORMAT_Z16) {
        return true;
     }

This seems unrelated to the rest of the patch, but is necessary. While you're at it, please add "case MESA_FORMAT_X8_Z24:" to translate_tex_format a few lines later. I had to add that to avoid assertion failures on Ivybridge when running glsl-fs-shadow2d.shader_test.

--Kenneth
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to