On 02/07/2014 10:43 PM, Kenneth Graunke wrote: > Broadwell's 3DSTATE_WM_HZ_OP packet makes this much easier. > > Instead of programming the whole pipeline, we simply have to emit the > depth/stencil packets, a state override, and a pipe control. Then > arrange for the state to be put back. This is easily done from a single > function. > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/mesa/drivers/dri/i965/brw_blorp.cpp | 8 +- > src/mesa/drivers/dri/i965/brw_context.h | 4 + > src/mesa/drivers/dri/i965/gen8_depth_state.c | 105 > +++++++++++++++++++++++++++ > 3 files changed, 115 insertions(+), 2 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp > b/src/mesa/drivers/dri/i965/brw_blorp.cpp > index 76537c8..4a5001d 100644 > --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp > +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp > @@ -195,8 +195,12 @@ intel_hiz_exec(struct brw_context *brw, struct > intel_mipmap_tree *mt, > DBG("%s %s to mt %p level %d layer %d\n", > __FUNCTION__, opname, mt, level, layer); > > - brw_hiz_op_params params(mt, level, layer, op); > - brw_blorp_exec(brw, ¶ms); > + if (brw->gen >= 8) { > + gen8_hiz_exec(brw, mt, level, layer, op); > + } else { > + brw_hiz_op_params params(mt, level, layer, op); > + brw_blorp_exec(brw, ¶ms); > + } > } > > } /* extern "C" */ > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index cf0fe98..c107683 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -61,6 +61,7 @@ extern "C" { > #include "intel_debug.h" > #include "intel_screen.h" > #include "intel_tex_obj.h" > +#include "intel_resolve_map.h" > > /* Glossary: > * > @@ -1891,6 +1892,9 @@ gen8_emit_depth_stencil_hiz(struct brw_context *brw, > uint32_t width, uint32_t height, > uint32_t tile_x, uint32_t tile_y); > > +void gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt, > + unsigned int level, unsigned int layer, enum gen6_hiz_op > op); > + > extern const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1]; > > void > diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c > b/src/mesa/drivers/dri/i965/gen8_depth_state.c > index f30ff28..3fa20c8 100644 > --- a/src/mesa/drivers/dri/i965/gen8_depth_state.c > +++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c > @@ -203,3 +203,108 @@ gen8_emit_depth_stencil_hiz(struct brw_context *brw, > brw->depthstencil.stencil_offset, > hiz, width, height, depth, lod, min_array_element); > } > + > +/** > + * Emit packets to perform a depth/HiZ resolve or fast depth/stencil clear. > + * > + * See the "Optimized Depth Buffer Clear and/or Stencil Buffer Clear" section > + * of the hardware documentation for details. > + */ > +void > +gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt, > + unsigned int level, unsigned int layer, enum gen6_hiz_op op) > +{ > + if (op == GEN6_HIZ_OP_NONE) > + return; > + > + assert(mt->first_level == 0); > + > + struct intel_mipmap_level *miplevel = &mt->level[level]; > + > + /* The basic algorithm is: > + * - If needed, emit 3DSTATE_{DEPTH,HIER_DEPTH,STENCIL}_BUFFER and > + * 3DSTATE_CLEAR_PARAMS packets to set up the relevant buffers. > + * - If needed, emit 3DSTATE_DRAWING_RECTANGLE. > + * - Emit 3DSTATE_WM_HZ_OP with a bit set for the particular operation. > + * - Do a special PIPE_CONTROL to trigger an implicit rectangle primitive. > + * - Emit 3DSTATE_WM_HZ_OP with no bits set to return to normal rendering. > + */ > + emit_depth_packets(brw, mt, > + brw_depth_format(brw, mt->format), > + BRW_SURFACE_2D, > + true, /* depth writes */ > + NULL, false, 0, /* no stencil for now */ > + true, /* hiz */ > + mt->logical_width0, > + mt->logical_height0, > + MAX2(mt->logical_depth0, 1), > + level, > + layer); /* min_array_element */ > + > + BEGIN_BATCH(4); > + OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); > + OUT_BATCH(0); > + OUT_BATCH(((mt->logical_width0 - 1) & 0xffff) | > + ((mt->logical_height0 - 1) << 16)); > + OUT_BATCH(0); > + ADVANCE_BATCH(); > + > + /* Emit 3DSTATE_WM_HZ_OP to override pipeline state for the particular > + * resolve or clear operation we want to perform. > + */ > + uint32_t dw1 = 0; > + > + switch (op) { > + case GEN6_HIZ_OP_DEPTH_RESOLVE: > + dw1 |= GEN8_WM_HZ_DEPTH_RESOLVE; > + break; > + case GEN6_HIZ_OP_HIZ_RESOLVE: > + dw1 |= GEN8_WM_HZ_HIZ_RESOLVE; > + break; > + case GEN6_HIZ_OP_DEPTH_CLEAR: > + dw1 |= GEN8_WM_HZ_DEPTH_CLEAR; > + break; > + case GEN6_HIZ_OP_NONE: > + assert(!"Should not get here."); > + } > + > + uint32_t sample_mask = 0xFFFF; > + if (mt->num_samples > 0) { > + dw1 |= SET_FIELD(ffs(mt->num_samples) - 1, GEN8_WM_HZ_NUM_SAMPLES); > + sample_mask = gen6_determine_sample_mask(brw); > + } > + > + BEGIN_BATCH(5); > + OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2)); > + OUT_BATCH(dw1); > + OUT_BATCH(0); > + OUT_BATCH(SET_FIELD(miplevel->width, GEN8_WM_HZ_CLEAR_RECTANGLE_X_MAX) | > + SET_FIELD(miplevel->height, GEN8_WM_HZ_CLEAR_RECTANGLE_Y_MAX));
I'm concerned about a possible interaction with multisampling: miplevel->width/height are based on the physical dimensions, rather than the logical ones. For example, a (logical) 256x256 texture may have a level 0 miplevel->width of 512 (4x MSAA) or 1024 (8x MSAA). I'm at a bit of a loss for what to do instead, though. > + OUT_BATCH(SET_FIELD(sample_mask, GEN8_WM_HZ_SAMPLE_MASK)); > + ADVANCE_BATCH(); > + > + /* Emit a PIPE_CONTROL with "Post-Sync Operation" set to "Write Immediate > + * Data", and no other bits set. This causes 3DSTATE_WM_HZ_OP's state to > + * take effect, and spawns a rectangle primitive. > + */ > + brw_emit_pipe_control_write(brw, > + PIPE_CONTROL_WRITE_IMMEDIATE, > + brw->batch.workaround_bo, 0, 0, 0); > + > + /* Emit 3DSTATE_WM_HZ_OP again to disable the state overrides. */ > + BEGIN_BATCH(5); > + OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2)); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + ADVANCE_BATCH(); > + > + /* We've clobbered all of the depth packets, and the drawing rectangle, > + * so we need to ensure those packets are re-emitted before the next > + * primitive. > + * > + * Setting _NEW_DEPTH and _NEW_BUFFERS covers it, but is rather overkill. > + */ > + brw->state.dirty.mesa |= _NEW_DEPTH | _NEW_BUFFERS; > +} >
signature.asc
Description: OpenPGP digital signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev