On Wed, Aug 31, 2016 at 08:29:55PM -0700, Nanley Chery wrote: > From: Jason Ekstrand <jason.ekstr...@intel.com> > > Nanley Chery: > (rebase) > - Resolve conflicts with the new anv_batch_emit macro > (amend) > - Update commit title > - Combine all HZ operations into one function > - Add code for performing HiZ resolve operations > - Add proper stencil and multisampling support > - Set the proper clear rectangles > - Add required cases for aborting an HZ operation > > Signed-off-by: Nanley Chery <nanley.g.ch...@intel.com> > --- > src/intel/vulkan/anv_genX.h | 3 + > src/intel/vulkan/anv_private.h | 6 ++ > src/intel/vulkan/gen7_cmd_buffer.c | 5 ++ > src/intel/vulkan/gen8_cmd_buffer.c | 124 > +++++++++++++++++++++++++++++++++++++ > 4 files changed, 138 insertions(+) > > diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h > index cf5a232..16de990 100644 > --- a/src/intel/vulkan/anv_genX.h > +++ b/src/intel/vulkan/anv_genX.h > @@ -54,6 +54,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct > anv_cmd_buffer *cmd_buffer); > > void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer); > > +void genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, > + enum anv_hz_op op); > + > VkResult > genX(graphics_pipeline_create)(VkDevice _device, > struct anv_pipeline_cache *cache, > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h > index 5718a19..40325fd 100644 > --- a/src/intel/vulkan/anv_private.h > +++ b/src/intel/vulkan/anv_private.h > @@ -1401,6 +1401,12 @@ anv_cmd_buffer_get_depth_stencil_view(const struct > anv_cmd_buffer *cmd_buffer); > > void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); > > +enum anv_hz_op { > + ANV_HZ_OP_CLEAR, > + ANV_HZ_OP_HIZ_RESOLVE, > + ANV_HZ_OP_DEPTH_RESOLVE, > +}; > + > struct anv_fence { > struct anv_bo bo; > struct drm_i915_gem_execbuffer2 execbuf; > diff --git a/src/intel/vulkan/gen7_cmd_buffer.c > b/src/intel/vulkan/gen7_cmd_buffer.c > index 61778aa..a057a04 100644 > --- a/src/intel/vulkan/gen7_cmd_buffer.c > +++ b/src/intel/vulkan/gen7_cmd_buffer.c > @@ -323,6 +323,11 @@ genX(cmd_buffer_flush_dynamic_state)(struct > anv_cmd_buffer *cmd_buffer) > cmd_buffer->state.dirty = 0; > } > > +void > +genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, enum anv_hz_op > op) > +{ > +} > + > void genX(CmdSetEvent)( > VkCommandBuffer commandBuffer, > VkEvent event, > diff --git a/src/intel/vulkan/gen8_cmd_buffer.c > b/src/intel/vulkan/gen8_cmd_buffer.c > index e22b4e2..4f27350 100644 > --- a/src/intel/vulkan/gen8_cmd_buffer.c > +++ b/src/intel/vulkan/gen8_cmd_buffer.c > @@ -399,6 +399,130 @@ genX(cmd_buffer_flush_compute_state)(struct > anv_cmd_buffer *cmd_buffer) > genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); > } > > + > +/** > + * Emit the HZ_OP packet in the sequence specified by the BDW PRM section > + * entitled: "Optimized Depth Buffer Clear and/or Stencil Buffer Clear." > + */ > +void > +genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, enum anv_hz_op > op) > +{ > + struct anv_cmd_state *cmd_state = &cmd_buffer->state; > + const struct anv_image_view *iview = > + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); > + > + if (iview == NULL || !anv_image_has_hiz(iview->image)) > + return; > + > + const uint32_t ds = cmd_state->subpass->depth_stencil_attachment; > + const bool full_surface_op = > + cmd_state->render_area.extent.width == iview->extent.width && > + cmd_state->render_area.extent.height == iview->extent.height; > + > + /* Validate that we can perform the HZ operation and that it's necessary. > */ > + switch (op) { > + case ANV_HZ_OP_CLEAR: > + if (cmd_buffer->state.pass->attachments[ds].load_op != > + VK_ATTACHMENT_LOAD_OP_CLEAR) > + return; > + > + /* Apply alignment restrictions. For a sample count of 16, the formulas > + * reduce to identity and indicate that no alignment is required. > + */ > + if (!full_surface_op && iview->image->samples < 16) {
There may be a bug here. I plan to remove the samples < 16 condition because when samples == 16, I'd expect the alignment will become 2x1, not 1x1. I also need to take a second look at isl_msaa_interleaved_scale_px_to_sa to ensure it returns the expected values. - Nanley > + uint32_t align_w = 1; > + uint32_t align_h = 1; > + > + if (iview->image->samples > 1) { > + isl_msaa_interleaved_scale_px_to_sa(iview->image->samples, > + &align_w, &align_h); > + } > + > + align_w = 8 / align_w; > + align_h = 4 / align_h; > + > + if (cmd_state->render_area.offset.x % align_w || > + cmd_state->render_area.offset.y % align_h || > + cmd_state->render_area.extent.width % align_w || > + cmd_state->render_area.extent.height % align_h) > + return; > + } > + break; > + case ANV_HZ_OP_DEPTH_RESOLVE: > + if (cmd_buffer->state.pass->attachments[ds].store_op != > + VK_ATTACHMENT_STORE_OP_STORE) > + return; > + break; > + case ANV_HZ_OP_HIZ_RESOLVE: > + if (cmd_buffer->state.pass->attachments[ds].load_op != > + VK_ATTACHMENT_LOAD_OP_LOAD) > + return; > + break; > + } > + > + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_HZ_OP), hzp) { > + switch (op) { > + case ANV_HZ_OP_CLEAR: > + hzp.StencilBufferClearEnable = VK_IMAGE_ASPECT_STENCIL_BIT & > + cmd_state->attachments[ds].pending_clear_aspects; > + hzp.DepthBufferClearEnable = VK_IMAGE_ASPECT_DEPTH_BIT & > + cmd_state->attachments[ds].pending_clear_aspects; > + hzp.FullSurfaceDepthandStencilClear = full_surface_op; > + hzp.StencilClearValue = 0xff & > + > cmd_state->attachments[ds].clear_value.depthStencil.stencil; > + > + /* Mark aspects as cleared */ > + cmd_state->attachments[ds].pending_clear_aspects = 0; > + break; > + case ANV_HZ_OP_DEPTH_RESOLVE: > + hzp.DepthBufferResolveEnable = true; > + break; > + case ANV_HZ_OP_HIZ_RESOLVE: > + hzp.HierarchicalDepthBufferResolveEnable = true; > + break; > + } > + > + /* The depth resolve rectangle must match the size of the previous > clear > + * rectangle. > + * > + * The HiZ resolve rectangle is specified as needing to be the > + * size of the full RT and aligned to 8x4, these requirements are in > + * conflict if the RT extent is not a multiple of 8x4. Testing shows > + * that setting the rectangle to match the render area works just fine. > + * > + * In a manner similar to i965, we'd like to diverge from the PRMs here > + * to reduce the number of HiZ blocks written to. > + */ > + hzp.ClearRectangleXMin = anv_minify(cmd_state->render_area.offset.x, > + iview->base_mip); > + hzp.ClearRectangleYMin = anv_minify(cmd_state->render_area.offset.y, > + iview->base_mip); > + hzp.ClearRectangleXMax = anv_minify(cmd_state->render_area.offset.x + > + > cmd_state->render_area.extent.width, > + iview->base_mip); > + hzp.ClearRectangleYMax = anv_minify(cmd_state->render_area.offset.y + > + > cmd_state->render_area.extent.height, > + iview->base_mip); > + > + /* Due to a hardware issue, this bit MBZ */ > + hzp.ScissorRectangleEnable = false; > + hzp.NumberofMultisamples = ffs(iview->image->samples) - 1; > + hzp.SampleMask = 0xFFFF; > + } > + > + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { > + pc.PostSyncOperation = WriteImmediateData; > + pc.Address = > + (struct anv_address){ &cmd_buffer->device->workaround_bo, 0 }; > + } > + > + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_HZ_OP), hzp); > + > + /* TODO: Determine if a DepthFlush and DepthStall is really necessary for > + * non-full_surface_op clears. > + */ > +} > + > void genX(CmdSetEvent)( > VkCommandBuffer commandBuffer, > VkEvent _event, > -- > 2.9.3 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev