On Wed, Feb 8, 2017 at 6:27 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote:
> On Wed, Feb 8, 2017 at 5:34 PM, Nanley Chery <nanleych...@gmail.com> > wrote: > >> On Thu, Feb 02, 2017 at 01:26:05PM -0800, Jason Ekstrand wrote: >> > In order to get good performance numbers for this, I had to hack up the >> > driver to whack wm_prog_data::uses_kill to true to emulate a discard and >> > used the Sascha "shadowmapping" demo. Setting uses_kill to true dropped >> > the framerate on the demo by 25-30%. Enabling the PMA fix brought it >> > back up to around 90% of the original framerate. This doesn't seem to >> > really impact Dota 2; probably because it doesn't use 16-bit depth. >> > >> > Reviewed-by: Lionel Landwerlin <lionel.g.landwer...@intel.com> >> > --- >> > src/intel/vulkan/TODO | 1 - >> > src/intel/vulkan/anv_cmd_buffer.c | 2 + >> > src/intel/vulkan/anv_genX.h | 3 + >> > src/intel/vulkan/anv_private.h | 17 +++++ >> > src/intel/vulkan/gen7_cmd_buffer.c | 7 ++ >> > src/intel/vulkan/gen8_cmd_buffer.c | 133 >> +++++++++++++++++++++++++++++++++++++ >> > src/intel/vulkan/genX_blorp_exec.c | 5 ++ >> > src/intel/vulkan/genX_cmd_buffer.c | 15 ++++- >> > src/intel/vulkan/genX_pipeline.c | 38 +++++++++++ >> > 9 files changed, 219 insertions(+), 2 deletions(-) >> > >> > diff --git a/src/intel/vulkan/TODO b/src/intel/vulkan/TODO >> > index 38acc0d..f8b73a1 100644 >> > --- a/src/intel/vulkan/TODO >> > +++ b/src/intel/vulkan/TODO >> > @@ -12,5 +12,4 @@ Performance: >> > - Compressed multisample support >> > - Pushing pieces of UBOs? >> > - Enable guardband clipping >> > - - pma stall workaround >> > - Use soft-pin to avoid relocations >> > diff --git a/src/intel/vulkan/anv_cmd_buffer.c >> b/src/intel/vulkan/anv_cmd_buffer.c >> > index 5886fa6..8c08f8d 100644 >> > --- a/src/intel/vulkan/anv_cmd_buffer.c >> > +++ b/src/intel/vulkan/anv_cmd_buffer.c >> > @@ -135,6 +135,8 @@ anv_cmd_state_reset(struct anv_cmd_buffer >> *cmd_buffer) >> > state->restart_index = UINT32_MAX; >> > state->dynamic = default_dynamic_state; >> > state->need_query_wa = true; >> > + state->pma_fix_enabled = false; >> > + state->hiz_enabled = false; >> > >> > if (state->attachments != NULL) { >> > vk_free(&cmd_buffer->pool->alloc, state->attachments); >> > diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h >> > index d04fe38..67147b0 100644 >> > --- a/src/intel/vulkan/anv_genX.h >> > +++ b/src/intel/vulkan/anv_genX.h >> > @@ -55,6 +55,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct >> anv_cmd_buffer *cmd_buffer); >> > >> > void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer >> *cmd_buffer); >> > >> > +void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer >> *cmd_buffer, >> > + bool enable); >> > + >> > void >> > genX(emit_urb_setup)(struct anv_device *device, struct anv_batch >> *batch, >> > const struct gen_l3_config *l3_config, >> > diff --git a/src/intel/vulkan/anv_private.h >> b/src/intel/vulkan/anv_private.h >> > index 4fe3ebc..6efe4ea 100644 >> > --- a/src/intel/vulkan/anv_private.h >> > +++ b/src/intel/vulkan/anv_private.h >> > @@ -1163,6 +1163,20 @@ struct anv_cmd_state { >> > bool need_query_wa; >> > >> > /** >> > + * Whether or not the gen8 PMA fix is enabled. We ensure that, at >> the top >> > + * of any command buffer it disabled by disabling it in >> EndCommandBuffer >> ^ >> is? >> >> > + * and before invoking the secondary in ExecuteCommands. >> > + */ >> > + bool pma_fix_enabled; >> > + >> > + /** >> > + * Whether or not we now for certain that HiZ is enabled for the >> current >> ^ >> know >> >> > + * subpass. If, for whatever reason, we are unsure as to whether >> HiZ is >> > + * enabled or not, this will be false. >> > + */ >> > + bool hiz_enabled; >> > + >> > + /** >> > * Array length is anv_cmd_state::pass::attachment_count. Array >> content is >> > * valid only when recording a render pass instance. >> > */ >> > @@ -1465,8 +1479,11 @@ struct anv_pipeline { >> > >> > uint32_t cs_right_mask; >> > >> > + bool writes_depth; >> > + bool depth_test_enable; >> > bool writes_stencil; >> > bool depth_clamp_enable; >> > + bool kill_pixel; >> > >> > struct { >> > uint32_t sf[7]; >> > diff --git a/src/intel/vulkan/gen7_cmd_buffer.c >> b/src/intel/vulkan/gen7_cmd_buffer.c >> > index 013ed87..c1a25e8 100644 >> > --- a/src/intel/vulkan/gen7_cmd_buffer.c >> > +++ b/src/intel/vulkan/gen7_cmd_buffer.c >> > @@ -260,6 +260,13 @@ genX(cmd_buffer_flush_dynamic_state)(struct >> anv_cmd_buffer *cmd_buffer) >> > cmd_buffer->state.dirty = 0; >> > } >> > >> > +void >> > +genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, >> > + bool enable) >> > +{ >> > + /* The NP PMA fix doesn't exist on gen7 */ >> > +} >> > + >> > void genX(CmdSetEvent)( >> > VkCommandBuffer commandBuffer, >> > VkEvent event, >> > diff --git a/src/intel/vulkan/gen8_cmd_buffer.c >> b/src/intel/vulkan/gen8_cmd_buffer.c >> > index 8c8de62..271ab3f 100644 >> > --- a/src/intel/vulkan/gen8_cmd_buffer.c >> > +++ b/src/intel/vulkan/gen8_cmd_buffer.c >> > @@ -155,6 +155,135 @@ __emit_sf_state(struct anv_cmd_buffer *cmd_buffer) >> > #endif >> > >> > void >> > +genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, >> bool enable) >> > +{ >> > +#if GEN_GEN == 8 >> > + if (cmd_buffer->state.pma_fix_enabled == enable) >> > + return; >> > + >> > + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { >> > + pc.DepthCacheFlushEnable = true; >> > + pc.CommandStreamerStallEnable = true; >> > + pc.RenderTargetCacheFlushEnable = true; >> >> Instead of flushing the depth and RT caches, what do you think about >> implementing a stall that writes 0 to the workaround bo? That should >> consume less bandwidth. >> >> To make the pipeline idle, flushing the caches shouldn't be necessary, >> we should be able to do it with a valid pipecontrol packet that has the >> CS stall bit set. >> > > Without having an extremely good idea of how these bits interact with > caching, I'm very reluctant to try too hard to over-optimize the pipe > control. > > >> The BDW PRM states: >> >> If the stall bit is set, the command streamer waits until the pipe is >> completely flushed. >> > > It waits until whatever flush/stall operation you gave it is finished. > You also need to specify a stall/flush operation. If you just do a depth > stall for instance and no depth reads/writes are in-flight, I don't think > it actually stalls at all. We may be able to use StallAtPixelScoreboard > instead but, again, I don't know for sure how this interacts with caches. > > >> > + } >> > + >> > + uint32_t cache_mode; >> > + anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1), >> > + .NPPMAFixEnable = enable, >> > + .NPEarlyZFailsDisableMask = enable, >> ^ >> I don't think you intended to set the mask here. It should be: >> >> .NPEarlyZFailsDisable = enable, >> >> Does fixing this impact your FPS measurements significantly? >> > > Thanks for catching that! I'm a bit surprised that it worked at all. > I'll definitely re-measure. > I just ran Dota2 and fixing this seems to help by around 4%! I'll do a full re-run and get fresh numbers. > > >> > + .NPPMAFixEnableMask = true, >> > + .NPEarlyZFailsDisableMask = true); >> > + anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), >> lri) { >> > + lri.RegisterOffset = GENX(CACHE_MODE_1_num); >> > + lri.DataDWord = cache_mode; >> > + } >> > + >> > + /* After the LRI, a PIPE_CONTROL with both the Depth Stall and >> Depth Cache >> > + * Flush bits is often necessary. We do it regardless because it's >> easier. >> > + * The render cache flush is also necessary if stencil writes are >> enabled. >> > + */ >> >> I couldn't find a requirement to flush these specific caches in the >> docs, but I did find: >> > > Both of these flushes are documented in the bspec (not PRM) docs for > PIPE_CONTROL. > > >> To ensure this command gets executed before upcoming commands in the >> ring, either a stalling pipecontrol should be sent after this >> command, or MMIO 0x20C0 bit 7 should be set to 1. >> >> Perhaps we could use the flush I suggested earlier? >> >> > + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { >> > + pc.DepthStallEnable = true; >> > + pc.DepthCacheFlushEnable = true; >> > + pc.RenderTargetCacheFlushEnable = true; >> > + } >> > + >> > + cmd_buffer->state.pma_fix_enabled = enable; >> > +#endif /* GEN_GEN == 8 */ >> > +} >> > + >> > +static inline bool >> > +want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer) >> > +{ >> > + assert(GEN_GEN == 8); >> > + >> > + /* From the Broadwell PRM Vol. 2c CACHE_MODE_1::NP_PMA_FIX_ENABLE: >> > + * >> > + * SW must set this bit in order to enable this fix when >> following >> > + * expression is TRUE. >> > + * >> > + * 3DSTATE_WM::ForceThreadDispatch != 1 && >> > + * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) && >> > + * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) && >> > + * (3DSTATE_DEPTH_BUFFER::HIZ Enable) && >> > + * !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) && >> > + * (3DSTATE_PS_EXTRA::PixelShaderValid) && >> > + * !(3DSTATE_WM_HZ_OP::DepthBufferClear || >> > + * 3DSTATE_WM_HZ_OP::DepthBufferResolve || >> > + * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable >> || >> > + * 3DSTATE_WM_HZ_OP::StencilBufferClear) && >> > + * (3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable) && >> > + * (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels || >> > + * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || >> > + * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || >> > + * 3DSTATE_PS_BLEND::AlphaTestEnable || >> > + * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) && >> > + * 3DSTATE_WM::ForceKillPix != ForceOff && >> > + * ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable && >> > + * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) || >> > + * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && >> > + * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE && >> > + * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) || >> > + * (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != >> PSCDEPTH_OFF)) >> > + * >> > + * This function only takes care of the pipeline parts of the >> equation. >> >> Which parts are the pipeline parts? It looks like you take care of the >> entire equation in this function. >> > > Yes, I do. > > >> > + */ >> > + >> > + /* These are always true: >> > + * 3DSTATE_WM::ForceThreadDispatch != 1 && >> > + * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) >> > + */ >> >> I couldn't quite understand what ForcedSampleCount does from looking at >> the HW docs. If you know, could you tell me what this does and why it >> wouldn't change under any circumstance in the future? Otherwise, I >> think we should put an assertion where we assign ForcedSampleCount and >> reference this PMA optimization. >> > > Honestly, I'm not 100% sure what it's for. I think it lets you make the > rasterizer run at a different number of samples than the underlying > surface. We've never used this bit in Vulkan or GL. Also, there's no flag > for us to assert on. > > >> > + >> > + /* We only enable the PMA fix if we know for certain that HiZ is >> enabled. >> > + * If we don't know whether HiZ is enabled or not, we disable the >> PMA fix >> > + * and there is no harm. >> > + * >> > + * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) && >> > + * 3DSTATE_DEPTH_BUFFER::HIZ Enable >> > + */ >> > + if (!cmd_buffer->state.hiz_enabled) >> > + return false; >> > + >> > + /* 3DSTATE_PS_EXTRA::PixelShaderValid */ >> > + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; >> > + if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) >> > + return false; >> > + >> > + /* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) */ >> > + const struct brw_wm_prog_data *wm_prog_data = >> get_wm_prog_data(pipeline); >> > + if (wm_prog_data->early_fragment_tests) >> > + return false; >> >> In keeping with your top-to-bottom evaluation of the logical statement, >> could you move this part to be above the PixelShaderValid part? >> > > Not really. If we don't' have a fragment shader, we wm_prog_data is NULL > > >> > + >> > + /* We never use anv_pipeline for HiZ ops so this is trivially true: >> > + * !(3DSTATE_WM_HZ_OP::DepthBufferClear || >> ^ >> misaligned asterisk >> > > Fixed. > > >> > + * 3DSTATE_WM_HZ_OP::DepthBufferResolve || >> > + * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable >> || >> > + * 3DSTATE_WM_HZ_OP::StencilBufferClear) >> > + */ >> > + >> > + /* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable */ >> > + if (!pipeline->depth_test_enable) >> > + return false; >> > + >> > + /* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels || >> > + * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || >> > + * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || >> > + * 3DSTATE_PS_BLEND::AlphaTestEnable || >> > + * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) && >> > + * 3DSTATE_WM::ForceKillPix != ForceOff && >> > + * ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable && >> > + * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) || >> > + * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && >> > + * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE && >> > + * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) || >> > + * (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != >> PSCDEPTH_OFF)) >> > + */ >> > + return (pipeline->kill_pixel && (pipeline->writes_depth || >> > + pipeline->writes_stencil)) || >> >> anv_pipeline::writes_stencil != >> (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && >> 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE && >> 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE); >> >> The last two lines are not accounted for - we don't detect if the >> currently bound depth_stencil attachment lacks or possesses a stencil >> buffer. >> > > We should be checking for the presence of a stencil aspect in the > pipeline. I think that got missed when I added the writes_stencil bit. We > do by the end of the series, but I'll patch up the disable stencil writes > patch to make it check. > > >> This is a valid scenario according to section 25.9. Stencil Test of the >> Vulkan spec, which states: >> >> If there is no stencil framebuffer attachment, stencil modification >> cannot occur, and it is as if the stencil tests always pass. >> >> anv_pipeline::writes_depth should be correct because hiz_enabled is true >> by the time we reach it. For hiz_enabled to be true, there must be a >> depth buffer present. >> > > Yup > > >> > + wm_prog_data->computed_depth_mode != PSCDEPTH_OFF; >> > +} >> > + >> > +void >> > genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer >> *cmd_buffer) >> > { >> > struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; >> > @@ -211,6 +340,7 @@ genX(cmd_buffer_flush_dynamic_state)(struct >> anv_cmd_buffer *cmd_buffer) >> > } >> > >> > if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | >> > + ANV_CMD_DIRTY_RENDER_TARGETS | >> >> Why is this necessary? >> > > We set RENDER_TARGETS dirty when the depth buffer changes due to > NextSubpass or BeginCommandBuffer > > >> > >> > ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK >> | >> > >> > ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) >> { >> > uint32_t wm_depth_stencil_dw[GENX(3DSTA >> TE_WM_DEPTH_STENCIL_length)]; >> > @@ -234,6 +364,9 @@ genX(cmd_buffer_flush_dynamic_state)(struct >> anv_cmd_buffer *cmd_buffer) >> > >> > anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw, >> > pipeline->gen8.wm_depth_stencil); >> > + >> > + genX(cmd_buffer_enable_pma_fix)(cmd_buffer, >> > + want_depth_pma_fix(cmd_buffer)); >> > } >> > #else >> > if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) >> { >> > diff --git a/src/intel/vulkan/genX_blorp_exec.c >> b/src/intel/vulkan/genX_blorp_exec.c >> > index 663e6c9..6f0b063 100644 >> > --- a/src/intel/vulkan/genX_blorp_exec.c >> > +++ b/src/intel/vulkan/genX_blorp_exec.c >> > @@ -154,6 +154,11 @@ genX(blorp_exec)(struct blorp_batch *batch, >> > >> > genX(cmd_buffer_emit_gen7_depth_flush)(cmd_buffer); >> > >> > + /* BLORP doesn't do anything fancy with depth such as discards, so >> we want >> > + * the PMA fix off. Also, off is always the safe option. >> > + */ >> > + genX(cmd_buffer_enable_pma_fix)(cmd_buffer, false); >> > + >> > blorp_exec(batch, params); >> > >> > cmd_buffer->state.vb_dirty = ~0; >> > diff --git a/src/intel/vulkan/genX_cmd_buffer.c >> b/src/intel/vulkan/genX_cmd_buffer.c >> > index b6b7f74..66de93a 100644 >> > --- a/src/intel/vulkan/genX_cmd_buffer.c >> > +++ b/src/intel/vulkan/genX_cmd_buffer.c >> > @@ -627,6 +627,11 @@ genX(EndCommandBuffer)( >> > { >> > ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); >> > >> > + /* We want every command buffer to start with the PMA fix in a >> known state, >> > + * so we disable it at the end of the command buffer. >> > + */ >> > + genX(cmd_buffer_enable_pma_fix)(cmd_buffer, false); >> > + >> > genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); >> > >> > anv_cmd_buffer_end_batch_buffer(cmd_buffer); >> > @@ -644,6 +649,11 @@ genX(CmdExecuteCommands)( >> > >> > assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); >> > >> > + /* The secondary command buffers will assume that the PMA fix is >> disabled >> > + * when they begin executing. Make sure this is true. >> > + */ >> > + genX(cmd_buffer_enable_pma_fix)(primary, false); >> > + >> > for (uint32_t i = 0; i < commandBufferCount; i++) { >> > ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); >> > >> > @@ -2181,7 +2191,8 @@ cmd_buffer_emit_depth_stencil(struct >> anv_cmd_buffer *cmd_buffer) >> > const bool has_stencil = >> > image && (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT); >> > >> > - /* FIXME: Implement the PMA stall W/A */ >> > + cmd_buffer->state.hiz_enabled = has_hiz; >> > + >> > /* FIXME: Width and Height are wrong */ >> > >> > genX(cmd_buffer_emit_gen7_depth_flush)(cmd_buffer); >> > @@ -2419,6 +2430,8 @@ void genX(CmdEndRenderPass)( >> > >> > anv_cmd_buffer_resolve_subpass(cmd_buffer); >> > >> > + cmd_buffer->state.hiz_enabled = false; >> > + >> > #ifndef NDEBUG >> > anv_dump_add_framebuffer(cmd_buffer, cmd_buffer->state.framebuffer) >> ; >> > #endif >> > diff --git a/src/intel/vulkan/genX_pipeline.c >> b/src/intel/vulkan/genX_pipeline.c >> > index 18fe48c..0588d01 100644 >> > --- a/src/intel/vulkan/genX_pipeline.c >> > +++ b/src/intel/vulkan/genX_pipeline.c >> > @@ -653,6 +653,8 @@ emit_ds_state(struct anv_pipeline *pipeline, >> > * to make sure it's initialized to something useful. >> > */ >> > pipeline->writes_stencil = false; >> > + pipeline->writes_depth = false; >> > + pipeline->depth_test_enable = false; >> > memset(depth_stencil_dw, 0, sizeof(depth_stencil_dw)); >> > return; >> > } >> > @@ -711,6 +713,9 @@ emit_ds_state(struct anv_pipeline *pipeline, >> > if (info->depthTestEnable && info->depthCompareOp == >> VK_COMPARE_OP_EQUAL) >> > depth_stencil.DepthBufferWriteEnable = false; >> > >> > + pipeline->writes_depth = depth_stencil.DepthBufferWriteEnable; >> > + pipeline->depth_test_enable = depth_stencil.DepthTestEnable; >> > + >> > #if GEN_GEN <= 7 >> > GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, >> &depth_stencil); >> > #else >> > @@ -1429,6 +1434,38 @@ emit_3dstate_vf_topology(struct anv_pipeline >> *pipeline) >> > } >> > #endif >> > >> > +static void >> > +compute_kill_pixel(struct anv_pipeline *pipeline, >> > + const VkPipelineMultisampleStateCreateInfo >> *ms_info, >> > + const struct anv_subpass *subpass) >> > +{ >> > + if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { >> > + pipeline->kill_pixel = false; >> > + return; >> > + } >> > + >> > + const struct brw_wm_prog_data *wm_prog_data = >> get_wm_prog_data(pipeline); >> > + >> > + /* This computes the KillPixel portion of the computation for >> whether or >> > + * not we want to enable the PMA fix on gen8. It's given by this >> chunk of >> > + * the giant formula: >> > + * >> > + * (3DSTATE_PS_EXTRA::PixelShaderKillsPixels || >> > + * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || >> > + * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || >> > + * 3DSTATE_PS_BLEND::AlphaTestEnable || >> > + * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) >> > + * >> > + * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable is always false and >> so is >> > + * 3DSTATE_PS_BLEND::AlphaTestEnable since Vulkan doesn't have a >> concept >> > + * of an alpha test. >> > + */ >> > + pipeline->kill_pixel = >> > + subpass->has_ds_self_dep || wm_prog_data->uses_kill || >> >> Why do we set kill_pixel in the presence of has_ds_self_dep? >> >> Section 7.1 of the Vulkan spec states: >> >> If a subpass uses the same attachment as both an input attachment and >> either a color attachment or a depth/stencil attachment, writes via >> the color or depth/stencil attachment are not automatically made >> visible to reads via the input attachment, causing a feedback loop, >> except in any of the following conditions: >> >> [...] >> >> I don't see how any of the conditions are satisfied through the use >> of kill_pixel. >> > > There's a really good comment where we set 3DSTATE_PS::PixelShaderKillsPixel > and 3DSTATE_PS_EXTRA::PixelShaderKillsPixel > > >> > + wm_prog_data->uses_omask || >> > + (ms_info && ms_info->alphaToCoverageEnable); >> > +} >> > + >> > static VkResult >> > genX(graphics_pipeline_create)( >> > VkDevice _device, >> > @@ -1466,6 +1503,7 @@ genX(graphics_pipeline_create)( >> > emit_ds_state(pipeline, pCreateInfo->pDepthStencilState, pass, >> subpass); >> > emit_cb_state(pipeline, pCreateInfo->pColorBlendState, >> > pCreateInfo->pMultisampleState); >> > + compute_kill_pixel(pipeline, pCreateInfo->pMultisampleState, >> subpass); >> > >> > emit_urb_setup(pipeline); >> > >> > -- >> > 2.5.0.400.gff86faf >> > >> > _______________________________________________ >> > mesa-dev mailing list >> > mesa-dev@lists.freedesktop.org >> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >> > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev