On Wed, Oct 17, 2018 at 6:59 AM Danylo Piliaiev <danylo.pilia...@gmail.com> wrote:
> Conditional rendering affects next functions: > - vkCmdDraw, vkCmdDrawIndexed, vkCmdDrawIndirect, vkCmdDrawIndexedIndirect > - vkCmdDrawIndirectCountKHR, vkCmdDrawIndexedIndirectCountKHR > - vkCmdDispatch, vkCmdDispatchIndirect, vkCmdDispatchBase > - vkCmdClearAttachments > > To reduce readings from the memory a result of the condition is calculated > and stored into designated register MI_ALU_REG15. > > In current implementation affected functions expect MI_PREDICATE_RESULT > being set before their call so any code which changes the predicate > should restore it with restore_conditional_render_predicate. > An alternative is to restore MI_PREDICATE_RESULT in all affected > functions at their beginning. > > Signed-off-by: Danylo Piliaiev <danylo.pilia...@globallogic.com> > --- > src/intel/vulkan/anv_blorp.c | 7 +- > src/intel/vulkan/anv_device.c | 12 ++ > src/intel/vulkan/anv_extensions.py | 1 + > src/intel/vulkan/anv_private.h | 2 + > src/intel/vulkan/genX_cmd_buffer.c | 192 ++++++++++++++++++++++++++++- > 5 files changed, 209 insertions(+), 5 deletions(-) > > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c > index 478b8e7a3d..157875d16f 100644 > --- a/src/intel/vulkan/anv_blorp.c > +++ b/src/intel/vulkan/anv_blorp.c > @@ -1144,8 +1144,11 @@ void anv_CmdClearAttachments( > * trash our depth and stencil buffers. > */ > struct blorp_batch batch; > - blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, > - BLORP_BATCH_NO_EMIT_DEPTH_STENCIL); > + enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL; > + if (cmd_buffer->state.conditional_render_enabled) { > + flags |= BLORP_BATCH_PREDICATE_ENABLE; > + } > + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, > flags); > > for (uint32_t a = 0; a < attachmentCount; ++a) { > if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) > { > diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c > index a2551452eb..930a192c25 100644 > --- a/src/intel/vulkan/anv_device.c > +++ b/src/intel/vulkan/anv_device.c > @@ -957,6 +957,18 @@ void anv_GetPhysicalDeviceFeatures2( > break; > } > > + case > VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: { > + VkPhysicalDeviceConditionalRenderingFeaturesEXT *features = > + (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext; > + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); > + > + features->conditionalRendering = pdevice->info.gen >= 8 || > + pdevice->info.is_haswell; > + features->inheritedConditionalRendering = pdevice->info.gen >= 8 > || > + > pdevice->info.is_haswell; > + break; > + } > + > default: > anv_debug_ignored_stype(ext->sType); > break; > diff --git a/src/intel/vulkan/anv_extensions.py > b/src/intel/vulkan/anv_extensions.py > index c13ce531ee..2ef7a52d01 100644 > --- a/src/intel/vulkan/anv_extensions.py > +++ b/src/intel/vulkan/anv_extensions.py > @@ -127,6 +127,7 @@ EXTENSIONS = [ > Extension('VK_EXT_vertex_attribute_divisor', 3, True), > Extension('VK_EXT_post_depth_coverage', 1, > 'device->info.gen >= 9'), > Extension('VK_EXT_sampler_filter_minmax', 1, > 'device->info.gen >= 9'), > + Extension('VK_EXT_conditional_rendering', 1, > 'device->info.gen >= 8 || device->info.is_haswell'), > ] > > class VkVersion: > diff --git a/src/intel/vulkan/anv_private.h > b/src/intel/vulkan/anv_private.h > index 599b903f25..108da51a59 100644 > --- a/src/intel/vulkan/anv_private.h > +++ b/src/intel/vulkan/anv_private.h > @@ -2032,6 +2032,8 @@ struct anv_cmd_state { > */ > bool hiz_enabled; > > + bool > conditional_render_enabled; > + > /** > * Array length is anv_cmd_state::pass::attachment_count. Array > content is > * valid only when recording a render pass instance. > diff --git a/src/intel/vulkan/genX_cmd_buffer.c > b/src/intel/vulkan/genX_cmd_buffer.c > index f07a6aa7c9..87abc443b6 100644 > --- a/src/intel/vulkan/genX_cmd_buffer.c > +++ b/src/intel/vulkan/genX_cmd_buffer.c > @@ -479,8 +479,9 @@ transition_depth_buffer(struct anv_cmd_buffer > *cmd_buffer, > 0, 0, 1, hiz_op); > } > > -#define MI_PREDICATE_SRC0 0x2400 > -#define MI_PREDICATE_SRC1 0x2408 > +#define MI_PREDICATE_SRC0 0x2400 > +#define MI_PREDICATE_SRC1 0x2408 > +#define MI_PREDICATE_RESULT 0x2418 > > static void > set_image_compressed_bit(struct anv_cmd_buffer *cmd_buffer, > @@ -545,6 +546,14 @@ mi_alu(uint32_t opcode, uint32_t operand1, uint32_t > operand2) > > #define CS_GPR(n) (0x2600 + (n) * 8) > > +#if GEN_GEN >= 8 || GEN_IS_HASWELL > +static void > +restore_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer) > +{ > + emit_lrr(&cmd_buffer->batch, MI_PREDICATE_RESULT, > CS_GPR(MI_ALU_REG15)); > Does this work? Is it sufficient to just set MI_PREDICATE_RESULT or do we actually need to use an MI_PREDICATE? I genuinely don't know and this strikes me as odd. > +} > +#endif > + > /* This is only really practical on haswell and above because it requires > * MI math in order to get it correct. > */ > @@ -1144,6 +1153,12 @@ transition_color_buffer(struct anv_cmd_buffer > *cmd_buffer, > } > } > > +#if GEN_GEN >= 8 || GEN_IS_HASWELL > + if (cmd_buffer->state.conditional_render_enabled) { > + restore_conditional_render_predicate(cmd_buffer); > + } > +#endif > + > cmd_buffer->state.pending_pipe_bits |= > ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; > } > @@ -1397,6 +1412,26 @@ genX(BeginCommandBuffer)( > cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS; > } > > +#if GEN_GEN >= 8 || GEN_IS_HASWELL > + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { > + vk_foreach_struct_const(s, pBeginInfo->pInheritanceInfo->pNext) { > + switch (s->sType) { > + case > VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_CONDITIONAL_RENDERING_INFO_EXT: > { > + const VkCommandBufferInheritanceConditionalRenderingInfoEXT > *conditional_rendering_info = > + (const > VkCommandBufferInheritanceConditionalRenderingInfoEXT *) s; > + /* We should emit commands as if conditional render is > enabled. */ > + cmd_buffer->state.conditional_render_enabled = > + conditional_rendering_info->conditionalRenderingEnable; > Might be easier to just use vk_find_struct_const() instead of the loop. > + break; > + } > + default: > + anv_debug_ignored_stype(s->sType); > + break; > + } > + } > + } > +#endif > + > return result; > } > > @@ -1501,6 +1536,20 @@ genX(CmdExecuteCommands)( > assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); > assert(!anv_batch_has_error(&secondary->batch)); > > +#if GEN_GEN >= 8 || GEN_IS_HASWELL > + if (secondary->state.conditional_render_enabled) { > + /* Secondary buffer is constructed as if it will be executed > + * with conditional rendering, we should satisfy this dependency > + * regardless of conditional rendering being enabled in primary. > + */ > + if (!primary->state.conditional_render_enabled) { > + emit_lri(&primary->batch, CS_GPR(MI_ALU_REG15), 1); > + emit_lri(&primary->batch, CS_GPR(MI_ALU_REG15) + 4, 0); > + emit_lrr(&primary->batch, MI_PREDICATE_RESULT, > CS_GPR(MI_ALU_REG15)); > + } > + } > +#endif > + > if (secondary->usage_flags & > VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { > /* If we're continuing a render pass from the primary, we need to > @@ -2761,6 +2810,7 @@ void genX(CmdDraw)( > instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass); > > anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { > + prim.PredicateEnable = > cmd_buffer->state.conditional_render_enabled; > prim.VertexAccessType = SEQUENTIAL; > prim.PrimitiveTopologyType = pipeline->topology; > prim.VertexCountPerInstance = vertexCount; > @@ -2800,6 +2850,7 @@ void genX(CmdDrawIndexed)( > instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass); > > anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { > + prim.PredicateEnable = > cmd_buffer->state.conditional_render_enabled; > prim.VertexAccessType = RANDOM; > prim.PrimitiveTopologyType = pipeline->topology; > prim.VertexCountPerInstance = indexCount; > @@ -2935,6 +2986,7 @@ void genX(CmdDrawIndirect)( > > anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { > prim.IndirectParameterEnable = true; > + prim.PredicateEnable = > cmd_buffer->state.conditional_render_enabled; > prim.VertexAccessType = SEQUENTIAL; > prim.PrimitiveTopologyType = pipeline->topology; > } > @@ -2974,6 +3026,7 @@ void genX(CmdDrawIndexedIndirect)( > > anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { > prim.IndirectParameterEnable = true; > + prim.PredicateEnable = > cmd_buffer->state.conditional_render_enabled; > prim.VertexAccessType = RANDOM; > prim.PrimitiveTopologyType = pipeline->topology; > } > @@ -3024,6 +3077,42 @@ emit_draw_count_predicate(struct anv_cmd_buffer > *cmd_buffer, > } > } > > +#if GEN_GEN >= 8 || GEN_IS_HASWELL > +static void > +emit_draw_count_predicate_with_conditional_render( > + struct anv_cmd_buffer *cmd_buffer, > + struct anv_address count_address, > + uint32_t draw_index) > +{ > + const int draw_index_reg = MI_ALU_REG0; > + const int draw_count_reg = MI_ALU_REG14; > + const int condition_reg = MI_ALU_REG15; > + const int tmp_result_reg = MI_ALU_REG1; > + > + emit_lri(&cmd_buffer->batch, CS_GPR(draw_index_reg), draw_index); > + emit_lri(&cmd_buffer->batch, CS_GPR(draw_index_reg) + 4, 0); > + > + uint32_t *dw; > + /* Compute (draw_index < draw_count). > + * We do this by subtracting and storing the carry bit. > + */ > + dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); > + dw[1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, draw_index_reg); > + dw[2] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, draw_count_reg); > + dw[3] = mi_alu(MI_ALU_SUB, 0, 0); > + dw[4] = mi_alu(MI_ALU_STORE, tmp_result_reg, MI_ALU_CF); > + > + /* & condition */ > + dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); > + dw[1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, tmp_result_reg); > + dw[2] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, condition_reg); > + dw[3] = mi_alu(MI_ALU_AND, 0, 0); > + dw[4] = mi_alu(MI_ALU_STORE, tmp_result_reg, MI_ALU_ACCU); > + > + emit_lrr(&cmd_buffer->batch, MI_PREDICATE_RESULT, > CS_GPR(tmp_result_reg)); > Again, is this sufficient? Maybe I'm missing something. > +} > +#endif > + > void genX(CmdDrawIndirectCountKHR)( > VkCommandBuffer commandBuffer, > VkBuffer _buffer, > @@ -3063,7 +3152,15 @@ void genX(CmdDrawIndirectCountKHR)( > for (uint32_t i = 0; i < maxDrawCount; i++) { > struct anv_address draw = anv_address_add(buffer->address, offset); > > +#if GEN_GEN >= 8 || GEN_IS_HASWELL > + if (cmd_state->conditional_render_enabled) { > + emit_draw_count_predicate_with_conditional_render(cmd_buffer, > count_address, i); > + } else { > + emit_draw_count_predicate(cmd_buffer, count_address, i); > + } > +#else > emit_draw_count_predicate(cmd_buffer, count_address, i); > +#endif > > if (vs_prog_data->uses_firstvertex || > vs_prog_data->uses_baseinstance) > @@ -3082,6 +3179,12 @@ void genX(CmdDrawIndirectCountKHR)( > > offset += stride; > } > + > +#if GEN_GEN >= 8 || GEN_IS_HASWELL > + if (cmd_state->conditional_render_enabled) { > + restore_conditional_render_predicate(cmd_buffer); > + } > +#endif > } > > void genX(CmdDrawIndexedIndirectCountKHR)( > @@ -3123,7 +3226,15 @@ void genX(CmdDrawIndexedIndirectCountKHR)( > for (uint32_t i = 0; i < maxDrawCount; i++) { > struct anv_address draw = anv_address_add(buffer->address, offset); > > +#if GEN_GEN >= 8 || GEN_IS_HASWELL > + if (cmd_state->conditional_render_enabled) { > + emit_draw_count_predicate_with_conditional_render(cmd_buffer, > count_address, i); > + } else { > + emit_draw_count_predicate(cmd_buffer, count_address, i); > + } > +#else > emit_draw_count_predicate(cmd_buffer, count_address, i); > +#endif > > /* TODO: We need to stomp base vertex to 0 somehow */ > if (vs_prog_data->uses_firstvertex || > @@ -3143,6 +3254,12 @@ void genX(CmdDrawIndexedIndirectCountKHR)( > > offset += stride; > } > + > +#if GEN_GEN >= 8 || GEN_IS_HASWELL > + if (cmd_state->conditional_render_enabled) { > + restore_conditional_render_predicate(cmd_buffer); > + } > +#endif > } > > static VkResult > @@ -3351,6 +3468,7 @@ void genX(CmdDispatchBase)( > genX(cmd_buffer_flush_compute_state)(cmd_buffer); > > anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) { > + ggw.PredicateEnable = > cmd_buffer->state.conditional_render_enabled; > ggw.SIMDSize = prog_data->simd_size / 16; > ggw.ThreadDepthCounterMaximum = 0; > ggw.ThreadHeightCounterMaximum = 0; > @@ -3448,7 +3566,8 @@ void genX(CmdDispatchIndirect)( > > anv_batch_emit(batch, GENX(GPGPU_WALKER), ggw) { > ggw.IndirectParameterEnable = true; > - ggw.PredicateEnable = GEN_GEN <= 7; > + ggw.PredicateEnable = GEN_GEN <= 7 || > + > cmd_buffer->state.conditional_render_enabled; > ggw.SIMDSize = prog_data->simd_size / 16; > ggw.ThreadDepthCounterMaximum = 0; > ggw.ThreadHeightCounterMaximum = 0; > @@ -4158,3 +4277,70 @@ void genX(CmdEndRenderPass2KHR)( > { > genX(CmdEndRenderPass)(commandBuffer); > } > + > +#if GEN_GEN >= 8 || GEN_IS_HASWELL > +void genX(CmdBeginConditionalRenderingEXT)( > + VkCommandBuffer commandBuffer, > + const VkConditionalRenderingBeginInfoEXT* > pConditionalRenderingBegin) > +{ > + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); > + ANV_FROM_HANDLE(anv_buffer, buffer, > pConditionalRenderingBegin->buffer); > + struct anv_cmd_state *cmd_state = &cmd_buffer->state; > + struct anv_address value_address = > + anv_address_add(buffer->address, > pConditionalRenderingBegin->offset); > + > + const bool inverted = pConditionalRenderingBegin->flags & > + VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT; > + > + cmd_state->conditional_render_enabled = true; > + > + /* Needed to ensure the memory is coherent for the > MI_LOAD_REGISTER_MEM > + * command when loading the values into the predicate source > registers. > + */ > + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { > + pc.PipeControlFlushEnable = true; > + } > + > + /* Section 19.4 of the Vulkan 1.1.85 spec says: > + * > + * If the value of the predicate in buffer memory changes > + * while conditional rendering is active, the rendering commands > + * may be discarded in an implementation-dependent way. > + * Some implementations may latch the value of the predicate > + * upon beginning conditional rendering while others > + * may read it before every rendering command. > + * > + * So it's perfectly fine to read a value from the buffer once. > + */ > + > + emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0, value_address); > + /* Zero the top 32-bits of MI_PREDICATE_SRC0 */ > + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0); > + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1, 0); > + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0); > + > + anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) { > + mip.LoadOperation = inverted ? LOAD_LOAD : LOAD_LOADINV; > + mip.CombineOperation = COMBINE_SET; > + mip.CompareOperation = COMPARE_SRCS_EQUAL; > + } > + > + /* Calculate predicate result once and store it in MI_ALU_REG15 > + * to prevent recalculating it when interacting with > + * VK_KHR_draw_indirect_count which also uses predicates. > + * It is also the only way to support conditional render of > + * secondary buffers because they are formed before we > + * know whether conditional render is enabled. > + */ > + emit_lrr(&cmd_buffer->batch, CS_GPR(MI_ALU_REG15), > MI_PREDICATE_RESULT); > +} > + > +void genX(CmdEndConditionalRenderingEXT)( > + VkCommandBuffer commandBuffer) > +{ > + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); > + struct anv_cmd_state *cmd_state = &cmd_buffer->state; > + > + cmd_state->conditional_render_enabled = false; > +} > +#endif > -- > 2.18.0 > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev