On Wed, Oct 17, 2018 at 6:59 AM Danylo Piliaiev <danylo.pilia...@gmail.com> wrote:
> Signed-off-by: Danylo Piliaiev <danylo.pilia...@globallogic.com> > --- > src/intel/vulkan/anv_extensions.py | 1 + > src/intel/vulkan/genX_cmd_buffer.c | 155 +++++++++++++++++++++++++++++ > 2 files changed, 156 insertions(+) > > diff --git a/src/intel/vulkan/anv_extensions.py > b/src/intel/vulkan/anv_extensions.py > index d4915c9501..7f44da6648 100644 > --- a/src/intel/vulkan/anv_extensions.py > +++ b/src/intel/vulkan/anv_extensions.py > @@ -113,6 +113,7 @@ EXTENSIONS = [ > Extension('VK_KHR_xlib_surface', 6, > 'VK_USE_PLATFORM_XLIB_KHR'), > Extension('VK_KHR_multiview', 1, True), > Extension('VK_KHR_display', 23, > 'VK_USE_PLATFORM_DISPLAY_KHR'), > + Extension('VK_KHR_draw_indirect_count', 1, > 'device->info.gen >= 8 || device->info.is_haswell'), > Extension('VK_EXT_acquire_xlib_display', 1, > 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'), > Extension('VK_EXT_debug_report', 8, True), > Extension('VK_EXT_direct_mode_display', 1, > 'VK_USE_PLATFORM_DISPLAY_KHR'), > diff --git a/src/intel/vulkan/genX_cmd_buffer.c > b/src/intel/vulkan/genX_cmd_buffer.c > index 43a02f2256..d7b94efd19 100644 > --- a/src/intel/vulkan/genX_cmd_buffer.c > +++ b/src/intel/vulkan/genX_cmd_buffer.c > @@ -2982,6 +2982,161 @@ void genX(CmdDrawIndexedIndirect)( > } > } > > +#if GEN_IS_HASWELL || GEN_GEN >= 8 > +static void > +emit_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer, > + struct anv_address count_address, > + uint32_t draw_index) > +{ > + /* Upload the current draw count from the draw parameters buffer to > + * MI_PREDICATE_SRC0. > + */ > + emit_lrr(&cmd_buffer->batch, MI_PREDICATE_SRC0, CS_GPR(MI_ALU_REG14)); > Do we also need to set MI_PREDICATE_SRC0 + 4 to 0? I suspect we do. Also, we can likely save some batch space if we have a "prepare" function which sets MI_PREDICATE_SRC0, SRC0 + 4, and SRC1 + 4 and only emit one LOAD_REGISTER_IMM and the MI_PREDICATE per-draw. For lots of primitives, those extra three MI_LOAD_REGISTER_* calls will add up. > + > + /* Upload the index of the current primitive to MI_PREDICATE_SRC1. */ > + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1, draw_index); > + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0); > + > + if (draw_index == 0) { > + anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) { > + mip.LoadOperation = LOAD_LOADINV; > + mip.CombineOperation = COMBINE_SET; > + mip.CompareOperation = COMPARE_SRCS_EQUAL; > + } > + } else { > + /* While draw_index < draw_count the predicate's result will be > + * (draw_index == draw_count) ^ TRUE = TRUE > + * When draw_index == draw_count the result is > + * (TRUE) ^ TRUE = FALSE > + * After this all results will be: > + * (FALSE) ^ FALSE = FALSE > + */ > + anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) { > + mip.LoadOperation = LOAD_LOAD; > + mip.CombineOperation = COMBINE_XOR; > + mip.CompareOperation = COMPARE_SRCS_EQUAL; > + } > + } > +} > + > +void genX(CmdDrawIndirectCountKHR)( > + VkCommandBuffer commandBuffer, > + VkBuffer _buffer, > + VkDeviceSize offset, > + VkBuffer _countBuffer, > + VkDeviceSize countBufferOffset, > + uint32_t maxDrawCount, > + uint32_t stride) > +{ > + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); > + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); > + ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer); > + struct anv_cmd_state *cmd_state = &cmd_buffer->state; > + struct anv_pipeline *pipeline = cmd_state->gfx.base.pipeline; > + const struct brw_vs_prog_data *vs_prog_data = > get_vs_prog_data(pipeline); > + > + if (anv_batch_has_error(&cmd_buffer->batch)) > + return; > + > + genX(cmd_buffer_flush_state)(cmd_buffer); > + > + struct anv_address count_address = > + anv_address_add(count_buffer->address, countBufferOffset); > + > + /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM > + * command when loading the values into the predicate source registers. > + */ > + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { > + pc.PipeControlFlushEnable = true; > + } > Have you seen this be an actual problem? If not, why? A documentation citation would be nice. > + > + emit_lrm(&cmd_buffer->batch, CS_GPR(MI_ALU_REG14), count_address); > + emit_lri(&cmd_buffer->batch, CS_GPR(MI_ALU_REG14) + 4, 0); > + > + for (uint32_t i = 0; i < maxDrawCount; i++) { > + struct anv_address draw = anv_address_add(buffer->address, offset); > + > + emit_draw_count_predicate(cmd_buffer, count_address, i); > + > + if (vs_prog_data->uses_firstvertex || > + vs_prog_data->uses_baseinstance) > + emit_base_vertex_instance_bo(cmd_buffer, anv_address_add(draw, > 8)); > + if (vs_prog_data->uses_drawid) > + emit_draw_index(cmd_buffer, i); > + > + load_indirect_parameters(cmd_buffer, draw, false); > + > + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { > + prim.IndirectParameterEnable = true; > + prim.PredicateEnable = true; > + prim.VertexAccessType = SEQUENTIAL; > + prim.PrimitiveTopologyType = pipeline->topology; > + } > + > + offset += stride; > + } > +} > + > +void genX(CmdDrawIndexedIndirectCountKHR)( > + VkCommandBuffer commandBuffer, > + VkBuffer _buffer, > + VkDeviceSize offset, > + VkBuffer _countBuffer, > + VkDeviceSize countBufferOffset, > + uint32_t maxDrawCount, > + uint32_t stride) > +{ > + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); > + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); > + ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer); > + struct anv_cmd_state *cmd_state = &cmd_buffer->state; > + struct anv_pipeline *pipeline = cmd_state->gfx.base.pipeline; > + const struct brw_vs_prog_data *vs_prog_data = > get_vs_prog_data(pipeline); > + > + if (anv_batch_has_error(&cmd_buffer->batch)) > + return; > + > + genX(cmd_buffer_flush_state)(cmd_buffer); > + > + struct anv_address count_address = > + anv_address_add(count_buffer->address, countBufferOffset); > + > + /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM > + * command when loading the values into the predicate source registers. > + */ > + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { > + pc.PipeControlFlushEnable = true; > + } > + > + emit_lrm(&cmd_buffer->batch, CS_GPR(MI_ALU_REG14), count_address); > + emit_lri(&cmd_buffer->batch, CS_GPR(MI_ALU_REG14) + 4, 0); > + > + for (uint32_t i = 0; i < maxDrawCount; i++) { > + struct anv_address draw = anv_address_add(buffer->address, offset); > + > + emit_draw_count_predicate(cmd_buffer, count_address, i); > + > + /* TODO: We need to stomp base vertex to 0 somehow */ > + if (vs_prog_data->uses_firstvertex || > + vs_prog_data->uses_baseinstance) > + emit_base_vertex_instance_bo(cmd_buffer, anv_address_add(draw, > 12)); > + if (vs_prog_data->uses_drawid) > + emit_draw_index(cmd_buffer, i); > + > + load_indirect_parameters(cmd_buffer, draw, true); > + > + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { > + prim.IndirectParameterEnable = true; > + prim.PredicateEnable = true; > + prim.VertexAccessType = RANDOM; > + prim.PrimitiveTopologyType = pipeline->topology; > + } > + > + offset += stride; > + } > +} > +#endif > + > static VkResult > flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) > { > -- > 2.18.0 > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev