r-b for the series
On Tue, Feb 26, 2019 at 1:39 PM Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > > This drastically reduces the number of SGPRs because the driver > now uses descriptors per vertex binding, instead of per vertex > attribute format. > > 29077 shaders in 15096 tests > Totals: > SGPRS: 1354285 -> 1282109 (-5.33 %) > VGPRS: 909896 -> 908800 (-0.12 %) > Spilled SGPRs: 24840 -> 24811 (-0.12 %) > Code Size: 49221144 -> 48986628 (-0.48 %) bytes > Max Waves: 243930 -> 244229 (0.12 %) > > Totals from affected shaders: > SGPRS: 390648 -> 318472 (-18.48 %) > VGPRS: 288432 -> 287336 (-0.38 %) > Spilled SGPRs: 94 -> 65 (-30.85 %) > Code Size: 11548412 -> 11313896 (-2.03 %) bytes > Max Waves: 86460 -> 86759 (0.35 %) > > This gives a really tiny boost. > > Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > --- > src/amd/vulkan/radv_cmd_buffer.c | 21 +++++++++----- > src/amd/vulkan/radv_nir_to_llvm.c | 47 +++++++++++++++++++++++++------ > src/amd/vulkan/radv_pipeline.c | 37 ++---------------------- > src/amd/vulkan/radv_private.h | 5 +--- > 4 files changed, 57 insertions(+), 53 deletions(-) > > diff --git a/src/amd/vulkan/radv_cmd_buffer.c > b/src/amd/vulkan/radv_cmd_buffer.c > index ad0b934ddfc..5ab93d11d68 100644 > --- a/src/amd/vulkan/radv_cmd_buffer.c > +++ b/src/amd/vulkan/radv_cmd_buffer.c > @@ -1985,13 +1985,13 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer > *cmd_buffer, > { > if ((pipeline_is_dirty || > (cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) && > - cmd_buffer->state.pipeline->vertex_elements.count && > + cmd_buffer->state.pipeline->num_vertex_bindings && > radv_get_shader(cmd_buffer->state.pipeline, > MESA_SHADER_VERTEX)->info.info.vs.has_vertex_buffers) { > struct radv_vertex_elements_info *velems = > &cmd_buffer->state.pipeline->vertex_elements; > unsigned vb_offset; > void *vb_ptr; > uint32_t i = 0; > - uint32_t count = velems->count; > + uint32_t count = > cmd_buffer->state.pipeline->num_vertex_bindings; > uint64_t va; > > /* allocate some descriptor state for vertex buffers */ > @@ -2002,13 +2002,15 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer > *cmd_buffer, > for (i = 0; i < count; i++) { > uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4]; > uint32_t offset; > - int vb = velems->binding[i]; > - struct radv_buffer *buffer = > cmd_buffer->vertex_bindings[vb].buffer; > - uint32_t stride = > cmd_buffer->state.pipeline->binding_stride[vb]; > + struct radv_buffer *buffer = > cmd_buffer->vertex_bindings[i].buffer; > + uint32_t stride = > cmd_buffer->state.pipeline->binding_stride[i]; > + > + if (!buffer) > + continue; > > va = radv_buffer_get_va(buffer->bo); > > - offset = cmd_buffer->vertex_bindings[vb].offset + > velems->offset[i]; > + offset = cmd_buffer->vertex_bindings[i].offset; > va += offset + buffer->offset; > desc[0] = va; > desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | > S_008F04_STRIDE(stride); > @@ -2016,7 +2018,12 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer > *cmd_buffer, > desc[2] = (buffer->size - offset - > velems->format_size[i]) / stride + 1; > else > desc[2] = buffer->size - offset; > - desc[3] = velems->rsrc_word3[i]; > + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | > + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | > + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | > + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | > + > S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) | > + > S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); > } > > va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); > diff --git a/src/amd/vulkan/radv_nir_to_llvm.c > b/src/amd/vulkan/radv_nir_to_llvm.c > index 36f499be212..e6c8f3ecb92 100644 > --- a/src/amd/vulkan/radv_nir_to_llvm.c > +++ b/src/amd/vulkan/radv_nir_to_llvm.c > @@ -2008,6 +2008,8 @@ adjust_vertex_fetch_alpha(struct radv_shader_context > *ctx, > > LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0); > > + alpha = LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.f32, ""); > + > if (adjustment == RADV_ALPHA_ADJUST_SSCALED) > alpha = LLVMBuildFPToUI(ctx->ac.builder, alpha, ctx->ac.i32, > ""); > else > @@ -2035,7 +2037,7 @@ adjust_vertex_fetch_alpha(struct radv_shader_context > *ctx, > alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, > ""); > } > > - return alpha; > + return LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.i32, ""); > } > > static unsigned > @@ -2096,7 +2098,7 @@ radv_fixup_vertex_input_fetches(struct > radv_shader_context *ctx, > > for (unsigned i = num_channels; i < 4; i++) { > chan[i] = i == 3 ? one : zero; > - chan[i] = ac_to_float(&ctx->ac, chan[i]); > + chan[i] = ac_to_integer(&ctx->ac, chan[i]); > } > > return ac_build_gather_values(&ctx->ac, chan, 4); > @@ -2154,20 +2156,49 @@ handle_vs_input_decl(struct radv_shader_context *ctx, > } else > buffer_index = LLVMBuildAdd(ctx->ac.builder, > ctx->abi.vertex_id, > ctx->abi.base_vertex, ""); > - t_offset = LLVMConstInt(ctx->ac.i32, attrib_index, false); > - > - t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, > t_offset); > > /* Adjust the number of channels to load based on the vertex > * attribute format. > */ > unsigned num_format_channels = > get_num_channels_from_data_format(data_format); > unsigned num_channels = MIN2(num_input_channels, > num_format_channels); > + unsigned attrib_binding = > ctx->options->key.vs.vertex_attribute_bindings[attrib_index]; > + unsigned attrib_offset = > ctx->options->key.vs.vertex_attribute_offsets[attrib_index]; > + unsigned attrib_stride = > ctx->options->key.vs.vertex_attribute_strides[attrib_index]; > > - input = ac_build_buffer_load_format(&ctx->ac, t_list, > + if (attrib_stride != 0 && attrib_offset > attrib_stride) { > + LLVMValueRef buffer_offset = > + LLVMConstInt(ctx->ac.i32, > + attrib_offset / attrib_stride, > false); > + > + buffer_index = LLVMBuildAdd(ctx->ac.builder, > buffer_index, > - ctx->ac.i32_0, > - num_channels, false, > true); > + buffer_offset, ""); > + > + attrib_offset = attrib_offset % attrib_stride; > + } > + > + t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false); > + t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, > t_offset); > + > + input = ac_build_tbuffer_load(&ctx->ac, t_list, buffer_index, > + LLVMConstInt(ctx->ac.i32, > attrib_offset, false), > + ctx->ac.i32_0, ctx->ac.i32_0, > + num_channels, > + data_format, num_format, > + false, false, true); > + > + if (ctx->options->key.vs.post_shuffle & (1 << attrib_index)) { > + if (num_channels > 1) { > + LLVMValueRef c[4]; > + c[0] = ac_llvm_extract_elem(&ctx->ac, input, > 2); > + c[1] = ac_llvm_extract_elem(&ctx->ac, input, > 1); > + c[2] = ac_llvm_extract_elem(&ctx->ac, input, > 0); > + c[3] = ac_llvm_extract_elem(&ctx->ac, input, > 3); > + > + input = ac_build_gather_values(&ctx->ac, c, > 4); > + } > + } > > input = radv_fixup_vertex_input_fetches(ctx, input, > num_channels, > is_float); > diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c > index 5fd57932102..30c3f60790e 100644 > --- a/src/amd/vulkan/radv_pipeline.c > +++ b/src/amd/vulkan/radv_pipeline.c > @@ -1244,25 +1244,6 @@ si_conv_prim_to_gs_out(enum VkPrimitiveTopology > topology) > } > } > > -static unsigned si_map_swizzle(unsigned swizzle) > -{ > - switch (swizzle) { > - case VK_SWIZZLE_Y: > - return V_008F0C_SQ_SEL_Y; > - case VK_SWIZZLE_Z: > - return V_008F0C_SQ_SEL_Z; > - case VK_SWIZZLE_W: > - return V_008F0C_SQ_SEL_W; > - case VK_SWIZZLE_0: > - return V_008F0C_SQ_SEL_0; > - case VK_SWIZZLE_1: > - return V_008F0C_SQ_SEL_1; > - default: /* VK_SWIZZLE_X */ > - return V_008F0C_SQ_SEL_X; > - } > -} > - > - > static unsigned radv_dynamic_state_mask(VkDynamicState state) > { > switch(state) { > @@ -3557,24 +3538,10 @@ radv_compute_vertex_input_state(struct radv_pipeline > *pipeline, > &vi_info->pVertexAttributeDescriptions[i]; > unsigned loc = desc->location; > const struct vk_format_description *format_desc; > - int first_non_void; > - uint32_t num_format, data_format; > - format_desc = vk_format_description(desc->format); > - first_non_void = > vk_format_get_first_non_void_channel(desc->format); > > - num_format = radv_translate_buffer_numformat(format_desc, > first_non_void); > - data_format = radv_translate_buffer_dataformat(format_desc, > first_non_void); > + format_desc = vk_format_description(desc->format); > > - velems->rsrc_word3[loc] = > S_008F0C_DST_SEL_X(si_map_swizzle(format_desc->swizzle[0])) | > - > S_008F0C_DST_SEL_Y(si_map_swizzle(format_desc->swizzle[1])) | > - > S_008F0C_DST_SEL_Z(si_map_swizzle(format_desc->swizzle[2])) | > - > S_008F0C_DST_SEL_W(si_map_swizzle(format_desc->swizzle[3])) | > - S_008F0C_NUM_FORMAT(num_format) | > - S_008F0C_DATA_FORMAT(data_format); > velems->format_size[loc] = format_desc->block.bits / 8; > - velems->offset[loc] = desc->offset; > - velems->binding[loc] = desc->binding; > - velems->count = MAX2(velems->count, loc + 1); > } > > for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) > { > @@ -3582,6 +3549,8 @@ radv_compute_vertex_input_state(struct radv_pipeline > *pipeline, > &vi_info->pVertexBindingDescriptions[i]; > > pipeline->binding_stride[desc->binding] = desc->stride; > + pipeline->num_vertex_bindings = > + MAX2(pipeline->num_vertex_bindings, desc->binding + > 1); > } > } > > diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h > index c73bdaca0a3..39fa6110fde 100644 > --- a/src/amd/vulkan/radv_private.h > +++ b/src/amd/vulkan/radv_private.h > @@ -1342,11 +1342,7 @@ struct radv_prim_vertex_count { > }; > > struct radv_vertex_elements_info { > - uint32_t rsrc_word3[MAX_VERTEX_ATTRIBS]; > uint32_t format_size[MAX_VERTEX_ATTRIBS]; > - uint32_t binding[MAX_VERTEX_ATTRIBS]; > - uint32_t offset[MAX_VERTEX_ATTRIBS]; > - uint32_t count; > }; > > struct radv_ia_multi_vgt_param_helpers { > @@ -1378,6 +1374,7 @@ struct radv_pipeline { > struct radv_vertex_elements_info vertex_elements; > > uint32_t binding_stride[MAX_VBS]; > + uint8_t num_vertex_bindings; > > uint32_t user_data_0[MESA_SHADER_STAGES]; > union { > -- > 2.21.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev