On Tue, Feb 12, 2019 at 3:07 PM Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > > It's unnecessary to load more channels than the vertex attribute > format. The remaining channels are filled with 0 for y and z, > and 1 for w. > > 29077 shaders in 15096 tests > Totals: > SGPRS: 1321605 -> 1318869 (-0.21 %) > VGPRS: 935236 -> 932252 (-0.32 %) > Spilled SGPRs: 24860 -> 24776 (-0.34 %) > Code Size: 49832348 -> 49819464 (-0.03 %) bytes > Max Waves: 242101 -> 242611 (0.21 %) > > Totals from affected shaders: > SGPRS: 93675 -> 90939 (-2.92 %) > VGPRS: 58016 -> 55032 (-5.14 %) > Spilled SGPRs: 172 -> 88 (-48.84 %) > Code Size: 2862740 -> 2849856 (-0.45 %) bytes > Max Waves: 15474 -> 15984 (3.30 %) > > This mostly helps Croteam games (Talos/Sam2017). > > Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > --- > src/amd/vulkan/radv_nir_to_llvm.c | 83 ++++++++++++++++++++++++++++++- > 1 file changed, 81 insertions(+), 2 deletions(-) > > diff --git a/src/amd/vulkan/radv_nir_to_llvm.c > b/src/amd/vulkan/radv_nir_to_llvm.c > index 7f74678d5f1..b1e0c64e4e1 100644 > --- a/src/amd/vulkan/radv_nir_to_llvm.c > +++ b/src/amd/vulkan/radv_nir_to_llvm.c > @@ -1967,6 +1967,72 @@ adjust_vertex_fetch_alpha(struct radv_shader_context > *ctx, > return alpha; > } > > +static unsigned > +get_num_channels_from_data_format(unsigned data_format) > +{ > + switch (data_format) { > + case V_008F0C_BUF_DATA_FORMAT_8: > + case V_008F0C_BUF_DATA_FORMAT_16: > + case V_008F0C_BUF_DATA_FORMAT_32: > + return 1; > + case V_008F0C_BUF_DATA_FORMAT_8_8: > + case V_008F0C_BUF_DATA_FORMAT_16_16: > + case V_008F0C_BUF_DATA_FORMAT_32_32: > + return 2; > + case V_008F0C_BUF_DATA_FORMAT_10_11_11: > + case V_008F0C_BUF_DATA_FORMAT_11_11_10: > + case V_008F0C_BUF_DATA_FORMAT_32_32_32: > + return 3; > + case V_008F0C_BUF_DATA_FORMAT_8_8_8_8: > + case V_008F0C_BUF_DATA_FORMAT_10_10_10_2: > + case V_008F0C_BUF_DATA_FORMAT_2_10_10_10: > + case V_008F0C_BUF_DATA_FORMAT_16_16_16_16: > + case V_008F0C_BUF_DATA_FORMAT_32_32_32_32: > + return 4; > + default: > + break; > + } > + > + return 4; > +} > + > +static LLVMValueRef > +radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx, > + LLVMValueRef value, > + unsigned num_channels, > + bool is_float) > +{ > + LLVMValueRef zero = is_float ? ctx->ac.f32_0 : ctx->ac.i32_0; > + LLVMValueRef one = is_float ? ctx->ac.f32_1 : ctx->ac.i32_1; > + LLVMTypeRef elemtype; > + LLVMValueRef chan[4]; > + > + if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) { > + unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value)); > + > + if (num_channels == 4 && vec_size == 4) > + return value;
Just num_channels == vec_size ? > + > + num_channels = MIN2(num_channels, vec_size); > + > + for (unsigned i = 0; i < num_channels; i++) > + chan[i] = ac_llvm_extract_elem(&ctx->ac, value, i); > + > + elemtype = LLVMGetElementType(LLVMTypeOf(value)); > + } else { > + if (num_channels) { > + assert(num_channels == 1); > + chan[0] = value; > + } > + elemtype = LLVMTypeOf(value); > + } > + > + for (unsigned i = num_channels; i < 4; i++) > + chan[i] = i == 3 ? one : zero; > + > + return ac_build_gather_values(&ctx->ac, chan, 4); > +} > + > static void > handle_vs_input_decl(struct radv_shader_context *ctx, > struct nir_variable *variable) > @@ -1979,7 +2045,7 @@ handle_vs_input_decl(struct radv_shader_context *ctx, > unsigned attrib_count = glsl_count_attribute_slots(variable->type, > true); > uint8_t input_usage_mask = > > ctx->shader_info->info.vs.input_usage_mask[variable->data.location]; > - unsigned num_channels = util_last_bit(input_usage_mask); > + unsigned num_input_channels = util_last_bit(input_usage_mask); > > variable->data.driver_location = variable->data.location * 4; > > @@ -1987,6 +2053,10 @@ handle_vs_input_decl(struct radv_shader_context *ctx, > for (unsigned i = 0; i < attrib_count; ++i) { > LLVMValueRef output[4]; > unsigned attrib_index = variable->data.location + i - > VERT_ATTRIB_GENERIC0; > + unsigned attrib_format = > ctx->options->key.vs.vertex_attribute_formats[attrib_index]; > + unsigned data_format = attrib_format & 0x0f; > + unsigned num_format = (attrib_format >> 4) & 0x07; > + bool is_float = num_format == V_008F0C_BUF_NUM_FORMAT_FLOAT; > > if (ctx->options->key.vs.instance_rate_inputs & (1u << > attrib_index)) { > uint32_t divisor = > ctx->options->key.vs.instance_rate_divisors[attrib_index]; > @@ -2018,12 +2088,21 @@ handle_vs_input_decl(struct radv_shader_context *ctx, > > t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, > t_offset); > > + /* Adjust the number of channels to load based on the vertex > + * attribute format. > + */ > + unsigned num_format_channels = > get_num_channels_from_data_format(data_format); > + unsigned num_channels = MIN2(num_input_channels, > num_format_channels); > + > + assert(num_channels <= num_input_channels); Seems kinda redundant given the MIN2 above. Does this work with 3 components? Otherwise r-b for the series. > + > input = ac_build_buffer_load_format(&ctx->ac, t_list, > buffer_index, > ctx->ac.i32_0, > num_channels, false, > true); > > - input = ac_build_expand_to_vec4(&ctx->ac, input, > num_channels); > + input = radv_fixup_vertex_input_fetches(ctx, input, > num_channels, > + is_float); > > for (unsigned chan = 0; chan < 4; chan++) { > LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, > chan, false); > -- > 2.20.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev