On Tue, Feb 5, 2019 at 11:07 AM Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > > > On 2/5/19 10:58 AM, Bas Nieuwenhuizen wrote: > > On Fri, Jan 25, 2019 at 5:27 PM Samuel Pitoiset > > <samuel.pitoi...@gmail.com> wrote: > >> This removes some scalar loads from shaders, but it increases > >> the number of SET_SH_REG packets. This is currently basic but > >> it could be improved if needed. Inlining dynamic offsets might > >> also help. > >> > >> Original idea from Dave Airlie. > >> > >> 29164 shaders in 15096 tests > >> Totals: > >> SGPRS: 1336072 -> 1365241 (2.18 %) > >> VGPRS: 937784 -> 934592 (-0.34 %) > >> Spilled SGPRs: 24751 -> 24796 (0.18 %) > >> Code Size: 50001672 -> 49815524 (-0.37 %) bytes > >> Max Waves: 208755 -> 208830 (0.04 %) > >> > >> Totals from affected shaders: > >> SGPRS: 295018 -> 324187 (9.89 %) > >> VGPRS: 243108 -> 239916 (-1.31 %) > >> Spilled SGPRs: 1464 -> 1509 (3.07 %) > >> Code Size: 8028188 -> 7842040 (-2.32 %) bytes > >> Max Waves: 69580 -> 69655 (0.11 %) > >> > >> Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > >> --- > >> src/amd/common/ac_nir_to_llvm.c | 23 +++++++-- > >> src/amd/common/ac_shader_abi.h | 4 ++ > >> src/amd/vulkan/radv_cmd_buffer.c | 78 ++++++++++++++++++++++--------- > >> src/amd/vulkan/radv_nir_to_llvm.c | 54 +++++++++++++++++++++ > >> src/amd/vulkan/radv_shader.h | 10 ++-- > >> src/amd/vulkan/radv_shader_info.c | 4 ++ > >> 6 files changed, 145 insertions(+), 28 deletions(-) > >> > >> diff --git a/src/amd/common/ac_nir_to_llvm.c > >> b/src/amd/common/ac_nir_to_llvm.c > >> index f509fc31dff..db1574b5b35 100644 > >> --- a/src/amd/common/ac_nir_to_llvm.c > >> +++ b/src/amd/common/ac_nir_to_llvm.c > >> @@ -1392,10 +1392,27 @@ static LLVMValueRef > >> visit_load_push_constant(struct ac_nir_context *ctx, > >> nir_intrinsic_instr *instr) > >> { > >> LLVMValueRef ptr, addr; > >> + LLVMValueRef src0 = get_src(ctx, instr->src[0]); > >> + unsigned index = nir_intrinsic_base(instr); > >> > >> - addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0); > >> - addr = LLVMBuildAdd(ctx->ac.builder, addr, > >> - get_src(ctx, instr->src[0]), ""); > >> + addr = LLVMConstInt(ctx->ac.i32, index, 0); > >> + addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, ""); > >> + > >> + /* Load constant values from user SGPRS when possible, otherwise > >> + * fallback to the default path that loads directly from memory. > >> + */ > >> + if (LLVMIsConstant(src0) && > >> + index == 0 && > >> + instr->dest.ssa.bit_size == 32) { > >> + unsigned offset = LLVMConstIntGetZExtValue(src0) / 4; > >> + unsigned count = instr->dest.ssa.num_components; > >> + > >> + if (offset + count <= ctx->abi->num_inline_push_consts) { > >> + return ac_build_gather_values(&ctx->ac, > >> + > >> ctx->abi->inline_push_consts + offset, > >> + count); > >> + } > >> + } > >> > >> ptr = ac_build_gep0(&ctx->ac, ctx->abi->push_constants, addr); > >> > >> diff --git a/src/amd/common/ac_shader_abi.h > >> b/src/amd/common/ac_shader_abi.h > >> index ee18e6c1923..704c3d107c2 100644 > >> --- a/src/amd/common/ac_shader_abi.h > >> +++ b/src/amd/common/ac_shader_abi.h > >> @@ -32,6 +32,8 @@ struct nir_variable; > >> > >> #define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1) > >> > >> +#define AC_MAX_INLINE_PUSH_CONSTS 8 > >> + > >> enum ac_descriptor_type { > >> AC_DESC_IMAGE, > >> AC_DESC_FMASK, > >> @@ -66,6 +68,8 @@ struct ac_shader_abi { > >> > >> /* Vulkan only */ > >> LLVMValueRef push_constants; > >> + LLVMValueRef inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS]; > >> + unsigned num_inline_push_consts; > >> LLVMValueRef view_index; > >> > >> LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4]; > >> diff --git a/src/amd/vulkan/radv_cmd_buffer.c > >> b/src/amd/vulkan/radv_cmd_buffer.c > >> index aae90290841..f80e2078da0 100644 > >> --- a/src/amd/vulkan/radv_cmd_buffer.c > >> +++ b/src/amd/vulkan/radv_cmd_buffer.c > >> @@ -627,6 +627,23 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer > >> *cmd_buffer, > >> } > >> } > >> > >> +static void > >> +radv_emit_inline_push_consts(struct radv_cmd_buffer *cmd_buffer, > >> + struct radv_pipeline *pipeline, > >> + gl_shader_stage stage, > >> + int idx, int count, uint32_t *values) > >> +{ > >> + struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, > >> stage, idx); > >> + uint32_t base_reg = pipeline->user_data_0[stage]; > >> + if (loc->sgpr_idx == -1) > >> + return; > >> + > >> + assert(loc->num_sgprs == count); > >> + > >> + radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * > >> 4, count); > >> + radeon_emit_array(cmd_buffer->cs, values, count); > >> +} > >> + > >> static void > >> radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, > >> struct radv_pipeline *pipeline) > >> @@ -1900,6 +1917,7 @@ radv_flush_constants(struct radv_cmd_buffer > >> *cmd_buffer, > >> radv_get_descriptors_state(cmd_buffer, bind_point); > >> struct radv_pipeline_layout *layout = pipeline->layout; > >> struct radv_shader_variant *shader, *prev_shader; > >> + bool need_push_constants = false; > >> unsigned offset; > >> void *ptr; > >> uint64_t va; > >> @@ -1909,37 +1927,55 @@ radv_flush_constants(struct radv_cmd_buffer > >> *cmd_buffer, > >> (!layout->push_constant_size && > >> !layout->dynamic_offset_count)) > >> return; > >> > >> - if (!radv_cmd_buffer_upload_alloc(cmd_buffer, > >> layout->push_constant_size + > >> - 16 * > >> layout->dynamic_offset_count, > >> - 256, &offset, &ptr)) > >> - return; > >> + radv_foreach_stage(stage, stages) { > >> + if (!pipeline->shaders[stage]) > >> + continue; > >> > >> - memcpy(ptr, cmd_buffer->push_constants, > >> layout->push_constant_size); > >> - memcpy((char*)ptr + layout->push_constant_size, > >> - descriptors_state->dynamic_buffers, > >> - 16 * layout->dynamic_offset_count); > >> + need_push_constants |= > >> pipeline->shaders[stage]->info.info.loads_push_constants; > >> + need_push_constants |= > >> pipeline->shaders[stage]->info.info.loads_dynamic_offsets; > >> > >> - va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); > >> - va += offset; > >> + uint8_t count = > >> pipeline->shaders[stage]->info.info.num_inline_push_consts; > >> > >> - MAYBE_UNUSED unsigned cdw_max = > >> radeon_check_space(cmd_buffer->device->ws, > >> - cmd_buffer->cs, > >> MESA_SHADER_STAGES * 4); > >> + radv_emit_inline_push_consts(cmd_buffer, pipeline, stage, > >> + AC_UD_INLINE_PUSH_CONSTANTS, > >> + count, > >> + (uint32_t > >> *)&cmd_buffer->push_constants[0]); > >> + } > >> > >> - prev_shader = NULL; > >> - radv_foreach_stage(stage, stages) { > >> - shader = radv_get_shader(pipeline, stage); > >> + if (need_push_constants) { > >> + if (!radv_cmd_buffer_upload_alloc(cmd_buffer, > >> layout->push_constant_size + > >> + 16 * > >> layout->dynamic_offset_count, > >> + 256, &offset, &ptr)) > >> + return; > >> + > >> + memcpy(ptr, cmd_buffer->push_constants, > >> layout->push_constant_size); > >> + memcpy((char*)ptr + layout->push_constant_size, > >> + descriptors_state->dynamic_buffers, > >> + 16 * layout->dynamic_offset_count); > >> + > >> + va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); > >> + va += offset; > >> + > >> + MAYBE_UNUSED unsigned cdw_max = > >> + radeon_check_space(cmd_buffer->device->ws, > >> + cmd_buffer->cs, > >> MESA_SHADER_STAGES * 4); > >> + > >> + prev_shader = NULL; > >> + radv_foreach_stage(stage, stages) { > >> + shader = radv_get_shader(pipeline, stage); > >> > >> - /* Avoid redundantly emitting the address for merged > >> stages. */ > >> - if (shader && shader != prev_shader) { > >> - radv_emit_userdata_address(cmd_buffer, pipeline, > >> stage, > >> - AC_UD_PUSH_CONSTANTS, > >> va); > >> + /* Avoid redundantly emitting the address for > >> merged stages. */ > >> + if (shader && shader != prev_shader) { > >> + radv_emit_userdata_address(cmd_buffer, > >> pipeline, stage, > >> + > >> AC_UD_PUSH_CONSTANTS, va); > >> > >> - prev_shader = shader; > >> + prev_shader = shader; > >> + } > >> } > >> + assert(cmd_buffer->cs->cdw <= cdw_max); > >> } > >> > >> cmd_buffer->push_constant_stages &= ~stages; > >> - assert(cmd_buffer->cs->cdw <= cdw_max); > >> } > >> > >> static void > >> diff --git a/src/amd/vulkan/radv_nir_to_llvm.c > >> b/src/amd/vulkan/radv_nir_to_llvm.c > >> index 11417c5991b..1cffa748d15 100644 > >> --- a/src/amd/vulkan/radv_nir_to_llvm.c > >> +++ b/src/amd/vulkan/radv_nir_to_llvm.c > >> @@ -627,6 +627,47 @@ count_vs_user_sgprs(struct radv_shader_context *ctx) > >> return count; > >> } > >> > >> +static void allocate_inline_push_consts(struct radv_shader_context *ctx, > >> + struct user_sgpr_info > >> *user_sgpr_info) > >> +{ > >> + uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs; > >> + > >> + /* Only supported if shaders don't have indirect push constants. */ > >> + if (ctx->shader_info->info.has_indirect_push_constants) > >> + return; > >> + > >> + /* Only supported for 32-bit push constants. */ > >> + if (!ctx->shader_info->info.has_32bit_push_constants) > >> + return; > >> + > >> + /* Only supported if base starts from 0. */ > >> + if (ctx->shader_info->info.min_push_constant_used != 0) > >> + return; > >> + > >> + uint8_t num_push_consts = > >> + (ctx->shader_info->info.max_push_constant_used - > >> + ctx->shader_info->info.min_push_constant_used) / 4; > > I think you need +1? if min_push_constant_used == > > max_push_constant_used you still use 1 push constant. (The code in the > > info pass adds range, but that does not include the size of the loaded > > thing?) > > No, as min is 'base' and max is 'base + range'. > > > > > Now that I think about it, the shader info pass does not take into > > account the size of the variable at all right? What happens with a > > vec4? The range does not include the object itself right? > > If it's a vec4 starting from base=0, range should be 16, that works for > vec4.
hmm, so it includes the size of type. If I read the spir-v parser correctly we set range very large though (looks like range is always the total size of all push constants?) > > > > >> + > >> + /* Check if the number of user SGPRs is large enough. */ > >> + if (num_push_consts < remaining_sgprs) { > >> + ctx->shader_info->info.num_inline_push_consts = > >> num_push_consts; > >> + } else { > >> + ctx->shader_info->info.num_inline_push_consts = > >> remaining_sgprs; > >> + } > >> + > >> + /* Clamp to the maximum number of allowed inlined push constants. > >> */ > >> + if (ctx->shader_info->info.num_inline_push_consts > > >> AC_MAX_INLINE_PUSH_CONSTS) > >> + ctx->shader_info->info.num_inline_push_consts = > >> AC_MAX_INLINE_PUSH_CONSTS; > >> + > >> + if (ctx->shader_info->info.num_inline_push_consts == > >> num_push_consts && > >> + !ctx->shader_info->info.loads_dynamic_offsets) { > > This won't work, as we can reach here even if there are some accesses > > which don't use inline push constants, e.g. some non 32-bit accesses. > > Good catch! One possible fix is to change 'has_32bit_push_constants' to > 'has_only_32bit_push_constants' and bail out if it's false. > > What do you think? That works. > > > > >> + /* Disable the default push constants path if all > >> constants are > >> + * inlined and if shaders don't use dynamic descriptors. > >> + */ > >> + ctx->shader_info->info.loads_push_constants = false; > >> + } > >> +} > >> + > >> static void allocate_user_sgprs(struct radv_shader_context *ctx, > >> gl_shader_stage stage, > >> bool has_previous_stage, > >> @@ -706,6 +747,8 @@ static void allocate_user_sgprs(struct > >> radv_shader_context *ctx, > >> } else { > >> user_sgpr_info->remaining_sgprs = remaining_sgprs - > >> num_desc_set; > >> } > >> + > >> + allocate_inline_push_consts(ctx, user_sgpr_info); > >> } > >> > >> static void > >> @@ -735,6 +778,12 @@ declare_global_input_sgprs(struct radv_shader_context > >> *ctx, > >> add_arg(args, ARG_SGPR, type, &ctx->abi.push_constants); > >> } > >> > >> + for (unsigned i = 0; i < > >> ctx->shader_info->info.num_inline_push_consts; i++) { > >> + add_arg(args, ARG_SGPR, ctx->ac.i32, > >> + &ctx->abi.inline_push_consts[i]); > >> + } > >> + ctx->abi.num_inline_push_consts = > >> ctx->shader_info->info.num_inline_push_consts; > >> + > >> if (ctx->shader_info->info.so.num_outputs) { > >> add_arg(args, ARG_SGPR, > >> ac_array_in_const32_addr_space(ctx->ac.v4i32), > >> @@ -853,6 +902,11 @@ set_global_input_locs(struct radv_shader_context *ctx, > >> set_loc_shader_ptr(ctx, AC_UD_PUSH_CONSTANTS, > >> user_sgpr_idx); > >> } > >> > >> + if (ctx->shader_info->info.num_inline_push_consts) { > >> + set_loc_shader(ctx, AC_UD_INLINE_PUSH_CONSTANTS, > >> user_sgpr_idx, > >> + > >> ctx->shader_info->info.num_inline_push_consts); > >> + } > >> + > >> if (ctx->streamout_buffers) { > >> set_loc_shader_ptr(ctx, AC_UD_STREAMOUT_BUFFERS, > >> user_sgpr_idx); > >> diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h > >> index 09bc5c2d4a9..d1bc467c5d9 100644 > >> --- a/src/amd/vulkan/radv_shader.h > >> +++ b/src/amd/vulkan/radv_shader.h > >> @@ -129,10 +129,11 @@ struct radv_nir_compiler_options { > >> enum radv_ud_index { > >> AC_UD_SCRATCH_RING_OFFSETS = 0, > >> AC_UD_PUSH_CONSTANTS = 1, > >> - AC_UD_INDIRECT_DESCRIPTOR_SETS = 2, > >> - AC_UD_VIEW_INDEX = 3, > >> - AC_UD_STREAMOUT_BUFFERS = 4, > >> - AC_UD_SHADER_START = 5, > >> + AC_UD_INLINE_PUSH_CONSTANTS = 2, > >> + AC_UD_INDIRECT_DESCRIPTOR_SETS = 3, > >> + AC_UD_VIEW_INDEX = 4, > >> + AC_UD_STREAMOUT_BUFFERS = 5, > >> + AC_UD_SHADER_START = 6, > >> AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START, > >> AC_UD_VS_BASE_VERTEX_START_INSTANCE, > >> AC_UD_VS_MAX_UD, > >> @@ -167,6 +168,7 @@ struct radv_shader_info { > >> uint8_t max_push_constant_used; > >> bool has_32bit_push_constants; > >> bool has_indirect_push_constants; > >> + uint8_t num_inline_push_consts; > >> uint32_t desc_set_used_mask; > >> bool needs_multiview_view_index; > >> bool uses_invocation_id; > >> diff --git a/src/amd/vulkan/radv_shader_info.c > >> b/src/amd/vulkan/radv_shader_info.c > >> index cabe2a470a0..900bca1d097 100644 > >> --- a/src/amd/vulkan/radv_shader_info.c > >> +++ b/src/amd/vulkan/radv_shader_info.c > >> @@ -211,6 +211,10 @@ gather_push_constant_info(const nir_shader *nir, > >> if (base < info->min_push_constant_used) > >> info->min_push_constant_used = base; > >> > >> + /* TODO: handle base not 0. */ > >> + if (base != 0) > >> + info->min_push_constant_used = -1; > >> + > >> info->loads_push_constants = true; > >> } > >> > >> -- > >> 2.20.1 > >> > >> _______________________________________________ > >> mesa-dev mailing list > >> mesa-dev@lists.freedesktop.org > >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev