From: Dave Airlie <airl...@redhat.com> Instead of putting the push constants into the upload buffer, if we have space in the sgprs we can upload the per-stage constants into the shaders directly.
This saves a few reads from memory in the meta shaders, we should also be able to inline other objects like descriptors. v2: fixup 16->available_sgprs (Samuel) fixup dynamic offsets. (Alex) bump to 12. handle push consts > 32 better, avoid F1 2017 crash TODO: proper vega support (Samuel) Signed-off-by: Dave Airlie <airl...@redhat.com> --- src/amd/common/ac_nir_to_llvm.c | 102 +++++++++++++++++++++++++++++++++++---- src/amd/common/ac_nir_to_llvm.h | 5 ++ src/amd/common/ac_shader_info.c | 5 +- src/amd/common/ac_shader_info.h | 1 + src/amd/vulkan/radv_cmd_buffer.c | 75 +++++++++++++++++++++------- 5 files changed, 159 insertions(+), 29 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 6c578de3aca..00ad76a82f7 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -92,6 +92,7 @@ struct nir_to_llvm_context { LLVMValueRef descriptor_sets[AC_UD_MAX_SETS]; LLVMValueRef ring_offsets; LLVMValueRef push_constants; + LLVMValueRef inline_push_consts[AC_UD_MAX_INLINE_PUSH_CONST]; LLVMValueRef view_index; LLVMValueRef num_work_groups; LLVMValueRef workgroup_ids[3]; @@ -243,7 +244,7 @@ static void set_llvm_calling_convention(LLVMValueRef func, LLVMSetFunctionCallConv(func, calling_conv); } -#define MAX_ARGS 23 +#define MAX_ARGS 32 struct arg_info { LLVMTypeRef types[MAX_ARGS]; LLVMValueRef *assign[MAX_ARGS]; @@ -538,6 +539,8 @@ struct user_sgpr_info { bool need_ring_offsets; uint8_t sgpr_count; bool indirect_all_descriptor_sets; + uint8_t base_inline_push_consts; + uint8_t num_inline_push_consts; }; static void allocate_user_sgprs(struct nir_to_llvm_context *ctx, @@ -609,8 +612,49 @@ static void allocate_user_sgprs(struct nir_to_llvm_context *ctx, } else { user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2; } + + if (ctx->shader_info->info.loads_push_constants) { + uint32_t remaining_sgprs = available_sgprs - user_sgpr_info->sgpr_count; + if (!ctx->shader_info->info.has_indirect_push_constants && + !ctx->shader_info->info.loads_dynamic_offsets) + remaining_sgprs += 2; + + if (ctx->options->layout->push_constant_size) { + uint8_t num_32bit_push_consts = (ctx->shader_info->info.max_push_constant_used - + ctx->shader_info->info.min_push_constant_used) / 4; + + if ((ctx->shader_info->info.min_push_constant_used / 4) <= 63 && + (ctx->shader_info->info.max_push_constant_used / 4) <= 63) { + user_sgpr_info->base_inline_push_consts = ctx->shader_info->info.min_push_constant_used / 4; + + if (num_32bit_push_consts < remaining_sgprs) { + user_sgpr_info->num_inline_push_consts = num_32bit_push_consts; + if (!ctx->shader_info->info.has_indirect_push_constants) + ctx->shader_info->info.loads_push_constants = false; + } else { + user_sgpr_info->num_inline_push_consts = remaining_sgprs; + } + + if (user_sgpr_info->num_inline_push_consts > AC_UD_MAX_INLINE_PUSH_CONST) + user_sgpr_info->num_inline_push_consts = AC_UD_MAX_INLINE_PUSH_CONST; + } + } + } } +static void +declare_inline_push_consts(struct nir_to_llvm_context *ctx, + gl_shader_stage stage, + const struct user_sgpr_info *user_sgpr_info, + struct arg_info *args) +{ + ctx->shader_info->inline_push_const_mask = (1ULL << user_sgpr_info->num_inline_push_consts) - 1; + ctx->shader_info->inline_push_const_base = user_sgpr_info->base_inline_push_consts; + + for (unsigned i = 0; i < user_sgpr_info->num_inline_push_consts; i++) + add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->inline_push_consts[i]); + +} static void declare_global_input_sgprs(struct nir_to_llvm_context *ctx, gl_shader_stage stage, @@ -640,10 +684,14 @@ declare_global_input_sgprs(struct nir_to_llvm_context *ctx, add_array_arg(args, const_array(type, 32), desc_sets); } - if (ctx->shader_info->info.loads_push_constants) { + if (ctx->shader_info->info.loads_push_constants || + ctx->shader_info->info.loads_dynamic_offsets) { /* 1 for push constants and dynamic descriptors */ add_array_arg(args, type, &ctx->push_constants); } + + if (!((stage == MESA_SHADER_VERTEX) || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) + declare_inline_push_consts(ctx, stage, user_sgpr_info, args); } static void @@ -651,6 +699,7 @@ declare_vs_specific_input_sgprs(struct nir_to_llvm_context *ctx, gl_shader_stage stage, bool has_previous_stage, gl_shader_stage previous_stage, + const struct user_sgpr_info *user_sgpr_info, struct arg_info *args) { if (!ctx->is_gs_copy_shader && @@ -660,6 +709,7 @@ declare_vs_specific_input_sgprs(struct nir_to_llvm_context *ctx, add_arg(args, ARG_SGPR, const_array(ctx->ac.v4i32, 16), &ctx->vertex_buffers); } + declare_inline_push_consts(ctx, stage, user_sgpr_info, args); add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex); add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.start_instance); if (ctx->shader_info->info.vs.needs_draw_id) { @@ -693,6 +743,16 @@ declare_tes_input_vgprs(struct nir_to_llvm_context *ctx, struct arg_info *args) add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.tes_patch_id); } +static void +set_inline_pushconst_locs(struct nir_to_llvm_context *ctx, + const struct user_sgpr_info *user_sgpr_info, + uint8_t *user_sgpr_idx) +{ + ctx->shader_info->user_sgprs_locs.push_const_base = user_sgpr_info->base_inline_push_consts; + for (unsigned i = 0; i < user_sgpr_info->num_inline_push_consts; i++) + set_loc(&ctx->shader_info->user_sgprs_locs.inline_push_consts[i], user_sgpr_idx, 1, 0); +} + static void set_global_input_locs(struct nir_to_llvm_context *ctx, gl_shader_stage stage, bool has_previous_stage, gl_shader_stage previous_stage, @@ -731,15 +791,21 @@ set_global_input_locs(struct nir_to_llvm_context *ctx, gl_shader_stage stage, ctx->shader_info->need_indirect_descriptor_sets = true; } - if (ctx->shader_info->info.loads_push_constants) { + if (ctx->shader_info->info.loads_push_constants || + ctx->shader_info->info.loads_dynamic_offsets) { set_loc_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2); } + + + if (!((stage == MESA_SHADER_VERTEX) || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) + set_inline_pushconst_locs(ctx, user_sgpr_info, user_sgpr_idx); } static void set_vs_specific_input_locs(struct nir_to_llvm_context *ctx, gl_shader_stage stage, bool has_previous_stage, gl_shader_stage previous_stage, + const struct user_sgpr_info *user_sgpr_info, uint8_t *user_sgpr_idx) { if (!ctx->is_gs_copy_shader && @@ -750,6 +816,7 @@ set_vs_specific_input_locs(struct nir_to_llvm_context *ctx, user_sgpr_idx, 2); } + set_inline_pushconst_locs(ctx, user_sgpr_info, user_sgpr_idx); unsigned vs_num = 2; if (ctx->shader_info->info.vs.needs_draw_id) vs_num++; @@ -805,7 +872,7 @@ static void create_function(struct nir_to_llvm_context *ctx, previous_stage, &user_sgpr_info, &args, &desc_sets); declare_vs_specific_input_sgprs(ctx, stage, has_previous_stage, - previous_stage, &args); + previous_stage, &user_sgpr_info, &args); if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.vs.as_es && !ctx->options->key.vs.as_ls && ctx->options->key.has_multiview_view_index)) add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->view_index); @@ -838,7 +905,7 @@ static void create_function(struct nir_to_llvm_context *ctx, &desc_sets); declare_vs_specific_input_sgprs(ctx, stage, has_previous_stage, - previous_stage, &args); + previous_stage, &user_sgpr_info, &args); add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->ls_out_layout); @@ -934,7 +1001,7 @@ static void create_function(struct nir_to_llvm_context *ctx, } else { declare_vs_specific_input_sgprs(ctx, stage, has_previous_stage, - previous_stage, + previous_stage, &user_sgpr_info, &args); } @@ -1076,7 +1143,7 @@ static void create_function(struct nir_to_llvm_context *ctx, break; case MESA_SHADER_VERTEX: set_vs_specific_input_locs(ctx, stage, has_previous_stage, - previous_stage, &user_sgpr_idx); + previous_stage, &user_sgpr_info, &user_sgpr_idx); if (ctx->view_index) set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); if (ctx->options->key.vs.as_ls) { @@ -1088,7 +1155,7 @@ static void create_function(struct nir_to_llvm_context *ctx, break; case MESA_SHADER_TESS_CTRL: set_vs_specific_input_locs(ctx, stage, has_previous_stage, - previous_stage, &user_sgpr_idx); + previous_stage, &user_sgpr_info, &user_sgpr_idx); if (has_previous_stage) set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, &user_sgpr_idx, 1); @@ -1108,6 +1175,7 @@ static void create_function(struct nir_to_llvm_context *ctx, set_vs_specific_input_locs(ctx, stage, has_previous_stage, previous_stage, + &user_sgpr_info, &user_sgpr_idx); else set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, @@ -2371,9 +2439,23 @@ static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef ptr, addr; + LLVMValueRef src0 = get_src(ctx->nir, instr->src[0]); + unsigned index = nir_intrinsic_base(instr); + + if (LLVMIsConstant(src0)) { + unsigned array_index = index; + array_index += LLVMConstIntGetZExtValue(src0); + array_index /= 4; + + array_index -= ctx->shader_info->inline_push_const_base; + uint64_t bits = (((1ULL << instr->num_components) - 1) << array_index); + if ((bits & ctx->shader_info->inline_push_const_mask) == bits) { + return ac_build_gather_values(&ctx->ac, &ctx->inline_push_consts[array_index], instr->num_components); + } + } - addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0); - addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx->nir, instr->src[0]), ""); + addr = LLVMConstInt(ctx->ac.i32, index, 0); + addr = LLVMBuildAdd(ctx->builder, addr, src0, ""); ptr = ac_build_gep0(&ctx->ac, ctx->push_constants, addr); ptr = cast_ptr(ctx, ptr, get_def_type(ctx->nir, &instr->dest.ssa)); diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h index b3ad0a09857..22330fdfbc4 100644 --- a/src/amd/common/ac_nir_to_llvm.h +++ b/src/amd/common/ac_nir_to_llvm.h @@ -127,10 +127,13 @@ enum ac_ud_index { // Match MAX_SETS from radv_descriptor_set.h #define AC_UD_MAX_SETS MAX_SETS +#define AC_UD_MAX_INLINE_PUSH_CONST 12 struct ac_userdata_locations { struct ac_userdata_info descriptor_sets[AC_UD_MAX_SETS]; struct ac_userdata_info shader_data[AC_UD_MAX_UD]; + struct ac_userdata_info inline_push_consts[AC_UD_MAX_INLINE_PUSH_CONST]; + uint8_t push_const_base; }; struct ac_vs_output_info { @@ -156,6 +159,8 @@ struct ac_shader_variant_info { unsigned num_user_sgprs; unsigned num_input_sgprs; unsigned num_input_vgprs; + uint64_t inline_push_const_mask; + uint32_t inline_push_const_base; bool need_indirect_descriptor_sets; struct { struct { diff --git a/src/amd/common/ac_shader_info.c b/src/amd/common/ac_shader_info.c index 18fa9e1c94c..fbb46684ae3 100644 --- a/src/amd/common/ac_shader_info.c +++ b/src/amd/common/ac_shader_info.c @@ -179,9 +179,10 @@ ac_nir_shader_info_pass(struct nir_shader *nir, { struct nir_function *func = (struct nir_function *)exec_list_get_head(&nir->functions); - - if (options->layout->dynamic_offset_count) + if (options->layout->dynamic_offset_count) { info->loads_push_constants = true; + info->loads_dynamic_offsets = true; + } nir_foreach_variable(variable, &nir->inputs) gather_info_input_decl(nir, options, variable, info); diff --git a/src/amd/common/ac_shader_info.h b/src/amd/common/ac_shader_info.h index e35cde0ca97..e8ea33f2e3a 100644 --- a/src/amd/common/ac_shader_info.h +++ b/src/amd/common/ac_shader_info.h @@ -32,6 +32,7 @@ struct ac_shader_info { uint8_t min_push_constant_used; uint8_t max_push_constant_used; bool has_indirect_push_constants; + bool loads_dynamic_offsets; bool loads_push_constants; bool needs_multiview_view_index; bool uses_invocation_id; diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index d870a9bedb3..e1953a3a095 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1807,6 +1807,27 @@ radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, assert(cmd_buffer->cs->cdw <= cdw_max); } +static struct ac_userdata_info * +radv_lookup_push_const_sgpr(struct radv_shader_variant *shader, + int idx) +{ + idx -= shader->info.user_sgprs_locs.push_const_base; + return &shader->info.user_sgprs_locs.inline_push_consts[idx]; +} + +static void +radv_emit_inline_pushconsts(struct radv_cmd_buffer *cmd_buffer, + struct radv_shader_variant *shader, + unsigned base_reg, + int idx, int count, uint32_t *values) +{ + struct ac_userdata_info *loc = radv_lookup_push_const_sgpr(shader, idx); + assert (loc->sgpr_idx == -1); + assert (!loc->indirect); + radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, count); + radeon_emit_array(cmd_buffer->cs, values, count); +} + static void radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline, @@ -1816,36 +1837,56 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, unsigned offset; void *ptr; uint64_t va; + bool need_push_constants = false; stages &= cmd_buffer->push_constant_stages; if (!stages || (!layout->push_constant_size && !layout->dynamic_offset_count)) return; - if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size + - 16 * layout->dynamic_offset_count, - 256, &offset, &ptr)) - return; + radv_foreach_stage(stage, stages) { + if (!pipeline->shaders[stage]) + continue; + + need_push_constants |= pipeline->shaders[stage]->info.info.loads_push_constants; + need_push_constants |= pipeline->shaders[stage]->info.info.loads_dynamic_offsets; - memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size); - if (layout->dynamic_offset_count) { - memcpy((char*)ptr + layout->push_constant_size, cmd_buffer->dynamic_buffers, - 16 * layout->dynamic_offset_count); + uint32_t mask = pipeline->shaders[stage]->info.inline_push_const_mask; + uint32_t base_reg = pipeline->user_data_0[stage]; + while (mask) { + int start, count; + u_bit_scan_consecutive_range(&mask, &start, &count); + start += pipeline->shaders[stage]->info.inline_push_const_base; + radv_emit_inline_pushconsts(cmd_buffer, pipeline->shaders[stage], base_reg, + start, count, (uint32_t *)&cmd_buffer->push_constants[start * 4]); + } } - va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); - va += offset; + if (need_push_constants) { + if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size + + 16 * layout->dynamic_offset_count, + 256, &offset, &ptr)) + return; - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, - cmd_buffer->cs, MESA_SHADER_STAGES * 4); + memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size); + if (layout->dynamic_offset_count) { + memcpy((char*)ptr + layout->push_constant_size, cmd_buffer->dynamic_buffers, + 16 * layout->dynamic_offset_count); + } - radv_foreach_stage(stage, stages) { - if (pipeline->shaders[stage]) { - radv_emit_userdata_address(cmd_buffer, pipeline, stage, - AC_UD_PUSH_CONSTANTS, va); + va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); + va += offset; + + MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, + cmd_buffer->cs, MESA_SHADER_STAGES * 4); + + radv_foreach_stage(stage, stages) { + if (pipeline->shaders[stage]) { + radv_emit_userdata_address(cmd_buffer, pipeline, stage, + AC_UD_PUSH_CONSTANTS, va); + } } } - cmd_buffer->push_constant_stages &= ~stages; assert(cmd_buffer->cs->cdw <= cdw_max); } -- 2.14.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev