From: Nicolai Hähnle <nicolai.haeh...@amd.com> v2: use LLVM values instead of function parameter indices
Reviewed-by: Marek Olšák <marek.ol...@amd.com> (v1) --- src/amd/common/ac_shader_abi.h | 40 +++++++++++++++ src/gallium/drivers/radeonsi/si_shader.c | 60 +++++++++++++---------- src/gallium/drivers/radeonsi/si_shader_internal.h | 8 ++- 3 files changed, 77 insertions(+), 31 deletions(-) create mode 100644 src/amd/common/ac_shader_abi.h diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h new file mode 100644 index 0000000..e10550b --- /dev/null +++ b/src/amd/common/ac_shader_abi.h @@ -0,0 +1,40 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef AC_SHADER_ABI_H +#define AC_SHADER_ABI_H + +#include <llvm-c/Core.h> + +/* Document the shader ABI during compilation. This is what allows radeonsi and + * radv to share a compiler backend. + */ +struct ac_shader_abi { + LLVMValueRef base_vertex; + LLVMValueRef start_instance; + LLVMValueRef draw_id; + LLVMValueRef vertex_id; + LLVMValueRef instance_id; +}; + +#endif /* AC_SHADER_ABI_H */ diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 28923e4..d8bacdb 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -60,20 +60,21 @@ struct si_shader_output_values unsigned semantic_index; ubyte vertex_stream[4]; }; /** * Used to collect types and other info about arguments of the LLVM function * before the function is created. */ struct si_function_info { LLVMTypeRef types[100]; + LLVMValueRef *assign[100]; unsigned num_sgpr_params; unsigned num_params; }; enum si_arg_regfile { ARG_SGPR, ARG_VGPR }; static void si_init_shader_ctx(struct si_shader_context *ctx, @@ -118,35 +119,43 @@ static bool is_merged_shader(struct si_shader *shader) shader->selector->type == PIPE_SHADER_TESS_CTRL || shader->selector->type == PIPE_SHADER_GEOMETRY; } static void si_init_function_info(struct si_function_info *fninfo) { fninfo->num_params = 0; fninfo->num_sgpr_params = 0; } -static unsigned add_arg(struct si_function_info *fninfo, - enum si_arg_regfile regfile, LLVMTypeRef type) +static unsigned add_arg_assign(struct si_function_info *fninfo, + enum si_arg_regfile regfile, LLVMTypeRef type, + LLVMValueRef *assign) { assert(regfile != ARG_SGPR || fninfo->num_sgpr_params == fninfo->num_params); unsigned idx = fninfo->num_params++; assert(idx < ARRAY_SIZE(fninfo->types)); if (regfile == ARG_SGPR) fninfo->num_sgpr_params = fninfo->num_params; fninfo->types[idx] = type; + fninfo->assign[idx] = assign; return idx; } +static unsigned add_arg(struct si_function_info *fninfo, + enum si_arg_regfile regfile, LLVMTypeRef type) +{ + return add_arg_assign(fninfo, regfile, type, NULL); +} + static void add_arg_checked(struct si_function_info *fninfo, enum si_arg_regfile regfile, LLVMTypeRef type, unsigned idx) { MAYBE_UNUSED unsigned actual = add_arg(fninfo, regfile, type); assert(actual == idx); } /** * Returns a unique index for a per-patch semantic name and index. The index @@ -353,22 +362,21 @@ get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx) rel_patch_id, ""), ""); } static LLVMValueRef get_instance_index_for_fetch( struct si_shader_context *ctx, unsigned param_start_instance, LLVMValueRef divisor) { struct gallivm_state *gallivm = &ctx->gallivm; - LLVMValueRef result = LLVMGetParam(ctx->main_fn, - ctx->param_instance_id); + LLVMValueRef result = ctx->abi.instance_id; /* The division must be done before START_INSTANCE is added. */ if (divisor != ctx->i32_1) result = LLVMBuildUDiv(gallivm->builder, result, divisor, ""); return LLVMBuildAdd(gallivm->builder, result, LLVMGetParam(ctx->main_fn, param_start_instance), ""); } /* Bitcast <4 x float> to <2 x double>, extract the component, and convert @@ -1463,30 +1471,27 @@ static void declare_system_value(struct si_shader_context *ctx, const struct tgsi_full_declaration *decl) { struct lp_build_context *bld = &ctx->bld_base.base; struct gallivm_state *gallivm = &ctx->gallivm; LLVMValueRef value = 0; assert(index < RADEON_LLVM_MAX_SYSTEM_VALUES); switch (decl->Semantic.Name) { case TGSI_SEMANTIC_INSTANCEID: - value = LLVMGetParam(ctx->main_fn, - ctx->param_instance_id); + value = ctx->abi.instance_id; break; case TGSI_SEMANTIC_VERTEXID: value = LLVMBuildAdd(gallivm->builder, - LLVMGetParam(ctx->main_fn, - ctx->param_vertex_id), - LLVMGetParam(ctx->main_fn, - ctx->param_base_vertex), ""); + ctx->abi.vertex_id, + ctx->abi.base_vertex, ""); break; case TGSI_SEMANTIC_VERTEXID_NOBASE: /* Unused. Clarify the meaning in indexed vs. non-indexed * draws if this is ever used again. */ assert(false); break; case TGSI_SEMANTIC_BASEVERTEX: { @@ -1494,31 +1499,30 @@ static void declare_system_value(struct si_shader_context *ctx, * (for direct draws) or the CP (for indirect draws) is the * first vertex ID, but GLSL expects 0 to be returned. */ LLVMValueRef vs_state = LLVMGetParam(ctx->main_fn, ctx->param_vs_state_bits); LLVMValueRef indexed; indexed = LLVMBuildLShr(gallivm->builder, vs_state, ctx->i32_1, ""); indexed = LLVMBuildTrunc(gallivm->builder, indexed, ctx->i1, ""); value = LLVMBuildSelect(gallivm->builder, indexed, - LLVMGetParam(ctx->main_fn, ctx->param_base_vertex), - ctx->i32_0, ""); + ctx->abi.base_vertex, ctx->i32_0, ""); break; } case TGSI_SEMANTIC_BASEINSTANCE: - value = LLVMGetParam(ctx->main_fn, ctx->param_start_instance); + value = ctx->abi.start_instance; break; case TGSI_SEMANTIC_DRAWID: - value = LLVMGetParam(ctx->main_fn, ctx->param_draw_id); + value = ctx->abi.draw_id; break; case TGSI_SEMANTIC_INVOCATIONID: if (ctx->type == PIPE_SHADER_TESS_CTRL) value = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5); else if (ctx->type == PIPE_SHADER_GEOMETRY) value = LLVMGetParam(ctx->main_fn, ctx->param_gs_instance_id); else assert(!"INVOCATIONID not implemented"); @@ -4006,20 +4010,25 @@ static void si_create_function(struct si_shader_context *ctx, * SGPR spilling significantly. */ if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) { lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_BYVAL); lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_NOALIAS); ac_add_attr_dereferenceable(P, UINT64_MAX); } else lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG); } + for (i = 0; i < fninfo->num_params; ++i) { + if (fninfo->assign[i]) + *fninfo->assign[i] = LLVMGetParam(ctx->main_fn, i); + } + if (max_workgroup_size) { si_llvm_add_attribute(ctx->main_fn, "amdgpu-max-work-group-size", max_workgroup_size); } LLVMAddTargetDependentFunctionAttr(ctx->main_fn, "no-signed-zeros-fp-math", "true"); if (ctx->screen->b.debug_flags & DBG_UNSAFE_MATH) { /* These were copied from some LLVM test. */ @@ -4153,38 +4162,38 @@ static void declare_default_desc_pointers(struct si_shader_context *ctx, ctx->param_rw_buffers = add_arg(fninfo, ARG_SGPR, si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS)); declare_per_stage_desc_pointers(ctx, fninfo, true); } static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx, struct si_function_info *fninfo) { ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR, si_const_array(ctx->v4i32, SI_NUM_VERTEX_BUFFERS)); - ctx->param_base_vertex = add_arg(fninfo, ARG_SGPR, ctx->i32); - ctx->param_start_instance = add_arg(fninfo, ARG_SGPR, ctx->i32); - ctx->param_draw_id = add_arg(fninfo, ARG_SGPR, ctx->i32); + add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex); + add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.start_instance); + add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id); ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32); } static void declare_vs_input_vgprs(struct si_shader_context *ctx, struct si_function_info *fninfo, unsigned *num_prolog_vgprs) { struct si_shader *shader = ctx->shader; - ctx->param_vertex_id = add_arg(fninfo, ARG_VGPR, ctx->i32); + add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.vertex_id); if (shader->key.as_ls) { ctx->param_rel_auto_id = add_arg(fninfo, ARG_VGPR, ctx->i32); - ctx->param_instance_id = add_arg(fninfo, ARG_VGPR, ctx->i32); + add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.instance_id); } else { - ctx->param_instance_id = add_arg(fninfo, ARG_VGPR, ctx->i32); + add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.instance_id); ctx->param_vs_prim_id = add_arg(fninfo, ARG_VGPR, ctx->i32); } add_arg(fninfo, ARG_VGPR, ctx->i32); /* unused */ if (!shader->is_gs_copy_shader) { /* Vertex load indices. */ ctx->param_vertex_index0 = fninfo->num_params; for (unsigned i = 0; i < shader->selector->info.num_inputs; i++) add_arg(fninfo, ARG_VGPR, ctx->i32); *num_prolog_vgprs += shader->selector->info.num_inputs; @@ -5196,22 +5205,21 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, si_init_shader_ctx(&ctx, sscreen, tm); ctx.shader = shader; ctx.type = PIPE_SHADER_VERTEX; builder = gallivm->builder; create_function(&ctx); preload_ring_buffers(&ctx); LLVMValueRef voffset = - lp_build_mul_imm(uint, LLVMGetParam(ctx.main_fn, - ctx.param_vertex_id), 4); + lp_build_mul_imm(uint, ctx.abi.vertex_id, 4); /* Fetch the vertex stream ID.*/ LLVMValueRef stream_id; if (gs_selector->so.num_outputs) stream_id = unpack_param(&ctx, ctx.param_streamout_config, 24, 2); else stream_id = ctx.i32_0; /* Fill in output information. */ @@ -6594,42 +6602,42 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx, LLVMTypeRef *returns; LLVMValueRef ret, func; int num_returns, i; unsigned first_vs_vgpr = key->vs_prolog.num_input_sgprs + key->vs_prolog.num_merged_next_stage_vgprs; unsigned num_input_vgprs = key->vs_prolog.num_merged_next_stage_vgprs + 4; unsigned num_all_input_regs = key->vs_prolog.num_input_sgprs + num_input_vgprs; unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0; - ctx->param_vertex_id = first_vs_vgpr; - ctx->param_instance_id = first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1); - si_init_function_info(&fninfo); /* 4 preloaded VGPRs + vertex load indices as prolog outputs */ returns = alloca((num_all_input_regs + key->vs_prolog.last_input + 1) * sizeof(LLVMTypeRef)); num_returns = 0; /* Declare input and output SGPRs. */ for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) { add_arg(&fninfo, ARG_SGPR, ctx->i32); returns[num_returns++] = ctx->i32; } /* Preloaded VGPRs (outputs must be floats) */ for (i = 0; i < num_input_vgprs; i++) { add_arg(&fninfo, ARG_VGPR, ctx->i32); returns[num_returns++] = ctx->f32; } + fninfo.assign[first_vs_vgpr] = &ctx->abi.vertex_id; + fninfo.assign[first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1)] = &ctx->abi.instance_id; + /* Vertex load indices. */ for (i = 0; i <= key->vs_prolog.last_input; i++) returns[num_returns++] = ctx->f32; /* Create the function. */ si_create_function(ctx, "vs_prolog", returns, num_returns, &fninfo, 0); func = ctx->main_fn; if (key->vs_prolog.num_merged_next_stage_vgprs && !key->vs_prolog.is_monolithic) @@ -6678,21 +6686,21 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx, } /* InstanceID / Divisor + StartInstance */ index = get_instance_index_for_fetch(ctx, user_sgpr_base + SI_SGPR_START_INSTANCE, divisor); } else { /* VertexID + BaseVertex */ index = LLVMBuildAdd(gallivm->builder, - LLVMGetParam(func, ctx->param_vertex_id), + ctx->abi.vertex_id, LLVMGetParam(func, user_sgpr_base + SI_SGPR_BASE_VERTEX), ""); } index = LLVMBuildBitCast(gallivm->builder, index, ctx->f32, ""); ret = LLVMBuildInsertValue(gallivm->builder, ret, index, fninfo.num_params + i, ""); } si_llvm_build_ret(ctx, ret); diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 3556e69..90a70b1 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -21,20 +21,21 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef SI_SHADER_PRIVATE_H #define SI_SHADER_PRIVATE_H #include "si_shader.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_tgsi.h" #include "tgsi/tgsi_parse.h" +#include "ac_shader_abi.h" #include "ac_llvm_util.h" #include "ac_llvm_build.h" #include <llvm-c/Core.h> #include <llvm-c/TargetMachine.h> struct pipe_debug_callback; struct ac_shader_binary; #define RADEON_LLVM_MAX_INPUT_SLOTS 32 @@ -59,20 +60,22 @@ struct si_shader_context { /* For clamping the non-constant index in resource indexing: */ unsigned num_const_buffers; unsigned num_shader_buffers; unsigned num_images; unsigned num_samplers; /* Whether the prolog will be compiled separately. */ bool separate_prolog; + struct ac_shader_abi abi; + /** This function is responsible for initilizing the inputs array and will be * called once for each input declared in the TGSI shader. */ void (*load_input)(struct si_shader_context *, unsigned input_index, const struct tgsi_full_declaration *decl, LLVMValueRef out[4]); void (*load_system_value)(struct si_shader_context *, unsigned index, @@ -115,27 +118,22 @@ struct si_shader_context { /* Parameter indices for LLVMGetParam. */ int param_rw_buffers; int param_const_and_shader_buffers; int param_samplers_and_images; /* Common inputs for merged shaders. */ int param_merged_wave_info; int param_merged_scratch_offset; /* API VS */ int param_vertex_buffers; - int param_base_vertex; - int param_start_instance; - int param_draw_id; - int param_vertex_id; int param_rel_auto_id; int param_vs_prim_id; - int param_instance_id; int param_vertex_index0; /* VS states and layout of LS outputs / TCS inputs at the end * [0] = clamp vertex color * [1] = indexed * [8:20] = stride between patches in DW = num_inputs * num_vertices * 4 * max = 32*32*4 + 32*4 * [24:31] = stride between vertices in DW = num_inputs * 4 * max = 32*4 */ int param_vs_state_bits; -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev