From: Marek Olšák <marek.ol...@amd.com> HS input VGPRs must be reserved. --- src/gallium/drivers/radeonsi/si_shader.c | 29 +++++++++++++++++++---------- src/gallium/drivers/radeonsi/si_shader.h | 2 ++ 2 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index dab95e2..f0e3f0c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -7207,20 +7207,23 @@ static void si_get_vs_prolog_key(const struct tgsi_shader_info *info, unsigned num_input_sgprs, const struct si_vs_prolog_bits *prolog_key, struct si_shader *shader_out, union si_shader_part_key *key) { memset(key, 0, sizeof(*key)); key->vs_prolog.states = *prolog_key; key->vs_prolog.num_input_sgprs = num_input_sgprs; key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1; + if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL) + key->vs_prolog.num_merged_next_stage_vgprs = 2; + /* Set the instanceID flag. */ for (unsigned i = 0; i < info->num_inputs; i++) if (key->vs_prolog.states.instance_divisors[i]) shader_out->info.uses_instanceid = true; } /** * Compute the VS epilog key, which contains all the information needed to * build the VS epilog function, and set the PrimitiveID output offset. */ @@ -7990,43 +7993,47 @@ out: * (InstanceID + StartInstance), * (InstanceID / 2 + StartInstance) */ static void si_build_vs_prolog_function(struct si_shader_context *ctx, union si_shader_part_key *key) { struct gallivm_state *gallivm = &ctx->gallivm; LLVMTypeRef *params, *returns; LLVMValueRef ret, func; int last_sgpr, num_params, num_returns, i; + unsigned first_vs_vgpr = key->vs_prolog.num_input_sgprs + + key->vs_prolog.num_merged_next_stage_vgprs; + unsigned num_input_vgprs = key->vs_prolog.num_merged_next_stage_vgprs + 4; + unsigned num_all_input_regs = key->vs_prolog.num_input_sgprs + + num_input_vgprs; + unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0; - ctx->param_vertex_id = key->vs_prolog.num_input_sgprs; - ctx->param_instance_id = key->vs_prolog.num_input_sgprs + 3; + ctx->param_vertex_id = first_vs_vgpr; + ctx->param_instance_id = first_vs_vgpr + 3; /* 4 preloaded VGPRs + vertex load indices as prolog outputs */ - params = alloca((key->vs_prolog.num_input_sgprs + 4) * - sizeof(LLVMTypeRef)); - returns = alloca((key->vs_prolog.num_input_sgprs + 4 + - key->vs_prolog.last_input + 1) * + params = alloca(num_all_input_regs * sizeof(LLVMTypeRef)); + returns = alloca((num_all_input_regs + key->vs_prolog.last_input + 1) * sizeof(LLVMTypeRef)); num_params = 0; num_returns = 0; /* Declare input and output SGPRs. */ num_params = 0; for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) { params[num_params++] = ctx->i32; returns[num_returns++] = ctx->i32; } last_sgpr = num_params - 1; - /* 4 preloaded VGPRs (outputs must be floats) */ - for (i = 0; i < 4; i++) { + /* Preloaded VGPRs (outputs must be floats) */ + for (i = 0; i < num_input_vgprs; i++) { params[num_params++] = ctx->i32; returns[num_returns++] = ctx->f32; } /* Vertex load indices. */ for (i = 0; i <= key->vs_prolog.last_input; i++) returns[num_returns++] = ctx->f32; /* Create the function. */ si_create_function(ctx, "vs_prolog", returns, num_returns, params, @@ -8034,41 +8041,43 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx, func = ctx->main_fn; /* Copy inputs to outputs. This should be no-op, as the registers match, * but it will prevent the compiler from overwriting them unintentionally. */ ret = ctx->return_value; for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) { LLVMValueRef p = LLVMGetParam(func, i); ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, ""); } - for (i = num_params - 4; i < num_params; i++) { + for (; i < num_params; i++) { LLVMValueRef p = LLVMGetParam(func, i); p = LLVMBuildBitCast(gallivm->builder, p, ctx->f32, ""); ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, ""); } /* Compute vertex load indices from instance divisors. */ for (i = 0; i <= key->vs_prolog.last_input; i++) { unsigned divisor = key->vs_prolog.states.instance_divisors[i]; LLVMValueRef index; if (divisor) { /* InstanceID / Divisor + StartInstance */ index = get_instance_index_for_fetch(ctx, + user_sgpr_base + SI_SGPR_START_INSTANCE, divisor); } else { /* VertexID + BaseVertex */ index = LLVMBuildAdd(gallivm->builder, LLVMGetParam(func, ctx->param_vertex_id), - LLVMGetParam(func, SI_SGPR_BASE_VERTEX), ""); + LLVMGetParam(func, user_sgpr_base + + SI_SGPR_BASE_VERTEX), ""); } index = LLVMBuildBitCast(gallivm->builder, index, ctx->f32, ""); ret = LLVMBuildInsertValue(gallivm->builder, ret, index, num_params++, ""); } si_llvm_build_ret(ctx, ret); } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 1fee044..afbe547 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -345,20 +345,22 @@ struct si_ps_epilog_bits { unsigned alpha_func:3; unsigned alpha_to_one:1; unsigned poly_line_smoothing:1; unsigned clamp_color:1; }; union si_shader_part_key { struct { struct si_vs_prolog_bits states; unsigned num_input_sgprs:6; + /* For merged stages such as LS-HS, HS input VGPRs are first. */ + unsigned num_merged_next_stage_vgprs:3; unsigned last_input:4; } vs_prolog; struct { struct si_vs_epilog_bits states; unsigned prim_id_param_offset:5; } vs_epilog; struct { struct si_tcs_epilog_bits states; } tcs_epilog; struct { -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev