From: Marek Olšák <marek.ol...@amd.com>

HS input VGPRs must be reserved.
---
 src/gallium/drivers/radeonsi/si_shader.c | 29 +++++++++++++++++++----------
 src/gallium/drivers/radeonsi/si_shader.h |  2 ++
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index dab95e2..f0e3f0c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -7207,20 +7207,23 @@ static void si_get_vs_prolog_key(const struct 
tgsi_shader_info *info,
                                 unsigned num_input_sgprs,
                                 const struct si_vs_prolog_bits *prolog_key,
                                 struct si_shader *shader_out,
                                 union si_shader_part_key *key)
 {
        memset(key, 0, sizeof(*key));
        key->vs_prolog.states = *prolog_key;
        key->vs_prolog.num_input_sgprs = num_input_sgprs;
        key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
 
+       if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL)
+               key->vs_prolog.num_merged_next_stage_vgprs = 2;
+
        /* Set the instanceID flag. */
        for (unsigned i = 0; i < info->num_inputs; i++)
                if (key->vs_prolog.states.instance_divisors[i])
                        shader_out->info.uses_instanceid = true;
 }
 
 /**
  * Compute the VS epilog key, which contains all the information needed to
  * build the VS epilog function, and set the PrimitiveID output offset.
  */
@@ -7990,43 +7993,47 @@ out:
  *   (InstanceID + StartInstance),
  *   (InstanceID / 2 + StartInstance)
  */
 static void si_build_vs_prolog_function(struct si_shader_context *ctx,
                                        union si_shader_part_key *key)
 {
        struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMTypeRef *params, *returns;
        LLVMValueRef ret, func;
        int last_sgpr, num_params, num_returns, i;
+       unsigned first_vs_vgpr = key->vs_prolog.num_input_sgprs +
+                                key->vs_prolog.num_merged_next_stage_vgprs;
+       unsigned num_input_vgprs = key->vs_prolog.num_merged_next_stage_vgprs + 
4;
+       unsigned num_all_input_regs = key->vs_prolog.num_input_sgprs +
+                                     num_input_vgprs;
+       unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 
8 : 0;
 
-       ctx->param_vertex_id = key->vs_prolog.num_input_sgprs;
-       ctx->param_instance_id = key->vs_prolog.num_input_sgprs + 3;
+       ctx->param_vertex_id = first_vs_vgpr;
+       ctx->param_instance_id = first_vs_vgpr + 3;
 
        /* 4 preloaded VGPRs + vertex load indices as prolog outputs */
-       params = alloca((key->vs_prolog.num_input_sgprs + 4) *
-                       sizeof(LLVMTypeRef));
-       returns = alloca((key->vs_prolog.num_input_sgprs + 4 +
-                         key->vs_prolog.last_input + 1) *
+       params = alloca(num_all_input_regs * sizeof(LLVMTypeRef));
+       returns = alloca((num_all_input_regs + key->vs_prolog.last_input + 1) *
                         sizeof(LLVMTypeRef));
        num_params = 0;
        num_returns = 0;
 
        /* Declare input and output SGPRs. */
        num_params = 0;
        for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
                params[num_params++] = ctx->i32;
                returns[num_returns++] = ctx->i32;
        }
        last_sgpr = num_params - 1;
 
-       /* 4 preloaded VGPRs (outputs must be floats) */
-       for (i = 0; i < 4; i++) {
+       /* Preloaded VGPRs (outputs must be floats) */
+       for (i = 0; i < num_input_vgprs; i++) {
                params[num_params++] = ctx->i32;
                returns[num_returns++] = ctx->f32;
        }
 
        /* Vertex load indices. */
        for (i = 0; i <= key->vs_prolog.last_input; i++)
                returns[num_returns++] = ctx->f32;
 
        /* Create the function. */
        si_create_function(ctx, "vs_prolog", returns, num_returns, params,
@@ -8034,41 +8041,43 @@ static void si_build_vs_prolog_function(struct 
si_shader_context *ctx,
        func = ctx->main_fn;
 
        /* Copy inputs to outputs. This should be no-op, as the registers match,
         * but it will prevent the compiler from overwriting them 
unintentionally.
         */
        ret = ctx->return_value;
        for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
                LLVMValueRef p = LLVMGetParam(func, i);
                ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
        }
-       for (i = num_params - 4; i < num_params; i++) {
+       for (; i < num_params; i++) {
                LLVMValueRef p = LLVMGetParam(func, i);
                p = LLVMBuildBitCast(gallivm->builder, p, ctx->f32, "");
                ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
        }
 
        /* Compute vertex load indices from instance divisors. */
        for (i = 0; i <= key->vs_prolog.last_input; i++) {
                unsigned divisor = key->vs_prolog.states.instance_divisors[i];
                LLVMValueRef index;
 
                if (divisor) {
                        /* InstanceID / Divisor + StartInstance */
                        index = get_instance_index_for_fetch(ctx,
+                                                            user_sgpr_base +
                                                             
SI_SGPR_START_INSTANCE,
                                                             divisor);
                } else {
                        /* VertexID + BaseVertex */
                        index = LLVMBuildAdd(gallivm->builder,
                                             LLVMGetParam(func, 
ctx->param_vertex_id),
-                                            LLVMGetParam(func, 
SI_SGPR_BASE_VERTEX), "");
+                                            LLVMGetParam(func, user_sgpr_base +
+                                                               
SI_SGPR_BASE_VERTEX), "");
                }
 
                index = LLVMBuildBitCast(gallivm->builder, index, ctx->f32, "");
                ret = LLVMBuildInsertValue(gallivm->builder, ret, index,
                                           num_params++, "");
        }
 
        si_llvm_build_ret(ctx, ret);
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 1fee044..afbe547 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -345,20 +345,22 @@ struct si_ps_epilog_bits {
        unsigned        alpha_func:3;
        unsigned        alpha_to_one:1;
        unsigned        poly_line_smoothing:1;
        unsigned        clamp_color:1;
 };
 
 union si_shader_part_key {
        struct {
                struct si_vs_prolog_bits states;
                unsigned        num_input_sgprs:6;
+               /* For merged stages such as LS-HS, HS input VGPRs are first. */
+               unsigned        num_merged_next_stage_vgprs:3;
                unsigned        last_input:4;
        } vs_prolog;
        struct {
                struct si_vs_epilog_bits states;
                unsigned        prim_id_param_offset:5;
        } vs_epilog;
        struct {
                struct si_tcs_epilog_bits states;
        } tcs_epilog;
        struct {
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to