From: Nicolai Hähnle <nicolai.haeh...@amd.com>

So that the prolog generated for monolithic fragment shaders will have the
right signature.
---
 src/gallium/drivers/radeonsi/si_shader.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 447293c..b15c60d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5488,20 +5488,21 @@ static void declare_tess_lds(struct si_shader_context 
*ctx)
 
 static void create_function(struct si_shader_context *ctx)
 {
        struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
        struct gallivm_state *gallivm = bld_base->base.gallivm;
        struct si_shader *shader = ctx->shader;
        LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v3i32;
        LLVMTypeRef returns[16+32*4];
        unsigned i, last_sgpr, num_params, num_return_sgprs;
        unsigned num_returns = 0;
+       unsigned num_prolog_vgprs = 0;
 
        v3i32 = LLVMVectorType(ctx->i32, 3);
 
        params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, 
SI_NUM_RW_BUFFERS);
        params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, 
SI_NUM_CONST_BUFFERS);
        params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
        params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES);
        params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, 
SI_NUM_SHADER_BUFFERS);
 
        switch (ctx->type) {
@@ -5538,20 +5539,22 @@ static void create_function(struct si_shader_context 
*ctx)
                params[ctx->param_vs_prim_id = num_params++] = ctx->i32;
                params[ctx->param_instance_id = num_params++] = ctx->i32;
 
                if (!ctx->no_prolog &&
                    !ctx->is_gs_copy_shader) {
                        /* Vertex load indices. */
                        ctx->param_vertex_index0 = num_params;
 
                        for (i = 0; i < shader->selector->info.num_inputs; i++)
                                params[num_params++] = ctx->i32;
+
+                       num_prolog_vgprs += shader->selector->info.num_inputs;
                }
 
                if (!ctx->no_epilog &&
                    !ctx->is_gs_copy_shader) {
                        /* PrimitiveID output. */
                        if (!shader->key.vs.as_es && !shader->key.vs.as_ls)
                                for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
                                        returns[num_returns++] = ctx->f32;
                }
                break;
@@ -5637,34 +5640,37 @@ static void create_function(struct si_shader_context 
*ctx)
                params[SI_PARAM_PERSP_PULL_MODEL] = v3i32;
                params[SI_PARAM_LINEAR_SAMPLE] = ctx->v2i32;
                params[SI_PARAM_LINEAR_CENTER] = ctx->v2i32;
                params[SI_PARAM_LINEAR_CENTROID] = ctx->v2i32;
                params[SI_PARAM_LINE_STIPPLE_TEX] = ctx->f32;
                params[SI_PARAM_POS_X_FLOAT] = ctx->f32;
                params[SI_PARAM_POS_Y_FLOAT] = ctx->f32;
                params[SI_PARAM_POS_Z_FLOAT] = ctx->f32;
                params[SI_PARAM_POS_W_FLOAT] = ctx->f32;
                params[SI_PARAM_FRONT_FACE] = ctx->i32;
+               shader->info.face_vgpr_index = 20;
                params[SI_PARAM_ANCILLARY] = ctx->i32;
                params[SI_PARAM_SAMPLE_COVERAGE] = ctx->f32;
                params[SI_PARAM_POS_FIXED_PT] = ctx->i32;
                num_params = SI_PARAM_POS_FIXED_PT+1;
 
                if (!ctx->no_prolog) {
                        /* Color inputs from the prolog. */
                        if (shader->selector->info.colors_read) {
                                unsigned num_color_elements =
                                        
util_bitcount(shader->selector->info.colors_read);
 
                                assert(num_params + num_color_elements <= 
ARRAY_SIZE(params));
                                for (i = 0; i < num_color_elements; i++)
                                        params[num_params++] = ctx->f32;
+
+                               num_prolog_vgprs += num_color_elements;
                        }
                }
 
                if (!ctx->no_epilog) {
                        /* Outputs for the epilog. */
                        num_return_sgprs = SI_SGPR_ALPHA_REF + 1;
                        num_returns =
                                num_return_sgprs +
                                
util_bitcount(shader->selector->info.colors_written) * 4 +
                                shader->selector->info.writes_z +
@@ -5733,26 +5739,25 @@ static void create_function(struct si_shader_context 
*ctx)
                                      "amdgpu-max-work-group-size",
                                      max_work_group_size);
        }
 
        shader->info.num_input_sgprs = 0;
        shader->info.num_input_vgprs = 0;
 
        for (i = 0; i <= last_sgpr; ++i)
                shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 
4;
 
-       /* Unused fragment shader inputs are eliminated by the compiler,
-        * so we don't know yet how many there will be.
-        */
-       if (ctx->type != PIPE_SHADER_FRAGMENT)
-               for (; i < num_params; ++i)
-                       shader->info.num_input_vgprs += 
llvm_get_type_size(params[i]) / 4;
+       for (; i < num_params; ++i)
+               shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 
4;
+
+       assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
+       shader->info.num_input_vgprs -= num_prolog_vgprs;
 
        if (!ctx->screen->has_ds_bpermute &&
            bld_base->info &&
            (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
             bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
             bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
             bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
             bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
             bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
                ctx->lds =
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to