From: Nicolai Hähnle <nicolai.haeh...@amd.com> So that the prolog generated for monolithic fragment shaders will have the right signature. --- src/gallium/drivers/radeonsi/si_shader.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 447293c..b15c60d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5488,20 +5488,21 @@ static void declare_tess_lds(struct si_shader_context *ctx) static void create_function(struct si_shader_context *ctx) { struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; struct gallivm_state *gallivm = bld_base->base.gallivm; struct si_shader *shader = ctx->shader; LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v3i32; LLVMTypeRef returns[16+32*4]; unsigned i, last_sgpr, num_params, num_return_sgprs; unsigned num_returns = 0; + unsigned num_prolog_vgprs = 0; v3i32 = LLVMVectorType(ctx->i32, 3); params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, SI_NUM_RW_BUFFERS); params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS); params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS); params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES); params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS); switch (ctx->type) { @@ -5538,20 +5539,22 @@ static void create_function(struct si_shader_context *ctx) params[ctx->param_vs_prim_id = num_params++] = ctx->i32; params[ctx->param_instance_id = num_params++] = ctx->i32; if (!ctx->no_prolog && !ctx->is_gs_copy_shader) { /* Vertex load indices. */ ctx->param_vertex_index0 = num_params; for (i = 0; i < shader->selector->info.num_inputs; i++) params[num_params++] = ctx->i32; + + num_prolog_vgprs += shader->selector->info.num_inputs; } if (!ctx->no_epilog && !ctx->is_gs_copy_shader) { /* PrimitiveID output. */ if (!shader->key.vs.as_es && !shader->key.vs.as_ls) for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++) returns[num_returns++] = ctx->f32; } break; @@ -5637,34 +5640,37 @@ static void create_function(struct si_shader_context *ctx) params[SI_PARAM_PERSP_PULL_MODEL] = v3i32; params[SI_PARAM_LINEAR_SAMPLE] = ctx->v2i32; params[SI_PARAM_LINEAR_CENTER] = ctx->v2i32; params[SI_PARAM_LINEAR_CENTROID] = ctx->v2i32; params[SI_PARAM_LINE_STIPPLE_TEX] = ctx->f32; params[SI_PARAM_POS_X_FLOAT] = ctx->f32; params[SI_PARAM_POS_Y_FLOAT] = ctx->f32; params[SI_PARAM_POS_Z_FLOAT] = ctx->f32; params[SI_PARAM_POS_W_FLOAT] = ctx->f32; params[SI_PARAM_FRONT_FACE] = ctx->i32; + shader->info.face_vgpr_index = 20; params[SI_PARAM_ANCILLARY] = ctx->i32; params[SI_PARAM_SAMPLE_COVERAGE] = ctx->f32; params[SI_PARAM_POS_FIXED_PT] = ctx->i32; num_params = SI_PARAM_POS_FIXED_PT+1; if (!ctx->no_prolog) { /* Color inputs from the prolog. */ if (shader->selector->info.colors_read) { unsigned num_color_elements = util_bitcount(shader->selector->info.colors_read); assert(num_params + num_color_elements <= ARRAY_SIZE(params)); for (i = 0; i < num_color_elements; i++) params[num_params++] = ctx->f32; + + num_prolog_vgprs += num_color_elements; } } if (!ctx->no_epilog) { /* Outputs for the epilog. */ num_return_sgprs = SI_SGPR_ALPHA_REF + 1; num_returns = num_return_sgprs + util_bitcount(shader->selector->info.colors_written) * 4 + shader->selector->info.writes_z + @@ -5733,26 +5739,25 @@ static void create_function(struct si_shader_context *ctx) "amdgpu-max-work-group-size", max_work_group_size); } shader->info.num_input_sgprs = 0; shader->info.num_input_vgprs = 0; for (i = 0; i <= last_sgpr; ++i) shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 4; - /* Unused fragment shader inputs are eliminated by the compiler, - * so we don't know yet how many there will be. - */ - if (ctx->type != PIPE_SHADER_FRAGMENT) - for (; i < num_params; ++i) - shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4; + for (; i < num_params; ++i) + shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4; + + assert(shader->info.num_input_vgprs >= num_prolog_vgprs); + shader->info.num_input_vgprs -= num_prolog_vgprs; if (!ctx->screen->has_ds_bpermute && bld_base->info && (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 || bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 || bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 || bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 || bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 || bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0)) ctx->lds = -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev