From: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 42 ++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b15c60d..1955917 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -64,20 +64,22 @@ static void si_init_shader_ctx(struct si_shader_context *ctx, struct si_shader *shader, LLVMTargetMachineRef tm); static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data); static void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f); +static void si_build_ps_prolog_function(struct si_shader_context *ctx, + union si_shader_part_key *key); static void si_build_ps_epilog_function(struct si_shader_context *ctx, union si_shader_part_key *key); /* Ideally pass the sample mask input to the PS epilog as v13, which * is its usual location, so that the shader doesn't have to add v_mov. */ #define PS_EPILOG_SAMPLEMASK_MIN_LOC 13 /* The VS location of the PrimitiveID input is the same in the epilog, * so that the main shader part doesn't have to move it. @@ -6765,21 +6767,22 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx, si_llvm_build_ret(ctx, ctx->return_value); return true; } /** * Compute the PS prolog key, which contains all the information needed to * build the PS prolog function, and set related bits in shader->config. */ static void si_get_ps_prolog_key(struct si_shader *shader, - union si_shader_part_key *key) + union si_shader_part_key *key, + bool separate_prolog) { struct tgsi_shader_info *info = &shader->selector->info; memset(key, 0, sizeof(*key)); key->ps_prolog.states = shader->key.ps.prolog; key->ps_prolog.colors_read = info->colors_read; key->ps_prolog.num_input_sgprs = shader->info.num_input_sgprs; key->ps_prolog.num_input_vgprs = shader->info.num_input_vgprs; key->ps_prolog.wqm = info->uses_derivatives && (key->ps_prolog.colors_read || @@ -6845,33 +6848,40 @@ static void si_get_ps_prolog_key(struct si_shader *shader, assert(0); } break; case TGSI_INTERPOLATE_LINEAR: /* Force the interpolation location for colors here. */ if (shader->key.ps.prolog.force_linear_sample_interp) location = TGSI_INTERPOLATE_LOC_SAMPLE; if (shader->key.ps.prolog.force_linear_center_interp) location = TGSI_INTERPOLATE_LOC_CENTER; + /* The VGPR assignment for non-monolithic shaders + * works because InitialPSInputAddr is set on the + * main shader and PERSP_PULL_MODEL is never used. + */ switch (location) { case TGSI_INTERPOLATE_LOC_SAMPLE: - key->ps_prolog.color_interp_vgpr_index[i] = 6; + key->ps_prolog.color_interp_vgpr_index[i] = + separate_prolog ? 6 : 9; shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1); break; case TGSI_INTERPOLATE_LOC_CENTER: - key->ps_prolog.color_interp_vgpr_index[i] = 8; + key->ps_prolog.color_interp_vgpr_index[i] = + separate_prolog ? 8 : 11; shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1); break; case TGSI_INTERPOLATE_LOC_CENTROID: - key->ps_prolog.color_interp_vgpr_index[i] = 10; + key->ps_prolog.color_interp_vgpr_index[i] = + separate_prolog ? 10 : 13; shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTROID_ENA(1); break; default: assert(0); } break; default: assert(0); } @@ -7115,47 +7125,59 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, !(sscreen->b.debug_flags & DBG_NO_TGSI)) { tgsi_dump(sel->tokens, 0); si_dump_streamout(&sel->so); } si_init_shader_ctx(&ctx, sscreen, shader, tm); ctx.no_prolog = is_monolithic; ctx.no_epilog = is_monolithic; ctx.separate_prolog = !is_monolithic; - if (ctx.type == PIPE_SHADER_FRAGMENT) + if (ctx.type == PIPE_SHADER_FRAGMENT) { + ctx.no_prolog = false; ctx.no_epilog = false; + } memset(shader->info.vs_output_param_offset, 0xff, sizeof(shader->info.vs_output_param_offset)); shader->info.uses_instanceid = sel->info.uses_instanceid; bld_base = &ctx.soa.bld_base; ctx.load_system_value = declare_system_value; if (!si_compile_tgsi_main(&ctx, shader)) { si_llvm_dispose(&ctx); return -1; } if (is_monolithic && ctx.type == PIPE_SHADER_FRAGMENT) { - LLVMValueRef parts[2]; + LLVMValueRef parts[3]; + union si_shader_part_key prolog_key; union si_shader_part_key epilog_key; + bool need_prolog; + + si_get_ps_prolog_key(shader, &prolog_key, false); + need_prolog = si_need_ps_prolog(&prolog_key); - parts[0] = ctx.main_fn; + parts[need_prolog ? 1 : 0] = ctx.main_fn; + + if (need_prolog) { + si_build_ps_prolog_function(&ctx, &prolog_key); + parts[0] = ctx.main_fn; + } si_get_ps_epilog_key(shader, &epilog_key); si_build_ps_epilog_function(&ctx, &epilog_key); - parts[1] = ctx.main_fn; + parts[need_prolog ? 2 : 1] = ctx.main_fn; - si_build_wrapper_function(&ctx, parts, 2, 0); + si_build_wrapper_function(&ctx, parts, need_prolog ? 3 : 2, need_prolog ? 1 : 0); } mod = bld_base->base.gallivm->module; /* Dump LLVM IR before any optimization passes */ if (sscreen->b.debug_flags & DBG_PREOPT_IR && r600_can_dump_shader(&sscreen->b, ctx.type)) LLVMDumpModule(mod); si_llvm_finalize_module(&ctx, @@ -8106,21 +8128,21 @@ static bool si_compile_ps_epilog(struct si_screen *sscreen, */ static bool si_shader_select_ps_parts(struct si_screen *sscreen, LLVMTargetMachineRef tm, struct si_shader *shader, struct pipe_debug_callback *debug) { union si_shader_part_key prolog_key; union si_shader_part_key epilog_key; /* Get the prolog. */ - si_get_ps_prolog_key(shader, &prolog_key); + si_get_ps_prolog_key(shader, &prolog_key, true); /* The prolog is a no-op if these aren't set. */ if (si_need_ps_prolog(&prolog_key)) { shader->prolog = si_get_shader_part(sscreen, &sscreen->ps_prologs, &prolog_key, tm, debug, si_compile_ps_prolog); if (!shader->prolog) return false; } -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev