From: Nicolai Hähnle <nicolai.haeh...@amd.com> This helps to achieve a gradual transition towards building monolithic shaders via inlining.
no_prolog and no_epilog will be removed by the end of the series, separate_prolog remains in use to control the PS input mapping. --- src/gallium/drivers/radeonsi/si_shader.c | 35 ++++++++++++++--------- src/gallium/drivers/radeonsi/si_shader_internal.h | 15 ++++++++-- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 5a0153c..dcbcfbc 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -382,21 +382,21 @@ static void declare_input_vs( /* Load the T list */ t_list_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_VERTEX_BUFFERS); t_offset = lp_build_const_int32(gallivm, input_index); t_list = build_indexed_load_const(ctx, t_list_ptr, t_offset); /* Build the attribute offset */ attribute_offset = lp_build_const_int32(gallivm, 0); - if (!ctx->is_monolithic) { + if (!ctx->no_prolog) { buffer_index = LLVMGetParam(radeon_bld->main_fn, ctx->param_vertex_index0 + input_index); } else if (divisor) { /* Build index from instance ID, start instance and divisor */ ctx->shader->info.uses_instanceid = true; buffer_index = get_instance_index_for_fetch(ctx, SI_PARAM_START_INSTANCE, divisor); } else { @@ -1168,21 +1168,21 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location) default: fprintf(stderr, "Warning: Unhandled interpolation mode.\n"); return -1; } } /* This shouldn't be used by explicit INTERP opcodes. */ static unsigned select_interp_param(struct si_shader_context *ctx, unsigned param) { - if (!ctx->is_monolithic) + if (!ctx->no_prolog) return param; if (ctx->shader->key.ps.prolog.force_persp_sample_interp) { switch (param) { case SI_PARAM_PERSP_CENTROID: case SI_PARAM_PERSP_CENTER: return SI_PARAM_PERSP_SAMPLE; } } if (ctx->shader->key.ps.prolog.force_linear_sample_interp) { @@ -1329,21 +1329,21 @@ static void interp_fs_input(struct si_shader_context *ctx, /* LLVMGetParam with bc_optimize resolved. */ static LLVMValueRef get_interp_param(struct si_shader_context *ctx, int interp_param_idx) { LLVMBuilderRef builder = ctx->gallivm.builder; LLVMValueRef main_fn = ctx->main_fn; LLVMValueRef param = NULL; /* Handle PRIM_MASK[31] (bc_optimize). */ - if (ctx->is_monolithic && + if (ctx->no_prolog && ((ctx->shader->key.ps.prolog.bc_optimize_for_persp && interp_param_idx == SI_PARAM_PERSP_CENTROID) || (ctx->shader->key.ps.prolog.bc_optimize_for_linear && interp_param_idx == SI_PARAM_LINEAR_CENTROID))) { /* The shader should do: if (PRIM_MASK[31]) CENTROID = CENTER; * The hw doesn't compute CENTROID if the whole wave only * contains fully-covered quads. */ LLVMValueRef bc_optimize = LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK); @@ -1385,21 +1385,21 @@ static void declare_input_fs( { struct lp_build_context *base = &radeon_bld->soa.bld_base.base; struct si_shader_context *ctx = si_shader_context(&radeon_bld->soa.bld_base); struct si_shader *shader = ctx->shader; LLVMValueRef main_fn = radeon_bld->main_fn; LLVMValueRef interp_param = NULL; int interp_param_idx; /* Get colors from input VGPRs (set by the prolog). */ - if (!ctx->is_monolithic && + if (!ctx->no_prolog && decl->Semantic.Name == TGSI_SEMANTIC_COLOR) { unsigned i = decl->Semantic.Index; unsigned colors_read = shader->selector->info.colors_read; unsigned mask = colors_read >> (i * 4); unsigned offset = SI_PARAM_POS_FIXED_PT + 1 + (i ? util_bitcount(colors_read & 0xf) : 0); out[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : base->undef; out[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : base->undef; out[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : base->undef; @@ -2624,21 +2624,21 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, /* This only writes the tessellation factor levels. */ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) { struct si_shader_context *ctx = si_shader_context(bld_base); LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset; rel_patch_id = get_rel_patch_id(ctx); invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5); tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx); - if (!ctx->is_monolithic) { + if (!ctx->no_epilog) { /* Return epilog parameters from this function. */ LLVMBuilderRef builder = bld_base->base.gallivm->builder; LLVMValueRef ret = ctx->return_value; LLVMValueRef rw_buffers, rw0, rw1, tf_soffset; unsigned vgpr; /* RW_BUFFERS pointer */ rw_buffers = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS); rw_buffers = LLVMBuildPtrToInt(builder, rw_buffers, ctx->i64, ""); @@ -2809,21 +2809,21 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base) outputs[i].name = info->output_semantic_name[i]; outputs[i].sid = info->output_semantic_index[i]; for (j = 0; j < 4; j++) outputs[i].values[j] = LLVMBuildLoad(gallivm->builder, ctx->soa.outputs[i][j], ""); } - if (ctx->is_monolithic) { + if (ctx->no_epilog) { /* Export PrimitiveID when PS needs it. */ if (si_vs_exports_prim_id(ctx->shader)) { outputs[i].name = TGSI_SEMANTIC_PRIMID; outputs[i].sid = 0; outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT, get_primitive_id(bld_base, 0)); outputs[i].values[1] = bld_base->base.undef; outputs[i].values[2] = bld_base->base.undef; outputs[i].values[3] = bld_base->base.undef; i++; @@ -5526,28 +5526,31 @@ static void create_function(struct si_shader_context *ctx) } last_sgpr = num_params-1; /* VGPRs */ params[ctx->param_vertex_id = num_params++] = ctx->i32; params[ctx->param_rel_auto_id = num_params++] = ctx->i32; params[ctx->param_vs_prim_id = num_params++] = ctx->i32; params[ctx->param_instance_id = num_params++] = ctx->i32; - if (!ctx->is_monolithic && + if (!ctx->no_prolog && !ctx->is_gs_copy_shader) { /* Vertex load indices. */ ctx->param_vertex_index0 = num_params; for (i = 0; i < shader->selector->info.num_inputs; i++) params[num_params++] = ctx->i32; + } + if (!ctx->no_epilog && + !ctx->is_gs_copy_shader) { /* PrimitiveID output. */ if (!shader->key.vs.as_es && !shader->key.vs.as_ls) for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++) returns[num_returns++] = ctx->f32; } break; case PIPE_SHADER_TESS_CTRL: params[SI_PARAM_TCS_OFFCHIP_LAYOUT] = ctx->i32; params[SI_PARAM_TCS_OUT_OFFSETS] = ctx->i32; @@ -5555,21 +5558,21 @@ static void create_function(struct si_shader_context *ctx) params[SI_PARAM_TCS_IN_LAYOUT] = ctx->i32; params[ctx->param_oc_lds = SI_PARAM_TCS_OC_LDS] = ctx->i32; params[SI_PARAM_TESS_FACTOR_OFFSET] = ctx->i32; last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET; /* VGPRs */ params[SI_PARAM_PATCH_ID] = ctx->i32; params[SI_PARAM_REL_IDS] = ctx->i32; num_params = SI_PARAM_REL_IDS+1; - if (!ctx->is_monolithic) { + if (!ctx->no_epilog) { /* SI_PARAM_TCS_OC_LDS and PARAM_TESS_FACTOR_OFFSET are * placed after the user SGPRs. */ for (i = 0; i < SI_TCS_NUM_USER_SGPR + 2; i++) returns[num_returns++] = ctx->i32; /* SGPRs */ for (i = 0; i < 3; i++) returns[num_returns++] = ctx->f32; /* VGPRs */ } break; @@ -5590,21 +5593,21 @@ static void create_function(struct si_shader_context *ctx) } last_sgpr = num_params - 1; /* VGPRs */ params[ctx->param_tes_u = num_params++] = ctx->f32; params[ctx->param_tes_v = num_params++] = ctx->f32; params[ctx->param_tes_rel_patch_id = num_params++] = ctx->i32; params[ctx->param_tes_patch_id = num_params++] = ctx->i32; /* PrimitiveID output. */ - if (!ctx->is_monolithic && !shader->key.tes.as_es) + if (!ctx->no_epilog && !shader->key.tes.as_es) for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++) returns[num_returns++] = ctx->f32; break; case PIPE_SHADER_GEOMETRY: params[SI_PARAM_GS2VS_OFFSET] = ctx->i32; params[SI_PARAM_GS_WAVE_ID] = ctx->i32; last_sgpr = SI_PARAM_GS_WAVE_ID; /* VGPRs */ @@ -5634,31 +5637,33 @@ static void create_function(struct si_shader_context *ctx) params[SI_PARAM_POS_X_FLOAT] = ctx->f32; params[SI_PARAM_POS_Y_FLOAT] = ctx->f32; params[SI_PARAM_POS_Z_FLOAT] = ctx->f32; params[SI_PARAM_POS_W_FLOAT] = ctx->f32; params[SI_PARAM_FRONT_FACE] = ctx->i32; params[SI_PARAM_ANCILLARY] = ctx->i32; params[SI_PARAM_SAMPLE_COVERAGE] = ctx->f32; params[SI_PARAM_POS_FIXED_PT] = ctx->i32; num_params = SI_PARAM_POS_FIXED_PT+1; - if (!ctx->is_monolithic) { + if (!ctx->no_prolog) { /* Color inputs from the prolog. */ if (shader->selector->info.colors_read) { unsigned num_color_elements = util_bitcount(shader->selector->info.colors_read); assert(num_params + num_color_elements <= ARRAY_SIZE(params)); for (i = 0; i < num_color_elements; i++) params[num_params++] = ctx->f32; } + } + if (!ctx->no_epilog) { /* Outputs for the epilog. */ num_return_sgprs = SI_SGPR_ALPHA_REF + 1; num_returns = num_return_sgprs + util_bitcount(shader->selector->info.colors_written) * 4 + shader->selector->info.writes_z + shader->selector->info.writes_stencil + shader->selector->info.writes_samplemask + 1 /* SampleMaskIn */; @@ -5687,21 +5692,21 @@ static void create_function(struct si_shader_context *ctx) return; } assert(num_params <= ARRAY_SIZE(params)); si_create_function(ctx, returns, num_returns, params, num_params, last_sgpr); /* Reserve register locations for VGPR inputs the PS prolog may need. */ if (ctx->type == PIPE_SHADER_FRAGMENT && - !ctx->is_monolithic) { + ctx->separate_prolog) { si_llvm_add_attribute(ctx->main_fn, "InitialPSInputAddr", S_0286D0_PERSP_SAMPLE_ENA(1) | S_0286D0_PERSP_CENTER_ENA(1) | S_0286D0_PERSP_CENTROID_ENA(1) | S_0286D0_LINEAR_SAMPLE_ENA(1) | S_0286D0_LINEAR_CENTER_ENA(1) | S_0286D0_LINEAR_CENTROID_ENA(1) | S_0286D0_FRONT_FACE_ENA(1) | S_0286D0_POS_FIXED_PT_ENA(1)); @@ -6702,38 +6707,38 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx, bld_base->emit_epilogue = si_llvm_emit_es_epilogue; else bld_base->emit_epilogue = si_llvm_emit_vs_epilogue; break; case PIPE_SHADER_GEOMETRY: bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs; bld_base->emit_epilogue = si_llvm_emit_gs_epilogue; break; case PIPE_SHADER_FRAGMENT: ctx->load_input = declare_input_fs; - if (ctx->is_monolithic) + if (ctx->no_epilog) bld_base->emit_epilogue = si_llvm_emit_fs_epilogue; else bld_base->emit_epilogue = si_llvm_return_fs_outputs; break; case PIPE_SHADER_COMPUTE: ctx->declare_memory_region = declare_compute_memory; break; default: assert(!"Unsupported shader type"); return false; } create_meta_data(ctx); create_function(ctx); preload_ring_buffers(ctx); - if (ctx->is_monolithic && sel->type == PIPE_SHADER_FRAGMENT && + if (ctx->no_prolog && sel->type == PIPE_SHADER_FRAGMENT && shader->key.ps.prolog.poly_stipple) { LLVMValueRef list = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS); si_llvm_emit_polygon_stipple(ctx, list, SI_PARAM_POS_FIXED_PT); } if (ctx->type == PIPE_SHADER_GEOMETRY) { int i; for (i = 0; i < 4; i++) { @@ -6766,21 +6771,23 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, /* Dump TGSI code before doing TGSI->LLVM conversion in case the * conversion fails. */ if (r600_can_dump_shader(&sscreen->b, sel->info.processor) && !(sscreen->b.debug_flags & DBG_NO_TGSI)) { tgsi_dump(sel->tokens, 0); si_dump_streamout(&sel->so); } si_init_shader_ctx(&ctx, sscreen, shader, tm); - ctx.is_monolithic = is_monolithic; + ctx.no_prolog = is_monolithic; + ctx.no_epilog = is_monolithic; + ctx.separate_prolog = !is_monolithic; memset(shader->info.vs_output_param_offset, 0xff, sizeof(shader->info.vs_output_param_offset)); shader->info.uses_instanceid = sel->info.uses_instanceid; bld_base = &ctx.soa.bld_base; ctx.load_system_value = declare_system_value; if (!si_compile_tgsi_main(&ctx, shader)) { diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 84d8ed5..7586373 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -46,24 +46,33 @@ struct radeon_shader_binary; struct si_llvm_flow; struct si_shader_context { struct lp_build_tgsi_soa_context soa; struct gallivm_state gallivm; struct si_shader *shader; struct si_screen *screen; unsigned type; /* PIPE_SHADER_* specifies the type of shader. */ bool is_gs_copy_shader; - /* Whether to generate the optimized shader variant compiled as a whole - * (without a prolog and epilog) + + /* Whether main TGSI code translation should assume that no prolog + * or epilog is present, respectively. + * + * This is used temporarily to indicate a monolithic shader that is + * _not_ assembled from parts via inlining. Will be removed once the + * transition is complete. */ - bool is_monolithic; + bool no_prolog; + bool no_epilog; + + /* Whether the prolog will be compiled separately. */ + bool separate_prolog; /** This function is responsible for initilizing the inputs array and will be * called once for each input declared in the TGSI shader. */ void (*load_input)(struct si_shader_context *, unsigned input_index, const struct tgsi_full_declaration *decl, LLVMValueRef out[4]); void (*load_system_value)(struct si_shader_context *, -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev