From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 30 +++++++++---------- src/gallium/drivers/radeonsi/si_shader.h | 1 - .../drivers/radeonsi/si_shader_internal.h | 3 -- .../drivers/radeonsi/si_state_shaders.c | 7 +++-- 4 files changed, 18 insertions(+), 23 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index e7e2a12a7b0..677853af60b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5047,22 +5047,21 @@ static void create_function(struct si_shader_context *ctx) break; default: assert(0 && "unimplemented shader"); return; } si_create_function(ctx, "main", returns, num_returns, &fninfo, si_get_max_workgroup_size(shader)); /* Reserve register locations for VGPR inputs the PS prolog may need. */ - if (ctx->type == PIPE_SHADER_FRAGMENT && - ctx->separate_prolog) { + if (ctx->type == PIPE_SHADER_FRAGMENT && !ctx->shader->is_monolithic) { ac_llvm_add_target_dep_function_attr(ctx->main_fn, "InitialPSInputAddr", S_0286D0_PERSP_SAMPLE_ENA(1) | S_0286D0_PERSP_CENTER_ENA(1) | S_0286D0_PERSP_CENTROID_ENA(1) | S_0286D0_LINEAR_SAMPLE_ENA(1) | S_0286D0_LINEAR_CENTER_ENA(1) | S_0286D0_LINEAR_CENTROID_ENA(1) | S_0286D0_FRONT_FACE_ENA(1) | S_0286D0_ANCILLARY_ENA(1) | @@ -6049,22 +6048,21 @@ static void si_init_exec_from_input(struct si_shader_context *ctx, } static bool si_vs_needs_prolog(const struct si_shader_selector *sel, const struct si_vs_prolog_bits *key) { /* VGPR initialization fixup for Vega10 and Raven is always done in the * VS prolog. */ return sel->vs_needs_prolog || key->ls_vgpr_fix; } -static bool si_compile_tgsi_main(struct si_shader_context *ctx, - bool is_monolithic) +static bool si_compile_tgsi_main(struct si_shader_context *ctx) { struct si_shader *shader = ctx->shader; struct si_shader_selector *sel = shader->selector; struct lp_build_tgsi_context *bld_base = &ctx->bld_base; // TODO clean all this up! switch (ctx->type) { case PIPE_SHADER_VERTEX: ctx->load_input = declare_input_vs; if (shader->key.as_ls) @@ -6135,31 +6133,31 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx, * - Add a barrier before the second shader. * - In the second shader, reset EXEC to ~0 and wrap the main part in * an if-statement. This is required for correctness in geometry * shaders, to ensure that empty GS waves do not send GS_EMIT and * GS_CUT messages. * * For monolithic merged shaders, the first shader is wrapped in an * if-block together with its prolog in si_build_wrapper_function. */ if (ctx->screen->info.chip_class >= GFX9) { - if (!is_monolithic && + if (!shader->is_monolithic && sel->info.num_instructions > 1 && /* not empty shader */ (shader->key.as_es || shader->key.as_ls) && (ctx->type == PIPE_SHADER_TESS_EVAL || (ctx->type == PIPE_SHADER_VERTEX && !si_vs_needs_prolog(sel, &shader->key.part.vs.prolog)))) { si_init_exec_from_input(ctx, ctx->param_merged_wave_info, 0); } else if (ctx->type == PIPE_SHADER_TESS_CTRL || ctx->type == PIPE_SHADER_GEOMETRY) { - if (!is_monolithic) + if (!shader->is_monolithic) ac_init_exec_full_mask(&ctx->ac); LLVMValueRef num_threads = si_unpack_param(ctx, ctx->param_merged_wave_info, 8, 8); LLVMValueRef ena = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), num_threads, ""); lp_build_if(&ctx->merged_wrap_if_state, &ctx->gallivm, ena); /* The barrier must execute for all shaders in a * threadgroup. @@ -6766,71 +6764,69 @@ static void si_build_wrapper_function(struct si_shader_context *ctx, } } } LLVMBuildRetVoid(builder); } int si_compile_tgsi_shader(struct si_screen *sscreen, struct si_compiler *compiler, struct si_shader *shader, - bool is_monolithic, struct pipe_debug_callback *debug) { struct si_shader_selector *sel = shader->selector; struct si_shader_context ctx; int r = -1; /* Dump TGSI code before doing TGSI->LLVM conversion in case the * conversion fails. */ if (si_can_dump_shader(sscreen, sel->info.processor) && !(sscreen->debug_flags & DBG(NO_TGSI))) { if (sel->tokens) tgsi_dump(sel->tokens, 0); else nir_print_shader(sel->nir, stderr); si_dump_streamout(&sel->so); } si_init_shader_ctx(&ctx, sscreen, compiler); si_llvm_context_set_tgsi(&ctx, shader); - ctx.separate_prolog = !is_monolithic; memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, sizeof(shader->info.vs_output_param_offset)); shader->info.uses_instanceid = sel->info.uses_instanceid; - if (!si_compile_tgsi_main(&ctx, is_monolithic)) { + if (!si_compile_tgsi_main(&ctx)) { si_llvm_dispose(&ctx); return -1; } - if (is_monolithic && ctx.type == PIPE_SHADER_VERTEX) { + if (shader->is_monolithic && ctx.type == PIPE_SHADER_VERTEX) { LLVMValueRef parts[2]; bool need_prolog = sel->vs_needs_prolog; parts[1] = ctx.main_fn; if (need_prolog) { union si_shader_part_key prolog_key; si_get_vs_prolog_key(&sel->info, shader->info.num_input_sgprs, &shader->key.part.vs.prolog, shader, &prolog_key); si_build_vs_prolog_function(&ctx, &prolog_key); parts[0] = ctx.main_fn; } si_build_wrapper_function(&ctx, parts + !need_prolog, 1 + need_prolog, need_prolog, 0); - } else if (is_monolithic && ctx.type == PIPE_SHADER_TESS_CTRL) { + } else if (shader->is_monolithic && ctx.type == PIPE_SHADER_TESS_CTRL) { if (sscreen->info.chip_class >= GFX9) { struct si_shader_selector *ls = shader->key.part.tcs.ls; LLVMValueRef parts[4]; bool vs_needs_prolog = si_vs_needs_prolog(ls, &shader->key.part.tcs.ls_prolog); /* TCS main part */ parts[2] = ctx.main_fn; /* TCS epilog */ @@ -6839,23 +6835,24 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, tcs_epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog; si_build_tcs_epilog_function(&ctx, &tcs_epilog_key); parts[3] = ctx.main_fn; /* VS as LS main part */ struct si_shader shader_ls = {}; shader_ls.selector = ls; shader_ls.key.as_ls = 1; shader_ls.key.mono = shader->key.mono; shader_ls.key.opt = shader->key.opt; + shader_ls.is_monolithic = true; si_llvm_context_set_tgsi(&ctx, &shader_ls); - if (!si_compile_tgsi_main(&ctx, true)) { + if (!si_compile_tgsi_main(&ctx)) { si_llvm_dispose(&ctx); return -1; } shader->info.uses_instanceid |= ls->info.uses_instanceid; parts[1] = ctx.main_fn; /* LS prolog */ if (vs_needs_prolog) { union si_shader_part_key vs_prolog_key; si_get_vs_prolog_key(&ls->info, @@ -6881,21 +6878,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, parts[0] = ctx.main_fn; memset(&epilog_key, 0, sizeof(epilog_key)); epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog; si_build_tcs_epilog_function(&ctx, &epilog_key); parts[1] = ctx.main_fn; si_build_wrapper_function(&ctx, parts, 2, 0, 0); } - } else if (is_monolithic && ctx.type == PIPE_SHADER_GEOMETRY) { + } else if (shader->is_monolithic && ctx.type == PIPE_SHADER_GEOMETRY) { if (ctx.screen->info.chip_class >= GFX9) { struct si_shader_selector *es = shader->key.part.gs.es; LLVMValueRef es_prolog = NULL; LLVMValueRef es_main = NULL; LLVMValueRef gs_prolog = NULL; LLVMValueRef gs_main = ctx.main_fn; /* GS prolog */ union si_shader_part_key gs_prolog_key; memset(&gs_prolog_key, 0, sizeof(gs_prolog_key)); @@ -6903,23 +6900,24 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, gs_prolog_key.gs_prolog.is_monolithic = true; si_build_gs_prolog_function(&ctx, &gs_prolog_key); gs_prolog = ctx.main_fn; /* ES main part */ struct si_shader shader_es = {}; shader_es.selector = es; shader_es.key.as_es = 1; shader_es.key.mono = shader->key.mono; shader_es.key.opt = shader->key.opt; + shader_es.is_monolithic = true; si_llvm_context_set_tgsi(&ctx, &shader_es); - if (!si_compile_tgsi_main(&ctx, true)) { + if (!si_compile_tgsi_main(&ctx)) { si_llvm_dispose(&ctx); return -1; } shader->info.uses_instanceid |= es->info.uses_instanceid; es_main = ctx.main_fn; /* ES prolog */ if (es->vs_needs_prolog) { union si_shader_part_key vs_prolog_key; si_get_vs_prolog_key(&es->info, @@ -6954,21 +6952,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, parts[1] = ctx.main_fn; memset(&prolog_key, 0, sizeof(prolog_key)); prolog_key.gs_prolog.states = shader->key.part.gs.prolog; si_build_gs_prolog_function(&ctx, &prolog_key); parts[0] = ctx.main_fn; si_build_wrapper_function(&ctx, parts, 2, 1, 0); } - } else if (is_monolithic && ctx.type == PIPE_SHADER_FRAGMENT) { + } else if (shader->is_monolithic && ctx.type == PIPE_SHADER_FRAGMENT) { LLVMValueRef parts[3]; union si_shader_part_key prolog_key; union si_shader_part_key epilog_key; bool need_prolog; si_get_ps_prolog_key(shader, &prolog_key, false); need_prolog = si_need_ps_prolog(&prolog_key); parts[need_prolog ? 1 : 0] = ctx.main_fn; @@ -8062,21 +8060,21 @@ int si_shader_create(struct si_screen *sscreen, struct si_compiler *compiler, /* LS, ES, VS are compiled on demand if the main part hasn't been * compiled for that stage. * * Vertex shaders are compiled on demand when a vertex fetch * workaround must be applied. */ if (shader->is_monolithic) { /* Monolithic shader (compiled as a whole, has many variants, * may take a long time to compile). */ - r = si_compile_tgsi_shader(sscreen, compiler, shader, true, debug); + r = si_compile_tgsi_shader(sscreen, compiler, shader, debug); if (r) return r; } else { /* The shader consists of several parts: * * - the middle part is the user shader, it has 1 variant only * and it was compiled during the creation of the shader * selector * - the prolog part is inserted at the beginning * - the epilog part is inserted at the end diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index e1f6b392fbe..fd2f71bed74 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -655,21 +655,20 @@ struct si_shader_part { /* si_shader.c */ struct si_shader * si_generate_gs_copy_shader(struct si_screen *sscreen, struct si_compiler *compiler, struct si_shader_selector *gs_selector, struct pipe_debug_callback *debug); int si_compile_tgsi_shader(struct si_screen *sscreen, struct si_compiler *compiler, struct si_shader *shader, - bool is_monolithic, struct pipe_debug_callback *debug); int si_shader_create(struct si_screen *sscreen, struct si_compiler *compiler, struct si_shader *shader, struct pipe_debug_callback *debug); void si_shader_destroy(struct si_shader *shader); unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned index); unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index, unsigned is_varying); int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader); void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader, diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 0a347172d62..e528a56023f 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -55,23 +55,20 @@ struct si_shader_context { struct si_screen *screen; unsigned type; /* PIPE_SHADER_* specifies the type of shader. */ /* For clamping the non-constant index in resource indexing: */ unsigned num_const_buffers; unsigned num_shader_buffers; unsigned num_images; unsigned num_samplers; - /* Whether the prolog will be compiled separately. */ - bool separate_prolog; - struct ac_shader_abi abi; /** This function is responsible for initilizing the inputs array and will be * called once for each input declared in the TGSI shader. */ void (*load_input)(struct si_shader_context *, unsigned input_index, const struct tgsi_full_declaration *decl, LLVMValueRef out[4]); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index f0498520ae8..ddd38dabbe6 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1573,24 +1573,24 @@ static bool si_check_missing_main_part(struct si_screen *sscreen, return false; /* We can leave the fence as permanently signaled because the * main part becomes visible globally only after it has been * compiled. */ util_queue_fence_init(&main_part->ready); main_part->selector = sel; main_part->key.as_es = key->as_es; main_part->key.as_ls = key->as_ls; + main_part->is_monolithic = false; if (si_compile_tgsi_shader(sscreen, compiler_state->compiler, - main_part, false, - &compiler_state->debug) != 0) { + main_part, &compiler_state->debug) != 0) { FREE(main_part); return false; } *mainp = main_part; } return true; } /* Select the hw shader variant depending on the current state. */ static int si_shader_select_with_key(struct si_screen *sscreen, @@ -1875,39 +1875,40 @@ static void si_init_shader_selector_async(void *job, int thread_index) if (!shader) { fprintf(stderr, "radeonsi: can't allocate a main shader part\n"); return; } /* We can leave the fence signaled because use of the default * main part is guarded by the selector's ready fence. */ util_queue_fence_init(&shader->ready); shader->selector = sel; + shader->is_monolithic = false; si_parse_next_shader_property(&sel->info, sel->so.num_outputs != 0, &shader->key); if (sel->tokens || sel->nir) ir_binary = si_get_ir_binary(sel); /* Try to load the shader from the shader cache. */ mtx_lock(&sscreen->shader_cache_mutex); if (ir_binary && si_shader_cache_load_shader(sscreen, ir_binary, shader)) { mtx_unlock(&sscreen->shader_cache_mutex); si_shader_dump_stats_for_shader_db(shader, debug); } else { mtx_unlock(&sscreen->shader_cache_mutex); /* Compile the shader if it hasn't been loaded from the cache. */ - if (si_compile_tgsi_shader(sscreen, compiler, shader, false, + if (si_compile_tgsi_shader(sscreen, compiler, shader, debug) != 0) { FREE(shader); FREE(ir_binary); fprintf(stderr, "radeonsi: can't compile a main shader part\n"); return; } if (ir_binary) { mtx_lock(&sscreen->shader_cache_mutex); if (!si_shader_cache_insert_shader(sscreen, ir_binary, shader, true)) -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev