From: Nicolai Hähnle <nicolai.haeh...@amd.com> We need to account for SGPR locations in merged shaders.
This case is exercised by KHR-GL45.enhanced_layouts.vertex_attrib_locations Fixes: 79c2e7388c7f ("radeonsi/gfx9: use SPI_SHADER_USER_DATA_COMMON") --- src/gallium/drivers/radeonsi/si_shader.c | 13 +++++++++++-- src/gallium/drivers/radeonsi/si_shader.h | 4 +--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 3293dd44c63..281449e9eeb 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5878,25 +5878,27 @@ static void si_get_vs_prolog_key(const struct tgsi_shader_info *info, unsigned num_input_sgprs, const struct si_vs_prolog_bits *prolog_key, struct si_shader *shader_out, union si_shader_part_key *key) { memset(key, 0, sizeof(*key)); key->vs_prolog.states = *prolog_key; key->vs_prolog.num_input_sgprs = num_input_sgprs; key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1; key->vs_prolog.as_ls = shader_out->key.as_ls; + key->vs_prolog.as_es = shader_out->key.as_es; if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL) { key->vs_prolog.as_ls = 1; key->vs_prolog.num_merged_next_stage_vgprs = 2; } else if (shader_out->selector->type == PIPE_SHADER_GEOMETRY) { + key->vs_prolog.as_es = 1; key->vs_prolog.num_merged_next_stage_vgprs = 5; } /* Enable loading the InstanceID VGPR. */ uint16_t input_mask = u_bit_consecutive(0, info->num_inputs); if ((key->vs_prolog.states.instance_divisor_is_one | key->vs_prolog.states.instance_divisor_is_fetched) & input_mask) shader_out->info.uses_instanceid = true; } @@ -6763,20 +6765,22 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader shader = {}; struct si_shader_context ctx; si_init_shader_ctx(&ctx, sscreen, tm); ctx.shader = &shader; ctx.type = type; switch (type) { case PIPE_SHADER_VERTEX: + shader.key.as_ls = key->vs_prolog.as_ls; + shader.key.as_es = key->vs_prolog.as_es; break; case PIPE_SHADER_TESS_CTRL: assert(!prolog); shader.key.part.tcs.epilog = key->tcs_epilog.states; break; case PIPE_SHADER_GEOMETRY: assert(prolog); break; case PIPE_SHADER_FRAGMENT: if (prolog) @@ -6805,24 +6809,29 @@ si_get_shader_part(struct si_screen *sscreen, out: si_llvm_dispose(&ctx); mtx_unlock(&sscreen->shader_parts_mutex); return result; } static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx) { LLVMValueRef ptr[2], list; + bool is_merged_shader = + ctx->screen->b.chip_class >= GFX9 && + (ctx->type == PIPE_SHADER_TESS_CTRL || + ctx->type == PIPE_SHADER_GEOMETRY || + ctx->shader->key.as_ls || ctx->shader->key.as_es); /* Get the pointer to rw buffers. */ - ptr[0] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS); - ptr[1] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS_HI); + ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS); + ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS_HI); list = lp_build_gather_values(&ctx->gallivm, ptr, 2); list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, ""); list = LLVMBuildIntToPtr(ctx->ac.builder, list, si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS), ""); return list; } /** * Build the vertex shader prolog function. * diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 41851627a87..148356b87ff 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -142,23 +142,20 @@ struct nir_shader; #define SI_MAX_VS_OUTPUTS 40 /* Shader IO unique indices are supported for TGSI_SEMANTIC_GENERIC with an * index smaller than this. */ #define SI_MAX_IO_GENERIC 46 /* SGPR user data indices */ enum { - /* GFX9 merged shaders have RW_BUFFERS among the first 8 system SGPRs, - * and these two are used for other purposes. - */ SI_SGPR_RW_BUFFERS, /* rings (& stream-out, VS only) */ SI_SGPR_RW_BUFFERS_HI, SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES, SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES_HI, SI_SGPR_CONST_AND_SHADER_BUFFERS, /* or just a constant buffer 0 pointer */ SI_SGPR_CONST_AND_SHADER_BUFFERS_HI, SI_SGPR_SAMPLERS_AND_IMAGES, SI_SGPR_SAMPLERS_AND_IMAGES_HI, SI_NUM_RESOURCE_SGPRS, @@ -448,20 +445,21 @@ struct si_ps_epilog_bits { }; union si_shader_part_key { struct { struct si_vs_prolog_bits states; unsigned num_input_sgprs:6; /* For merged stages such as LS-HS, HS input VGPRs are first. */ unsigned num_merged_next_stage_vgprs:3; unsigned last_input:4; unsigned as_ls:1; + unsigned as_es:1; /* Prologs for monolithic shaders shouldn't set EXEC. */ unsigned is_monolithic:1; } vs_prolog; struct { struct si_tcs_epilog_bits states; } tcs_epilog; struct { struct si_gs_prolog_bits states; /* Prologs of monolithic shaders shouldn't set EXEC. */ unsigned is_monolithic:1; -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev