Currently, we can store up to 256 immediates in a static array, but this is not always enough, instead we should allocate a dynamic array. But for performance reasons, only do that when the limit is reached because static allocation is better.
This fixes a segfault with dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.23 No regressions found with full piglit run. Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> --- src/gallium/drivers/radeonsi/si_shader.c | 15 ++++----- src/gallium/drivers/radeonsi/si_shader_internal.h | 5 +++ .../drivers/radeonsi/si_shader_tgsi_setup.c | 37 +++++++++++++++++++--- 3 files changed, 44 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 5dfbd6603a..7ef1707050 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4720,7 +4720,6 @@ static void tex_fetch_args( /* add tex offsets */ if (inst->Texture.NumOffsets) { struct lp_build_context *uint_bld = &bld_base->uint_bld; - struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); const struct tgsi_texture_offset *off = inst->TexOffsets; assert(inst->Texture.NumOffsets == 1); @@ -4728,7 +4727,7 @@ static void tex_fetch_args( switch (target) { case TGSI_TEXTURE_3D: address[2] = lp_build_add(uint_bld, address[2], - bld->immediates[off->Index][off->SwizzleZ]); + si_llvm_get_immediate(bld_base, off->Index, off->SwizzleZ)); /* fall through */ case TGSI_TEXTURE_2D: case TGSI_TEXTURE_SHADOW2D: @@ -4738,7 +4737,7 @@ static void tex_fetch_args( case TGSI_TEXTURE_SHADOW2D_ARRAY: address[1] = lp_build_add(uint_bld, address[1], - bld->immediates[off->Index][off->SwizzleY]); + si_llvm_get_immediate(bld_base, off->Index, off->SwizzleY)); /* fall through */ case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: @@ -4746,7 +4745,7 @@ static void tex_fetch_args( case TGSI_TEXTURE_SHADOW1D_ARRAY: address[0] = lp_build_add(uint_bld, address[0], - bld->immediates[off->Index][off->SwizzleX]); + si_llvm_get_immediate(bld_base, off->Index, off->SwizzleX)); break; /* texture offsets do not apply to other texture targets */ } @@ -4766,13 +4765,12 @@ static void tex_fetch_args( /* Get the component index from src1.x for Gather4. */ if (!tgsi_is_shadow_target(target)) { - LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates; LLVMValueRef comp_imm; struct tgsi_src_register src1 = inst->Src[1].Register; assert(src1.File == TGSI_FILE_IMMEDIATE); - comp_imm = imms[src1.Index][src1.SwizzleX]; + comp_imm = si_llvm_get_immediate(bld_base, src1.Index, src1.SwizzleX); gather_comp = LLVMConstIntGetZExtValue(comp_imm); gather_comp = CLAMP(gather_comp, 0, 3); } @@ -5246,13 +5244,14 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action, static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { - LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates; struct tgsi_src_register src0 = emit_data->inst->Src[0].Register; + LLVMValueRef imm; unsigned stream; assert(src0.File == TGSI_FILE_IMMEDIATE); - stream = LLVMConstIntGetZExtValue(imms[src0.Index][src0.SwizzleX]) & 0x3; + imm = si_llvm_get_immediate(bld_base, src0.Index, src0.SwizzleX); + stream = LLVMConstIntGetZExtValue(imm) & 0x3; return stream; } diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 8d6a40b164..8c12eaa86e 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -94,6 +94,8 @@ struct si_shader_context { struct tgsi_array_info *temp_arrays; LLVMValueRef *temp_array_allocas; + LLVMValueRef *imms_array; + LLVMValueRef undef_alloca; LLVMValueRef main_fn; @@ -218,4 +220,7 @@ void si_prepare_cube_coords(struct lp_build_tgsi_context *bld_base, LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg); +LLVMValueRef si_llvm_get_immediate(struct lp_build_tgsi_context *bld_base, + int index, int channel); + #endif diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index 3e0f7c4f76..3cd87f2f66 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -677,14 +677,14 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, if (tgsi_type_is_64bit(type)) { result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2)); result = LLVMConstInsertElement(result, - bld->immediates[reg->Register.Index][swizzle], + si_llvm_get_immediate(bld_base, reg->Register.Index, swizzle), bld_base->int_bld.zero); result = LLVMConstInsertElement(result, - bld->immediates[reg->Register.Index][swizzle + 1], + si_llvm_get_immediate(bld_base, reg->Register.Index, swizzle + 1), bld_base->int_bld.one); return LLVMConstBitCast(result, ctype); } else { - return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype); + return LLVMConstBitCast(si_llvm_get_immediate(bld_base, reg->Register.Index, swizzle), ctype); } } @@ -1230,13 +1230,28 @@ static void emit_immediate(struct lp_build_tgsi_context *bld_base, struct si_shader_context *ctx = si_shader_context(bld_base); for (i = 0; i < 4; ++i) { - ctx->soa.immediates[ctx->soa.num_immediates][i] = - LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false ); + LLVMValueRef value = + LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false); + if (!ctx->imms_array) { + ctx->soa.immediates[ctx->soa.num_immediates][i] = value; + } else { + ctx->imms_array[ctx->soa.num_immediates * 4 + i] = value; + } } ctx->soa.num_immediates++; } +LLVMValueRef si_llvm_get_immediate(struct lp_build_tgsi_context *bld_base, + int index, int channel) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + + if (!ctx->imms_array) + return ctx->soa.immediates[index][channel]; + return ctx->imms_array[index * 4 + channel]; +} + void si_llvm_context_init(struct si_shader_context *ctx, struct si_screen *sscreen, struct si_shader *shader, @@ -1281,6 +1296,16 @@ void si_llvm_context_init(struct si_shader_context *ctx, ctx->temp_arrays); } + if (info && + info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES) { + int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1; + + /* Use a dynamically allocated array for immediates when their + * number is too great, but only in certain situations for + * performance reasons because static allocation is better. */ + ctx->imms_array = CALLOC(size * 4, sizeof(ctx->imms_array[0])); + } + type.floating = true; type.fixed = false; type.sign = true; @@ -1411,6 +1436,8 @@ void si_llvm_dispose(struct si_shader_context *ctx) ctx->temp_arrays = NULL; FREE(ctx->temp_array_allocas); ctx->temp_array_allocas = NULL; + FREE(ctx->imms_array); + ctx->imms_array = NULL; FREE(ctx->temps); ctx->temps = NULL; ctx->temps_count = 0; -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev