Currently, we can store up to 256 immediates in a static array,
but this is not always enough, instead we should allocate a
dynamic array. But for performance reasons, only do that when
the limit is reached because static allocation is better.

This fixes a segfault with
dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.23

No regressions found with full piglit run.

Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
---
 src/gallium/drivers/radeonsi/si_shader.c           | 15 ++++-----
 src/gallium/drivers/radeonsi/si_shader_internal.h  |  5 +++
 .../drivers/radeonsi/si_shader_tgsi_setup.c        | 37 +++++++++++++++++++---
 3 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 5dfbd6603a..7ef1707050 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4720,7 +4720,6 @@ static void tex_fetch_args(
                /* add tex offsets */
                if (inst->Texture.NumOffsets) {
                        struct lp_build_context *uint_bld = &bld_base->uint_bld;
-                       struct lp_build_tgsi_soa_context *bld = 
lp_soa_context(bld_base);
                        const struct tgsi_texture_offset *off = 
inst->TexOffsets;
 
                        assert(inst->Texture.NumOffsets == 1);
@@ -4728,7 +4727,7 @@ static void tex_fetch_args(
                        switch (target) {
                        case TGSI_TEXTURE_3D:
                                address[2] = lp_build_add(uint_bld, address[2],
-                                               
bld->immediates[off->Index][off->SwizzleZ]);
+                                               si_llvm_get_immediate(bld_base, 
off->Index, off->SwizzleZ));
                                /* fall through */
                        case TGSI_TEXTURE_2D:
                        case TGSI_TEXTURE_SHADOW2D:
@@ -4738,7 +4737,7 @@ static void tex_fetch_args(
                        case TGSI_TEXTURE_SHADOW2D_ARRAY:
                                address[1] =
                                        lp_build_add(uint_bld, address[1],
-                                               
bld->immediates[off->Index][off->SwizzleY]);
+                                               si_llvm_get_immediate(bld_base, 
off->Index, off->SwizzleY));
                                /* fall through */
                        case TGSI_TEXTURE_1D:
                        case TGSI_TEXTURE_SHADOW1D:
@@ -4746,7 +4745,7 @@ static void tex_fetch_args(
                        case TGSI_TEXTURE_SHADOW1D_ARRAY:
                                address[0] =
                                        lp_build_add(uint_bld, address[0],
-                                               
bld->immediates[off->Index][off->SwizzleX]);
+                                               si_llvm_get_immediate(bld_base, 
off->Index, off->SwizzleX));
                                break;
                                /* texture offsets do not apply to other 
texture targets */
                        }
@@ -4766,13 +4765,12 @@ static void tex_fetch_args(
 
                /* Get the component index from src1.x for Gather4. */
                if (!tgsi_is_shadow_target(target)) {
-                       LLVMValueRef (*imms)[4] = 
lp_soa_context(bld_base)->immediates;
                        LLVMValueRef comp_imm;
                        struct tgsi_src_register src1 = inst->Src[1].Register;
 
                        assert(src1.File == TGSI_FILE_IMMEDIATE);
 
-                       comp_imm = imms[src1.Index][src1.SwizzleX];
+                       comp_imm = si_llvm_get_immediate(bld_base, src1.Index, 
src1.SwizzleX);
                        gather_comp = LLVMConstIntGetZExtValue(comp_imm);
                        gather_comp = CLAMP(gather_comp, 0, 3);
                }
@@ -5246,13 +5244,14 @@ static void build_interp_intrinsic(const struct 
lp_build_tgsi_action *action,
 static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
                                       struct lp_build_emit_data *emit_data)
 {
-       LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
        struct tgsi_src_register src0 = emit_data->inst->Src[0].Register;
+       LLVMValueRef imm;
        unsigned stream;
 
        assert(src0.File == TGSI_FILE_IMMEDIATE);
 
-       stream = LLVMConstIntGetZExtValue(imms[src0.Index][src0.SwizzleX]) & 
0x3;
+       imm = si_llvm_get_immediate(bld_base, src0.Index, src0.SwizzleX);
+       stream = LLVMConstIntGetZExtValue(imm) & 0x3;
        return stream;
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 8d6a40b164..8c12eaa86e 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -94,6 +94,8 @@ struct si_shader_context {
        struct tgsi_array_info *temp_arrays;
        LLVMValueRef *temp_array_allocas;
 
+       LLVMValueRef *imms_array;
+
        LLVMValueRef undef_alloca;
 
        LLVMValueRef main_fn;
@@ -218,4 +220,7 @@ void si_prepare_cube_coords(struct lp_build_tgsi_context 
*bld_base,
                            LLVMValueRef *coords_arg,
                            LLVMValueRef *derivs_arg);
 
+LLVMValueRef si_llvm_get_immediate(struct lp_build_tgsi_context *bld_base,
+                                  int index, int channel);
+
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index 3e0f7c4f76..3cd87f2f66 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -677,14 +677,14 @@ LLVMValueRef si_llvm_emit_fetch(struct 
lp_build_tgsi_context *bld_base,
                if (tgsi_type_is_64bit(type)) {
                        result = 
LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context,
 32), bld_base->base.type.length * 2));
                        result = LLVMConstInsertElement(result,
-                                                       
bld->immediates[reg->Register.Index][swizzle],
+                                                       
si_llvm_get_immediate(bld_base, reg->Register.Index, swizzle),
                                                        bld_base->int_bld.zero);
                        result = LLVMConstInsertElement(result,
-                                                       
bld->immediates[reg->Register.Index][swizzle + 1],
+                                                       
si_llvm_get_immediate(bld_base, reg->Register.Index, swizzle + 1),
                                                        bld_base->int_bld.one);
                        return LLVMConstBitCast(result, ctype);
                } else {
-                       return 
LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
+                       return LLVMConstBitCast(si_llvm_get_immediate(bld_base, 
reg->Register.Index, swizzle), ctype);
                }
        }
 
@@ -1230,13 +1230,28 @@ static void emit_immediate(struct lp_build_tgsi_context 
*bld_base,
        struct si_shader_context *ctx = si_shader_context(bld_base);
 
        for (i = 0; i < 4; ++i) {
-               ctx->soa.immediates[ctx->soa.num_immediates][i] =
-                               LLVMConstInt(bld_base->uint_bld.elem_type, 
imm->u[i].Uint, false   );
+               LLVMValueRef value =
+                       LLVMConstInt(bld_base->uint_bld.elem_type, 
imm->u[i].Uint, false);
+               if (!ctx->imms_array) {
+                       ctx->soa.immediates[ctx->soa.num_immediates][i] = value;
+               } else {
+                       ctx->imms_array[ctx->soa.num_immediates * 4 + i] = 
value;
+               }
        }
 
        ctx->soa.num_immediates++;
 }
 
+LLVMValueRef si_llvm_get_immediate(struct lp_build_tgsi_context *bld_base,
+                                  int index, int channel)
+{
+       struct si_shader_context *ctx = si_shader_context(bld_base);
+
+       if (!ctx->imms_array)
+               return ctx->soa.immediates[index][channel];
+       return ctx->imms_array[index * 4 + channel];
+}
+
 void si_llvm_context_init(struct si_shader_context *ctx,
                          struct si_screen *sscreen,
                          struct si_shader *shader,
@@ -1281,6 +1296,16 @@ void si_llvm_context_init(struct si_shader_context *ctx,
                                         ctx->temp_arrays);
        }
 
+       if (info &&
+           info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES) {
+               int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
+
+               /* Use a dynamically allocated array for immediates when their
+                * number is too great, but only in certain situations for
+                * performance reasons because static allocation is better. */
+               ctx->imms_array = CALLOC(size * 4, sizeof(ctx->imms_array[0]));
+       }
+
        type.floating = true;
        type.fixed = false;
        type.sign = true;
@@ -1411,6 +1436,8 @@ void si_llvm_dispose(struct si_shader_context *ctx)
        ctx->temp_arrays = NULL;
        FREE(ctx->temp_array_allocas);
        ctx->temp_array_allocas = NULL;
+       FREE(ctx->imms_array);
+       ctx->imms_array = NULL;
        FREE(ctx->temps);
        ctx->temps = NULL;
        ctx->temps_count = 0;
-- 
2.11.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to