radeon: allocate temps array info in radeon_llvm_context_init

Nicolai Hähnle Wed, 10 Aug 2016 12:24:54 -0700

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

Also, prepare for using tgsi_array_info.


This also opens the door for properly handling allocation failures, but I'm
leaving that for a separate change.
---
 src/gallium/drivers/radeon/radeon_llvm.h           | 11 ++--
 .../drivers/radeon/radeon_setup_tgsi_llvm.c        | 66 +++++++++++++---------
 src/gallium/drivers/radeonsi/si_shader.c           |  6 +-
 3 files changed, 47 insertions(+), 36 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_llvm.h 
b/src/gallium/drivers/radeon/radeon_llvm.h
index 13f3336..6086dd6 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -43,25 +43,20 @@ struct radeon_llvm_branch {
        LLVMBasicBlockRef if_block;
        LLVMBasicBlockRef else_block;
        unsigned has_else;
 };
 
 struct radeon_llvm_loop {
        LLVMBasicBlockRef loop_block;
        LLVMBasicBlockRef endloop_block;
 };
 
-struct radeon_llvm_array {
-       struct tgsi_declaration_range range;
-       LLVMValueRef alloca;
-};
-
 struct radeon_llvm_context {
        struct lp_build_tgsi_soa_context soa;
 
        /*=== Front end configuration ===*/
 
        /* Instructions that are not described by any of the TGSI opcodes. */
 
        /** This function is responsible for initilizing the inputs array and 
will be
          * called once for each input declared in the TGSI shader.
          */
@@ -94,21 +89,22 @@ struct radeon_llvm_context {
        /*=== Private Members ===*/
 
        struct radeon_llvm_branch *branch;
        struct radeon_llvm_loop *loop;
 
        unsigned branch_depth;
        unsigned branch_depth_max;
        unsigned loop_depth;
        unsigned loop_depth_max;
 
-       struct radeon_llvm_array *arrays;
+       struct tgsi_array_info *temp_arrays;
+       LLVMValueRef *temp_array_allocas;
 
        LLVMValueRef main_fn;
        LLVMTypeRef return_type;
 
        unsigned fpmath_md_kind;
        LLVMValueRef fpmath_md_2p5_ulp;
 
        struct gallivm_state gallivm;
 };
 
@@ -117,21 +113,22 @@ LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context 
*bld_base,
 
 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
                     enum tgsi_opcode_type type, LLVMValueRef value);
 
 void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context 
*bld_base,
                                          struct lp_build_emit_data *emit_data,
                                          LLVMValueRef *coords_arg,
                                          LLVMValueRef *derivs_arg);
 
 void radeon_llvm_context_init(struct radeon_llvm_context *ctx,
-                              const char *triple);
+                              const char *triple,
+                             const struct tgsi_shader_info *info);
 
 void radeon_llvm_create_func(struct radeon_llvm_context *ctx,
                             LLVMTypeRef *return_types, unsigned 
num_return_elems,
                             LLVMTypeRef *ParamTypes, unsigned ParamCount);
 
 void radeon_llvm_dispose(struct radeon_llvm_context *ctx);
 
 unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan);
 
 void radeon_llvm_finalize_module(struct radeon_llvm_context *ctx);
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index d8ab5b0..2521023 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -108,54 +108,54 @@ static LLVMValueRef emit_swizzle(struct 
lp_build_tgsi_context *bld_base,
        return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
                                      value,
                                      LLVMGetUndef(LLVMTypeOf(value)),
                                      LLVMConstVector(swizzles, 4), "");
 }
 
 /**
  * Return the description of the array covering the given temporary register
  * index.
  */
-static const struct radeon_llvm_array *
-get_temp_array(struct lp_build_tgsi_context *bld_base,
-              unsigned reg_index,
-              const struct tgsi_ind_register *reg)
+static unsigned
+get_temp_array_id(struct lp_build_tgsi_context *bld_base,
+                 unsigned reg_index,
+                 const struct tgsi_ind_register *reg)
 {
        struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
        unsigned num_arrays = 
ctx->soa.bld_base.info->array_max[TGSI_FILE_TEMPORARY];
        unsigned i;
 
        if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
-               return &ctx->arrays[reg->ArrayID - 1];
+               return reg->ArrayID;
 
        for (i = 0; i < num_arrays; i++) {
-               const struct radeon_llvm_array *array = &ctx->arrays[i];
+               const struct tgsi_array_info *array = &ctx->temp_arrays[i];
 
                if (reg_index >= array->range.First && reg_index <= 
array->range.Last)
-                       return array;
+                       return i + 1;
        }
 
-       return NULL;
+       return 0;
 }
 
 static struct tgsi_declaration_range
 get_array_range(struct lp_build_tgsi_context *bld_base,
                unsigned File, unsigned reg_index,
                const struct tgsi_ind_register *reg)
 {
+       struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
        struct tgsi_declaration_range range;
 
        if (File == TGSI_FILE_TEMPORARY) {
-               const struct radeon_llvm_array *array =
-                       get_temp_array(bld_base, reg_index, reg);
-               if (array)
-                       return array->range;
+               unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
+               if (array_id)
+                       return ctx->temp_arrays[array_id - 1].range;
        }
 
        range.First = 0;
        range.Last = bld_base->info->file_max[File];
        return range;
 }
 
 static LLVMValueRef
 emit_array_index(struct lp_build_tgsi_soa_context *bld,
                 const struct tgsi_ind_register *reg,
@@ -177,39 +177,45 @@ emit_array_index(struct lp_build_tgsi_soa_context *bld,
  * Returns NULL if the insertelement/extractelement fallback for array access
  * must be used.
  */
 static LLVMValueRef
 get_pointer_into_array(struct radeon_llvm_context *ctx,
                       unsigned file,
                       unsigned swizzle,
                       unsigned reg_index,
                       const struct tgsi_ind_register *reg_indirect)
 {
-       const struct radeon_llvm_array *array;
+       unsigned array_id;
        struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        LLVMValueRef idxs[2];
        LLVMValueRef index;
+       LLVMValueRef alloca;
 
        if (file != TGSI_FILE_TEMPORARY)
                return NULL;
 
-       array = get_temp_array(&ctx->soa.bld_base, reg_index, reg_indirect);
-       if (!array || !array->alloca)
+       array_id = get_temp_array_id(&ctx->soa.bld_base, reg_index, 
reg_indirect);
+       if (!array_id)
                return NULL;
 
-       index = emit_array_index(&ctx->soa, reg_indirect, reg_index - 
array->range.First);
+       alloca = ctx->temp_array_allocas[array_id - 1];
+       if (!alloca)
+               return NULL;
+
+       index = emit_array_index(&ctx->soa, reg_indirect,
+                                reg_index - ctx->temp_arrays[array_id - 
1].range.First);
        index = LLVMBuildMul(builder, index, lp_build_const_int32(gallivm, 
TGSI_NUM_CHANNELS), "");
        index = LLVMBuildAdd(builder, index, lp_build_const_int32(gallivm, 
swizzle), "");
        idxs[0] = ctx->soa.bld_base.uint_bld.zero;
        idxs[1] = index;
-       return LLVMBuildGEP(builder, array->alloca, idxs, 2, "");
+       return LLVMBuildGEP(builder, alloca, idxs, 2, "");
 }
 
 LLVMValueRef
 radeon_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
                             enum tgsi_opcode_type type,
                             LLVMValueRef ptr,
                             LLVMValueRef ptr2)
 {
        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
        LLVMValueRef result;
@@ -471,26 +477,22 @@ static void emit_declaration(struct lp_build_tgsi_context 
*bld_base,
        case TGSI_FILE_TEMPORARY:
        {
                char name[16] = "";
                LLVMValueRef array_alloca = NULL;
                unsigned decl_size;
                first = decl->Range.First;
                last = decl->Range.Last;
                decl_size = 4 * ((last - first) + 1);
                if (decl->Declaration.Array) {
                        unsigned id = decl->Array.ArrayID - 1;
-                       if (!ctx->arrays) {
-                               int size = 
bld_base->info->array_max[TGSI_FILE_TEMPORARY];
-                               ctx->arrays = CALLOC(size, 
sizeof(ctx->arrays[0]));
-                       }
 
-                       ctx->arrays[id].range = decl->Range;
+                       ctx->temp_arrays[id].range = decl->Range;
 
                        /* If the array has more than 16 elements, store it
                         * in memory using an alloca that spans the entire
                         * array.
                         *
                         * Otherwise, store each array element individually.
                         * We will then generate vectors (per-channel, up to
                         * <4 x float>) for indirect addressing.
                         *
                         * Note that 16 is the number of vector elements that
@@ -500,21 +502,21 @@ static void emit_declaration(struct lp_build_tgsi_context 
*bld_base,
                         * depends on VGPR register pressure elsewhere.
                         *
                         * FIXME: We shouldn't need to have the non-alloca
                         * code path for arrays. LLVM should be smart enough to
                         * promote allocas into registers when profitable.
                         */
                        if (decl_size > 16) {
                                array_alloca = LLVMBuildAlloca(builder,
                                        LLVMArrayType(bld_base->base.vec_type,
                                                      decl_size), "array");
-                               ctx->arrays[id].alloca = array_alloca;
+                               ctx->temp_array_allocas[id] = array_alloca;
                        }
                }
 
                if (!ctx->temps_count) {
                        ctx->temps_count = 
bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
                        ctx->temps = MALLOC(TGSI_NUM_CHANNELS * 
ctx->temps_count * sizeof(LLVMValueRef));
                }
                if (!array_alloca) {
                        for (i = 0; i < decl_size; ++i) {
 #ifdef DEBUG
@@ -1720,39 +1722,49 @@ static void emit_rsq(const struct lp_build_tgsi_action 
*action,
 {
        LLVMValueRef sqrt =
                lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT,
                                         emit_data->args[0]);
 
        emit_data->output[emit_data->chan] =
                lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV,
                                          bld_base->base.one, sqrt);
 }
 
-void radeon_llvm_context_init(struct radeon_llvm_context *ctx, const char 
*triple)
+void radeon_llvm_context_init(struct radeon_llvm_context *ctx, const char 
*triple,
+                             const struct tgsi_shader_info *info)
 {
        struct lp_type type;
 
        /* Initialize the gallivm object:
         * We are only using the module, context, and builder fields of this 
struct.
         * This should be enough for us to be able to pass our gallivm struct 
to the
         * helper functions in the gallivm module.
         */
        memset(&ctx->gallivm, 0, sizeof (ctx->gallivm));
        memset(&ctx->soa, 0, sizeof(ctx->soa));
        ctx->gallivm.context = LLVMContextCreate();
        ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
                                                ctx->gallivm.context);
        LLVMSetTarget(ctx->gallivm.module, triple);
        ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
 
        struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
 
+       bld_base->info = info;
+
+       if (info && info->array_max[TGSI_FILE_TEMPORARY] > 0) {
+               int size = info->array_max[TGSI_FILE_TEMPORARY];
+
+               ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
+               ctx->temp_array_allocas = CALLOC(size, 
sizeof(ctx->temp_array_allocas[0]));
+       }
+
        type.floating = true;
        type.fixed = false;
        type.sign = true;
        type.norm = false;
        type.width = 32;
        type.length = 1;
 
        lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
        lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, 
lp_uint_type(type));
        lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, 
lp_int_type(type));
@@ -1959,22 +1971,24 @@ void radeon_llvm_finalize_module(struct 
radeon_llvm_context *ctx)
 
        LLVMDisposeBuilder(gallivm->builder);
        LLVMDisposePassManager(gallivm->passmgr);
        gallivm_dispose_target_library_info(target_library_info);
 }
 
 void radeon_llvm_dispose(struct radeon_llvm_context *ctx)
 {
        LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
        LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
-       FREE(ctx->arrays);
-       ctx->arrays = NULL;
+       FREE(ctx->temp_arrays);
+       ctx->temp_arrays = NULL;
+       FREE(ctx->temp_array_allocas);
+       ctx->temp_array_allocas = NULL;
        FREE(ctx->temps);
        ctx->temps = NULL;
        ctx->temps_count = 0;
        FREE(ctx->loop);
        ctx->loop = NULL;
        ctx->loop_depth_max = 0;
        FREE(ctx->branch);
        ctx->branch = NULL;
        ctx->branch_depth_max = 0;
 }
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 2de20cb..c595ee0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6515,21 +6515,23 @@ static void si_dump_shader_key(unsigned shader, union 
si_shader_key *key,
 
 static void si_init_shader_ctx(struct si_shader_context *ctx,
                               struct si_screen *sscreen,
                               struct si_shader *shader,
                               LLVMTargetMachineRef tm)
 {
        struct lp_build_tgsi_context *bld_base;
        struct lp_build_tgsi_action tmpl = {};
 
        memset(ctx, 0, sizeof(*ctx));
-       radeon_llvm_context_init(&ctx->radeon_bld, "amdgcn--");
+       radeon_llvm_context_init(
+               &ctx->radeon_bld, "amdgcn--",
+               (shader && shader->selector) ? &shader->selector->info : NULL);
        ctx->tm = tm;
        ctx->screen = sscreen;
        if (shader && shader->selector)
                ctx->type = shader->selector->info.processor;
        else
                ctx->type = -1;
        ctx->shader = shader;
 
        ctx->voidt = LLVMVoidTypeInContext(ctx->radeon_bld.gallivm.context);
        ctx->i1 = LLVMInt1TypeInContext(ctx->radeon_bld.gallivm.context);
@@ -6538,22 +6540,20 @@ static void si_init_shader_ctx(struct si_shader_context 
*ctx,
        ctx->i64 = LLVMInt64TypeInContext(ctx->radeon_bld.gallivm.context);
        ctx->i128 = LLVMIntTypeInContext(ctx->radeon_bld.gallivm.context, 128);
        ctx->f32 = LLVMFloatTypeInContext(ctx->radeon_bld.gallivm.context);
        ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
        ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
        ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
        ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
        ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
 
        bld_base = &ctx->radeon_bld.soa.bld_base;
-       if (shader && shader->selector)
-               bld_base->info = &shader->selector->info;
        bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
 
        bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action;
        bld_base->op_actions[TGSI_OPCODE_INTERP_SAMPLE] = interp_action;
        bld_base->op_actions[TGSI_OPCODE_INTERP_OFFSET] = interp_action;
 
        bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
        bld_base->op_actions[TGSI_OPCODE_TEX2] = tex_action;
        bld_base->op_actions[TGSI_OPCODE_TXB] = tex_action;
        bld_base->op_actions[TGSI_OPCODE_TXB2] = tex_action;
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 12/16] gallium/radeon: allocate temps array info in radeon_llvm_context_init

Reply via email to