From: Nicolai Hähnle <nicolai.haeh...@amd.com> Also, prepare for using tgsi_array_info.
This also opens the door for properly handling allocation failures, but I'm leaving that for a separate change. --- src/gallium/drivers/radeon/radeon_llvm.h | 11 ++-- .../drivers/radeon/radeon_setup_tgsi_llvm.c | 66 +++++++++++++--------- src/gallium/drivers/radeonsi/si_shader.c | 6 +- 3 files changed, 47 insertions(+), 36 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index 13f3336..6086dd6 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -43,25 +43,20 @@ struct radeon_llvm_branch { LLVMBasicBlockRef if_block; LLVMBasicBlockRef else_block; unsigned has_else; }; struct radeon_llvm_loop { LLVMBasicBlockRef loop_block; LLVMBasicBlockRef endloop_block; }; -struct radeon_llvm_array { - struct tgsi_declaration_range range; - LLVMValueRef alloca; -}; - struct radeon_llvm_context { struct lp_build_tgsi_soa_context soa; /*=== Front end configuration ===*/ /* Instructions that are not described by any of the TGSI opcodes. */ /** This function is responsible for initilizing the inputs array and will be * called once for each input declared in the TGSI shader. */ @@ -94,21 +89,22 @@ struct radeon_llvm_context { /*=== Private Members ===*/ struct radeon_llvm_branch *branch; struct radeon_llvm_loop *loop; unsigned branch_depth; unsigned branch_depth_max; unsigned loop_depth; unsigned loop_depth_max; - struct radeon_llvm_array *arrays; + struct tgsi_array_info *temp_arrays; + LLVMValueRef *temp_array_allocas; LLVMValueRef main_fn; LLVMTypeRef return_type; unsigned fpmath_md_kind; LLVMValueRef fpmath_md_2p5_ulp; struct gallivm_state gallivm; }; @@ -117,21 +113,22 @@ LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base, LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base, enum tgsi_opcode_type type, LLVMValueRef value); void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data, LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg); void radeon_llvm_context_init(struct radeon_llvm_context *ctx, - const char *triple); + const char *triple, + const struct tgsi_shader_info *info); void radeon_llvm_create_func(struct radeon_llvm_context *ctx, LLVMTypeRef *return_types, unsigned num_return_elems, LLVMTypeRef *ParamTypes, unsigned ParamCount); void radeon_llvm_dispose(struct radeon_llvm_context *ctx); unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan); void radeon_llvm_finalize_module(struct radeon_llvm_context *ctx); diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index d8ab5b0..2521023 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -108,54 +108,54 @@ static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base, return LLVMBuildShuffleVector(bld_base->base.gallivm->builder, value, LLVMGetUndef(LLVMTypeOf(value)), LLVMConstVector(swizzles, 4), ""); } /** * Return the description of the array covering the given temporary register * index. */ -static const struct radeon_llvm_array * -get_temp_array(struct lp_build_tgsi_context *bld_base, - unsigned reg_index, - const struct tgsi_ind_register *reg) +static unsigned +get_temp_array_id(struct lp_build_tgsi_context *bld_base, + unsigned reg_index, + const struct tgsi_ind_register *reg) { struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); unsigned num_arrays = ctx->soa.bld_base.info->array_max[TGSI_FILE_TEMPORARY]; unsigned i; if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays) - return &ctx->arrays[reg->ArrayID - 1]; + return reg->ArrayID; for (i = 0; i < num_arrays; i++) { - const struct radeon_llvm_array *array = &ctx->arrays[i]; + const struct tgsi_array_info *array = &ctx->temp_arrays[i]; if (reg_index >= array->range.First && reg_index <= array->range.Last) - return array; + return i + 1; } - return NULL; + return 0; } static struct tgsi_declaration_range get_array_range(struct lp_build_tgsi_context *bld_base, unsigned File, unsigned reg_index, const struct tgsi_ind_register *reg) { + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); struct tgsi_declaration_range range; if (File == TGSI_FILE_TEMPORARY) { - const struct radeon_llvm_array *array = - get_temp_array(bld_base, reg_index, reg); - if (array) - return array->range; + unsigned array_id = get_temp_array_id(bld_base, reg_index, reg); + if (array_id) + return ctx->temp_arrays[array_id - 1].range; } range.First = 0; range.Last = bld_base->info->file_max[File]; return range; } static LLVMValueRef emit_array_index(struct lp_build_tgsi_soa_context *bld, const struct tgsi_ind_register *reg, @@ -177,39 +177,45 @@ emit_array_index(struct lp_build_tgsi_soa_context *bld, * Returns NULL if the insertelement/extractelement fallback for array access * must be used. */ static LLVMValueRef get_pointer_into_array(struct radeon_llvm_context *ctx, unsigned file, unsigned swizzle, unsigned reg_index, const struct tgsi_ind_register *reg_indirect) { - const struct radeon_llvm_array *array; + unsigned array_id; struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef idxs[2]; LLVMValueRef index; + LLVMValueRef alloca; if (file != TGSI_FILE_TEMPORARY) return NULL; - array = get_temp_array(&ctx->soa.bld_base, reg_index, reg_indirect); - if (!array || !array->alloca) + array_id = get_temp_array_id(&ctx->soa.bld_base, reg_index, reg_indirect); + if (!array_id) return NULL; - index = emit_array_index(&ctx->soa, reg_indirect, reg_index - array->range.First); + alloca = ctx->temp_array_allocas[array_id - 1]; + if (!alloca) + return NULL; + + index = emit_array_index(&ctx->soa, reg_indirect, + reg_index - ctx->temp_arrays[array_id - 1].range.First); index = LLVMBuildMul(builder, index, lp_build_const_int32(gallivm, TGSI_NUM_CHANNELS), ""); index = LLVMBuildAdd(builder, index, lp_build_const_int32(gallivm, swizzle), ""); idxs[0] = ctx->soa.bld_base.uint_bld.zero; idxs[1] = index; - return LLVMBuildGEP(builder, array->alloca, idxs, 2, ""); + return LLVMBuildGEP(builder, alloca, idxs, 2, ""); } LLVMValueRef radeon_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base, enum tgsi_opcode_type type, LLVMValueRef ptr, LLVMValueRef ptr2) { LLVMBuilderRef builder = bld_base->base.gallivm->builder; LLVMValueRef result; @@ -471,26 +477,22 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base, case TGSI_FILE_TEMPORARY: { char name[16] = ""; LLVMValueRef array_alloca = NULL; unsigned decl_size; first = decl->Range.First; last = decl->Range.Last; decl_size = 4 * ((last - first) + 1); if (decl->Declaration.Array) { unsigned id = decl->Array.ArrayID - 1; - if (!ctx->arrays) { - int size = bld_base->info->array_max[TGSI_FILE_TEMPORARY]; - ctx->arrays = CALLOC(size, sizeof(ctx->arrays[0])); - } - ctx->arrays[id].range = decl->Range; + ctx->temp_arrays[id].range = decl->Range; /* If the array has more than 16 elements, store it * in memory using an alloca that spans the entire * array. * * Otherwise, store each array element individually. * We will then generate vectors (per-channel, up to * <4 x float>) for indirect addressing. * * Note that 16 is the number of vector elements that @@ -500,21 +502,21 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base, * depends on VGPR register pressure elsewhere. * * FIXME: We shouldn't need to have the non-alloca * code path for arrays. LLVM should be smart enough to * promote allocas into registers when profitable. */ if (decl_size > 16) { array_alloca = LLVMBuildAlloca(builder, LLVMArrayType(bld_base->base.vec_type, decl_size), "array"); - ctx->arrays[id].alloca = array_alloca; + ctx->temp_array_allocas[id] = array_alloca; } } if (!ctx->temps_count) { ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1; ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef)); } if (!array_alloca) { for (i = 0; i < decl_size; ++i) { #ifdef DEBUG @@ -1720,39 +1722,49 @@ static void emit_rsq(const struct lp_build_tgsi_action *action, { LLVMValueRef sqrt = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT, emit_data->args[0]); emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV, bld_base->base.one, sqrt); } -void radeon_llvm_context_init(struct radeon_llvm_context *ctx, const char *triple) +void radeon_llvm_context_init(struct radeon_llvm_context *ctx, const char *triple, + const struct tgsi_shader_info *info) { struct lp_type type; /* Initialize the gallivm object: * We are only using the module, context, and builder fields of this struct. * This should be enough for us to be able to pass our gallivm struct to the * helper functions in the gallivm module. */ memset(&ctx->gallivm, 0, sizeof (ctx->gallivm)); memset(&ctx->soa, 0, sizeof(ctx->soa)); ctx->gallivm.context = LLVMContextCreate(); ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi", ctx->gallivm.context); LLVMSetTarget(ctx->gallivm.module, triple); ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context); struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; + bld_base->info = info; + + if (info && info->array_max[TGSI_FILE_TEMPORARY] > 0) { + int size = info->array_max[TGSI_FILE_TEMPORARY]; + + ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0])); + ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0])); + } + type.floating = true; type.fixed = false; type.sign = true; type.norm = false; type.width = 32; type.length = 1; lp_build_context_init(&bld_base->base, &ctx->gallivm, type); lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type)); lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type)); @@ -1959,22 +1971,24 @@ void radeon_llvm_finalize_module(struct radeon_llvm_context *ctx) LLVMDisposeBuilder(gallivm->builder); LLVMDisposePassManager(gallivm->passmgr); gallivm_dispose_target_library_info(target_library_info); } void radeon_llvm_dispose(struct radeon_llvm_context *ctx) { LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module); LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context); - FREE(ctx->arrays); - ctx->arrays = NULL; + FREE(ctx->temp_arrays); + ctx->temp_arrays = NULL; + FREE(ctx->temp_array_allocas); + ctx->temp_array_allocas = NULL; FREE(ctx->temps); ctx->temps = NULL; ctx->temps_count = 0; FREE(ctx->loop); ctx->loop = NULL; ctx->loop_depth_max = 0; FREE(ctx->branch); ctx->branch = NULL; ctx->branch_depth_max = 0; } diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 2de20cb..c595ee0 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -6515,21 +6515,23 @@ static void si_dump_shader_key(unsigned shader, union si_shader_key *key, static void si_init_shader_ctx(struct si_shader_context *ctx, struct si_screen *sscreen, struct si_shader *shader, LLVMTargetMachineRef tm) { struct lp_build_tgsi_context *bld_base; struct lp_build_tgsi_action tmpl = {}; memset(ctx, 0, sizeof(*ctx)); - radeon_llvm_context_init(&ctx->radeon_bld, "amdgcn--"); + radeon_llvm_context_init( + &ctx->radeon_bld, "amdgcn--", + (shader && shader->selector) ? &shader->selector->info : NULL); ctx->tm = tm; ctx->screen = sscreen; if (shader && shader->selector) ctx->type = shader->selector->info.processor; else ctx->type = -1; ctx->shader = shader; ctx->voidt = LLVMVoidTypeInContext(ctx->radeon_bld.gallivm.context); ctx->i1 = LLVMInt1TypeInContext(ctx->radeon_bld.gallivm.context); @@ -6538,22 +6540,20 @@ static void si_init_shader_ctx(struct si_shader_context *ctx, ctx->i64 = LLVMInt64TypeInContext(ctx->radeon_bld.gallivm.context); ctx->i128 = LLVMIntTypeInContext(ctx->radeon_bld.gallivm.context, 128); ctx->f32 = LLVMFloatTypeInContext(ctx->radeon_bld.gallivm.context); ctx->v16i8 = LLVMVectorType(ctx->i8, 16); ctx->v2i32 = LLVMVectorType(ctx->i32, 2); ctx->v4i32 = LLVMVectorType(ctx->i32, 4); ctx->v4f32 = LLVMVectorType(ctx->f32, 4); ctx->v8i32 = LLVMVectorType(ctx->i32, 8); bld_base = &ctx->radeon_bld.soa.bld_base; - if (shader && shader->selector) - bld_base->info = &shader->selector->info; bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant; bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action; bld_base->op_actions[TGSI_OPCODE_INTERP_SAMPLE] = interp_action; bld_base->op_actions[TGSI_OPCODE_INTERP_OFFSET] = interp_action; bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action; bld_base->op_actions[TGSI_OPCODE_TEX2] = tex_action; bld_base->op_actions[TGSI_OPCODE_TXB] = tex_action; bld_base->op_actions[TGSI_OPCODE_TXB2] = tex_action; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev