From: Nicolai Hähnle <nicolai.haeh...@amd.com> Allocating the ddxy_lds is considered to be part of the API shader translation and not part of the ABI. --- src/amd/common/ac_nir_to_llvm.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-)
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index ef1eeac..8953a3d 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -60,20 +60,22 @@ struct ac_nir_context { LLVMValueRef main_function; LLVMBasicBlockRef continue_block; LLVMBasicBlockRef break_block; LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4]; int num_locals; LLVMValueRef *locals; + LLVMValueRef ddxy_lds; + struct nir_to_llvm_context *nctx; /* TODO get rid of this */ }; struct nir_to_llvm_context { struct ac_llvm_context ac; const struct ac_nir_compiler_options *options; struct ac_shader_variant_info *shader_info; struct ac_shader_abi abi; struct ac_nir_context *nir; @@ -161,22 +163,20 @@ struct nir_to_llvm_context { LLVMValueRef lds; LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4]; LLVMValueRef shared_memory; uint64_t input_mask; uint64_t output_mask; uint8_t num_output_clips; uint8_t num_output_culls; - bool has_ds_bpermute; - bool is_gs_copy_shader; LLVMValueRef gs_next_vertex; unsigned gs_max_out_vertices; unsigned tes_primitive_mode; uint64_t tess_outputs_written; uint64_t tess_patch_outputs_written; }; static inline struct nir_to_llvm_context * @@ -1460,69 +1460,70 @@ static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx, } LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2); result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(v2f32), temps[0], ctx->i32_0, ""); result = LLVMBuildInsertElement(ctx->builder, result, temps[1], ctx->i32_1, ""); return result; } -static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx, nir_op op, LLVMValueRef src0) { unsigned mask; int idx; LLVMValueRef result; + bool has_ds_bpermute = ctx->abi->chip_class >= VI; - if (!ctx->lds && !ctx->has_ds_bpermute) - ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module, - LLVMArrayType(ctx->i32, 64), + if (!ctx->ddxy_lds && !has_ds_bpermute) + ctx->ddxy_lds = LLVMAddGlobalInAddressSpace(ctx->ac.module, + LLVMArrayType(ctx->ac.i32, 64), "ddxy_lds", LOCAL_ADDR_SPACE); if (op == nir_op_fddx_fine || op == nir_op_fddx) mask = AC_TID_MASK_LEFT; else if (op == nir_op_fddy_fine || op == nir_op_fddy) mask = AC_TID_MASK_TOP; else mask = AC_TID_MASK_TOP_LEFT; /* for DDX we want to next X pixel, DDY next Y pixel. */ if (op == nir_op_fddx_fine || op == nir_op_fddx_coarse || op == nir_op_fddx) idx = 1; else idx = 2; - result = ac_build_ddxy(&ctx->ac, ctx->has_ds_bpermute, - mask, idx, ctx->lds, + result = ac_build_ddxy(&ctx->ac, has_ds_bpermute, + mask, idx, ctx->ddxy_lds, src0); return result; } /* * this takes an I,J coordinate pair, * and works out the X and Y derivatives. * it returns DDX(I), DDX(J), DDY(I), DDY(J). */ static LLVMValueRef emit_ddxy_interp( - struct nir_to_llvm_context *ctx, + struct ac_nir_context *ctx, LLVMValueRef interp_ij) { LLVMValueRef result[4], a; unsigned i; for (i = 0; i < 2; i++) { - a = LLVMBuildExtractElement(ctx->builder, interp_ij, - LLVMConstInt(ctx->i32, i, false), ""); + a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij, + LLVMConstInt(ctx->ac.i32, i, false), ""); result[i] = emit_ddxy(ctx, nir_op_fddx, a); result[2+i] = emit_ddxy(ctx, nir_op_fddy, a); } return ac_build_gather_values(&ctx->ac, result, 4); } static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) { LLVMValueRef src[4], result = NULL; unsigned num_components = instr->dest.dest.ssa.num_components; @@ -1881,21 +1882,21 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) break; case nir_op_unpack_half_2x16: result = emit_unpack_half_2x16(&ctx->ac, src[0]); break; case nir_op_fddx: case nir_op_fddy: case nir_op_fddx_fine: case nir_op_fddy_fine: case nir_op_fddx_coarse: case nir_op_fddy_coarse: - result = emit_ddxy(ctx->nctx, instr->op, src[0]); + result = emit_ddxy(ctx, instr->op, src[0]); break; case nir_op_unpack_64_2x32_split_x: { assert(instr->src[0].src.ssa->num_components == 1); LLVMValueRef tmp = LLVMBuildBitCast(ctx->builder, src[0], LLVMVectorType(ctx->i32, 2), ""); result = LLVMBuildExtractElement(ctx->builder, tmp, ctx->i32zero, ""); break; @@ -3774,21 +3775,21 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32zero, ""); src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, ""); src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32one, ""); src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, ""); } interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location); attr_number = LLVMConstInt(ctx->i32, input_index, false); if (location == INTERP_SAMPLE || location == INTERP_CENTER) { LLVMValueRef ij_out[2]; - LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param); + LLVMValueRef ddxy_out = emit_ddxy_interp(ctx->nir, interp_param); /* * take the I then J parameters, and the DDX/Y for it, and * calculate the IJ inputs for the interpolator. * temp1 = ddx * offset/sample.x + I; * interp_param.I = ddy * offset/sample.y + temp1; * temp1 = ddx * offset/sample.x + J; * interp_param.J = ddy * offset/sample.y + temp1; */ for (unsigned i = 0; i < 2; i++) { @@ -6102,22 +6103,20 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, struct nir_to_llvm_context ctx = {0}; unsigned i; ctx.options = options; ctx.shader_info = shader_info; ctx.context = LLVMContextCreate(); ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context); ac_llvm_context_init(&ctx.ac, ctx.context); ctx.ac.module = ctx.module; - ctx.has_ds_bpermute = ctx.options->chip_class >= VI; - memset(shader_info, 0, sizeof(*shader_info)); ac_nir_shader_info_pass(nir, options, &shader_info->info); LLVMSetTarget(ctx.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--"); LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm); char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout); LLVMSetDataLayout(ctx.module, data_layout_str); LLVMDisposeTargetData(data_layout); -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev