We will call these from the radeonsi NIR backend. Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/amd/common/ac_llvm_build.c | 24 +++++++++++++++++ src/amd/common/ac_llvm_build.h | 8 ++++++ src/amd/common/ac_nir_to_llvm.c | 58 +++++++++++++---------------------------- 3 files changed, 50 insertions(+), 40 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index b2bf1bf7b51..faa08b6301c 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -91,20 +91,44 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context, args[0] = LLVMConstReal(ctx->f32, 2.5); ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1); ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14); ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0); } +int +ac_get_llvm_num_components(LLVMValueRef value) +{ + LLVMTypeRef type = LLVMTypeOf(value); + unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind + ? LLVMGetVectorSize(type) + : 1; + return num_components; +} + +LLVMValueRef +ac_llvm_extract_elem(struct ac_llvm_context *ac, + LLVMValueRef value, + int index) +{ + int count = ac_get_llvm_num_components(value); + + if (count == 1) + return value; + + return LLVMBuildExtractElement(ac->builder, value, + LLVMConstInt(ac->i32, index, false), ""); +} + unsigned ac_get_type_size(LLVMTypeRef type) { LLVMTypeKind kind = LLVMGetTypeKind(type); switch (kind) { case LLVMIntegerTypeKind: return LLVMGetIntTypeWidth(type) / 8; case LLVMFloatTypeKind: return 4; diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 655dc1dcc86..c14b0d9f019 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -75,20 +75,28 @@ struct ac_llvm_context { enum chip_class chip_class; LLVMValueRef lds; }; void ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context, enum chip_class chip_class); +int +ac_get_llvm_num_components(LLVMValueRef value); + +LLVMValueRef +ac_llvm_extract_elem(struct ac_llvm_context *ac, + LLVMValueRef value, + int index); + unsigned ac_get_type_size(LLVMTypeRef type); LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t); LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v); LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t); LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v); LLVMValueRef ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name, LLVMTypeRef return_type, LLVMValueRef *params, diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 6f84604d54a..6060df75314 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -995,46 +995,24 @@ static void create_function(struct nir_to_llvm_context *ctx, set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET, &user_sgpr_idx, 1); } break; default: unreachable("Shader stage not implemented"); } ctx->shader_info->num_user_sgprs = user_sgpr_idx; } -static int get_llvm_num_components(LLVMValueRef value) -{ - LLVMTypeRef type = LLVMTypeOf(value); - unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind - ? LLVMGetVectorSize(type) - : 1; - return num_components; -} - -static LLVMValueRef llvm_extract_elem(struct ac_llvm_context *ac, - LLVMValueRef value, - int index) -{ - int count = get_llvm_num_components(value); - - if (count == 1) - return value; - - return LLVMBuildExtractElement(ac->builder, value, - LLVMConstInt(ac->i32, index, false), ""); -} - static LLVMValueRef trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned count) { - unsigned num_components = get_llvm_num_components(value); + unsigned num_components = ac_get_llvm_num_components(value); if (count == num_components) return value; LLVMValueRef masks[] = { LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false), LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)}; if (count == 1) return LLVMBuildExtractElement(ctx->builder, value, masks[0], ""); @@ -2321,21 +2299,21 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, ctx->ac.i32_0, ""); tmp = LLVMBuildExtractElement(ctx->ac.builder, base_data, LLVMConstInt(ctx->ac.i32, start + 1, false), ""); data = LLVMBuildInsertElement(ctx->ac.builder, data, tmp, ctx->ac.i32_1, ""); store_name = "llvm.amdgcn.buffer.store.v2f32"; } else { assert(count == 1); - if (get_llvm_num_components(base_data) > 1) + if (ac_get_llvm_num_components(base_data) > 1) data = LLVMBuildExtractElement(ctx->ac.builder, base_data, LLVMConstInt(ctx->ac.i32, start, false), ""); else data = base_data; store_name = "llvm.amdgcn.buffer.store.f32"; } offset = base_offset; if (start != 0) { offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, start * 4, false), ""); @@ -2348,23 +2326,23 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, } static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { const char *name; LLVMValueRef params[6]; int arg_count = 0; if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) { - params[arg_count++] = llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0); + params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0); } - params[arg_count++] = llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0); + params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0); params[arg_count++] = ctx->abi->load_ssbo(ctx->abi, get_src(ctx, instr->src[0]), true); params[arg_count++] = LLVMConstInt(ctx->ac.i32, 0, false); /* vindex */ params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */ params[arg_count++] = LLVMConstInt(ctx->ac.i1, 0, false); /* slc */ switch (instr->intrinsic) { case nir_intrinsic_ssbo_atomic_add: name = "llvm.amdgcn.buffer.atomic.add"; @@ -2827,21 +2805,21 @@ store_tcs_output(struct ac_shader_abi *abi, bool is_tess_factor = false; if (location == VARYING_SLOT_TESS_LEVEL_INNER || location == VARYING_SLOT_TESS_LEVEL_OUTER) is_tess_factor = true; unsigned base = is_compact ? const_index : 0; for (unsigned chan = 0; chan < 8; chan++) { if (!(writemask & (1 << chan))) continue; - LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan - component); + LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component); if (store_lds || is_tess_factor) ac_lds_store(&ctx->ac, dw_addr, value); if (!is_tess_factor && writemask != 0xF) ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1, buf_addr, ctx->oc_lds, 4 * (base + chan), 1, 0, true, false); dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, @@ -3119,21 +3097,21 @@ visit_store_var(struct ac_nir_context *ctx, int writemask = instr->const_index[0] << comp; LLVMValueRef indir_index; unsigned const_index; get_deref_offset(ctx, instr->variables[0], false, NULL, NULL, &const_index, &indir_index); if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) { int old_writemask = writemask; src = LLVMBuildBitCast(ctx->ac.builder, src, - LLVMVectorType(ctx->ac.f32, get_llvm_num_components(src) * 2), + LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2), ""); writemask = 0; for (unsigned chan = 0; chan < 4; chan++) { if (old_writemask & (1 << chan)) writemask |= 3u << (2 * chan); } } switch (instr->variables[0]->var->data.mode) { @@ -3157,21 +3135,21 @@ visit_store_var(struct ac_nir_context *ctx, const_index, location, driver_location, src, comp, is_patch, is_compact, writemask); return; } for (unsigned chan = 0; chan < 8; chan++) { int stride = 4; if (!(writemask & (1 << chan))) continue; - value = llvm_extract_elem(&ctx->ac, src, chan - comp); + value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp); if (instr->variables[0]->var->data.compact) stride = 1; if (indir_index) { unsigned count = glsl_count_attribute_slots( instr->variables[0]->var->type, false); count -= chan / 4; LLVMValueRef tmp_vec = ac_build_gather_values_extended( &ctx->ac, ctx->outputs + idx + chan, count, stride, true, true); @@ -3186,21 +3164,21 @@ visit_store_var(struct ac_nir_context *ctx, LLVMBuildStore(ctx->ac.builder, value, temp_ptr); } } break; case nir_var_local: for (unsigned chan = 0; chan < 8; chan++) { if (!(writemask & (1 << chan))) continue; - value = llvm_extract_elem(&ctx->ac, src, chan); + value = ac_llvm_extract_elem(&ctx->ac, src, chan); if (indir_index) { unsigned count = glsl_count_attribute_slots( instr->variables[0]->var->type, false); count -= chan / 4; LLVMValueRef tmp_vec = ac_build_gather_values_extended( &ctx->ac, ctx->locals + idx + chan, count, 4, true, true); tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec, value, indir_index, ""); @@ -3226,22 +3204,22 @@ visit_store_var(struct ac_nir_context *ctx, ctx->ac.builder, val, LLVMGetElementType(LLVMTypeOf(address)), ""); LLVMBuildStore(ctx->ac.builder, val, address); } else { for (unsigned chan = 0; chan < 4; chan++) { if (!(writemask & (1 << chan))) continue; LLVMValueRef ptr = LLVMBuildStructGEP(ctx->ac.builder, address, chan, ""); - LLVMValueRef src = llvm_extract_elem(&ctx->ac, val, - chan); + LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val, + chan); src = LLVMBuildBitCast( ctx->ac.builder, src, LLVMGetElementType(LLVMTypeOf(ptr)), ""); LLVMBuildStore(ctx->ac.builder, src, ptr); } } break; } default: break; @@ -3359,21 +3337,21 @@ static LLVMValueRef get_image_coords(struct ac_nir_context *ctx, if(instr->variables[0]->deref.child) type = instr->variables[0]->deref.child->type; LLVMValueRef src0 = get_src(ctx, instr->src[0]); LLVMValueRef coords[4]; LLVMValueRef masks[] = { LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false), LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false), }; LLVMValueRef res; - LLVMValueRef sample_index = llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[1]), 0); + LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[1]), 0); int count; enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); bool is_array = glsl_sampler_type_is_array(type); bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS); bool is_ms = (dim == GLSL_SAMPLER_DIM_MS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS); bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D; count = image_type_to_components_count(dim, is_array); @@ -3406,21 +3384,21 @@ static LLVMValueRef get_image_coords(struct ac_nir_context *ctx, if (count == 1 && !gfx9_1d) { if (instr->src[0].ssa->num_components) res = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], ""); else res = src0; } else { int chan; if (is_ms) count--; for (chan = 0; chan < count; ++chan) { - coords[chan] = llvm_extract_elem(&ctx->ac, src0, chan); + coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan); } if (add_frag_pos) { for (chan = 0; chan < 2; ++chan) coords[chan] = LLVMBuildAdd(ctx->ac.builder, coords[chan], LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan], ctx->ac.i32, ""), ""); coords[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]); count++; } if (gfx9_1d) { @@ -4607,50 +4585,50 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1, samples, ""); samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples, ctx->ac.i32_1, ""); result = samples; goto write_result; } if (coord) for (chan = 0; chan < instr->coord_components; chan++) - coords[chan] = llvm_extract_elem(&ctx->ac, coord, chan); + coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan); if (offsets && instr->op != nir_texop_txf) { LLVMValueRef offset[3], pack; for (chan = 0; chan < 3; ++chan) offset[chan] = ctx->ac.i32_0; args.offset = true; - for (chan = 0; chan < get_llvm_num_components(offsets); chan++) { - offset[chan] = llvm_extract_elem(&ctx->ac, offsets, chan); + for (chan = 0; chan < ac_get_llvm_num_components(offsets); chan++) { + offset[chan] = ac_llvm_extract_elem(&ctx->ac, offsets, chan); offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan], LLVMConstInt(ctx->ac.i32, 0x3f, false), ""); if (chan) offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan], LLVMConstInt(ctx->ac.i32, chan * 8, false), ""); } pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], ""); pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], ""); address[count++] = pack; } /* pack LOD bias value */ if (instr->op == nir_texop_txb && bias) { address[count++] = bias; } /* Pack depth comparison value */ if (instr->is_shadow && comparator) { LLVMValueRef z = ac_to_float(&ctx->ac, - llvm_extract_elem(&ctx->ac, comparator, 0)); + ac_llvm_extract_elem(&ctx->ac, comparator, 0)); /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT, * so the depth comparison value isn't clamped for Z16 and * Z24 anymore. Do it manually here. * * It's unnecessary if the original texture format was * Z32_FLOAT, but we don't know that here. */ if (ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference) z = ac_build_clamp(&ctx->ac, z); @@ -4680,22 +4658,22 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) num_dest_deriv_channels = 2; num_deriv_comp = 2; } else { num_dest_deriv_channels = 1; num_deriv_comp = 1; } break; } for (unsigned i = 0; i < num_src_deriv_channels; i++) { - derivs[i] = ac_to_float(&ctx->ac, llvm_extract_elem(&ctx->ac, ddx, i)); - derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, llvm_extract_elem(&ctx->ac, ddy, i)); + derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i)); + derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i)); } for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) { derivs[i] = ctx->ac.f32_0; derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0; } } if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) { for (chan = 0; chan < instr->coord_components; chan++) coords[chan] = ac_to_float(&ctx->ac, coords[chan]); -- 2.14.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev