This is needed for RADV to support explicit component packing. This is also required to use the new NIR component splitting / packing passes. --- src/amd/common/ac_nir_to_llvm.c | 57 +++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 11 deletions(-)
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 5e5a46a21f..2ca0d487d0 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1060,21 +1060,20 @@ static int get_llvm_num_components(LLVMValueRef value) : 1; return num_components; } static LLVMValueRef llvm_extract_elem(struct ac_llvm_context *ac, LLVMValueRef value, int index) { int count = get_llvm_num_components(value); - assert(index < count); if (count == 1) return value; return LLVMBuildExtractElement(ac->builder, value, LLVMConstInt(ac->i32, index, false), ""); } static LLVMValueRef trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned count) { @@ -2810,20 +2809,43 @@ get_dw_address(struct nir_to_llvm_context *ctx, dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, LLVMConstInt(ctx->i32, param * 4, false), ""); if (const_index && compact_const_index) dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, LLVMConstInt(ctx->i32, const_index, false), ""); return dw_addr; } static LLVMValueRef +build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values, + unsigned value_count, unsigned component) +{ + LLVMValueRef vec = NULL; + unsigned value_stride = 1; + + if (value_count == 1) { + return values[component]; + } else if (!value_count) + unreachable("value_count is 0"); + + for (unsigned i = component; i < value_count + component; i++) { + LLVMValueRef value = values[i * value_stride]; + + if (!i) + vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count)); + LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); + vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, ""); + } + return vec; +} + +static LLVMValueRef load_tcs_input(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef dw_addr, stride; unsigned const_index; LLVMValueRef vertex_index; LLVMValueRef indir_index; unsigned param; LLVMValueRef value[4], result; const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage); @@ -2831,26 +2853,27 @@ load_tcs_input(struct nir_to_llvm_context *ctx, param = shader_io_get_unique_index(instr->variables[0]->var->data.location); get_deref_offset(ctx->nir, instr->variables[0], false, NULL, per_vertex ? &vertex_index : NULL, &const_index, &indir_index); stride = unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8); dw_addr = get_tcs_in_current_patch_offset(ctx); dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, indir_index); - for (unsigned i = 0; i < instr->num_components; i++) { + unsigned comp = instr->variables[0]->var->data.location_frac; + for (unsigned i = 0; i < instr->num_components + comp; i++) { value[i] = lds_load(ctx, dw_addr); dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->i32one, ""); } - result = ac_build_gather_values(&ctx->ac, value, instr->num_components); + result = build_varying_gather_values(&ctx->ac, value, instr->num_components, comp); result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), ""); return result; } static LLVMValueRef load_tcs_output(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef dw_addr; LLVMValueRef stride = NULL; @@ -2869,26 +2892,27 @@ load_tcs_output(struct nir_to_llvm_context *ctx, if (!instr->variables[0]->var->data.patch) { stride = unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8); dw_addr = get_tcs_out_current_patch_offset(ctx); } else { dw_addr = get_tcs_out_current_patch_data_offset(ctx); } dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, indir_index); - for (unsigned i = 0; i < instr->num_components; i++) { + unsigned comp = instr->variables[0]->var->data.location_frac; + for (unsigned i = comp; i < instr->num_components + comp; i++) { value[i] = lds_load(ctx, dw_addr); dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->i32one, ""); } - result = ac_build_gather_values(&ctx->ac, value, instr->num_components); + result = build_varying_gather_values(&ctx->ac, value, instr->num_components, comp); result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), ""); return result; } static void store_tcs_output(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr, LLVMValueRef src, unsigned writemask) { @@ -2971,23 +2995,28 @@ load_tes_input(struct nir_to_llvm_context *ctx, get_deref_offset(ctx->nir, instr->variables[0], false, NULL, per_vertex ? &vertex_index : NULL, &const_index, &indir_index); param = shader_io_get_unique_index(instr->variables[0]->var->data.location); if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) { const_index -= 3; param++; } + + unsigned comp = instr->variables[0]->var->data.location_frac; buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact, vertex_index, indir_index); + LLVMValueRef comp_offset = LLVMConstInt(ctx->i32, comp * 4, false); + buf_addr = LLVMBuildAdd(ctx->builder, buf_addr, comp_offset, ""); + result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, instr->num_components, NULL, buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false); result = trim_vector(&ctx->ac, result, instr->num_components); result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), ""); return result; } static LLVMValueRef load_gs_input(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) @@ -3000,21 +3029,23 @@ load_gs_input(struct nir_to_llvm_context *ctx, unsigned vertex_index; get_deref_offset(ctx->nir, instr->variables[0], false, &vertex_index, NULL, &const_index, &indir_index); vtx_offset_param = vertex_index; assert(vtx_offset_param < 6); vtx_offset = LLVMBuildMul(ctx->builder, ctx->gs_vtx_offset[vtx_offset_param], LLVMConstInt(ctx->i32, 4, false), ""); param = shader_io_get_unique_index(instr->variables[0]->var->data.location); - for (unsigned i = 0; i < instr->num_components; i++) { + + unsigned comp = instr->variables[0]->var->data.location_frac; + for (unsigned i = comp; i < instr->num_components + comp; i++) { if (ctx->ac.chip_class >= GFX9) { LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param]; dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), ""); value[i] = lds_load(ctx, dw_addr); } else { args[0] = ctx->esgs_ring; args[1] = vtx_offset; args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index) * 256, false); args[3] = ctx->i32zero; @@ -3023,21 +3054,21 @@ load_gs_input(struct nir_to_llvm_context *ctx, args[6] = ctx->i32one; /* GLC */ args[7] = ctx->i32zero; /* SLC */ args[8] = ctx->i32zero; /* TFE */ value[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32", ctx->i32, args, 9, AC_FUNC_ATTR_READONLY | AC_FUNC_ATTR_LEGACY); } } - result = ac_build_gather_values(&ctx->ac, value, instr->num_components); + result = build_varying_gather_values(&ctx->ac, value, instr->num_components, comp); return result; } static LLVMValueRef build_gep_for_deref(struct ac_nir_context *ctx, nir_deref_var *deref) { struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var); assert(entry->data); @@ -3073,41 +3104,43 @@ build_gep_for_deref(struct ac_nir_context *ctx, } return val; } static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef values[8]; int idx = instr->variables[0]->var->data.driver_location; int ve = instr->dest.ssa.num_components; + unsigned comp = instr->variables[0]->var->data.location_frac; LLVMValueRef indir_index; LLVMValueRef ret; unsigned const_index; bool vs_in = ctx->stage == MESA_SHADER_VERTEX && instr->variables[0]->var->data.mode == nir_var_shader_in; get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL, &const_index, &indir_index); if (instr->dest.ssa.bit_size == 64) ve *= 2; switch (instr->variables[0]->var->data.mode) { case nir_var_shader_in: if (ctx->stage == MESA_SHADER_TESS_CTRL) return load_tcs_input(ctx->nctx, instr); if (ctx->stage == MESA_SHADER_TESS_EVAL) return load_tes_input(ctx->nctx, instr); if (ctx->stage == MESA_SHADER_GEOMETRY) { return load_gs_input(ctx->nctx, instr); } - for (unsigned chan = 0; chan < ve; chan++) { + + for (unsigned chan = comp; chan < ve + comp; chan++) { if (indir_index) { unsigned count = glsl_count_attribute_slots( instr->variables[0]->var->type, ctx->stage == MESA_SHADER_VERTEX); count -= chan / 4; LLVMValueRef tmp_vec = ac_build_gather_values_extended( &ctx->ac, ctx->abi->inputs + idx + chan, count, 4, false, true); values[chan] = LLVMBuildExtractElement(ctx->ac.builder, @@ -3139,21 +3172,22 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, LLVMValueRef address = build_gep_for_deref(ctx, instr->variables[0]); LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, ""); return LLVMBuildBitCast(ctx->ac.builder, val, get_def_type(ctx, &instr->dest.ssa), ""); } case nir_var_shader_out: if (ctx->stage == MESA_SHADER_TESS_CTRL) return load_tcs_output(ctx->nctx, instr); - for (unsigned chan = 0; chan < ve; chan++) { + + for (unsigned chan = comp; chan < ve + comp; chan++) { if (indir_index) { unsigned count = glsl_count_attribute_slots( instr->variables[0]->var->type, false); count -= chan / 4; LLVMValueRef tmp_vec = ac_build_gather_values_extended( &ctx->ac, ctx->outputs + idx + chan, count, 4, true, true); values[chan] = LLVMBuildExtractElement(ctx->ac.builder, tmp_vec, @@ -3161,32 +3195,33 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, } else { values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->outputs[idx + chan + const_index * 4], ""); } } break; default: unreachable("unhandle variable mode"); } - ret = ac_build_gather_values(&ctx->ac, values, ve); + ret = build_varying_gather_values(&ctx->ac, values, ve, comp); return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); } static void visit_store_var(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef temp_ptr, value; int idx = instr->variables[0]->var->data.driver_location; + unsigned comp = instr->variables[0]->var->data.location_frac; LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0])); - int writemask = instr->const_index[0]; + int writemask = instr->const_index[0] << comp; LLVMValueRef indir_index; unsigned const_index; get_deref_offset(ctx, instr->variables[0], false, NULL, NULL, &const_index, &indir_index); if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) { int old_writemask = writemask; src = LLVMBuildBitCast(ctx->ac.builder, src, LLVMVectorType(ctx->ac.f32, get_llvm_num_components(src) * 2), -- 2.13.6 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev