On Wed, Oct 25, 2017 at 1:04 AM, Timothy Arceri <tarc...@itsqueeze.com> wrote: > This is needed for RADV to support explicit component packing. > > This is also required to use the new NIR component splitting / > packing passes. > > V2: > - add commponent packing support for interpolate_at* intrinsics > - improve store packing support when not all varyings are scalar > as spotted by Bas the store source was incorrectly offset. > --- > src/amd/common/ac_nir_to_llvm.c | 68 > +++++++++++++++++++++++++++++++---------- > 1 file changed, 52 insertions(+), 16 deletions(-) > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index 2e50e50b12..5d9c5be7d2 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -1060,21 +1060,20 @@ static int get_llvm_num_components(LLVMValueRef value) > : 1; > return num_components; > } > > static LLVMValueRef llvm_extract_elem(struct ac_llvm_context *ac, > LLVMValueRef value, > int index) > { > int count = get_llvm_num_components(value); > > - assert(index < count); > if (count == 1) > return value; > > return LLVMBuildExtractElement(ac->builder, value, > LLVMConstInt(ac->i32, index, false), > ""); > } > > static LLVMValueRef trim_vector(struct ac_llvm_context *ctx, > LLVMValueRef value, unsigned count) > { > @@ -2811,20 +2810,42 @@ get_dw_address(struct nir_to_llvm_context *ctx, > dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, > LLVMConstInt(ctx->i32, param * 4, false), ""); > > if (const_index && compact_const_index) > dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, > LLVMConstInt(ctx->i32, const_index, > false), ""); > return dw_addr; > } > > static LLVMValueRef > +build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef > *values, > + unsigned value_count, unsigned component) > +{ > + LLVMValueRef vec = NULL; > + > + if (value_count == 1) { > + return values[component]; > + } else if (!value_count) > + unreachable("value_count is 0"); > + > + for (unsigned i = component; i < value_count + component; i++) { > + LLVMValueRef value = values[i]; > + > + if (!i) > + vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), > value_count)); > + LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
Doesn't this need to be i - component to get a range of [0, value_count)? Otherwise Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> > + vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, > ""); > + } > + return vec; > +} > + > +static LLVMValueRef > load_tcs_input(struct nir_to_llvm_context *ctx, > nir_intrinsic_instr *instr) > { > LLVMValueRef dw_addr, stride; > unsigned const_index; > LLVMValueRef vertex_index; > LLVMValueRef indir_index; > unsigned param; > LLVMValueRef value[4], result; > const bool per_vertex = > nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage); > @@ -2832,26 +2853,27 @@ load_tcs_input(struct nir_to_llvm_context *ctx, > param = > shader_io_get_unique_index(instr->variables[0]->var->data.location); > get_deref_offset(ctx->nir, instr->variables[0], > false, NULL, per_vertex ? &vertex_index : NULL, > &const_index, &indir_index); > > stride = unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8); > dw_addr = get_tcs_in_current_patch_offset(ctx); > dw_addr = get_dw_address(ctx, dw_addr, param, const_index, > is_compact, vertex_index, stride, > indir_index); > > - for (unsigned i = 0; i < instr->num_components; i++) { > + unsigned comp = instr->variables[0]->var->data.location_frac; > + for (unsigned i = 0; i < instr->num_components + comp; i++) { > value[i] = lds_load(ctx, dw_addr); > dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, > ctx->i32one, ""); > } > - result = ac_build_gather_values(&ctx->ac, value, > instr->num_components); > + result = build_varying_gather_values(&ctx->ac, value, > instr->num_components, comp); > result = LLVMBuildBitCast(ctx->builder, result, > get_def_type(ctx->nir, &instr->dest.ssa), ""); > return result; > } > > static LLVMValueRef > load_tcs_output(struct nir_to_llvm_context *ctx, > nir_intrinsic_instr *instr) > { > LLVMValueRef dw_addr; > LLVMValueRef stride = NULL; > @@ -2870,43 +2892,45 @@ load_tcs_output(struct nir_to_llvm_context *ctx, > if (!instr->variables[0]->var->data.patch) { > stride = unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8); > dw_addr = get_tcs_out_current_patch_offset(ctx); > } else { > dw_addr = get_tcs_out_current_patch_data_offset(ctx); > } > > dw_addr = get_dw_address(ctx, dw_addr, param, const_index, > is_compact, vertex_index, stride, > indir_index); > > - for (unsigned i = 0; i < instr->num_components; i++) { > + unsigned comp = instr->variables[0]->var->data.location_frac; > + for (unsigned i = comp; i < instr->num_components + comp; i++) { > value[i] = lds_load(ctx, dw_addr); > dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, > ctx->i32one, ""); > } > - result = ac_build_gather_values(&ctx->ac, value, > instr->num_components); > + result = build_varying_gather_values(&ctx->ac, value, > instr->num_components, comp); > result = LLVMBuildBitCast(ctx->builder, result, > get_def_type(ctx->nir, &instr->dest.ssa), ""); > return result; > } > > static void > store_tcs_output(struct nir_to_llvm_context *ctx, > nir_intrinsic_instr *instr, > LLVMValueRef src, > unsigned writemask) > { > LLVMValueRef dw_addr; > LLVMValueRef stride = NULL; > LLVMValueRef buf_addr = NULL; > LLVMValueRef vertex_index = NULL; > LLVMValueRef indir_index = NULL; > unsigned const_index = 0; > unsigned param; > + const unsigned comp = instr->variables[0]->var->data.location_frac; > const bool per_vertex = > nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage); > const bool is_compact = instr->variables[0]->var->data.compact; > > get_deref_offset(ctx->nir, instr->variables[0], > false, NULL, per_vertex ? &vertex_index : NULL, > &const_index, &indir_index); > > param = > shader_io_get_unique_index(instr->variables[0]->var->data.location); > if (instr->variables[0]->var->data.location == > VARYING_SLOT_CLIP_DIST0 && > is_compact && const_index > 3) { > @@ -2930,21 +2954,21 @@ store_tcs_output(struct nir_to_llvm_context *ctx, > > bool is_tess_factor = false; > if (instr->variables[0]->var->data.location == > VARYING_SLOT_TESS_LEVEL_INNER || > instr->variables[0]->var->data.location == > VARYING_SLOT_TESS_LEVEL_OUTER) > is_tess_factor = true; > > unsigned base = is_compact ? const_index : 0; > for (unsigned chan = 0; chan < 8; chan++) { > if (!(writemask & (1 << chan))) > continue; > - LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan); > + LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan - > comp); > > lds_store(ctx, dw_addr, value); > > if (!is_tess_factor && writemask != 0xF) > ac_build_buffer_store_dword(&ctx->ac, > ctx->hs_ring_tess_offchip, value, 1, > buf_addr, ctx->oc_lds, > 4 * (base + chan), 1, 0, > true, false); > > dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, > ctx->i32one, ""); > @@ -2972,23 +2996,28 @@ load_tes_input(struct nir_to_llvm_context *ctx, > > get_deref_offset(ctx->nir, instr->variables[0], > false, NULL, per_vertex ? &vertex_index : NULL, > &const_index, &indir_index); > param = > shader_io_get_unique_index(instr->variables[0]->var->data.location); > if (instr->variables[0]->var->data.location == > VARYING_SLOT_CLIP_DIST0 && > is_compact && const_index > 3) { > const_index -= 3; > param++; > } > + > + unsigned comp = instr->variables[0]->var->data.location_frac; > buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, > is_compact, > vertex_index, indir_index); > > + LLVMValueRef comp_offset = LLVMConstInt(ctx->i32, comp * 4, false); > + buf_addr = LLVMBuildAdd(ctx->builder, buf_addr, comp_offset, ""); > + > result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, > instr->num_components, NULL, > buf_addr, ctx->oc_lds, is_compact ? (4 > * const_index) : 0, 1, 0, true, false); > result = trim_vector(&ctx->ac, result, instr->num_components); > result = LLVMBuildBitCast(ctx->builder, result, > get_def_type(ctx->nir, &instr->dest.ssa), ""); > return result; > } > > static LLVMValueRef > load_gs_input(struct nir_to_llvm_context *ctx, > nir_intrinsic_instr *instr) > @@ -3001,21 +3030,23 @@ load_gs_input(struct nir_to_llvm_context *ctx, > unsigned vertex_index; > get_deref_offset(ctx->nir, instr->variables[0], > false, &vertex_index, NULL, > &const_index, &indir_index); > vtx_offset_param = vertex_index; > assert(vtx_offset_param < 6); > vtx_offset = LLVMBuildMul(ctx->builder, > ctx->gs_vtx_offset[vtx_offset_param], > LLVMConstInt(ctx->i32, 4, false), ""); > > param = > shader_io_get_unique_index(instr->variables[0]->var->data.location); > - for (unsigned i = 0; i < instr->num_components; i++) { > + > + unsigned comp = instr->variables[0]->var->data.location_frac; > + for (unsigned i = comp; i < instr->num_components + comp; i++) { > if (ctx->ac.chip_class >= GFX9) { > LLVMValueRef dw_addr = > ctx->gs_vtx_offset[vtx_offset_param]; > dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, > LLVMConstInt(ctx->ac.i32, > param * 4 + i + const_index, 0), ""); > value[i] = lds_load(ctx, dw_addr); > } else { > args[0] = ctx->esgs_ring; > args[1] = vtx_offset; > args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + > const_index) * 256, false); > args[3] = ctx->i32zero; > @@ -3024,21 +3055,21 @@ load_gs_input(struct nir_to_llvm_context *ctx, > args[6] = ctx->i32one; /* GLC */ > args[7] = ctx->i32zero; /* SLC */ > args[8] = ctx->i32zero; /* TFE */ > > value[i] = ac_build_intrinsic(&ctx->ac, > "llvm.SI.buffer.load.dword.i32.i32", > ctx->i32, args, 9, > AC_FUNC_ATTR_READONLY | > AC_FUNC_ATTR_LEGACY); > } > } > - result = ac_build_gather_values(&ctx->ac, value, > instr->num_components); > + result = build_varying_gather_values(&ctx->ac, value, > instr->num_components, comp); > > return result; > } > > static LLVMValueRef > build_gep_for_deref(struct ac_nir_context *ctx, > nir_deref_var *deref) > { > struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, > deref->var); > assert(entry->data); > @@ -3074,41 +3105,43 @@ build_gep_for_deref(struct ac_nir_context *ctx, > } > return val; > } > > static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, > nir_intrinsic_instr *instr) > { > LLVMValueRef values[8]; > int idx = instr->variables[0]->var->data.driver_location; > int ve = instr->dest.ssa.num_components; > + unsigned comp = instr->variables[0]->var->data.location_frac; > LLVMValueRef indir_index; > LLVMValueRef ret; > unsigned const_index; > bool vs_in = ctx->stage == MESA_SHADER_VERTEX && > instr->variables[0]->var->data.mode == nir_var_shader_in; > get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL, > &const_index, &indir_index); > > if (instr->dest.ssa.bit_size == 64) > ve *= 2; > > switch (instr->variables[0]->var->data.mode) { > case nir_var_shader_in: > if (ctx->stage == MESA_SHADER_TESS_CTRL) > return load_tcs_input(ctx->nctx, instr); > if (ctx->stage == MESA_SHADER_TESS_EVAL) > return load_tes_input(ctx->nctx, instr); > if (ctx->stage == MESA_SHADER_GEOMETRY) { > return load_gs_input(ctx->nctx, instr); > } > - for (unsigned chan = 0; chan < ve; chan++) { > + > + for (unsigned chan = comp; chan < ve + comp; chan++) { > if (indir_index) { > unsigned count = glsl_count_attribute_slots( > > instr->variables[0]->var->type, > ctx->stage == > MESA_SHADER_VERTEX); > count -= chan / 4; > LLVMValueRef tmp_vec = > ac_build_gather_values_extended( > &ctx->ac, ctx->abi->inputs + > idx + chan, count, > 4, false, true); > > values[chan] = > LLVMBuildExtractElement(ctx->ac.builder, > @@ -3140,21 +3173,22 @@ static LLVMValueRef visit_load_var(struct > ac_nir_context *ctx, > LLVMValueRef address = build_gep_for_deref(ctx, > > instr->variables[0]); > LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, > ""); > return LLVMBuildBitCast(ctx->ac.builder, val, > get_def_type(ctx, &instr->dest.ssa), > ""); > } > case nir_var_shader_out: > if (ctx->stage == MESA_SHADER_TESS_CTRL) > return load_tcs_output(ctx->nctx, instr); > - for (unsigned chan = 0; chan < ve; chan++) { > + > + for (unsigned chan = comp; chan < ve + comp; chan++) { > if (indir_index) { > unsigned count = glsl_count_attribute_slots( > > instr->variables[0]->var->type, false); > count -= chan / 4; > LLVMValueRef tmp_vec = > ac_build_gather_values_extended( > &ctx->ac, ctx->outputs + idx > + chan, count, > 4, true, true); > > values[chan] = > LLVMBuildExtractElement(ctx->ac.builder, > > tmp_vec, > @@ -3162,32 +3196,33 @@ static LLVMValueRef visit_load_var(struct > ac_nir_context *ctx, > } else { > values[chan] = LLVMBuildLoad(ctx->ac.builder, > ctx->outputs[idx + chan > + const_index * 4], > ""); > } > } > break; > default: > unreachable("unhandle variable mode"); > } > - ret = ac_build_gather_values(&ctx->ac, values, ve); > + ret = build_varying_gather_values(&ctx->ac, values, ve, comp); > return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, > &instr->dest.ssa), ""); > } > > static void > visit_store_var(struct ac_nir_context *ctx, > nir_intrinsic_instr *instr) > { > LLVMValueRef temp_ptr, value; > int idx = instr->variables[0]->var->data.driver_location; > + unsigned comp = instr->variables[0]->var->data.location_frac; > LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0])); > - int writemask = instr->const_index[0]; > + int writemask = instr->const_index[0] << comp; > LLVMValueRef indir_index; > unsigned const_index; > get_deref_offset(ctx, instr->variables[0], false, > NULL, NULL, &const_index, &indir_index); > > if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) { > int old_writemask = writemask; > > src = LLVMBuildBitCast(ctx->ac.builder, src, > LLVMVectorType(ctx->ac.f32, > get_llvm_num_components(src) * 2), > @@ -3206,21 +3241,21 @@ visit_store_var(struct ac_nir_context *ctx, > if (ctx->stage == MESA_SHADER_TESS_CTRL) { > store_tcs_output(ctx->nctx, instr, src, writemask); > return; > } > > for (unsigned chan = 0; chan < 8; chan++) { > int stride = 4; > if (!(writemask & (1 << chan))) > continue; > > - value = llvm_extract_elem(&ctx->ac, src, chan); > + value = llvm_extract_elem(&ctx->ac, src, chan - comp); > > if (instr->variables[0]->var->data.compact) > stride = 1; > if (indir_index) { > unsigned count = glsl_count_attribute_slots( > > instr->variables[0]->var->type, false); > count -= chan / 4; > LLVMValueRef tmp_vec = > ac_build_gather_values_extended( > &ctx->ac, ctx->outputs + idx > + chan, count, > stride, true, true); > @@ -3907,21 +3942,21 @@ static LLVMValueRef load_sample_pos(struct > ac_nir_context *ctx) > LLVMValueRef values[2]; > > values[0] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[0]); > values[1] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[1]); > return ac_build_gather_values(&ctx->ac, values, 2); > } > > static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, > const nir_intrinsic_instr *instr) > { > - LLVMValueRef result[2]; > + LLVMValueRef result[4]; > LLVMValueRef interp_param, attr_number; > unsigned location; > unsigned chan; > LLVMValueRef src_c0 = NULL; > LLVMValueRef src_c1 = NULL; > LLVMValueRef src0 = NULL; > int input_index = instr->variables[0]->var->data.location - > VARYING_SLOT_VAR0; > switch (instr->intrinsic) { > case nir_intrinsic_interp_var_at_centroid: > location = INTERP_CENTROID; > @@ -3985,42 +4020,43 @@ static LLVMValueRef visit_interp(struct > nir_to_llvm_context *ctx, > temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, > ""); > temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, ""); > > ij_out[i] = LLVMBuildBitCast(ctx->builder, > temp2, ctx->i32, ""); > } > interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2); > > } > > - for (chan = 0; chan < 2; chan++) { > + for (chan = 0; chan < 4; chan++) { > LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false); > > if (interp_param) { > interp_param = LLVMBuildBitCast(ctx->builder, > interp_param, > LLVMVectorType(ctx->f32, 2), ""); > LLVMValueRef i = LLVMBuildExtractElement( > ctx->builder, interp_param, ctx->i32zero, ""); > LLVMValueRef j = LLVMBuildExtractElement( > ctx->builder, interp_param, ctx->i32one, ""); > > result[chan] = ac_build_fs_interp(&ctx->ac, > llvm_chan, > attr_number, > ctx->prim_mask, i, > j); > } else { > result[chan] = ac_build_fs_interp_mov(&ctx->ac, > > LLVMConstInt(ctx->i32, 2, false), > llvm_chan, > attr_number, > ctx->prim_mask); > } > } > - return ac_build_gather_values(&ctx->ac, result, 2); > + return build_varying_gather_values(&ctx->ac, result, > instr->num_components, > + > instr->variables[0]->var->data.location_frac); > } > > static void > visit_emit_vertex(struct nir_to_llvm_context *ctx, > const nir_intrinsic_instr *instr) > { > LLVMValueRef gs_next_vertex; > LLVMValueRef can_emit, kill; > int idx; > > -- > 2.13.6 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev