From: Nicolai Hähnle <nicolai.haeh...@amd.com> ... and eliminate the non-ac copies. Mostly straight-forward search & replace. --- src/amd/common/ac_nir_to_llvm.c | 89 +++++++++++------------------------------ 1 file changed, 24 insertions(+), 65 deletions(-)
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index fefd5e7..5fe9e8c 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -624,53 +624,20 @@ static LLVMValueRef trim_vector(struct nir_to_llvm_context *ctx, LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)}; if (count == 1) return LLVMBuildExtractElement(ctx->builder, value, masks[0], ""); LLVMValueRef swizzle = LLVMConstVector(masks, count); return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, ""); } -static LLVMValueRef -build_gather_values_extended(struct nir_to_llvm_context *ctx, - LLVMValueRef *values, - unsigned value_count, - unsigned value_stride, - bool load) -{ - LLVMBuilderRef builder = ctx->builder; - LLVMValueRef vec; - unsigned i; - - - if (value_count == 1) { - if (load) - return LLVMBuildLoad(builder, values[0], ""); - return values[0]; - } else if (!value_count) - unreachable("value_count is 0"); - - for (i = 0; i < value_count; i++) { - LLVMValueRef value = values[i * value_stride]; - if (load) - value = LLVMBuildLoad(builder, value, ""); - - if (!i) - vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count)); - LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); - vec = LLVMBuildInsertElement(builder, vec, value, index, ""); - } - return vec; -} - - static void build_store_values_extended(struct nir_to_llvm_context *ctx, LLVMValueRef *values, unsigned value_count, unsigned value_stride, LLVMValueRef vec) { LLVMBuilderRef builder = ctx->builder; unsigned i; @@ -680,28 +647,20 @@ build_store_values_extended(struct nir_to_llvm_context *ctx, } for (i = 0; i < value_count; i++) { LLVMValueRef ptr = values[i * value_stride]; LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, ""); LLVMBuildStore(builder, value, ptr); } } -static LLVMValueRef -build_gather_values(struct nir_to_llvm_context *ctx, - LLVMValueRef *values, - unsigned value_count) -{ - return build_gather_values_extended(ctx, values, value_count, 1, false); -} - static LLVMTypeRef get_def_type(struct nir_to_llvm_context *ctx, nir_ssa_def *def) { LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, def->bit_size); if (def->num_components > 1) { type = LLVMVectorType(type, def->num_components); } return type; } @@ -744,21 +703,21 @@ static LLVMValueRef get_alu_src(struct nir_to_llvm_context *ctx, LLVMConstInt(ctx->i32, src.swizzle[0], false), LLVMConstInt(ctx->i32, src.swizzle[1], false), LLVMConstInt(ctx->i32, src.swizzle[2], false), LLVMConstInt(ctx->i32, src.swizzle[3], false)}; if (src_components > 1 && num_components == 1) { value = LLVMBuildExtractElement(ctx->builder, value, masks[0], ""); } else if (src_components == 1 && num_components > 1) { LLVMValueRef values[] = {value, value, value, value}; - value = build_gather_values(ctx, values, num_components); + value = ac_build_gather_values(&ctx->ac, values, num_components); } else { LLVMValueRef swizzle = LLVMConstVector(masks, num_components); value = LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, ""); } } assert(!src.negate); assert(!src.abs); return value; } @@ -1224,21 +1183,21 @@ static LLVMValueRef emit_ddxy_interp( { LLVMValueRef result[4], a; unsigned i; for (i = 0; i < 2; i++) { a = LLVMBuildExtractElement(ctx->builder, interp_ij, LLVMConstInt(ctx->i32, i, false), ""); result[i] = emit_ddxy(ctx, nir_op_fddx, a); result[2+i] = emit_ddxy(ctx, nir_op_fddy, a); } - return build_gather_values(ctx, result, 4); + return ac_build_gather_values(&ctx->ac, result, 4); } static LLVMValueRef emit_fdiv(struct nir_to_llvm_context *ctx, LLVMValueRef num, LLVMValueRef den) { LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, ""); if (!LLVMIsConstant(ret)) LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp); @@ -1476,21 +1435,21 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr) result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE); break; case nir_op_bit_count: result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE); break; case nir_op_vec2: case nir_op_vec3: case nir_op_vec4: for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) src[i] = to_integer(ctx, src[i]); - result = build_gather_values(ctx, src, num_components); + result = ac_build_gather_values(&ctx->ac, src, num_components); break; case nir_op_f2i: src[0] = to_float(ctx, src[0]); result = LLVMBuildFPToSI(ctx->builder, src[0], ctx->i32, ""); break; case nir_op_f2u: src[0] = to_float(ctx, src[0]); result = LLVMBuildFPToUI(ctx->builder, src[0], ctx->i32, ""); break; case nir_op_i2f: @@ -1998,21 +1957,21 @@ static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx, LLVMValueRef params[] = { rsrc, LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0), offset, "") }; results[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32, params, 2, AC_FUNC_ATTR_READNONE); } - ret = build_gather_values(ctx, results, instr->num_components); + ret = ac_build_gather_values(&ctx->ac, results, instr->num_components); return LLVMBuildBitCast(ctx->builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); } static void radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail, bool vs_in, unsigned *const_out, LLVMValueRef *indir_out) { unsigned const_offset = 0; LLVMValueRef offset = NULL; @@ -2072,86 +2031,86 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, switch (instr->variables[0]->var->data.mode) { case nir_var_shader_in: radv_get_deref_offset(ctx, &instr->variables[0]->deref, ctx->stage == MESA_SHADER_VERTEX, &const_index, &indir_index); for (unsigned chan = 0; chan < ve; chan++) { if (indir_index) { unsigned count = glsl_count_attribute_slots( instr->variables[0]->var->type, ctx->stage == MESA_SHADER_VERTEX); - LLVMValueRef tmp_vec = build_gather_values_extended( - ctx, ctx->inputs + idx + chan, count, + LLVMValueRef tmp_vec = ac_build_gather_values_extended( + &ctx->ac, ctx->inputs + idx + chan, count, 4, false); values[chan] = LLVMBuildExtractElement(ctx->builder, tmp_vec, indir_index, ""); } else values[chan] = ctx->inputs[idx + chan + const_index * 4]; } - return to_integer(ctx, build_gather_values(ctx, values, ve)); + return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve)); break; case nir_var_local: radv_get_deref_offset(ctx, &instr->variables[0]->deref, false, &const_index, &indir_index); for (unsigned chan = 0; chan < ve; chan++) { if (indir_index) { unsigned count = glsl_count_attribute_slots( instr->variables[0]->var->type, false); - LLVMValueRef tmp_vec = build_gather_values_extended( - ctx, ctx->locals + idx + chan, count, + LLVMValueRef tmp_vec = ac_build_gather_values_extended( + &ctx->ac, ctx->locals + idx + chan, count, 4, true); values[chan] = LLVMBuildExtractElement(ctx->builder, tmp_vec, indir_index, ""); } else { values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], ""); } } - return to_integer(ctx, build_gather_values(ctx, values, ve)); + return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve)); case nir_var_shader_out: radv_get_deref_offset(ctx, &instr->variables[0]->deref, false, &const_index, &indir_index); for (unsigned chan = 0; chan < ve; chan++) { if (indir_index) { unsigned count = glsl_count_attribute_slots( instr->variables[0]->var->type, false); - LLVMValueRef tmp_vec = build_gather_values_extended( - ctx, ctx->outputs + idx + chan, count, + LLVMValueRef tmp_vec = ac_build_gather_values_extended( + &ctx->ac, ctx->outputs + idx + chan, count, 4, true); values[chan] = LLVMBuildExtractElement(ctx->builder, tmp_vec, indir_index, ""); } else { values[chan] = LLVMBuildLoad(ctx->builder, ctx->outputs[idx + chan + const_index * 4], ""); } } - return to_integer(ctx, build_gather_values(ctx, values, ve)); + return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve)); case nir_var_shared: { radv_get_deref_offset(ctx, &instr->variables[0]->deref, false, &const_index, &indir_index); LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32); LLVMValueRef derived_ptr; for (unsigned chan = 0; chan < ve; chan++) { LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false); if (indir_index) index = LLVMBuildAdd(ctx->builder, index, indir_index, ""); derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, ""); values[chan] = LLVMBuildLoad(ctx->builder, derived_ptr, ""); } - return to_integer(ctx, build_gather_values(ctx, values, ve)); + return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve)); } default: break; } return NULL; } static void visit_store_var(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) @@ -2177,22 +2136,22 @@ visit_store_var(struct nir_to_llvm_context *ctx, LLVMConstInt(ctx->i32, chan, false), ""); if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 || instr->variables[0]->var->data.location == VARYING_SLOT_CULL_DIST0) stride = 1; if (indir_index) { unsigned count = glsl_count_attribute_slots( instr->variables[0]->var->type, false); - LLVMValueRef tmp_vec = build_gather_values_extended( - ctx, ctx->outputs + idx + chan, count, + LLVMValueRef tmp_vec = ac_build_gather_values_extended( + &ctx->ac, ctx->outputs + idx + chan, count, stride, true); if (get_llvm_num_components(tmp_vec) > 1) { tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec, value, indir_index, ""); } else tmp_vec = value; build_store_values_extended(ctx, ctx->outputs + idx + chan, count, stride, tmp_vec); @@ -2211,22 +2170,22 @@ visit_store_var(struct nir_to_llvm_context *ctx, continue; if (get_llvm_num_components(src) == 1) value = src; else value = LLVMBuildExtractElement(ctx->builder, src, LLVMConstInt(ctx->i32, chan, false), ""); if (indir_index) { unsigned count = glsl_count_attribute_slots( instr->variables[0]->var->type, false); - LLVMValueRef tmp_vec = build_gather_values_extended( - ctx, ctx->locals + idx + chan, count, + LLVMValueRef tmp_vec = ac_build_gather_values_extended( + &ctx->ac, ctx->locals + idx + chan, count, 4, true); tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec, value, indir_index, ""); build_store_values_extended(ctx, ctx->locals + idx + chan, count, 4, tmp_vec); } else { temp_ptr = ctx->locals[idx + chan + const_index * 4]; LLVMBuildStore(ctx->builder, value, temp_ptr); @@ -2320,21 +2279,21 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, } if (add_frag_pos) { for (chan = 0; chan < count; ++chan) coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), ""); } if (count == 3) { coords[3] = LLVMGetUndef(ctx->i32); count = 4; } - res = build_gather_values(ctx, coords, count); + res = ac_build_gather_values(&ctx->ac, coords, count); } return res; } static void build_type_name_for_intr( LLVMTypeRef type, char *buf, unsigned bufsize) { LLVMTypeRef elem_type = type; @@ -2763,30 +2722,30 @@ static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx, LLVMValueRef sample_id) { /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */ LLVMValueRef offset0 = LLVMBuildMul(ctx->builder, sample_id, LLVMConstInt(ctx->i32, 8, false), ""); LLVMValueRef offset1 = LLVMBuildAdd(ctx->builder, offset0, LLVMConstInt(ctx->i32, 4, false), ""); LLVMValueRef result[2]; result[0] = build_indexed_load_const(ctx, ctx->sample_positions, offset0); result[1] = build_indexed_load_const(ctx, ctx->sample_positions, offset1); - return build_gather_values(ctx, result, 2); + return ac_build_gather_values(&ctx->ac, result, 2); } static LLVMValueRef load_sample_pos(struct nir_to_llvm_context *ctx) { LLVMValueRef values[2]; values[0] = emit_ffract(ctx, ctx->frag_pos[0]); values[1] = emit_ffract(ctx, ctx->frag_pos[1]); - return build_gather_values(ctx, values, 2); + return ac_build_gather_values(&ctx->ac, values, 2); } static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef result[2]; LLVMValueRef interp_param, attr_number; unsigned location; unsigned chan; LLVMValueRef src_c0, src_c1; @@ -2852,37 +2811,37 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, ""); temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, ""); temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, ""); temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, ""); ij_out[i] = LLVMBuildBitCast(ctx->builder, temp2, ctx->i32, ""); } - interp_param = build_gather_values(ctx, ij_out, 2); + interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2); } intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant"; for (chan = 0; chan < 2; chan++) { LLVMValueRef args[4]; LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false); args[0] = llvm_chan; args[1] = attr_number; args[2] = ctx->prim_mask; args[3] = interp_param; result[chan] = ac_emit_llvm_intrinsic(&ctx->ac, intr_name, ctx->f32, args, args[3] ? 4 : 3, AC_FUNC_ATTR_READNONE); } - return build_gather_values(ctx, result, 2); + return ac_build_gather_values(&ctx->ac, result, 2); } static void visit_intrinsic(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef result = NULL; switch (instr->intrinsic) { case nir_intrinsic_load_work_group_id: { result = ctx->workgroup_ids; @@ -3100,21 +3059,21 @@ static void set_tex_fetch_args(struct nir_to_llvm_context *ctx, unsigned is_rect = 0; bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE; if (op == nir_texop_lod) da = false; /* Pad to power of two vector */ while (count < util_next_power_of_two(count)) param[count++] = LLVMGetUndef(ctx->i32); if (count > 1) - tinfo->args[0] = build_gather_values(ctx, param, count); + tinfo->args[0] = ac_build_gather_values(&ctx->ac, param, count); else tinfo->args[0] = param[0]; tinfo->args[1] = res_ptr; num_args = 2; if (op == nir_texop_txf || op == nir_texop_txf_ms || op == nir_texop_query_levels || op == nir_texop_texture_samples || -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev