Sorry for breaking radeonsi, I somehow thought this way only used for cpu only already, without actually checking... And thanks for fixing that typo, apparently you can pass piglits umul_hi/imul_hi tests (at least those from the shader_integer_mix group) even with the square of argument a... btw as I didn't consider this, I don't know if you want to change the shift/trunc to shuffle in the end - feel free to change it back if it doesn't generate good code on radeonsi...
Reviewed-by: Roland Scheidegger <srol...@vmware.com> Am 08.11.2016 um 10:15 schrieb Nicolai Hähnle: > From: Nicolai Hähnle <nicolai.haeh...@amd.com> > > This patch does two things: > > 1. It separates the host-CPU code generation from the generic code > generation. This guards against accidently breaking things for > radeonsi in the future. > > 2. It makes sure we actually use both arguments and don't just compute > a square :-p > > Fixes a regression introduced by commit > 29279f44b3172ef3b84d470e70fc7684695ced4b > > Cc: Roland Scheidegger <srol...@vmware.com> > --- > src/gallium/auxiliary/gallivm/lp_bld_arit.c | 72 > ++++++++++++++-------- > src/gallium/auxiliary/gallivm/lp_bld_arit.h | 6 ++ > src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 40 +++++++++++- > 3 files changed, 90 insertions(+), 28 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c > b/src/gallium/auxiliary/gallivm/lp_bld_arit.c > index 3de4628..43ad238 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c > @@ -1087,26 +1087,28 @@ lp_build_mul(struct lp_build_context *bld, > res = LLVMBuildLShr(builder, res, shift, ""); > } > } > > return res; > } > > /* > * Widening mul, valid for 32x32 bit -> 64bit only. > * Result is low 32bits, high bits returned in res_hi. > + * > + * Emits code that is meant to be compiled for the host CPU. > */ > LLVMValueRef > -lp_build_mul_32_lohi(struct lp_build_context *bld, > - LLVMValueRef a, > - LLVMValueRef b, > - LLVMValueRef *res_hi) > +lp_build_mul_32_lohi_cpu(struct lp_build_context *bld, > + LLVMValueRef a, > + LLVMValueRef b, > + LLVMValueRef *res_hi) > { > struct gallivm_state *gallivm = bld->gallivm; > LLVMBuilderRef builder = gallivm->builder; > > assert(bld->type.width == 32); > assert(bld->type.floating == 0); > assert(bld->type.fixed == 0); > assert(bld->type.norm == 0); > > /* > @@ -1209,43 +1211,61 @@ lp_build_mul_32_lohi(struct lp_build_context *bld, > *res_hi = LLVMBuildShuffleVector(builder, muleven, mulodd, shuf_vec, > ""); > > for (i = 0; i < bld->type.length; i += 2) { > shuf[i] = lp_build_const_int32(gallivm, i); > shuf[i+1] = lp_build_const_int32(gallivm, i + bld->type.length); > } > shuf_vec = LLVMConstVector(shuf, bld->type.length); > return LLVMBuildShuffleVector(builder, muleven, mulodd, shuf_vec, ""); > } > else { > - LLVMValueRef tmp; > - struct lp_type type_tmp; > - LLVMTypeRef wide_type, cast_type; > - > - type_tmp = bld->type; > - type_tmp.width *= 2; > - wide_type = lp_build_vec_type(gallivm, type_tmp); > - type_tmp = bld->type; > - type_tmp.length *= 2; > - cast_type = lp_build_vec_type(gallivm, type_tmp); > - > - if (bld->type.sign) { > - a = LLVMBuildSExt(builder, a, wide_type, ""); > - b = LLVMBuildSExt(builder, b, wide_type, ""); > - } else { > - a = LLVMBuildZExt(builder, a, wide_type, ""); > - b = LLVMBuildZExt(builder, b, wide_type, ""); > - } > - tmp = LLVMBuildMul(builder, a, b, ""); > - tmp = LLVMBuildBitCast(builder, tmp, cast_type, ""); > - *res_hi = lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, > 1); > - return lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 0); > + return lp_build_mul_32_lohi(bld, a, b, res_hi); > + } > +} > + > + > +/* > + * Widening mul, valid for 32x32 bit -> 64bit only. > + * Result is low 32bits, high bits returned in res_hi. > + * > + * Emits generic code. > + */ > +LLVMValueRef > +lp_build_mul_32_lohi(struct lp_build_context *bld, > + LLVMValueRef a, > + LLVMValueRef b, > + LLVMValueRef *res_hi) > +{ > + struct gallivm_state *gallivm = bld->gallivm; > + LLVMBuilderRef builder = gallivm->builder; > + LLVMValueRef tmp; > + struct lp_type type_tmp; > + LLVMTypeRef wide_type, cast_type; > + > + type_tmp = bld->type; > + type_tmp.width *= 2; > + wide_type = lp_build_vec_type(gallivm, type_tmp); > + type_tmp = bld->type; > + type_tmp.length *= 2; > + cast_type = lp_build_vec_type(gallivm, type_tmp); > + > + if (bld->type.sign) { > + a = LLVMBuildSExt(builder, a, wide_type, ""); > + b = LLVMBuildSExt(builder, b, wide_type, ""); > + } else { > + a = LLVMBuildZExt(builder, a, wide_type, ""); > + b = LLVMBuildZExt(builder, b, wide_type, ""); > } > + tmp = LLVMBuildMul(builder, a, b, ""); > + tmp = LLVMBuildBitCast(builder, tmp, cast_type, ""); > + *res_hi = lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 1); > + return lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 0); > } > > > /* a * b + c */ > LLVMValueRef > lp_build_mad(struct lp_build_context *bld, > LLVMValueRef a, > LLVMValueRef b, > LLVMValueRef c) > { > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h > b/src/gallium/auxiliary/gallivm/lp_bld_arit.h > index 5d48b1c..2a4137a 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h > +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h > @@ -70,20 +70,26 @@ LLVMValueRef > lp_build_sub(struct lp_build_context *bld, > LLVMValueRef a, > LLVMValueRef b); > > LLVMValueRef > lp_build_mul(struct lp_build_context *bld, > LLVMValueRef a, > LLVMValueRef b); > > LLVMValueRef > +lp_build_mul_32_lohi_cpu(struct lp_build_context *bld, > + LLVMValueRef a, > + LLVMValueRef b, > + LLVMValueRef *res_hi); > + > +LLVMValueRef > lp_build_mul_32_lohi(struct lp_build_context *bld, > LLVMValueRef a, > LLVMValueRef b, > LLVMValueRef *res_hi); > > LLVMValueRef > lp_build_mul_imm(struct lp_build_context *bld, > LLVMValueRef a, > int b); > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > index 72d4579..9c6fc4b 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > @@ -842,39 +842,73 @@ imul_hi_emit( > struct lp_build_tgsi_context * bld_base, > struct lp_build_emit_data * emit_data) > { > struct lp_build_context *int_bld = &bld_base->int_bld; > LLVMValueRef hi_bits; > > assert(int_bld->type.width == 32); > > /* low result bits are tossed away */ > lp_build_mul_32_lohi(int_bld, emit_data->args[0], > - emit_data->args[0], &hi_bits); > + emit_data->args[1], &hi_bits); > + emit_data->output[emit_data->chan] = hi_bits; > +} > + > +static void > +imul_hi_emit_cpu( > + const struct lp_build_tgsi_action * action, > + struct lp_build_tgsi_context * bld_base, > + struct lp_build_emit_data * emit_data) > +{ > + struct lp_build_context *int_bld = &bld_base->int_bld; > + LLVMValueRef hi_bits; > + > + assert(int_bld->type.width == 32); > + > + /* low result bits are tossed away */ > + lp_build_mul_32_lohi_cpu(int_bld, emit_data->args[0], > + emit_data->args[1], &hi_bits); > emit_data->output[emit_data->chan] = hi_bits; > } > > /* TGSI_OPCODE_UMUL_HI */ > static void > umul_hi_emit( > const struct lp_build_tgsi_action * action, > struct lp_build_tgsi_context * bld_base, > struct lp_build_emit_data * emit_data) > { > struct lp_build_context *uint_bld = &bld_base->uint_bld; > LLVMValueRef hi_bits; > > assert(uint_bld->type.width == 32); > > /* low result bits are tossed away */ > lp_build_mul_32_lohi(uint_bld, emit_data->args[0], > - emit_data->args[0], &hi_bits); > + emit_data->args[1], &hi_bits); > + emit_data->output[emit_data->chan] = hi_bits; > +} > + > +static void > +umul_hi_emit_cpu( > + const struct lp_build_tgsi_action * action, > + struct lp_build_tgsi_context * bld_base, > + struct lp_build_emit_data * emit_data) > +{ > + struct lp_build_context *uint_bld = &bld_base->uint_bld; > + LLVMValueRef hi_bits; > + > + assert(uint_bld->type.width == 32); > + > + /* low result bits are tossed away */ > + lp_build_mul_32_lohi_cpu(uint_bld, emit_data->args[0], > + emit_data->args[1], &hi_bits); > emit_data->output[emit_data->chan] = hi_bits; > } > > /* TGSI_OPCODE_MAX */ > static void fmax_emit( > const struct lp_build_tgsi_action * action, > struct lp_build_tgsi_context * bld_base, > struct lp_build_emit_data * emit_data) > { > LLVMBuilderRef builder = bld_base->base.gallivm->builder; > @@ -2574,20 +2608,22 @@ lp_set_default_actions_cpu( > bld_base->op_actions[TGSI_OPCODE_I2F].emit = i2f_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_IABS].emit = iabs_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_IDIV].emit = idiv_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_INEG].emit = ineg_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_IMAX].emit = imax_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_IMIN].emit = imin_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_ISGE].emit = isge_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_ISHR].emit = ishr_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_ISLT].emit = islt_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_ISSG].emit = issg_emit_cpu; > + bld_base->op_actions[TGSI_OPCODE_IMUL_HI].emit = imul_hi_emit_cpu; > + bld_base->op_actions[TGSI_OPCODE_UMUL_HI].emit = umul_hi_emit_cpu; > > bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_MAX].emit = max_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_MIN].emit = min_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_MOD].emit = mod_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_NOT].emit = not_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_OR].emit = or_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_POW].emit = pow_emit_cpu; > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev