Hmm sure it is rarely used (for arb_vp and d3d9 vs 1.1 (2.0 too maybe though the semantics are different there even if the precision required is the same)? The problem I have with this is that the emulation which will get used instead is _extremely_ terrible. EXP should be a cheaper alternative to EX2, yet the emulation will make it more than twice as expensive (because there are _two_ ex2 calls in exp_emit()). Also, since the exp/log functions actually have configurable precision (though it is compile-time dependent for now) maybe could exploit that and use a polynomial with a lesser degree? Otherwise though having less specialized code makes sense.
Roland Am 11.09.2013 13:04, schrieb jfons...@vmware.com: > From: José Fonseca <jfons...@vmware.com> > > It was wrong for EXP.y, as we clamped the source before computing the > fractional part, and this opcode should be rarely used, so it's not > worth the hassle. > --- > src/gallium/auxiliary/gallivm/lp_bld_arit.c | 80 > ++++++++-------------- > src/gallium/auxiliary/gallivm/lp_bld_arit.h | 7 -- > src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 15 ---- > 3 files changed, 30 insertions(+), 72 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c > b/src/gallium/auxiliary/gallivm/lp_bld_arit.c > index 09107ff..00052ed 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c > @@ -3001,12 +3001,9 @@ const double lp_build_exp2_polynomial[] = { > }; > > > -void > -lp_build_exp2_approx(struct lp_build_context *bld, > - LLVMValueRef x, > - LLVMValueRef *p_exp2_int_part, > - LLVMValueRef *p_frac_part, > - LLVMValueRef *p_exp2) > +LLVMValueRef > +lp_build_exp2(struct lp_build_context *bld, > + LLVMValueRef x) > { > LLVMBuilderRef builder = bld->gallivm->builder; > const struct lp_type type = bld->type; > @@ -3019,65 +3016,48 @@ lp_build_exp2_approx(struct lp_build_context *bld, > > assert(lp_check_value(bld->type, x)); > > - if(p_exp2_int_part || p_frac_part || p_exp2) { > - /* TODO: optimize the constant case */ > - if (gallivm_debug & GALLIVM_DEBUG_PERF && > - LLVMIsConstant(x)) { > - debug_printf("%s: inefficient/imprecise constant arithmetic\n", > - __FUNCTION__); > - } > > - assert(type.floating && type.width == 32); > + /* TODO: optimize the constant case */ > + if (gallivm_debug & GALLIVM_DEBUG_PERF && > + LLVMIsConstant(x)) { > + debug_printf("%s: inefficient/imprecise constant arithmetic\n", > + __FUNCTION__); > + } > > - /* We want to preserve NaN and make sure than for exp2 if x > 128, > - * the result is INF and if it's smaller than -126.9 the result is 0 > */ > - x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type, > 128.0), x, > - GALLIVM_NAN_RETURN_SECOND); > - x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, > -126.99999), x, > - GALLIVM_NAN_RETURN_SECOND); > + assert(type.floating && type.width == 32); > > - /* ipart = floor(x) */ > - /* fpart = x - ipart */ > - lp_build_ifloor_fract(bld, x, &ipart, &fpart); > - } > + /* We want to preserve NaN and make sure than for exp2 if x > 128, > + * the result is INF and if it's smaller than -126.9 the result is 0 */ > + x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type, 128.0), > x, > + GALLIVM_NAN_RETURN_SECOND); > + x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, > -126.99999), x, > + GALLIVM_NAN_RETURN_SECOND); > > - if(p_exp2_int_part || p_exp2) { > - /* expipart = (float) (1 << ipart) */ > - expipart = LLVMBuildAdd(builder, ipart, > - lp_build_const_int_vec(bld->gallivm, type, > 127), ""); > - expipart = LLVMBuildShl(builder, expipart, > - lp_build_const_int_vec(bld->gallivm, type, > 23), ""); > - expipart = LLVMBuildBitCast(builder, expipart, vec_type, ""); > - } > + /* ipart = floor(x) */ > + /* fpart = x - ipart */ > + lp_build_ifloor_fract(bld, x, &ipart, &fpart); > > - if(p_exp2) { > - expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, > - Elements(lp_build_exp2_polynomial)); > > - res = LLVMBuildFMul(builder, expipart, expfpart, ""); > - } > > - if(p_exp2_int_part) > - *p_exp2_int_part = expipart; > + /* expipart = (float) (1 << ipart) */ > + expipart = LLVMBuildAdd(builder, ipart, > + lp_build_const_int_vec(bld->gallivm, type, 127), > ""); > + expipart = LLVMBuildShl(builder, expipart, > + lp_build_const_int_vec(bld->gallivm, type, 23), > ""); > + expipart = LLVMBuildBitCast(builder, expipart, vec_type, ""); > > - if(p_frac_part) > - *p_frac_part = fpart; > > - if(p_exp2) > - *p_exp2 = res; > -} > + expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, > + Elements(lp_build_exp2_polynomial)); > + > + res = LLVMBuildFMul(builder, expipart, expfpart, ""); > > > -LLVMValueRef > -lp_build_exp2(struct lp_build_context *bld, > - LLVMValueRef x) > -{ > - LLVMValueRef res; > - lp_build_exp2_approx(bld, x, NULL, NULL, &res); > return res; > } > > > + > /** > * Extract the exponent of a IEEE-754 floating point value. > * > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h > b/src/gallium/auxiliary/gallivm/lp_bld_arit.h > index d98025e..49d4e2c 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h > +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h > @@ -326,13 +326,6 @@ lp_build_ilog2(struct lp_build_context *bld, > LLVMValueRef x); > > void > -lp_build_exp2_approx(struct lp_build_context *bld, > - LLVMValueRef x, > - LLVMValueRef *p_exp2_int_part, > - LLVMValueRef *p_frac_part, > - LLVMValueRef *p_exp2); > - > -void > lp_build_log2_approx(struct lp_build_context *bld, > LLVMValueRef x, > LLVMValueRef *p_exp, > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > index 86c3249..1cfaf78 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > @@ -1057,20 +1057,6 @@ ex2_emit_cpu( > emit_data->args[0]); > } > > -/* TGSI_OPCODE_EXP (CPU Only) */ > -static void > -exp_emit_cpu( > - const struct lp_build_tgsi_action * action, > - struct lp_build_tgsi_context * bld_base, > - struct lp_build_emit_data * emit_data) > -{ > - lp_build_exp2_approx(&bld_base->base, emit_data->args[0], > - &emit_data->output[TGSI_CHAN_X], > - &emit_data->output[TGSI_CHAN_Y], > - &emit_data->output[TGSI_CHAN_Z]); > - emit_data->output[TGSI_CHAN_W] = bld_base->base.one; > -} > - > /* TGSI_OPCODE_F2I (CPU Only) */ > static void > f2i_emit_cpu( > @@ -1785,7 +1771,6 @@ lp_set_default_actions_cpu( > bld_base->op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu; > - bld_base->op_actions[TGSI_OPCODE_EXP].emit = exp_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_F2I].emit = f2i_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit_cpu; > bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu; > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev