The introduction and use of lp_build_clamp_zero_one_nanzero looks good. I can't comment on the changes to the existing paths though, as they are too subtle for me. As long there are no regressions I'm good.
Jose ----- Original Message ----- > From: Roland Scheidegger <srol...@vmware.com> > > d3d10 requires us to convert NaNs to zero for any float->int conversion. > We don't really do that but mostly seems to work. In particular I suspect the > very common float->unorm8 path only really passes because it relies on sse2 > pack intrinsics which just happen to work by luck for NaNs (float->int > conversion in hw gives integer indeterminate value, which just happens to be > -0x80000000 hence gets converted to zero in the end after pack intrinsics). > However, float->srgb didn't get so lucky, because we need to clamp before > blending and clamping resulted in NaN behavior being undefined (and actually > got converted to 1.0 by clamping with sse2). Fix this by using a zero/one > clamp > with defined nan behavior as we can handle the NaN for free this way. > I suspect there's more bugs lurking in this area (e.g. converting floats to > snorm) as we don't really use defined NaN behavior everywhere but this seems > to be good enough. > While here respecify nan behavior modes a bit, in particular the > return_second > mode didn't really do what we wanted. From the caller's perspective, we > really > wanted to say we need the non-nan result, but we already know the second arg > isn't a NaN. So we use this now instead, which means that cpu architectures > which actually implement min/max by always returning non-nan (that is > adhering > to ieee754-2008 rules) don't need to bend over backwards for nothing. > --- > src/gallium/auxiliary/gallivm/lp_bld_arit.c | 44 > +++++++++++++------- > src/gallium/auxiliary/gallivm/lp_bld_arit.h | 12 ++++-- > src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c | 2 +- > src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 11 ++--- > src/gallium/drivers/llvmpipe/lp_state_fs.c | 4 +- > 5 files changed, 45 insertions(+), 28 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c > b/src/gallium/auxiliary/gallivm/lp_bld_arit.c > index 00052ed..70929e7 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c > @@ -123,8 +123,10 @@ lp_build_min_simple(struct lp_build_context *bld, > } > } > else if (type.floating && util_cpu_caps.has_altivec) { > - debug_printf("%s: altivec doesn't support nan behavior modes\n", > - __FUNCTION__); > + if (nan_behavior == GALLIVM_NAN_RETURN_NAN) { > + debug_printf("%s: altivec doesn't support nan return nan > behavior\n", > + __FUNCTION__); > + } > if (type.width == 32 && type.length == 4) { > intrinsic = "llvm.ppc.altivec.vminfp"; > intr_size = 128; > @@ -159,8 +161,6 @@ lp_build_min_simple(struct lp_build_context *bld, > } > } else if (util_cpu_caps.has_altivec) { > intr_size = 128; > - debug_printf("%s: altivec doesn't support nan behavior modes\n", > - __FUNCTION__); > if (type.width == 8) { > if (!type.sign) { > intrinsic = "llvm.ppc.altivec.vminub"; > @@ -191,7 +191,7 @@ lp_build_min_simple(struct lp_build_context *bld, > */ > if (util_cpu_caps.has_sse && type.floating && > nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED && > - nan_behavior != GALLIVM_NAN_RETURN_SECOND) { > + nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) { > LLVMValueRef isnan, max; > max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic, > type, > @@ -227,7 +227,7 @@ lp_build_min_simple(struct lp_build_context *bld, > return lp_build_select(bld, cond, a, b); > } > break; > - case GALLIVM_NAN_RETURN_SECOND: > + case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN: > cond = lp_build_cmp_ordered(bld, PIPE_FUNC_LESS, a, b); > return lp_build_select(bld, cond, a, b); > case GALLIVM_NAN_BEHAVIOR_UNDEFINED: > @@ -299,8 +299,10 @@ lp_build_max_simple(struct lp_build_context *bld, > } > } > else if (type.floating && util_cpu_caps.has_altivec) { > - debug_printf("%s: altivec doesn't support nan behavior modes\n", > - __FUNCTION__); > + if (nan_behavior == GALLIVM_NAN_RETURN_NAN) { > + debug_printf("%s: altivec doesn't support nan return nan > behavior\n", > + __FUNCTION__); > + } > if (type.width == 32 || type.length == 4) { > intrinsic = "llvm.ppc.altivec.vmaxfp"; > intr_size = 128; > @@ -336,8 +338,6 @@ lp_build_max_simple(struct lp_build_context *bld, > } > } else if (util_cpu_caps.has_altivec) { > intr_size = 128; > - debug_printf("%s: altivec doesn't support nan behavior modes\n", > - __FUNCTION__); > if (type.width == 8) { > if (!type.sign) { > intrinsic = "llvm.ppc.altivec.vmaxub"; > @@ -362,7 +362,7 @@ lp_build_max_simple(struct lp_build_context *bld, > if(intrinsic) { > if (util_cpu_caps.has_sse && type.floating && > nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED && > - nan_behavior != GALLIVM_NAN_RETURN_SECOND) { > + nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) { > LLVMValueRef isnan, min; > min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic, > type, > @@ -398,7 +398,7 @@ lp_build_max_simple(struct lp_build_context *bld, > return lp_build_select(bld, cond, a, b); > } > break; > - case GALLIVM_NAN_RETURN_SECOND: > + case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN: > cond = lp_build_cmp_ordered(bld, PIPE_FUNC_GREATER, a, b); > return lp_build_select(bld, cond, a, b); > case GALLIVM_NAN_BEHAVIOR_UNDEFINED: > @@ -1399,6 +1399,7 @@ lp_build_max_ext(struct lp_build_context *bld, > > /** > * Generate clamp(a, min, max) > + * NaN behavior (for any of a, min, max) is undefined. > * Do checks for special cases. > */ > LLVMValueRef > @@ -1418,6 +1419,20 @@ lp_build_clamp(struct lp_build_context *bld, > > > /** > + * Generate clamp(a, 0, 1) > + * A NaN will get converted to zero. > + */ > +LLVMValueRef > +lp_build_clamp_zero_one_nanzero(struct lp_build_context *bld, > + LLVMValueRef a) > +{ > + a = lp_build_max_ext(bld, a, bld->zero, > GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); > + a = lp_build_min(bld, a, bld->one); > + return a; > +} > + > + > +/** > * Generate abs(a) > */ > LLVMValueRef > @@ -3029,9 +3044,8 @@ lp_build_exp2(struct lp_build_context *bld, > /* We want to preserve NaN and make sure than for exp2 if x > 128, > * the result is INF and if it's smaller than -126.9 the result is 0 */ > x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type, 128.0), > x, > - GALLIVM_NAN_RETURN_SECOND); > - x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, > -126.99999), x, > - GALLIVM_NAN_RETURN_SECOND); > + GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); > + x = lp_build_max(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), > x); > > /* ipart = floor(x) */ > /* fpart = x - ipart */ > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h > b/src/gallium/auxiliary/gallivm/lp_bld_arit.h > index 49d4e2c..75bf89e 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h > +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h > @@ -142,9 +142,11 @@ enum gallivm_nan_behavior { > GALLIVM_NAN_RETURN_NAN, > /* If one of the inputs is NaN, the other operand is returned */ > GALLIVM_NAN_RETURN_OTHER, > - /* If one of the inputs is NaN, the second operand is returned. > - * In min/max it will be as fast as undefined with sse opcodes */ > - GALLIVM_NAN_RETURN_SECOND > + /* If one of the inputs is NaN, the other operand is returned, > + * but we guarantee the second operand is not a NaN. > + * In min/max it will be as fast as undefined with sse opcodes, > + * and archs having native return_other can benefit too. */ > + GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN > }; > > LLVMValueRef > @@ -176,6 +178,10 @@ lp_build_clamp(struct lp_build_context *bld, > LLVMValueRef max); > > LLVMValueRef > +lp_build_clamp_zero_one_nanzero(struct lp_build_context *bld, > + LLVMValueRef a); > + > +LLVMValueRef > lp_build_abs(struct lp_build_context *bld, > LLVMValueRef a); > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c > b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c > index 2b1fe64..6645151 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c > @@ -326,7 +326,7 @@ lp_build_float_to_srgb_packed(struct gallivm_state > *gallivm, > * can't use lp_build_conv since we want to keep values as 32bit > * here so we can interleave with rgb to go from SoA->AoS. > */ > - alpha = lp_build_clamp(&f32_bld, src[3], f32_bld.zero, f32_bld.one); > + alpha = lp_build_clamp_zero_one_nanzero(&f32_bld, src[3]); > alpha = lp_build_mul(&f32_bld, alpha, > lp_build_const_vec(gallivm, src_type, 255.0f)); > tmpsrgb[3] = lp_build_iround(&f32_bld, alpha); > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > index 5f81066..5fc47ed 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c > @@ -1384,21 +1384,18 @@ emit_store_chan( > assert(dtype == TGSI_TYPE_FLOAT || > dtype == TGSI_TYPE_UNTYPED); > value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); > - value = lp_build_max_ext(float_bld, value, float_bld->zero, > - GALLIVM_NAN_RETURN_SECOND); > - value = lp_build_min_ext(float_bld, value, float_bld->one, > - GALLIVM_NAN_BEHAVIOR_UNDEFINED); > + value = lp_build_clamp_zero_one_nanzero(float_bld, value); > break; > > case TGSI_SAT_MINUS_PLUS_ONE: > assert(dtype == TGSI_TYPE_FLOAT || > dtype == TGSI_TYPE_UNTYPED); > value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); > + /* This will give -1.0 for NaN which is probably not what we want. */ > value = lp_build_max_ext(float_bld, value, > lp_build_const_vec(gallivm, float_bld->type, > -1.0), > - GALLIVM_NAN_RETURN_SECOND); > - value = lp_build_min_ext(float_bld, value, float_bld->one, > - GALLIVM_NAN_BEHAVIOR_UNDEFINED); > + GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); > + value = lp_build_min(float_bld, value, float_bld->one); > break; > > default: > diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c > b/src/gallium/drivers/llvmpipe/lp_state_fs.c > index 8223d2a..b5816e0 100644 > --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c > +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c > @@ -1760,11 +1760,11 @@ generate_unswizzled_blend(struct gallivm_state > *gallivm, > assert(row_type.floating); > lp_build_context_init(&f32_bld, gallivm, row_type); > for (i = 0; i < src_count; i++) { > - src[i] = lp_build_clamp(&f32_bld, src[i], f32_bld.zero, > f32_bld.one); > + src[i] = lp_build_clamp_zero_one_nanzero(&f32_bld, src[i]); > } > if (dual_source_blend) { > for (i = 0; i < src_count; i++) { > - src1[i] = lp_build_clamp(&f32_bld, src1[i], f32_bld.zero, > f32_bld.one); > + src1[i] = lp_build_clamp_zero_one_nanzero(&f32_bld, src1[i]); > } > } > /* probably can't be different than row_type but better safe than > sorry... */ > -- > 1.7.9.5 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev