----- Original Message ----- > On 11/29/2012 08:32 AM, jfons...@vmware.com wrote: > > From: José Fonseca<jfons...@vmware.com> > > > > The current implementation was close by not fully correct: several > > operations that should be done in floating point were being done in > > integer. > > > > Fixes piglit fbo-clear-formats GL_ARB_texture_float > > --- > > src/gallium/auxiliary/gallivm/lp_bld_conv.c | 103 > > +++++++++++++++++++-------- > > 1 file changed, 73 insertions(+), 30 deletions(-) > > > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c > > b/src/gallium/auxiliary/gallivm/lp_bld_conv.c > > index cd18b0c..a48b053 100644 > > --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c > > +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c > > @@ -63,15 +63,18 @@ > > > > #include "util/u_debug.h" > > #include "util/u_math.h" > > +#include "util/u_half.h" > > #include "util/u_cpu_detect.h" > > > > #include "lp_bld_type.h" > > #include "lp_bld_const.h" > > #include "lp_bld_arit.h" > > +#include "lp_bld_bitarit.h" > > #include "lp_bld_pack.h" > > #include "lp_bld_conv.h" > > #include "lp_bld_logic.h" > > #include "lp_bld_intr.h" > > +#include "lp_bld_printf.h" > > > > > > > > @@ -220,64 +223,104 @@ lp_build_half_to_float(struct gallivm_state > > *gallivm, > > * > > * ref > > http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ > > * ref https://gist.github.com/2156668 > > + * > > + * XXX: This is approximation. It is is faster certain NaNs are > > converted to > > + * infinity, and rounding is not correct. > > I can't quite parse that comment.
Oops. Will correct. > > */ > > LLVMValueRef > > lp_build_float_to_half(struct gallivm_state *gallivm, > > LLVMValueRef src) > > { > > - struct lp_type i32_type = lp_type_int_vec(32, 32 * > > LLVMGetVectorSize(LLVMTypeOf(src))); > > - > > LLVMBuilderRef builder = gallivm->builder; > > - LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, > > i32_type); > > - > > - struct lp_build_context bld; > > - > > + LLVMTypeRef f32_vec_type = LLVMTypeOf(src); > > + unsigned length = LLVMGetTypeKind(f32_vec_type) == > > LLVMVectorTypeKind > > + ? LLVMGetVectorSize(f32_vec_type) : 1; > > + struct lp_type f32_type = lp_type_float_vec(32, 32 * length); > > + struct lp_type u32_type = lp_type_uint_vec(32, 32 * length); > > + struct lp_type i16_type = lp_type_int_vec(16, 16 * length); > > + LLVMTypeRef u32_vec_type = lp_build_vec_type(gallivm, > > u32_type); > > + LLVMTypeRef i16_vec_type = lp_build_vec_type(gallivm, > > i16_type); > > + struct lp_build_context f32_bld; > > + struct lp_build_context u32_bld; > > LLVMValueRef result; > > > > - lp_build_context_init(&bld, gallivm, i32_type); > > + lp_build_context_init(&f32_bld, gallivm, f32_type); > > + lp_build_context_init(&u32_bld, gallivm, u32_type); > > > > - /* Extra scope because lp_build_min needs a build context, le > > sigh */ > > { > > /* Constants */ > > - LLVMValueRef i32_13 = lp_build_const_int_vec(gallivm, > > i32_type, 13); > > - LLVMValueRef i32_16 = lp_build_const_int_vec(gallivm, > > i32_type, 16); > > - LLVMValueRef i32_mask_fabs = lp_build_const_int_vec(gallivm, > > i32_type, 0x7fffffff); > > - LLVMValueRef i32_f32infty = lp_build_const_int_vec(gallivm, > > i32_type, 0xff<< 23); > > - LLVMValueRef i32_expinf = lp_build_const_int_vec(gallivm, > > i32_type, 0xe0<< 23); > > - LLVMValueRef i32_f16max = lp_build_const_int_vec(gallivm, > > i32_type, 0x8f<< 23); > > - LLVMValueRef i32_magic = lp_build_const_int_vec(gallivm, > > i32_type, 0x0f<< 23); > > + LLVMValueRef u32_f32inf = lp_build_const_int_vec(gallivm, > > u32_type, 0xff<< 23); > > + LLVMValueRef u32_expinf = lp_build_const_int_vec(gallivm, > > u32_type, 0xe0<< 23); > > + LLVMValueRef f32_f16max = lp_build_const_vec(gallivm, > > f32_type, 65536.0); // 0x8f<< 23 > > + LLVMValueRef f32_magic = lp_build_const_vec(gallivm, > > f32_type, 1.92592994e-34); // 0x0f<< 23 > > > > /* Cast from float32 to int32 */ > > - LLVMValueRef f = LLVMBuildBitCast(builder, src, > > int_vec_type, ""); > > + LLVMValueRef f = LLVMBuildBitCast(builder, src, > > u32_vec_type, ""); > > > > /* Remove sign */ > > - LLVMValueRef fabs = LLVMBuildAnd(builder, > > i32_mask_fabs, f, ""); > > + LLVMValueRef srcabs = lp_build_abs(&f32_bld, src); > > + LLVMValueRef fabs = LLVMBuildBitCast(builder, > > srcabs, u32_vec_type, ""); > > > > /* Magic conversion */ > > - LLVMValueRef clamped = lp_build_min(&bld, i32_f16max, > > fabs); > > - LLVMValueRef scaled = LLVMBuildMul(builder, clamped, > > i32_magic, ""); > > - > > + LLVMValueRef clamped = lp_build_min(&f32_bld, > > f32_f16max, srcabs); > > + LLVMValueRef scaled = LLVMBuildBitCast(builder, > > + > > LLVMBuildFMul(builder, > > + > > clamped, > > + > > f32_magic, > > + > > ""), > > + u32_vec_type, > > + ""); > > /* Make sure Inf/NaN and unormalised survive */ > > - LLVMValueRef infnancase = LLVMBuildXor(builder, > > i32_expinf, fabs, ""); > > - LLVMValueRef b_notnormal = lp_build_compare(gallivm, > > i32_type, PIPE_FUNC_GREATER, fabs, i32_f32infty); > > + LLVMValueRef infnancase = LLVMBuildXor(builder, > > u32_expinf, fabs, ""); > > + LLVMValueRef b_notnormal = lp_build_compare(gallivm, > > f32_type, PIPE_FUNC_GEQUAL, > > + srcabs, > > + > > LLVMBuildBitCast(builder, > > u32_f32inf, f32_vec_type, "")); > > > > /* Merge normal / unnormal case */ > > - LLVMValueRef merge1 = LLVMBuildAnd(builder, > > infnancase, b_notnormal, ""); > > - LLVMValueRef merge2 = LLVMBuildNot(builder, > > LLVMBuildAnd(builder, b_notnormal, scaled, ""), ""); > > - LLVMValueRef merged = LLVMBuildOr(builder, merge1, > > merge2, ""); > > - LLVMValueRef shifted = LLVMBuildLShr(builder, merged, > > i32_13, ""); > > + LLVMValueRef merged = lp_build_select(&u32_bld, > > b_notnormal, infnancase, scaled); > > + LLVMValueRef shifted = lp_build_shr_imm(&u32_bld, > > merged, 13); > > > > /* Sign bit */ > > LLVMValueRef justsign = LLVMBuildXor(builder, f, fabs, > > ""); > > - LLVMValueRef signshifted = LLVMBuildLShr(builder, > > justsign, i32_16, ""); > > + LLVMValueRef signshifted = lp_build_shr_imm(&u32_bld, > > justsign, 16); > > > > /* Combine result */ > > result = LLVMBuildOr(builder, shifted, > > signshifted, ""); > > } > > > > - /* Truncate from 32 bit to 16 bit */ > > - i32_type.width = 16; > > - return LLVMBuildTrunc(builder, result, > > lp_build_vec_type(gallivm, i32_type), ""); > > + result = LLVMBuildTrunc(builder, result, i16_vec_type, ""); > > + > > The following if (0) block is just debug code, right? Yes. > It's a little > suspect because it manipulates the 'result' return value. Maybe add > a > comment about what's going on. I'm sure we'll need to revisit this code, due to the liberties it takes converting, which is why I left the debug code here. I'll add a comment to this effect. > > > + if (0) { > > + LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); > > + LLVMTypeRef i16t = LLVMInt16TypeInContext(gallivm->context); > > + LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context); > > + unsigned i; > > + > > + LLVMTypeRef func_type = LLVMFunctionType(i16t,&f32t, 1, 0); > > + LLVMValueRef func = lp_build_const_int_pointer(gallivm, > > func_to_pointer((func_pointer)util_float_to_half)); > > + func = LLVMBuildBitCast(builder, func, > > LLVMPointerType(func_type, 0), "util_float_to_half"); > > + > > + lp_build_print_value(gallivm, "src = ", src); > > + lp_build_print_value(gallivm, "util = ", result); > > + lp_build_print_value(gallivm, "llvm = ", result); > > + > > + result = LLVMGetUndef(LLVMVectorType(i16t, length)); > > + for (i = 0; i< length; ++i) { > > + LLVMValueRef index = LLVMConstInt(i32t, i, 0); > > + LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, > > index, ""); > > +#if 0 > > + /* XXX: not really supported by backends */ > > + LLVMValueRef f16 = lp_build_intrinsic_unary(builder, > > "llvm.convert.to.fp16", i16t, f32); > > +#else > > + LLVMValueRef f16 = LLVMBuildCall(builder, func,&f32, 1, > > ""); > > +#endif > > + result = LLVMBuildInsertElement(builder, result, f16, > > index, ""); > > + } > > + > > + lp_build_printf(gallivm, "\n"); > > + } > > + > > + return result; > > } > > > > > > I didn't study the code in detail (it's pretty dense), but it looks > OK > on the surface. Roland? > > Reviewed-by: Brian Paul <bri...@vmware.com> Thanks. Jose _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev