From: Andreas Hartmetz <andreas.hartm...@kdab.com> --- src/gallium/auxiliary/translate/translate_sse.c | 35 ++++++++++++++++++++----- 1 file changed, 28 insertions(+), 7 deletions(-)
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index dace722..03d8276 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -71,7 +71,7 @@ struct translate_group #define ELEMENT_BUFFER_INSTANCE_ID 1001 -#define NUM_CONSTS 7 +#define NUM_CONSTS 8 enum { @@ -81,6 +81,7 @@ enum CONST_INV_32767, CONST_INV_65535, CONST_INV_2147483647, + CONST_INV_4294967295, CONST_255 }; @@ -92,6 +93,7 @@ static float consts[NUM_CONSTS][4] = { C(1.0 / 32767.0), C(1.0 / 65535.0), C(1.0 / 2147483647.0), + C(1.0 / 4294967295.0), C(255.0) }; @@ -515,6 +517,7 @@ translate_attr_convert(struct translate_sse *p, || a->output_format == PIPE_FORMAT_R32G32B32_FLOAT || a->output_format == PIPE_FORMAT_R32G32B32A32_FLOAT)) { struct x86_reg dataXMM = x86_make_reg(file_XMM, 0); + struct x86_reg dataXMM2 = x86_make_reg(file_XMM, 1); for (i = 0; i < output_desc->nr_channels; ++i) { if (swizzle[i] == UTIL_FORMAT_SWIZZLE_0 @@ -546,17 +549,38 @@ translate_attr_convert(struct translate_sse *p, */ sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY)); sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY)); + sse2_cvtdq2ps(p->func, dataXMM, dataXMM); break; case 16: sse2_punpcklwd(p->func, dataXMM, get_const(p, CONST_IDENTITY)); + sse2_cvtdq2ps(p->func, dataXMM, dataXMM); break; - case 32: /* we lose precision here */ + case 32: /* we lose precision if value > 2^23 - 1 */ + /* ...but try to keep all bits for value <= 2^23 - 1 */ + /* this could be done much smarter for 1 or 2 dwords of input + * on X64, using sth like "cvtsi2ss xmm0, rax" (note *R*ax) */ + + /* save the low bit */ + sse_movss(p->func, dataXMM2, dataXMM); + /* right shift & convert, losing the low bit - must clear + * high bit because there is no unsigned convert instruction */ sse2_psrld_imm(p->func, dataXMM, 1); + sse2_cvtdq2ps(p->func, dataXMM, dataXMM); + + /* convert low bit to float */ + sse2_pslld_imm(p->func, dataXMM2, 31); + sse2_psrld_imm(p->func, dataXMM2, 31); + sse2_cvtdq2ps(p->func, dataXMM2, dataXMM2); + + /* mostly undo the right-shift by 1 */ + sse_addps(p->func, dataXMM, dataXMM); + /* add back the lost low bit */ + sse_addps(p->func, dataXMM, dataXMM2); break; default: return FALSE; } - sse2_cvtdq2ps(p->func, dataXMM, dataXMM); + if (input_desc->channel[0].normalized) { struct x86_reg factor; switch (input_desc->channel[0].size) { @@ -567,7 +591,7 @@ translate_attr_convert(struct translate_sse *p, factor = get_const(p, CONST_INV_65535); break; case 32: - factor = get_const(p, CONST_INV_2147483647); + factor = get_const(p, CONST_INV_4294967295); break; default: assert(0); @@ -579,9 +603,6 @@ translate_attr_convert(struct translate_sse *p, } sse_mulps(p->func, dataXMM, factor); } - else if (input_desc->channel[0].size == 32) - /* compensate for the bit we threw away to fit u32 into s32 */ - sse_addps(p->func, dataXMM, dataXMM); break; case UTIL_FORMAT_TYPE_SIGNED: if (!(x86_target_caps(p->func) & X86_SSE2)) -- 1.9.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev