According to a quick micro-benchmark, this new version is 20% faster on my Haswell laptop.
v2: Removed the XXX note about x86_64 from the comment Signed-off-by: Jason Ekstrand <jason.ekstr...@intel.com> --- src/mesa/main/imports.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index af780b2..c8ae7f2 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -277,7 +277,6 @@ static inline int IROUND_POS(float f) /** * Convert float to int using a fast method. The rounding mode may vary. - * XXX We could use an x86-64/SSE2 version here. */ static inline int F_TO_I(float f) { @@ -285,6 +284,10 @@ static inline int F_TO_I(float f) int r; __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st"); return r; +#elif defined(USE_X86_64_ASM) && defined(__GNUC__) + int r; + __asm__ ("cvtss2si %1, %0" : "=r" (r) : "xm" (f)); + return r; #elif defined(USE_X86_ASM) && defined(_MSC_VER) int r; _asm { -- 2.0.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev