According to a quick micro-benchmark, this new version is 20% faster on my
Haswell laptop.

v2: Removed the XXX note about x86_64 from the comment

Signed-off-by: Jason Ekstrand <jason.ekstr...@intel.com>
---
 src/mesa/main/imports.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
index af780b2..c8ae7f2 100644
--- a/src/mesa/main/imports.h
+++ b/src/mesa/main/imports.h
@@ -277,7 +277,6 @@ static inline int IROUND_POS(float f)
 
 /**
  * Convert float to int using a fast method.  The rounding mode may vary.
- * XXX We could use an x86-64/SSE2 version here.
  */
 static inline int F_TO_I(float f)
 {
@@ -285,6 +284,10 @@ static inline int F_TO_I(float f)
    int r;
    __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st");
    return r;
+#elif defined(USE_X86_64_ASM) && defined(__GNUC__)
+   int r;
+   __asm__ ("cvtss2si %1, %0" : "=r" (r) : "xm" (f));
+   return r;
 #elif defined(USE_X86_ASM) && defined(_MSC_VER)
    int r;
    _asm {
-- 
2.0.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to