File specific optimization as used for src/mesa/main/streaming-load-memcpy.c currently will cause problems with LTO in the future (see: https://bugs.freedesktop.org/show_bug.cgi?id=83669). Replace it with in-file target specification.
This only works for gcc for now. The intel compiler has __attribute__((cpu_specific(cpuid))) with cpuid core_2_duo_sse4_1, but I'm not sure if mesa compiles with it and how it behaves on different platforms. V2: limit support for this optimization to gcc >= 4.4 only Signed-off-by: Marc Dietrich <marvi...@gmx.de> --- This is the simplest solution I see for now. Drawback is that gcc < 4.4 cannot make use of this single improvement anymore. Otherwise we would have to maintain a nightmare of version checks for different compilers and different pragmas. configure.ac | 10 ++++++---- src/mesa/Makefile.am | 1 - src/mesa/main/streaming-load-memcpy.c | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/configure.ac b/configure.ac index 29cf32e..52ff00a 100644 --- a/configure.ac +++ b/configure.ac @@ -240,11 +240,13 @@ AC_SUBST([VISIBILITY_CXXFLAGS]) dnl dnl Optional flags, check for compiler support dnl -AX_CHECK_COMPILE_FLAG([-msse4.1], [SSE41_SUPPORTED=1], [SSE41_SUPPORTED=0]) -if test "x$SSE41_SUPPORTED" = x1; then - DEFINES="$DEFINES -DUSE_SSE41" +if test 0$GCC_VERSION_MAJOR$GCC_VERSION_MINOR -ge 44; then + AX_CHECK_COMPILE_FLAG([-msse4.1], [SSE41_SUPPORTED=1], [SSE41_SUPPORTED=0]) + if test "x$SSE41_SUPPORTED" = x1; then + DEFINES="$DEFINES -DUSE_SSE41" + fi + AM_CONDITIONAL([SSE41_SUPPORTED], [test x$SSE41_SUPPORTED = x1]) fi -AM_CONDITIONAL([SSE41_SUPPORTED], [test x$SSE41_SUPPORTED = x1]) dnl Can't have static and shared libraries, default to static if user dnl explicitly requested. If both disabled, set to static since shared diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am index e71bccb..bbdb36f 100644 --- a/src/mesa/Makefile.am +++ b/src/mesa/Makefile.am @@ -152,7 +152,6 @@ libmesagallium_la_LIBADD = \ libmesa_sse41_la_SOURCES = \ main/streaming-load-memcpy.c -libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) -msse4.1 pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = gl.pc diff --git a/src/mesa/main/streaming-load-memcpy.c b/src/mesa/main/streaming-load-memcpy.c index 8427149..94b0e0a 100644 --- a/src/mesa/main/streaming-load-memcpy.c +++ b/src/mesa/main/streaming-load-memcpy.c @@ -26,7 +26,7 @@ * */ -#ifdef __SSE4_1__ +#ifdef USE_SSE41 #include "main/macros.h" #include "main/streaming-load-memcpy.h" #include <smmintrin.h> @@ -34,7 +34,7 @@ /* Copies memory from src to dst, using SSE 4.1's MOVNTDQA to get streaming * read performance from uncached memory. */ -void +void __attribute__ ((target("sse4.1"))) _mesa_streaming_load_memcpy(void *restrict dst, void *restrict src, size_t len) { char *restrict d = dst; -- 2.1.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev