Uses SSE 4.1's MOVNTDQA instruction (streaming load) to read from uncached memory without polluting the cache. --- We should add runtime detection support later.
src/mesa/Makefile.am | 10 +++++ src/mesa/main/streaming-load-memcpy.c | 85 +++++++++++++++++++++++++++++++++++ src/mesa/main/streaming-load-memcpy.h | 33 ++++++++++++++ 3 files changed, 128 insertions(+) create mode 100644 src/mesa/main/streaming-load-memcpy.c create mode 100644 src/mesa/main/streaming-load-memcpy.h diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am index b4ad9fc..f34ca82 100644 --- a/src/mesa/Makefile.am +++ b/src/mesa/Makefile.am @@ -100,16 +100,20 @@ MESA_ASM_FILES_FOR_ARCH = if HAVE_X86_ASM MESA_ASM_FILES_FOR_ARCH += $(X86_FILES) AM_CPPFLAGS += -I$(builddir)/x86 -I$(srcdir)/x86 +ARCH_LIBS = libmesa_sse41.la endif if HAVE_X86_64_ASM MESA_ASM_FILES_FOR_ARCH += $(X86_64_FILES) AM_CPPFLAGS += -I$(builddir)/x86-64 -I$(srcdir)/x86-64 +ARCH_LIBS = libmesa_sse41.la endif if HAVE_SPARC_ASM MESA_ASM_FILES_FOR_ARCH += $(SPARC_FILES) AM_CPPFLAGS += -I$(builddir)/sparc -I$(srcdir)/sparc endif +noinst_LTLIBRARIES += $(ARCH_LIBS) + libmesa_la_SOURCES = \ $(MESA_FILES) \ $(PROGRAM_FILES) \ @@ -117,6 +121,7 @@ libmesa_la_SOURCES = \ libmesa_la_LIBADD = \ $(top_builddir)/src/glsl/libglsl.la \ + $(ARCH_LIBS) \ $() libmesagallium_la_SOURCES = \ @@ -126,8 +131,13 @@ libmesagallium_la_SOURCES = \ libmesagallium_la_LIBADD = \ $(top_builddir)/src/glsl/libglsl.la \ + $(ARCH_LIBS) \ $() +libmesa_sse41_la_SOURCES = \ + main/streaming-load-memcpy.c +libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) -msse4.1 + pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = gl.pc diff --git a/src/mesa/main/streaming-load-memcpy.c b/src/mesa/main/streaming-load-memcpy.c new file mode 100644 index 0000000..d7147af --- /dev/null +++ b/src/mesa/main/streaming-load-memcpy.c @@ -0,0 +1,85 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <e...@anholt.net> + * Matt Turner <matts...@gmail.com> + * + */ + +#include "main/macros.h" +#include "main/streaming-load-memcpy.h" +#include <smmintrin.h> + +/* Copies memory from src to dst, using SSE 4.1's MOVNTDQA to get streaming + * read performance from uncached memory. + */ +void +_mesa_streaming_load_memcpy(void *restrict dst, void *restrict src, size_t len) +{ + char *restrict d = dst; + char *restrict s = src; + + /* If dst and src are not co-aligned, fallback to memcpy(). */ + if (((uintptr_t)d & 15) != ((uintptr_t)s & 15)) { + memcpy(d, s, len); + return; + } + + /* memcpy() the misaligned header. At the end of this if block, <d> and <s> + * are aligned to a 16-byte boundary or <len> == 0. + */ + if ((uintptr_t)d & 15) { + uintptr_t bytes_before_alignment_boundary = 16 - ((uintptr_t)d & 15); + assert(bytes_before_alignment_boundary < 16); + + memcpy(d, s, MIN2(bytes_before_alignment_boundary, len)); + + d = (char *)ALIGN((uintptr_t)d, 16); + s = (char *)ALIGN((uintptr_t)s, 16); + len -= MIN2(bytes_before_alignment_boundary, len); + } + + while (len >= 64) { + __m128i *dst_cacheline = (__m128i *)d; + __m128i *src_cacheline = (__m128i *)s; + + __m128i temp1 = _mm_stream_load_si128(src_cacheline + 0); + __m128i temp2 = _mm_stream_load_si128(src_cacheline + 1); + __m128i temp3 = _mm_stream_load_si128(src_cacheline + 2); + __m128i temp4 = _mm_stream_load_si128(src_cacheline + 3); + + _mm_store_si128(dst_cacheline + 0, temp1); + _mm_store_si128(dst_cacheline + 1, temp2); + _mm_store_si128(dst_cacheline + 2, temp3); + _mm_store_si128(dst_cacheline + 3, temp4); + + d += 64; + s += 64; + len -= 64; + } + + /* memcpy() the tail. */ + if (len) { + memcpy(d, s, len); + } +} diff --git a/src/mesa/main/streaming-load-memcpy.h b/src/mesa/main/streaming-load-memcpy.h new file mode 100644 index 0000000..41eeeec --- /dev/null +++ b/src/mesa/main/streaming-load-memcpy.h @@ -0,0 +1,33 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <e...@anholt.net> + * Matt Turner <matts...@gmail.com> + * + */ + +/* Copies memory from src to dst, using SSE 4.1's MOVNTDQA to get streaming + * read performance from uncached memory. + */ +void +_mesa_streaming_load_memcpy(void *restrict dst, void *restrict src, size_t len); -- 1.8.3.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev