Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikk...@gmail.com> --- src/mesa/Makefile.am | 8 +++ src/mesa/main/sse2_clamping.c | 138 ++++++++++++++++++++++++++++++++++++++++++ src/mesa/main/sse2_clamping.h | 49 +++++++++++++++ 3 files changed, 195 insertions(+) create mode 100644 src/mesa/main/sse2_clamping.c create mode 100644 src/mesa/main/sse2_clamping.h
diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am index 932db4f..43dbe87 100644 --- a/src/mesa/Makefile.am +++ b/src/mesa/Makefile.am @@ -111,6 +111,10 @@ if SSE41_SUPPORTED ARCH_LIBS += libmesa_sse41.la endif +if SSE2_SUPPORTED +ARCH_LIBS += libmesa_sse2.la +endif + MESA_ASM_FILES_FOR_ARCH = if HAVE_X86_ASM @@ -155,6 +159,10 @@ libmesa_sse41_la_SOURCES = \ main/sse_minmax.c libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) -msse4.1 +libmesa_sse2_la_SOURCES = \ + main/sse2_clamping.c +libmesa_sse2_la_CFLAGS = $(AM_CFLAGS) -msse2 + pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = gl.pc diff --git a/src/mesa/main/sse2_clamping.c b/src/mesa/main/sse2_clamping.c new file mode 100644 index 0000000..66c7dc7 --- /dev/null +++ b/src/mesa/main/sse2_clamping.c @@ -0,0 +1,138 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Juha-Pekka Heikkila <juhapekka.heikk...@gmail.com> + * + */ + +#ifdef __SSE2__ +#include "main/macros.h" +#include "main/sse2_clamping.h" +#include <emmintrin.h> + +/** + * Clamp four float values to [min,max] + */ +static inline void +_mesa_clamp_float_rgba(GLfloat src[4], GLfloat result[4], const float min, + const float max) +{ + __m128 operand, minval, maxval; + + operand = _mm_loadu_ps(src); + minval = _mm_set1_ps(min); + maxval = _mm_set1_ps(max); + operand = _mm_max_ps(operand, minval); + operand = _mm_min_ps(operand, maxval); + _mm_storeu_ps(result, operand); +} + + +/* Clamp n amount float rgba pixels to [min,max] using SSE2 + */ +__attribute__((optimize("unroll-loops"))) +void +_mesa_streaming_clamp_float_rgba(const GLuint n, GLfloat rgba_src[][4], + GLfloat rgba_dst[][4], const GLfloat min, + const GLfloat max) +{ + int c, prefetch_c; + float* worker = &rgba_src[0][0]; + __m128 operand[2], minval, maxval; + + _mm_prefetch((char*) (((unsigned long)worker)|0x1f) + 65, _MM_HINT_T0); + + minval = _mm_set1_ps(min); + maxval = _mm_set1_ps(max); + + for (c = n*4; c > 0 && (((unsigned long)worker)&0x1f) != 0; c--, worker++) { + operand[0] = _mm_load_ss(worker); + operand[0] = _mm_max_ss(operand[0], minval); + operand[0] = _mm_min_ss(operand[0], maxval); + _mm_store_ss(worker, operand[0]); + } + + while (c >= 8) { + _mm_prefetch((char*) worker + 64, _MM_HINT_T0); + + for (prefetch_c = 64/8; prefetch_c > 0 && c >= 8; prefetch_c--, c-=8, + worker += 8) { + + operand[0] = _mm_load_ps(worker); + operand[1] = _mm_load_ps(worker+4); + operand[0] = _mm_max_ps(operand[0], minval); + operand[1] = _mm_max_ps(operand[1], minval); + operand[0] = _mm_min_ps(operand[0], maxval); + operand[1] = _mm_min_ps(operand[1], maxval); + + _mm_store_ps(worker, operand[0]); + _mm_store_ps(worker+4, operand[1]); + } + } + + for (; c > 0; c--, worker++) { + operand[0] = _mm_load_ss(worker); + operand[0] = _mm_max_ss(operand[0], minval); + operand[0] = _mm_min_ss(operand[0], maxval); + _mm_store_ss(worker, operand[0]); + } +} + + +/* Clamp n amount float rgba pixels to [min,max] using SSE2 and apply + * scaling and mapping to components. + * + * this replace handling of [RGBA] channels: + * rgba_temp[RCOMP] = CLAMP(rgba[i][RCOMP], 0.0F, 1.0F); + * rgba[i][RCOMP] = rMap[F_TO_I(rgba_temp[RCOMP] * scale[RCOMP])]; + */ +void +_mesa_clamp_float_rgba_scale_and_map(const GLuint n, GLfloat rgba_src[][4], + GLfloat rgba_dst[][4], const GLfloat min, + const GLfloat max, + const GLfloat scale[4], + const GLfloat* rMap, const GLfloat* gMap, + const GLfloat* bMap, const GLfloat* aMap) +{ + int i; + GLfloat __attribute__((aligned(16))) temp[4]; + __m128 *operand = (__m128*) &temp, multiplier, mmove; + __m128i truncated_integers; + + const unsigned int* map_p = (const unsigned int*) &truncated_integers; + + multiplier = _mm_loadu_ps(scale); + + for(i = 0; i < n; i++) { + _mesa_clamp_float_rgba(rgba_src[i], temp, min, max); + + *operand = _mm_mul_ps(multiplier, *operand); + truncated_integers = _mm_cvttps_epi32(*operand); + mmove = _mm_set_ps(aMap[map_p[ACOMP]], bMap[map_p[BCOMP]], + gMap[map_p[GCOMP]], rMap[map_p[RCOMP]] ); + + _mm_storeu_ps(rgba_dst[i], mmove); + } +} + +#endif /* __SSE2__ */ diff --git a/src/mesa/main/sse2_clamping.h b/src/mesa/main/sse2_clamping.h new file mode 100644 index 0000000..688fab7 --- /dev/null +++ b/src/mesa/main/sse2_clamping.h @@ -0,0 +1,49 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Juha-Pekka Heikkila <juhapekka.heikk...@gmail.com> + * + */ + +#ifdef __SSE2__ + +/* Clamp n amount float rgba pixels to [min,max] using SSE2 + */ +void +_mesa_streaming_clamp_float_rgba(const GLuint n, GLfloat rgba_src[][4], + GLfloat rgba_dst[][4], const GLfloat min, + const GLfloat max); + + +/* Clamp n amount float rgba pixels to [min,max] using SSE2 and apply + * scaling and mapping to components. + */ +void +_mesa_clamp_float_rgba_scale_and_map(const GLuint n, GLfloat rgba_src[][4], + GLfloat rgba_dst[][4], const GLfloat min, + const GLfloat max, + const GLfloat scale[4], + const GLfloat* rMap, const GLfloat* gMap, + const GLfloat* bMap, const GLfloat* aMap); + +#endif /* __SSE2__ */ -- 1.8.5.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev