Am 05.11.2014 um 10:13 schrieb Juha-Pekka Heikkila: > On 04.11.2014 23:24, Roland Scheidegger wrote: >> Am 04.11.2014 um 13:05 schrieb Juha-Pekka Heikkila: >>> Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikk...@gmail.com> >>> --- >>> src/mesa/Makefile.am | 8 +++ >>> src/mesa/main/x86/sse2_clamping.c | 103 >>> ++++++++++++++++++++++++++++++++++++++ >>> src/mesa/main/x86/sse2_clamping.h | 49 ++++++++++++++++++ >>> 3 files changed, 160 insertions(+) >>> create mode 100644 src/mesa/main/x86/sse2_clamping.c >>> create mode 100644 src/mesa/main/x86/sse2_clamping.h >>> >>> diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am >>> index e71bccb..5d3c6f5 100644 >>> --- a/src/mesa/Makefile.am >>> +++ b/src/mesa/Makefile.am >>> @@ -111,6 +111,10 @@ if SSE41_SUPPORTED >>> ARCH_LIBS += libmesa_sse41.la >>> endif >>> >>> +if SSE2_SUPPORTED >>> +ARCH_LIBS += libmesa_sse2.la >>> +endif >>> + >>> MESA_ASM_FILES_FOR_ARCH = >>> >>> if HAVE_X86_ASM >>> @@ -154,6 +158,10 @@ libmesa_sse41_la_SOURCES = \ >>> main/streaming-load-memcpy.c >>> libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) -msse4.1 >>> >>> +libmesa_sse2_la_SOURCES = \ >>> + main/x86/sse2_clamping.c >>> +libmesa_sse2_la_CFLAGS = $(AM_CFLAGS) -msse2 >>> + >>> pkgconfigdir = $(libdir)/pkgconfig >>> pkgconfig_DATA = gl.pc >>> >>> diff --git a/src/mesa/main/x86/sse2_clamping.c >>> b/src/mesa/main/x86/sse2_clamping.c >>> new file mode 100644 >>> index 0000000..7df1c85 >>> --- /dev/null >>> +++ b/src/mesa/main/x86/sse2_clamping.c >>> @@ -0,0 +1,103 @@ >>> +/* >>> + * Copyright © 2014 Intel Corporation >>> + * >>> + * Permission is hereby granted, free of charge, to any person obtaining a >>> + * copy of this software and associated documentation files (the >>> "Software"), >>> + * to deal in the Software without restriction, including without >>> limitation >>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense, >>> + * and/or sell copies of the Software, and to permit persons to whom the >>> + * Software is furnished to do so, subject to the following conditions: >>> + * >>> + * The above copyright notice and this permission notice (including the >>> next >>> + * paragraph) shall be included in all copies or substantial portions of >>> the >>> + * Software. >>> + * >>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS >>> OR >>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR >>> OTHER >>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING >>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER >>> DEALINGS >>> + * IN THE SOFTWARE. >>> + * >>> + * Authors: >>> + * Juha-Pekka Heikkila <juhapekka.heikk...@gmail.com> >>> + * >>> + */ >>> + >>> +#ifdef __SSE2__ >>> +#include "main/macros.h" >>> +#include "main/x86/sse2_clamping.h" >>> +#include <emmintrin.h> >>> + >>> +/** >>> + * Clamp four float values to [min,max] >>> + */ >>> +static inline void >>> +_mesa_clamp_float_rgba(GLfloat src[4], GLfloat result[4], const float min, >>> + const float max) >>> +{ >>> + __m128 operand, minval, maxval; >>> + >>> + operand = _mm_loadu_ps(src); >>> + minval = _mm_set1_ps(min); >>> + maxval = _mm_set1_ps(max); >>> + operand = _mm_max_ps(operand, minval); >>> + operand = _mm_min_ps(operand, maxval); >>> + _mm_storeu_ps(result, operand); >>> +} >>> + >>> + >>> +/* Clamp n amount float rgba pixels to [min,max] using SSE2 >>> + */ >>> +void >>> +_mesa_streaming_clamp_float_rgba(const GLuint n, GLfloat rgba_src[][4], >>> + GLfloat rgba_dst[][4], const GLfloat min, >>> + const GLfloat max) >>> +{ >>> + int i; >>> + >>> + for (i = 0; i < n; i++) { >>> + _mesa_clamp_float_rgba(rgba_src[i], rgba_dst[i], min, max); >>> + } >>> +} >>> + >>> + >>> +/* Clamp n amount float rgba pixels to [min,max] using SSE2 and apply >>> + * scaling and mapping to components. >>> + * >>> + * this replace handling of [RGBA] channels: >>> + * rgba_temp[RCOMP] = CLAMP(rgba[i][RCOMP], 0.0F, 1.0F); >>> + * rgba[i][RCOMP] = rMap[F_TO_I(rgba_temp[RCOMP] * scale[RCOMP])]; >>> + */ >>> +void >>> +_mesa_clamp_float_rgba_scale_and_map(const GLuint n, GLfloat rgba_src[][4], >>> + GLfloat rgba_dst[][4], const GLfloat >>> min, >>> + const GLfloat max, >>> + const GLfloat scale[4], >>> + const GLfloat* rMap, const GLfloat* >>> gMap, >>> + const GLfloat* bMap, const GLfloat* >>> aMap) >>> +{ >>> + int i; >>> + GLfloat __attribute__((aligned(16))) temp[4]; >>> + __m128 *operand = (__m128*) &temp, multiplier, mmove; >>> + __m128i truncated_integers; >>> + >>> + const unsigned int* map_p = (const unsigned int*) &truncated_integers; >>> + >>> + multiplier = _mm_loadu_ps(scale); >>> + >>> + for(i = 0; i < n; i++) { >>> + _mesa_clamp_float_rgba(rgba_src[i], temp, min, max); >>> + >>> + *operand = _mm_mul_ps(multiplier, *operand); >>> + truncated_integers = _mm_cvttps_epi32(*operand); >>> + mmove = _mm_set_ps(aMap[map_p[ACOMP]], bMap[map_p[BCOMP]], >>> + gMap[map_p[GCOMP]], rMap[map_p[RCOMP]] ); >>> + >>> + _mm_storeu_ps(rgba_dst[i], mmove); >> The sse2 code at the end looks counterproductive to me. Not sure what >> gcc will generate but I'd suspect it involves some simd->int domain >> transition for the table lookups, plus another int->simd transition to >> get the values back into simd domain (alternatively it might use >> stores/load here) just so you can store them again... >> It would probably be better to just store the values directly after the >> table lookups. > > I did at first try have this tail piece of code as simple c code as > possible but what gcc did was not so nice. This piece is compiled with > sse2 flag and what gcc though of this was to start moving elements > around one by one using movss. "mmove = _mm_set_ps.." looks quite bad > but the code it create is not so crappy in the end, at least when > compare to what gcc originally did. I guess it would work better if you'd use an explicit simd store instead of casting the __m128i to ints in this case. But it doesn't really matter in the end.
Roland > >> But in any case actually I'm beginning to suspect noone really cares >> about performance anyway for that path (who the hell uses these >> scale/map features?) so whatever works... > > I agree, this function is of low interest. Clamping is the issue here > which deserves to be fixed, to get _mesa_streaming_clamp_float_rgba > function reasonable as it is used often. > > There is additional nuance to this particular file which I mentioned in > the cover-letter. Note where this file is stored. There was discussion > in the previous rfc patch about where to store all these optimizations > targeting particular architecture, this is my suggestion. > > /Juha-Pekka > >> >> >>> + } >>> +} >>> + >>> + >>> +#endif /* __SSE2__ */ >>> diff --git a/src/mesa/main/x86/sse2_clamping.h >>> b/src/mesa/main/x86/sse2_clamping.h >>> new file mode 100644 >>> index 0000000..688fab7 >>> --- /dev/null >>> +++ b/src/mesa/main/x86/sse2_clamping.h >>> @@ -0,0 +1,49 @@ >>> +/* >>> + * Copyright © 2014 Intel Corporation >>> + * >>> + * Permission is hereby granted, free of charge, to any person obtaining a >>> + * copy of this software and associated documentation files (the >>> "Software"), >>> + * to deal in the Software without restriction, including without >>> limitation >>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense, >>> + * and/or sell copies of the Software, and to permit persons to whom the >>> + * Software is furnished to do so, subject to the following conditions: >>> + * >>> + * The above copyright notice and this permission notice (including the >>> next >>> + * paragraph) shall be included in all copies or substantial portions of >>> the >>> + * Software. >>> + * >>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS >>> OR >>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR >>> OTHER >>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING >>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER >>> DEALINGS >>> + * IN THE SOFTWARE. >>> + * >>> + * Authors: >>> + * Juha-Pekka Heikkila <juhapekka.heikk...@gmail.com> >>> + * >>> + */ >>> + >>> +#ifdef __SSE2__ >>> + >>> +/* Clamp n amount float rgba pixels to [min,max] using SSE2 >>> + */ >>> +void >>> +_mesa_streaming_clamp_float_rgba(const GLuint n, GLfloat rgba_src[][4], >>> + GLfloat rgba_dst[][4], const GLfloat min, >>> + const GLfloat max); >>> + >>> + >>> +/* Clamp n amount float rgba pixels to [min,max] using SSE2 and apply >>> + * scaling and mapping to components. >>> + */ >>> +void >>> +_mesa_clamp_float_rgba_scale_and_map(const GLuint n, GLfloat rgba_src[][4], >>> + GLfloat rgba_dst[][4], const GLfloat >>> min, >>> + const GLfloat max, >>> + const GLfloat scale[4], >>> + const GLfloat* rMap, const GLfloat* >>> gMap, >>> + const GLfloat* bMap, const GLfloat* >>> aMap); >>> + >>> +#endif /* __SSE2__ */ >>> >> > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev