This makes _mm_{min,max}_ps work correctly for QNaNs. Tested on powerpc64le-linux; committing.
Segher 2018-03-31 Segher Boessenkool <seg...@kernel.crashing.org> PR target/83315 * config/rs6000/xmmintrin.h (_mm_set_ps, _mm_max_ps): Handle (quiet) NaN inputs correctly. gcc/testsuite/ PR target/83315 * gcc.target/powerpc/sse-maxps-2.c: New test. * gcc.target/powerpc/sse-minps-2.c: New test. --- gcc/config/rs6000/xmmintrin.h | 6 ++-- gcc/testsuite/gcc.target/powerpc/sse-maxps-2.c | 43 ++++++++++++++++++++++++++ gcc/testsuite/gcc.target/powerpc/sse-minps-2.c | 43 ++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/sse-maxps-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse-minps-2.c diff --git a/gcc/config/rs6000/xmmintrin.h b/gcc/config/rs6000/xmmintrin.h index 2cf2bf2..aa2823f 100644 --- a/gcc/config/rs6000/xmmintrin.h +++ b/gcc/config/rs6000/xmmintrin.h @@ -438,13 +438,15 @@ _mm_max_ss (__m128 __A, __m128 __B) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_ps (__m128 __A, __m128 __B) { - return ((__m128)vec_min ((__v4sf)__A,(__v4sf) __B)); + __m128 m = (__m128) vec_vcmpgtfp ((__v4sf) __B, (__v4sf) __A); + return vec_sel (__B, __A, m); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_ps (__m128 __A, __m128 __B) { - return ((__m128)vec_max ((__v4sf)__A, (__v4sf)__B)); + __m128 m = (__m128) vec_vcmpgtfp ((__v4sf) __A, (__v4sf) __B); + return vec_sel (__B, __A, m); } /* Perform logical bit-wise operations on 128-bit values. */ diff --git a/gcc/testsuite/gcc.target/powerpc/sse-maxps-2.c b/gcc/testsuite/gcc.target/powerpc/sse-maxps-2.c new file mode 100644 index 0000000..5cf9c3f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse-maxps-2.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target p8vector_hw } */ + +#define NO_WARN_X86_INTRINSICS 1 + +#ifndef CHECK_H +#define CHECK_H "sse-check.h" +#endif + +#include CHECK_H + +#ifndef TEST +#define TEST sse_test_maxps_2 +#endif + +#include <xmmintrin.h> + +static __m128 +__attribute__((noinline, unused)) +test (__m128 s1, __m128 s2) +{ + return _mm_max_ps (s1, s2); +} + +static void +TEST (void) +{ + union128 u, s1, s2; + float e[4]; + int i; + + s1.x = _mm_set_ps (24.43, __builtin_nanf("1"), __builtin_nanf("2"), 546.46); + s2.x = _mm_set_ps (__builtin_nanf("3"), __builtin_nanf("4"), 3.15, 4.14); + u.x = test (s1.x, s2.x); + + for (i = 0; i < 4; i++) + e[i] = s1.a[i] > s2.a[i] ? s1.a[i] : s2.a[i]; + + if (__builtin_memcmp (&u, e, 16)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse-minps-2.c b/gcc/testsuite/gcc.target/powerpc/sse-minps-2.c new file mode 100644 index 0000000..4cb4b73 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse-minps-2.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target p8vector_hw } */ + +#define NO_WARN_X86_INTRINSICS 1 + +#ifndef CHECK_H +#define CHECK_H "sse-check.h" +#endif + +#include CHECK_H + +#ifndef TEST +#define TEST sse_test_minps_2 +#endif + +#include <xmmintrin.h> + +static __m128 +__attribute__((noinline, unused)) +test (__m128 s1, __m128 s2) +{ + return _mm_min_ps (s1, s2); +} + +static void +TEST (void) +{ + union128 u, s1, s2; + float e[4]; + int i; + + s1.x = _mm_set_ps (24.43, __builtin_nanf("1"), __builtin_nanf("2"), 546.46); + s2.x = _mm_set_ps (__builtin_nanf("3"), __builtin_nanf("4"), 3.15, 4.14); + u.x = test (s1.x, s2.x); + + for (i = 0; i < 4; i++) + e[i] = s1.a[i] < s2.a[i] ? s1.a[i] : s2.a[i]; + + if (__builtin_memcmp (&u, e, 16)) + abort (); +} -- 1.8.3.1