Hi, It was recently brought to our attention that the existing emmintrin.h header, which was believed to be feature-complete for SSE2 support, is actually missing four logical-op interfaces:
_mm_and_si128 _mm_andnot_si128 _mm_or_si128 _mm_xor_si128 This patch provides those with the obvious implementations, along with test cases. I've bootstrapped it on powerpc64le-linux-gnu (P8, P9) and powerpc64-linux-gnu (P7, P8) and tested it with no regressions. Is this okay for trunk? Although this isn't a regression, it is an oversight that leaves the SSE2 support incomplete. Thus I'd like to ask permission to also backport this to gcc-8-branch after a short waiting period. It's passed regstrap on P8 and P9 LE, and P7/P8 BE testing is underway. Is that backport okay if testing succeeds? [BTW, I'm shepherding this patch on behalf of Steve Munroe.] Thanks! Bill [gcc] 2018-07-10 Bill Schmidt <wschm...@linux.ibm.com> Steve Munroe <munroes...@gmail.com> * config/rs6000/emmintrin.h (_mm_and_si128): New function. (_mm_andnot_si128): Likewise. (_mm_or_si128): Likewise. (_mm_xor_si128): Likewise. [gcc/testsuite] 2018-07-10 Bill Schmidt <wschm...@linux.ibm.com> Steve Munroe <munroes...@gmail.com> * gcc.target/powerpc/sse2-pand-1.c: New file. * gcc.target/powerpc/sse2-pandn-1.c: Likewise. * gcc.target/powerpc/sse2-por-1.c: Likewise. * gcc.target/powerpc/sse2-pxor-1.c: Likewise. Index: gcc/config/rs6000/emmintrin.h =================================================================== --- gcc/config/rs6000/emmintrin.h (revision 262235) +++ gcc/config/rs6000/emmintrin.h (working copy) @@ -1884,6 +1884,30 @@ } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_and_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)vec_and ((__v2di) __A, (__v2di) __B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_andnot_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)vec_andc ((__v2di) __B, (__v2di) __A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_or_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)vec_or ((__v2di) __A, (__v2di) __B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_xor_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)vec_xor ((__v2di) __A, (__v2di) __B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_epi8 (__m128i __A, __m128i __B) { return (__m128i) vec_cmpeq ((__v16qi) __A, (__v16qi)__B); @@ -2333,3 +2357,4 @@ } #endif /* EMMINTRIN_H_ */ + Index: gcc/testsuite/gcc.target/powerpc/sse2-pand-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/sse2-pand-1.c (nonexistent) +++ gcc/testsuite/gcc.target/powerpc/sse2-pand-1.c (working copy) @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target p8vector_hw } */ + +#ifndef CHECK_H +#define CHECK_H "sse2-check.h" +#endif + +#include CHECK_H + +#ifndef TEST +#define TEST sse2_test_pand_1 +#endif + +#include <emmintrin.h> + +static __m128i +__attribute__((noinline, unused)) +test (__m128i s1, __m128i s2) +{ + return _mm_and_si128 (s1, s2); +} + +static void +TEST (void) +{ + union128i_b u, s1, s2; + char e[16]; + int i; + + s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7); + s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, -100, -34, -78, -39, 6, 3, 4, 5, 119); + u.x = test (s1.x, s2.x); + + for (i = 0; i < 16; i++) + e[i] = s1.a[i] & s2.a[i]; + + if (check_union128i_b (u, e)) + abort (); +} Index: gcc/testsuite/gcc.target/powerpc/sse2-pandn-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/sse2-pandn-1.c (nonexistent) +++ gcc/testsuite/gcc.target/powerpc/sse2-pandn-1.c (working copy) @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target p8vector_hw } */ + +#ifndef CHECK_H +#define CHECK_H "sse2-check.h" +#endif + +#include CHECK_H + +#ifndef TEST +#define TEST sse2_test_pandn_1 +#endif + +#include <emmintrin.h> + +static __m128i +__attribute__((noinline, unused)) +test (__m128i s1, __m128i s2) +{ + return _mm_andnot_si128 (s1, s2); +} + +static void +TEST (void) +{ + union128i_b u, s1, s2; + char e[16]; + int i; + + s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7); + s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, -100, -34, -78, -39, 6, 3, 4, 5, 119); + u.x = test (s1.x, s2.x); + + for (i = 0; i < 16; i++) + e[i] = (~s1.a[i]) & s2.a[i]; + + if (check_union128i_b (u, e)) + abort (); +} Index: gcc/testsuite/gcc.target/powerpc/sse2-por-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/sse2-por-1.c (nonexistent) +++ gcc/testsuite/gcc.target/powerpc/sse2-por-1.c (working copy) @@ -0,0 +1,43 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target p8vector_hw } */ + +#ifndef CHECK_H +#define CHECK_H "sse2-check.h" +#endif + +#include CHECK_H + +#ifndef TEST +#define TEST sse2_test_por_1 +#endif + +#include <emmintrin.h> + +static __m128i +__attribute__((noinline, unused)) +test (__m128i s1, __m128i s2) +{ + return _mm_or_si128 (s1, s2); +} + +static void +TEST (void) +{ + union128i_w u, s1, s2; + short e[8]; + int i; + + s1.x = _mm_set_epi16 (10,20,30,90,-80,-40,-100,-15); + s2.x = _mm_set_epi16 (11, 98, 76, -100, -34, -78, -39, 14); + u.x = test (s1.x, s2.x); + + for (i = 0; i < 8; i++) + { + e[i] = s1.a[i] | s2.a[i]; + } + + if (check_union128i_w (u, e)) + abort (); +} Index: gcc/testsuite/gcc.target/powerpc/sse2-pxor-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/sse2-pxor-1.c (nonexistent) +++ gcc/testsuite/gcc.target/powerpc/sse2-pxor-1.c (working copy) @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target p8vector_hw } */ + +#ifndef CHECK_H +#define CHECK_H "sse2-check.h" +#endif + +#include CHECK_H + +#ifndef TEST +#define TEST sse2_test_pxor_1 +#endif + +#include <emmintrin.h> + +static __m128i +__attribute__((noinline, unused)) +test (__m128i s1, __m128i s2) +{ + return _mm_xor_si128 (s1, s2); +} + +static void +TEST (void) +{ + union128i_ub u, s1, s2; + unsigned char e[16] = {0}; + int i; + + s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,80,40,100,15,98, 25, 98,7); + s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, 100, 34, 78, 39, 6, 3, 4, 5, 119); + u.x = test (s1.x, s2.x); + + for (i = 0; i < 16; i++) + e[i] = s1.a[i] ^ s2.a[i]; + + if (check_union128i_ub (u, e)) + abort (); +}