Hi, As PR109167 shows, it's unexpected to have two different implementation ways for _mm_slli_si128 and _mm_bslli_si128, as gcc/config/i386/emmintrin.h they should be the same. So this patch is to fix it accordingly.
Bootstrapped and regtested on powerpc64-linux-gnu P8 and powerpc64le-linux-gnu P9 and P10. I'm going to push this soon if no objections. BR, Kewen ----- PR target/109167 gcc/ChangeLog: * config/rs6000/emmintrin.h (_mm_bslli_si128): Move the implementation from ... (_mm_slli_si128): ... here. Change to call _mm_bslli_si128 directly. gcc/testsuite/ChangeLog: * gcc.target/powerpc/pr109167.c: New test. --- gcc/config/rs6000/emmintrin.h | 26 ++++-------- gcc/testsuite/gcc.target/powerpc/pr109167.c | 47 +++++++++++++++++++++ 2 files changed, 56 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr109167.c diff --git a/gcc/config/rs6000/emmintrin.h b/gcc/config/rs6000/emmintrin.h index bfff7ff6fea..44d01a83d8d 100644 --- a/gcc/config/rs6000/emmintrin.h +++ b/gcc/config/rs6000/emmintrin.h @@ -1601,8 +1601,14 @@ _mm_bslli_si128 (__m128i __A, const int __N) __v16qu __result; const __v16qu __zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - if (__N >= 0 && __N < 16) + if (__N == 0) + return __A; + else if (__N > 0 && __N < 16) +#ifdef __LITTLE_ENDIAN__ __result = vec_sld ((__v16qu) __A, __zeros, __N); +#else + __result = vec_sld (__zeros, (__v16qu) __A, (16 - __N)); +#endif else __result = __zeros; @@ -1647,23 +1653,9 @@ _mm_srli_si128 (__m128i __A, const int __N) } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_slli_si128 (__m128i __A, const int _imm5) +_mm_slli_si128 (__m128i __A, const int __N) { - __v16qu __result; - const __v16qu __zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - - if (_imm5 == 0) - return __A; - else if (_imm5 > 0 && _imm5 < 16) -#ifdef __LITTLE_ENDIAN__ - __result = vec_sld ((__v16qu) __A, __zeros, _imm5); -#else - __result = vec_sld (__zeros, (__v16qu) __A, (16 - _imm5)); -#endif - else - __result = __zeros; - - return (__m128i) __result; + return _mm_bslli_si128 (__A, __N); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/testsuite/gcc.target/powerpc/pr109167.c b/gcc/testsuite/gcc.target/powerpc/pr109167.c new file mode 100644 index 00000000000..d490c995b14 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr109167.c @@ -0,0 +1,47 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +/* Verify there is no warning message. */ + +#define NO_WARN_X86_INTRINSICS 1 + +#include <emmintrin.h> + +#define N 5 + +__attribute__ ((noipa)) __m128i +test1 (__m128i v) +{ + return _mm_bslli_si128 (v, N); +} + +__attribute__ ((noipa)) __m128i +test2 (__m128i v) +{ + return _mm_slli_si128 (v, N); +} + +typedef union +{ + __m128i x; + unsigned char a[16]; +} union128i_ub; + +int main() +{ + union128i_ub v; + v.x + = _mm_set_epi8 (1, 2, 3, 4, 10, 20, 30, 90, 80, 40, 100, 15, 98, 25, 98, 7); + + union128i_ub r1, r2; + r1.x = test1 (v.x); + r2.x = test2 (v.x); + + for (int i = 0; i < 16; i++) + if (r1.a[i] != r2.a[i]) + __builtin_abort(); + + return 0; +} + -- 2.31.1