On Wed, Jan 28, 2015 at 3:29 PM, Ilya Tocar <tocarip.in...@gmail.com> wrote: > I'd like to backport this to 4.8/4.9 > Is this ok?
OK, since it just adds new inline function/define names for compatibility. Uros. > On 15 Jan 17:17, Ilya Tocar wrote: >> Hi, >> Looks like new ISA doc [1] renamed srli,slli intrinsics to bsrli,bslli. >> This patch adds b* versions, while keeping old srli for backward >> compatibility. >> OK for trunk? >> >> 1:https://software.intel.com/sites/default/files/managed/0d/53/319433-022.pdf >> >> ChangeLog: >> gcc/ >> * config/i386/avx2intrin.h (_mm256_bslli_si256, >> _mm256_bsrli_si256): New. >> * config/i386/emmintrin.h (_mm_bsrli_si128, _mm_bslli_si128): >> Ditto. >> >> testsuite/ >> * gcc.target/i386/sse-14.c: Test new intrinsic. >> * gcc.target/i386/sse-22.c: Diito. >> --- >> gcc/config/i386/avx2intrin.h | 18 ++++++++++++++++++ >> gcc/config/i386/emmintrin.h | 16 ++++++++++++++++ >> gcc/testsuite/gcc.target/i386/sse-14.c | 2 ++ >> gcc/testsuite/gcc.target/i386/sse-22.c | 4 ++++ >> 4 files changed, 40 insertions(+) >> >> diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h >> index 669f1dc..8a30c5b 100644 >> --- a/gcc/config/i386/avx2intrin.h >> +++ b/gcc/config/i386/avx2intrin.h >> @@ -645,11 +645,20 @@ _mm256_sign_epi32 (__m256i __X, __m256i __Y) >> #ifdef __OPTIMIZE__ >> extern __inline __m256i >> __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) >> +_mm256_bslli_si256 (__m256i __A, const int __N) >> +{ >> + return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8); >> +} >> + >> +extern __inline __m256i >> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) >> _mm256_slli_si256 (__m256i __A, const int __N) >> { >> return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8); >> } >> #else >> +#define _mm256_bslli_si256(A, N) \ >> + ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8)) >> #define _mm256_slli_si256(A, N) \ >> ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8)) >> #endif >> @@ -727,11 +736,20 @@ _mm256_sra_epi32 (__m256i __A, __m128i __B) >> #ifdef __OPTIMIZE__ >> extern __inline __m256i >> __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) >> +_mm256_bsrli_si256 (__m256i __A, const int __N) >> +{ >> + return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8); >> +} >> + >> +extern __inline __m256i >> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) >> _mm256_srli_si256 (__m256i __A, const int __N) >> { >> return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8); >> } >> #else >> +#define _mm256_bsrli_si256(A, N) \ >> + ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8)) >> #define _mm256_srli_si256(A, N) \ >> ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8)) >> #endif >> diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h >> index ad37fac..b19f05a 100644 >> --- a/gcc/config/i386/emmintrin.h >> +++ b/gcc/config/i386/emmintrin.h >> @@ -1165,6 +1165,18 @@ _mm_srai_epi32 (__m128i __A, int __B) >> >> #ifdef __OPTIMIZE__ >> extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, >> __artificial__)) >> +_mm_bsrli_si128 (__m128i __A, const int __N) >> +{ >> + return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8); >> +} >> + >> +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, >> __artificial__)) >> +_mm_bslli_si128 (__m128i __A, const int __N) >> +{ >> + return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8); >> +} >> + >> +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, >> __artificial__)) >> _mm_srli_si128 (__m128i __A, const int __N) >> { >> return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8); >> @@ -1176,6 +1188,10 @@ _mm_slli_si128 (__m128i __A, const int __N) >> return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8); >> } >> #else >> +#define _mm_bsrli_si128(A, N) \ >> + ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8)) >> +#define _mm_bslli_si128(A, N) \ >> + ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8)) >> #define _mm_srli_si128(A, N) \ >> ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8)) >> #define _mm_slli_si128(A, N) \ >> diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c >> b/gcc/testsuite/gcc.target/i386/sse-14.c >> index f3f6c5c..e8791e3 100644 >> --- a/gcc/testsuite/gcc.target/i386/sse-14.c >> +++ b/gcc/testsuite/gcc.target/i386/sse-14.c >> @@ -601,6 +601,8 @@ test_2 (_mm_alignr_pi8, __m64, __m64, __m64, 1) >> >> /* emmintrin.h */ >> test_2 (_mm_shuffle_pd, __m128d, __m128d, __m128d, 1) >> +test_1 (_mm_bsrli_si128, __m128i, __m128i, 1) >> +test_1 (_mm_bslli_si128, __m128i, __m128i, 1) >> test_1 (_mm_srli_si128, __m128i, __m128i, 1) >> test_1 (_mm_slli_si128, __m128i, __m128i, 1) >> test_1 (_mm_extract_epi16, int, __m128i, 1) >> diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c >> b/gcc/testsuite/gcc.target/i386/sse-22.c >> index 0d7bd16..5735514 100644 >> --- a/gcc/testsuite/gcc.target/i386/sse-22.c >> +++ b/gcc/testsuite/gcc.target/i386/sse-22.c >> @@ -138,6 +138,8 @@ test_1 (_mm_prefetch, void, void *, _MM_HINT_NTA) >> #endif >> #include <emmintrin.h> >> test_2 (_mm_shuffle_pd, __m128d, __m128d, __m128d, 1) >> +test_1 (_mm_bsrli_si128, __m128i, __m128i, 1) >> +test_1 (_mm_bslli_si128, __m128i, __m128i, 1) >> test_1 (_mm_srli_si128, __m128i, __m128i, 1) >> test_1 (_mm_slli_si128, __m128i, __m128i, 1) >> test_1 (_mm_extract_epi16, int, __m128i, 1) >> @@ -269,6 +271,8 @@ test_2 ( _mm256_blend_epi16, __m256i, __m256i, __m256i, >> 1) >> test_1 ( _mm256_shuffle_epi32, __m256i, __m256i, 1) >> test_1 ( _mm256_shufflehi_epi16, __m256i, __m256i, 1) >> test_1 ( _mm256_shufflelo_epi16, __m256i, __m256i, 1) >> +test_1 ( _mm256_bslli_si256, __m256i, __m256i, 8) >> +test_1 ( _mm256_bsrli_si256, __m256i, __m256i, 8) >> test_1 ( _mm256_slli_si256, __m256i, __m256i, 8) >> test_1 ( _mm256_srli_si256, __m256i, __m256i, 8) >> test_2 ( _mm_blend_epi32, __m128i, __m128i, __m128i, 1) >> -- >> 1.8.3.1 >>