On Saturday 22 April 2017, Allan Sandfeld Jensen wrote:
> Replaces definitions of immediate logical shift intrinsics with GCC
> extension syntax. Tests are added to ensure the intrinsics still produce
> the right instructions and that a few basic optimizations now work.
> 
> Compared to the earlier version of the patch, all potentially undefined
> shifts are now avoided, which also means no variable shifts or arithmetic
> right shifts.

Fixed 2 errors in the tests.
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b58f5050db0..b9406550fc5 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2017-04-22  Allan Sandfeld Jensen  <sandf...@kde.org>
+
+	* config/i386/emmintrin.h (_mm_slli_*, _mm_srli_*):
+	Use vector intrinstics instead of builtins.
+	* config/i386/avx2intrin.h (_mm256_slli_*, _mm256_srli_*):
+	Use vector intrinstics instead of builtins.
+
 2017-04-21  Uros Bizjak  <ubiz...@gmail.com>
 
 	* config/i386/i386.md (*extzvqi_mem_rex64): Move above *extzv<mode>.
diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h
index 82f170a3d61..acb49734131 100644
--- a/gcc/config/i386/avx2intrin.h
+++ b/gcc/config/i386/avx2intrin.h
@@ -667,7 +667,7 @@ extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_slli_epi16 (__m256i __A, int __B)
 {
-  return (__m256i)__builtin_ia32_psllwi256 ((__v16hi)__A, __B);
+  return ((__B & 0xff) < 16) ? (__m256i)((__v16hi)__A << (__B & 0xff)) : _mm256_setzero_si256();
 }
 
 extern __inline __m256i
@@ -681,7 +681,7 @@ extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_slli_epi32 (__m256i __A, int __B)
 {
-  return (__m256i)__builtin_ia32_pslldi256 ((__v8si)__A, __B);
+  return ((__B & 0xff) < 32) ? (__m256i)((__v8si)__A << (__B & 0xff)) : _mm256_setzero_si256();
 }
 
 extern __inline __m256i
@@ -695,7 +695,7 @@ extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_slli_epi64 (__m256i __A, int __B)
 {
-  return (__m256i)__builtin_ia32_psllqi256 ((__v4di)__A, __B);
+  return ((__B & 0xff) < 64) ? (__m256i)((__v4di)__A << (__B & 0xff)) : _mm256_setzero_si256();
 }
 
 extern __inline __m256i
@@ -758,7 +758,7 @@ extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_srli_epi16 (__m256i __A, int __B)
 {
-  return (__m256i)__builtin_ia32_psrlwi256 ((__v16hi)__A, __B);
+  return ((__B & 0xff) < 16) ? (__m256i) ((__v16hu)__A >> (__B & 0xff)) : _mm256_setzero_si256();
 }
 
 extern __inline __m256i
@@ -772,7 +772,7 @@ extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_srli_epi32 (__m256i __A, int __B)
 {
-  return (__m256i)__builtin_ia32_psrldi256 ((__v8si)__A, __B);
+  return ((__B & 0xff) < 32) ? (__m256i) ((__v8su)__A >> (__B & 0xff)) : _mm256_setzero_si256();
 }
 
 extern __inline __m256i
@@ -786,7 +786,7 @@ extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_srli_epi64 (__m256i __A, int __B)
 {
-  return (__m256i)__builtin_ia32_psrlqi256 ((__v4di)__A, __B);
+  return ((__B & 0xff) < 64) ? (__m256i) ((__v4du)__A >> (__B & 0xff)) : _mm256_setzero_si256();
 }
 
 extern __inline __m256i
diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
index 828f4a07a9b..5c048d9fd0d 100644
--- a/gcc/config/i386/emmintrin.h
+++ b/gcc/config/i386/emmintrin.h
@@ -1140,19 +1140,19 @@ _mm_mul_epu32 (__m128i __A, __m128i __B)
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_slli_epi16 (__m128i __A, int __B)
 {
-  return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
+  return ((__B & 0xff) < 16) ? (__m128i)((__v8hi)__A << (__B & 0xff)) : _mm_setzero_si128();
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_slli_epi32 (__m128i __A, int __B)
 {
-  return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
+  return ((__B & 0xff) < 32) ? (__m128i)((__v4si)__A << (__B & 0xff)) : _mm_setzero_si128();
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_slli_epi64 (__m128i __A, int __B)
 {
-  return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
+  return ((__B & 0xff) < 64) ? (__m128i)((__v2di)__A << (__B & 0xff)) : _mm_setzero_si128();
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1205,19 +1205,19 @@ _mm_slli_si128 (__m128i __A, const int __N)
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_srli_epi16 (__m128i __A, int __B)
 {
-  return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
+  return ((__B & 0xff) < 16) ? (__m128i)((__v8hu)__A >> (__B & 0xff)) : _mm_setzero_si128();
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_srli_epi32 (__m128i __A, int __B)
 {
-  return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
+  return ((__B & 0xff) < 32) ? (__m128i)((__v4su)__A >> (__B & 0xff)) : _mm_setzero_si128();
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_srli_epi64 (__m128i __A, int __B)
 {
-  return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
+  return ((__B & 0xff) < 64) ? (__m128i)((__v2du)__A >> (__B & 0xff)) : _mm_setzero_si128();
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 6f4dc8d5095..ffface0f6b3 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,13 @@
+2017-04-22  Allan Sandfeld Jensen  <sandf...@kde.org>
+
+	* gcc.target/i386/sse2-pslld-1.c: Expand test with more corner cases.
+	* gcc.target/i386/sse2-psllw-1.c: Expand test with more corner cases.
+	* gcc.target/i386/sse2-pslrd-1.c: Expand test with more corner cases.
+	* gcc.target/i386/sse2-shifts-1.c: New testcases of shift intrinsics
+	producing intended instructions.
+	* gcc.target/i386/sse2-shifts-2.c: New testcasse of shift intrinsics
+	being folded.
+
 2017-04-21  Janus Weil  <ja...@gcc.gnu.org>
 
 	PR fortran/80392
diff --git a/gcc/testsuite/gcc.target/i386/sse2-pslld-1.c b/gcc/testsuite/gcc.target/i386/sse2-pslld-1.c
index 31474e3234f..b2eb938fc06 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-pslld-1.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-pslld-1.c
@@ -10,17 +10,15 @@
 #define TEST sse2_test
 #endif
 
-#define N 0xf
-
 #include CHECK_H
 
 #include <emmintrin.h>
 
 static __m128i
 __attribute__((noinline, unused))
-test (__m128i s1)
+test (__m128i s1, int n)
 {
-  return _mm_slli_epi32 (s1, N); 
+  return _mm_slli_epi32 (s1, n); 
 }
 
 static void
@@ -28,16 +26,25 @@ TEST (void)
 {
   union128i_d u, s;
   int e[4] = {0};
-  int i;
+  int ns[4] = {15, 65, 260, -250};
  
   s.x = _mm_set_epi32 (1, -2, 3, 4);
 
-  u.x = test (s.x);
-
-  if (N < 32)
-    for (i = 0; i < 4; i++)
-      e[i] = s.a[i] << N; 
-
-  if (check_union128i_d (u, e))
-    abort (); 
+  for (int j = 0; j < 4; j++) {
+    int n = ns[j];
+    u.x = test (s.x, n);
+    
+    n = n & 0xff;
+    if (n < 32) {
+      for (int i = 0; i < 4; i++)
+        e[i] = s.a[i] << n;
+    } else {
+      for (int i = 0; i < 4; i++)
+        e[i] = 0;
+    }
+
+
+    if (check_union128i_d (u, e))
+      abort ();
+  }
 }
diff --git a/gcc/testsuite/gcc.target/i386/sse2-psllw-1.c b/gcc/testsuite/gcc.target/i386/sse2-psllw-1.c
index 3153ec45529..6a740fce050 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-psllw-1.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-psllw-1.c
@@ -10,17 +10,15 @@
 #define TEST sse2_test
 #endif
 
-#define N 0xb
-
 #include CHECK_H
 
 #include <emmintrin.h>
 
 static __m128i
 __attribute__((noinline, unused))
-test (__m128i s1)
+test (__m128i s1, int n)
 {
-  return _mm_slli_epi16 (s1, N); 
+  return _mm_slli_epi16 (s1, n); 
 }
 
 static void
@@ -28,16 +26,25 @@ TEST (void)
 {
   union128i_w u, s;
   short e[8] = {0};
+  int ns[4] = {11, 16, 63, -250};
   int i;
  
   s.x = _mm_set_epi16 (1, 2, 3, 4, 5, 6, 0x7000, 0x9000);
 
-  u.x = test (s.x);
-
-  if (N < 16)
-    for (i = 0; i < 8; i++)
-      e[i] = s.a[i] << N; 
-
-  if (check_union128i_w (u, e))
-    abort (); 
+  for (int j = 0; j < 4; j++) {
+    int n = ns[j];
+    u.x = test (s.x, n);
+
+    n = n & 0xff;
+    if (n < 16) {
+      for (i = 0; i < 8; i++)
+        e[i] = s.a[i] << n;
+    } else {
+      for (int i = 0; i < 8; i++)
+        e[i] = 0;
+    }
+
+    if (check_union128i_w (u, e))
+      abort (); 
+  }
 }
diff --git a/gcc/testsuite/gcc.target/i386/sse2-psrld-1.c b/gcc/testsuite/gcc.target/i386/sse2-psrld-1.c
index d310fc45204..ec5f1c2d391 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-psrld-1.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-psrld-1.c
@@ -10,17 +10,15 @@
 #define TEST sse2_test
 #endif
 
-#define N 0xf
-
 #include CHECK_H
 
 #include <emmintrin.h>
 
 static __m128i
 __attribute__((noinline, unused))
-test (__m128i s1)
+test (__m128i s1, int n)
 {
-  return _mm_srli_epi32 (s1, N); 
+  return _mm_srli_epi32 (s1, n);
 }
 
 static void
@@ -28,19 +26,28 @@ TEST (void)
 {
   union128i_d u, s;
   int e[4] = {0};
+  int ns[4] = {15, 65, 260, -250};
   unsigned int tmp;
   int i;
  
   s.x = _mm_set_epi32 (1, -2, 3, 4);
 
-  u.x = test (s.x);
-
-  if (N < 32)
-    for (i = 0; i < 4; i++) {
-      tmp  = s.a[i];
-      e[i] = tmp >> N; 
+  for (int j = 0; j < 4; j++) {
+    int n = ns[j];
+    u.x = test (s.x, n);
+
+    n = n & 0xff;
+    if (n < 32) {
+      for (i = 0; i < 4; i++) {
+        tmp  = s.a[i];
+        e[i] = tmp >> n; 
+      }
+    } else {
+      for (int i = 0; i < 4; i++)
+        e[i] = 0;
     }
 
-  if (check_union128i_d (u, e))
-    abort (); 
+    if (check_union128i_d (u, e))
+      abort ();
+  }
 }
diff --git a/gcc/testsuite/gcc.target/i386/sse2-shifts-1.c b/gcc/testsuite/gcc.target/i386/sse2-shifts-1.c
new file mode 100644
index 00000000000..a2305cf042a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-shifts-1.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-avx" } */
+/* { dg-require-effective-target sse2 } */
+
+#include <emmintrin.h>
+
+__m128i test1(__m128i a)
+{
+    return _mm_slli_epi16(a, 9);
+}
+
+__m128i test2(__m128i a)
+{
+    return _mm_slli_epi32(a, 13);
+}
+
+__m128i test3(__m128i a)
+{
+    return _mm_slli_epi64(a, 17);
+}
+
+__m128i test4(__m128i a)
+{
+    return _mm_srli_epi16(a, 9);
+}
+
+__m128i test5(__m128i a)
+{
+    return _mm_srli_epi32(a, 13);
+}
+
+__m128i test6(__m128i a)
+{
+    return _mm_srli_epi64(a, 7);
+}
+
+__m128i test7(__m128i a)
+{
+    return _mm_srai_epi16(a, 3);
+}
+
+__m128i test8(__m128i a)
+{
+    return _mm_srai_epi32(a, 6);
+}
+
+/* { dg-final { scan-assembler "psllw" } } */
+/* { dg-final { scan-assembler "pslld" } } */
+/* { dg-final { scan-assembler "psllq" } } */
+/* { dg-final { scan-assembler "psrlw" } } */
+/* { dg-final { scan-assembler "psrld" } } */
+/* { dg-final { scan-assembler "psrlq" } } */
+/* { dg-final { scan-assembler "psraw" } } */
+/* { dg-final { scan-assembler "psrad" } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse2-shifts-2.c b/gcc/testsuite/gcc.target/i386/sse2-shifts-2.c
new file mode 100644
index 00000000000..ce05a7dc44e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-shifts-2.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+#include <emmintrin.h>
+
+__m128i test1(__m128i a)
+{
+    a = _mm_slli_epi16(a, 2);
+    return _mm_slli_epi16(a, 3);
+}
+/* { dg-final { scan-assembler "psllw.*5"} } */
+
+__m128i test3(__m128i a)
+{
+    a = _mm_srli_epi16(a, 4);
+    return _mm_srli_epi16(a, 9);
+}
+/* { dg-final { scan-assembler-times "psrlw" 1} } */
+
+__m128i test4(__m128i a)
+{
+    a = _mm_setr_epi32(128, 255, 86, 23);
+    return _mm_srli_epi32(a, 8);
+}
+/* { dg-final { scan-assembler-not "psrld"} } */
+

Reply via email to