2022-01-05  Uroš Bizjak  <ubiz...@gmail.com>

gcc/ChangeLog:

    PR target/103905
    * config/i386/i386-expand.c (expand_vec_perm_pshufb): Fix number of
    narrow mode remapped elements for !one_operand_p case.

gcc/testsuite/ChangeLog:

    PR target/103905
    * gcc.target/i386/pr103905.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
The testcase is also tested by Martin on a real XOP target.

Pushed to master.

Uros.
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index e93ef1cafa6..9bd8e539d08 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -18730,7 +18730,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
 {
   unsigned i, nelt, eltsz, mask;
   unsigned char perm[64];
-  machine_mode vmode = V16QImode;
+  machine_mode vmode;
   struct expand_vec_perm_d nd;
   rtx rperm[64], vperm, target, op0, op1;
 
@@ -18754,6 +18754,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
       case 16:
        if (!TARGET_XOP)
          return false;
+       vmode = V16QImode;
        break;
 
       case 32:
@@ -18803,6 +18804,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
       case 16:
        if (!TARGET_SSSE3)
          return false;
+       vmode = V16QImode;
        break;
 
       case 32:
@@ -18894,6 +18896,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
        /* Or if vpermps can be used.  */
        else if (d->vmode == V16SFmode)
          vmode = V16SImode;
+
        if (vmode == V64QImode)
          {
            /* vpshufb only works intra lanes, it is not
@@ -18946,8 +18949,10 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
 
   machine_mode vpmode = vmode;
 
-  if (vmode == V4QImode
-      || vmode == V8QImode)
+  nelt = GET_MODE_SIZE (vmode);
+
+  /* Emulate narrow modes with V16QI instructions.  */
+  if (nelt < 16)
     {
       rtx m128 = GEN_INT (-128);
 
@@ -18955,19 +18960,15 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
         account for inactive top elements from the first operand.  */
       if (!d->one_operand_p)
        {
-         int sz = GET_MODE_SIZE (vmode);
-
          for (i = 0; i < nelt; ++i)
            {
-             int ival = INTVAL (rperm[i]);
-             if (ival >= sz)
-               ival += 16-sz;
-             rperm[i] = GEN_INT (ival);
+             unsigned ival = UINTVAL (rperm[i]);
+             if (ival >= nelt)
+               rperm[i] = GEN_INT (ival + 16 - nelt);
            }
        }
 
-      /* V4QI/V8QI is emulated with V16QI instruction, fill inactive
-        elements in the top positions with zeros.  */
+      /* Fill inactive elements in the top positions with zeros.  */
       for (i = nelt; i < 16; ++i)
        rperm[i] = m128;
 
diff --git a/gcc/testsuite/gcc.target/i386/pr103905.c 
b/gcc/testsuite/gcc.target/i386/pr103905.c
new file mode 100644
index 00000000000..aef9c4d3a0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103905.c
@@ -0,0 +1,25 @@
+/* PR target/103905 */
+/* { dg-do run } */
+/* { dg-require-effective-target xop } */
+/* { dg-options "-O3 -mxop" } */
+
+#include "xop-check.h"
+
+char perm[64];
+
+void
+__attribute__((noipa))
+foo (int n)
+{
+  for (int i = 0; i < n; ++i)
+    perm[i] = i;
+}
+
+static void
+xop_test (void)
+{
+  foo (8);
+
+  if (perm[7] != 7)
+    __builtin_abort ();
+}

Reply via email to