r12-6103-g1a7ce8570997eb combines vpcmpuw + zero_extend to vpcmpuw
with the pre_reload splitter, but the splitter transforms the
zero_extend into a subreg which make reload think the upper part is
garbage, it's not correct.

The patch adjusts the zero_extend define_insn_and_split to
define_insn to keep zero_extend.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready push to trunk.

gcc/ChangeLog:

        PR target/117159
        * config/i386/sse.md
        (*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
        Change from define_insn_and_split to define_insn.
        (*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
        Ditto.
        (*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
        Ditto.
        (*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
        Ditto.
        (*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
        Split to the zero_extend pattern.
        (*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
        Ditto.
        (*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
        Ditto.
        (*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
        Ditto.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/pr117159.c: New test.
        * gcc.target/i386/avx512bw-pr103750-1.c: Remove xfail.
        * gcc.target/i386/avx512bw-pr103750-2.c: Remove xfail.
---
 gcc/config/i386/sse.md                        | 186 +++++++-----------
 .../gcc.target/i386/avx512bw-pr103750-1.c     |   3 +-
 .../gcc.target/i386/avx512bw-pr103750-2.c     |   3 +-
 gcc/testsuite/gcc.target/i386/pr117159.c      |  42 ++++
 4 files changed, 113 insertions(+), 121 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr117159.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a45b50ad732..06c2c9d7a5e 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4298,32 +4298,19 @@ (define_insn 
"<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
 
 ;; Since vpcmpd implicitly clear the upper bits of dest, transform
 ;; vpcmpd + zero_extend to vpcmpd since the instruction
-(define_insn_and_split 
"*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
-  [(set (match_operand:SWI248x 0 "register_operand")
+(define_insn "*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+  [(set (match_operand:SWI248x 0 "register_operand" "=k")
        (zero_extend:SWI248x
          (unspec:<V48H_AVX512VL:avx512fmaskmode>
-           [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand")
-            (match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
-            (match_operand:SI 3 "const_0_to_7_operand")]
+           [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand" "v")
+            (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
+            (match_operand:SI 3 "const_0_to_7_operand" "n")]
            UNSPEC_PCMP)))]
   "TARGET_AVX512F
    && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
-   && ix86_pre_reload_split ()
    && (GET_MODE_NUNITS (<V48H_AVX512VL:MODE>mode)
       < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-       (unspec:<V48H_AVX512VL:avx512fmaskmode>
-         [(match_dup 1)
-          (match_dup 2)
-          (match_dup 3)]
-         UNSPEC_PCMP))]
-{
-  operands[1] = force_reg (<V48H_AVX512VL:MODE>mode, operands[1]);
-  operands[0] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode,
-                                operands[0], <SWI248x:MODE>mode);
-}
+  "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssecmp")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
@@ -4351,21 +4338,19 @@ (define_insn_and_split 
"*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:m
   "#"
   "&& 1"
   [(set (match_dup 0)
-       (unspec:<V48H_AVX512VL:avx512fmaskmode>
-         [(match_dup 1)
-          (match_dup 2)
-          (match_dup 3)]
-         UNSPEC_PCMP))
-   (set (match_dup 4) (match_dup 0))]
+    (zero_extend:SWI248x
+         (unspec:<V48H_AVX512VL:avx512fmaskmode>
+           [(match_dup 1)
+            (match_dup 2)
+            (match_dup 3)]
+           UNSPEC_PCMP)))
+   (set (match_dup 4) (match_dup 5))]
 {
-  operands[1] = force_reg (<V48H_AVX512VL:MODE>mode, operands[1]);
-  operands[0] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode,
+  operands[5] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode,
                                operands[0], <SWI248x:MODE>mode);
-}
-  [(set_attr "type" "ssecmp")
-   (set_attr "length_immediate" "1")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<V48H_AVX512VL:sseinsnmode>")])
+  SUBREG_PROMOTED_VAR_P (operands[5]) = 1;
+  SUBREG_PROMOTED_SET (operands[5], 1);
+})
 
 (define_insn_and_split "*<avx512>_cmp<mode>3"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
@@ -4400,31 +4385,18 @@ (define_insn 
"<avx512>_cmp<mode>3<mask_scalar_merge_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn_and_split 
"*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
-  [(set (match_operand:SWI248x 0 "register_operand")
+(define_insn "*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+  [(set (match_operand:SWI248x 0 "register_operand" "=k")
        (zero_extend:SWI248x
          (unspec:<VI12_AVX512VL:avx512fmaskmode>
-           [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
-            (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
-            (match_operand:SI 3 "const_0_to_7_operand")]
+           [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v")
+            (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+            (match_operand:SI 3 "const_0_to_7_operand" "n")]
            UNSPEC_PCMP)))]
   "TARGET_AVX512BW
-  && ix86_pre_reload_split ()
-  && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
-      < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-       (unspec:<VI12_AVX512VL:avx512fmaskmode>
-         [(match_dup 1)
-          (match_dup 2)
-          (match_dup 3)]
-         UNSPEC_PCMP))]
-{
-  operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
-  operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
-                               operands[0], <SWI248x:MODE>mode);
-}
+   && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
+       < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+  "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssecmp")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
@@ -4451,16 +4423,18 @@ (define_insn_and_split 
"*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:m
   "#"
   "&& 1"
   [(set (match_dup 0)
-       (unspec:<VI12_AVX512VL:avx512fmaskmode>
-         [(match_dup 1)
-          (match_dup 2)
-          (match_dup 3)]
-         UNSPEC_PCMP))
-   (set (match_dup 4) (match_dup 0))]
+       (zero_extend:SWI248x
+         (unspec:<VI12_AVX512VL:avx512fmaskmode>
+          [(match_dup 1)
+               (match_dup 2)
+               (match_dup 3)]
+          UNSPEC_PCMP)))
+   (set (match_dup 4) (match_dup 5))]
 {
-  operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
-  operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
+  operands[5] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
                                operands[0], <SWI248x:MODE>mode);
+  SUBREG_PROMOTED_VAR_P (operands[5]) = 1;
+  SUBREG_PROMOTED_SET (operands[5], 1);
 }
   [(set_attr "type" "ssecmp")
    (set_attr "length_immediate" "1")
@@ -4518,31 +4492,18 @@ (define_insn 
"<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn_and_split 
"*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
-  [(set (match_operand:SWI248x 0 "register_operand")
+(define_insn "*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+  [(set (match_operand:SWI248x 0 "register_operand" "=k")
        (zero_extend:SWI248x
          (unspec:<VI12_AVX512VL:avx512fmaskmode>
-           [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
-            (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
-            (match_operand:SI 3 "const_0_to_7_operand")]
+           [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v")
+            (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+            (match_operand:SI 3 "const_0_to_7_operand" "n")]
            UNSPEC_UNSIGNED_PCMP)))]
   "TARGET_AVX512BW
-  && ix86_pre_reload_split ()
   && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
       < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-       (unspec:<VI12_AVX512VL:avx512fmaskmode>
-         [(match_dup 1)
-          (match_dup 2)
-          (match_dup 3)]
-         UNSPEC_UNSIGNED_PCMP))]
-{
-  operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
-  operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
-                               operands[0], <SWI248x:MODE>mode);
-}
+  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssecmp")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
@@ -4570,16 +4531,18 @@ (define_insn_and_split 
"*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:
   "#"
   "&& 1"
   [(set (match_dup 0)
-       (unspec:<VI12_AVX512VL:avx512fmaskmode>
-         [(match_dup 1)
-          (match_dup 2)
-          (match_dup 3)]
-         UNSPEC_UNSIGNED_PCMP))
-   (set (match_dup 4) (match_dup 0))]
-{
-  operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
-  operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
+       (zero_extend:SWI248x
+        (unspec:<VI12_AVX512VL:avx512fmaskmode>
+          [(match_dup 1)
+               (match_dup 2)
+               (match_dup 3)]
+          UNSPEC_UNSIGNED_PCMP)))
+   (set (match_dup 4) (match_dup 5))]
+{
+  operands[5] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
                                operands[0], <SWI248x:MODE>mode);
+  SUBREG_PROMOTED_VAR_P (operands[5]) = 1;
+  SUBREG_PROMOTED_SET (operands[5], 1);
 }
   [(set_attr "type" "ssecmp")
    (set_attr "length_immediate" "1")
@@ -4615,32 +4578,19 @@ (define_insn 
"<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn_and_split 
"*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
-  [(set (match_operand:SWI248x 0 "register_operand")
+(define_insn "*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+  [(set (match_operand:SWI248x 0 "register_operand" "=k")
        (zero_extend:SWI248x
          (unspec:<VI48_AVX512VL:avx512fmaskmode>
-           [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
-            (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
-            (match_operand:SI 3 "const_0_to_7_operand")]
+           [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "v")
+            (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
+            (match_operand:SI 3 "const_0_to_7_operand" "n")]
            UNSPEC_UNSIGNED_PCMP)))]
   "TARGET_AVX512F
    && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
-   && ix86_pre_reload_split ()
    && (GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode)
       < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-       (unspec:<VI48_AVX512VL:avx512fmaskmode>
-         [(match_dup 1)
-          (match_dup 2)
-          (match_dup 3)]
-         UNSPEC_UNSIGNED_PCMP))]
-{
-  operands[1] = force_reg (<VI48_AVX512VL:MODE>mode, operands[1]);
-  operands[0] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode,
-                               operands[0], <SWI248x:MODE>mode);
-}
+  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssecmp")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
@@ -4668,16 +4618,18 @@ (define_insn_and_split 
"*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:
   "#"
   "&& 1"
   [(set (match_dup 0)
-       (unspec:<VI48_AVX512VL:avx512fmaskmode>
-         [(match_dup 1)
-          (match_dup 2)
-          (match_dup 3)]
-         UNSPEC_UNSIGNED_PCMP))
-   (set (match_dup 4) (match_dup 0))]
-{
-  operands[1] = force_reg (<VI48_AVX512VL:MODE>mode, operands[1]);
-  operands[0] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode,
+       (zero_extend:SWI248x
+        (unspec:<VI48_AVX512VL:avx512fmaskmode>
+          [(match_dup 1)
+               (match_dup 2)
+               (match_dup 3)]
+          UNSPEC_UNSIGNED_PCMP)))
+   (set (match_dup 4) (match_dup 5))]
+{
+  operands[5] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode,
                                operands[0], <SWI248x:MODE>mode);
+  SUBREG_PROMOTED_VAR_P (operands[5]) = 1;
+  SUBREG_PROMOTED_SET (operands[5], 1);
 }
   [(set_attr "type" "ssecmp")
    (set_attr "length_immediate" "1")
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c 
b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c
index b1165f069bb..e7d6183232b 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c
@@ -1,8 +1,7 @@
 /* PR target/103750 */
 /* { dg-do compile }  */
 /* { dg-options "-O2 -mavx512bw -mavx512vl" } */
-/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */
-/* xfail need to be fixed.  */
+/* { dg-final { scan-assembler-not "kmov" } } */
 
 #include <immintrin.h>
 extern __m128i* pi128;
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c 
b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c
index 7303f5403ba..3392e193222 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c
@@ -1,8 +1,7 @@
 /* PR target/103750 */
 /* { dg-do compile }  */
 /* { dg-options "-O2 -mavx512dq -mavx512bw -mavx512vl" } */
-/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */
-/* xfail need to be fixed.  */
+/* { dg-final { scan-assembler-not "kmov" } } */
 
 #include <immintrin.h>
 extern __m128i* pi128;
diff --git a/gcc/testsuite/gcc.target/i386/pr117159.c 
b/gcc/testsuite/gcc.target/i386/pr117159.c
new file mode 100644
index 00000000000..b67d682ecef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117159.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-options "-Os -mavx512bw" } */
+/* { dg-require-effective-target avx512bw } */
+
+typedef __attribute__((__vector_size__ (4))) unsigned char W;
+typedef __attribute__((__vector_size__ (64))) int V;
+typedef __attribute__((__vector_size__ (64))) long long Vq;
+
+W w;
+V v;
+Vq vq;
+
+static inline W
+foo (short m)
+{
+  unsigned k = __builtin_ia32_pcmpgtq512_mask ((Vq) { }, vq, m);
+  W r = (W) k + w;
+  return r;
+}
+
+static inline W
+foo1 (short m)
+{
+  unsigned k = __builtin_ia32_pcmpgtd512_mask ((V) {1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1}, v, m);
+  W r = (W) k + w;
+  return r;
+}
+
+int
+main ()
+{
+  if (!__builtin_cpu_supports ("avx512bw"))
+    return 0;
+  W y = foo1 (65535);
+  if (!y[0] || !y[1] || y[2] || y[3])
+    __builtin_abort();
+  W x = foo (65535);
+  if (x[0] || x[1] || x[2] || x[3])
+    __builtin_abort();
+
+  return 0;
+}
-- 
2.31.1

Reply via email to