The purpose of those define_insn_and_split:
1. Combine vpcmpuw and zero_extend into vpcmpuw.
2. Canonicalize vpcmpuw pattern so CSE can replace duplicate vpcmpuw to just 
kmov
3. Use DImode as dest of zero_extend so cprop_hardreg can eliminate redundant 
kmov.

It should partially fix the issue in PR.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready to push to trunk.

gcc/ChangeLog:

        PR target/103750
        * config/i386/sse.md
        (*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
        New define_insn_and_split.
        (*<avx512>_cmp<mode>3): Ditto.
        (*<avx512>_cmp<mode>3_zero_extenddi): New define_insn.
        (*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
        New define_insn_and_split.
        (*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
        Ditto.
        (*<avx512>_ucmp<mode>3): Ditto.
        (*<avx512>_ucmp<mode>3_zero_extenddi): New define_insn.
        (*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
        New define_insn_and_split.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/bitwise_mask_op-3.c: Adjust test/
        * g++.target/i386/pr103750-1.C: New test.
---
 gcc/config/i386/sse.md                        | 267 ++++++++++++++++++
 gcc/testsuite/g++.target/i386/pr103750-1.C    |  50 ++++
 .../gcc.target/i386/bitwise_mask_op-3.c       |   6 +-
 3 files changed, 320 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/pr103750-1.C

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5196149ee32..fb885d58272 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3702,6 +3702,75 @@ (define_insn 
"<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+;; Those Splitters are used to canonicalize vpcmpuw pattern, so that CSE can 
transfrom
+;; duplicated vpcmpuw to vpcmpuw and kmov
+;; Choose biggest mode(DImode) as dest, so kmov can be optimized by 
cprop_hardreg.
+(define_insn_and_split 
"*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+  [(set (match_operand:SWI248x 0 "register_operand" "=k")
+       (zero_extend:SWI248x
+         (unspec:<V48H_AVX512VL:avx512fmaskmode>
+           [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
+            (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
+            (match_operand:SI 3 "<V48H_AVX512VL:cmp_imm_predicate>" "n")]
+           UNSPEC_PCMP)))]
+  "TARGET_AVX512BW
+   && (GET_MODE_NUNITS (<V48H_AVX512VL:MODE>mode)
+       < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+  "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  "&& <SWI248x:MODE>mode != E_DImode"
+  [(set (match_dup 0)
+       (zero_extend:DI
+         (unspec:<V48H_AVX512VL:avx512fmaskmode>
+           [(match_dup 1)
+            (match_dup 2)
+            (match_dup 3)]
+           UNSPEC_PCMP)))]
+  "operands[0] = lowpart_subreg (DImode, operands[0], <SWI248x:MODE>mode);"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<V48H_AVX512VL:sseinsnmode>")])
+
+(define_insn_and_split "*<avx512>_cmp<mode>3"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+       (unspec:<avx512fmaskmode>
+         [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
+          (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
+          (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+         UNSPEC_PCMP))]
+  "TARGET_AVX512BW
+   && GET_MODE_NUNITS (<MODE>mode) < 64"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (zero_extend:DI
+         (unspec:<avx512fmaskmode>
+           [(match_dup 1)
+            (match_dup 2)
+            (match_dup 3)]
+           UNSPEC_PCMP)))]
+  "operands[0] = lowpart_subreg (DImode, operands[0], <avx512fmaskmode>mode);"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_cmp<mode>3_zero_extenddi"
+  [(set (match_operand:DI 0 "register_operand" "=k")
+       (zero_extend:DI
+         (unspec:<avx512fmaskmode>
+           [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
+            (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
+            (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+           UNSPEC_PCMP)))]
+  "TARGET_AVX512BW
+   && GET_MODE_NUNITS (<V48H_AVX512VL:MODE>mode) < 64"
+  "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn_and_split "*<avx512>_cmp<mode>3"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
        (not:<avx512fmaskmode>
@@ -3735,6 +3804,72 @@ (define_insn 
"<avx512>_cmp<mode>3<mask_scalar_merge_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split 
"*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+  [(set (match_operand:SWI248x 0 "register_operand" "=k")
+       (zero_extend:SWI248x
+         (unspec:<VI12_AVX512VL:avx512fmaskmode>
+           [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
+            (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+            (match_operand:SI 3 "<VI12_AVX512VL:cmp_imm_predicate>" "n")]
+           UNSPEC_PCMP)))]
+  "TARGET_AVX512BW
+   && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
+       < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+  "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  "&& <SWI248x:MODE>mode != E_DImode"
+  [(set (match_dup 0)
+       (zero_extend:DI
+         (unspec:<VI12_AVX512VL:avx512fmaskmode>
+           [(match_dup 1)
+            (match_dup 2)
+            (match_dup 3)]
+          UNSPEC_PCMP)))]
+  "operands[0] = lowpart_subreg (DImode, operands[0], <SWI248x:MODE>mode);"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
+
+(define_insn_and_split "*<avx512>_cmp<mode>3"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+       (unspec:<avx512fmaskmode>
+         [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
+          (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+          (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+         UNSPEC_PCMP))]
+  "TARGET_AVX512BW
+   && GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) < 64"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (zero_extend:DI
+         (unspec:<avx512fmaskmode>
+           [(match_dup 1)
+            (match_dup 2)
+            (match_dup 3)]
+           UNSPEC_PCMP)))]
+  "operands[0] = lowpart_subreg (DImode, operands[0], <avx512fmaskmode>mode);"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_cmp<mode>3_zero_extenddi"
+  [(set (match_operand:DI 0 "register_operand" "=k")
+       (zero_extend:DI
+         (unspec:<avx512fmaskmode>
+           [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
+            (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+            (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+           UNSPEC_PCMP)))]
+  "TARGET_AVX512BW
+   && GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) < 64"
+  "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_int_iterator UNSPEC_PCMP_ITER
   [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP])
 
@@ -3771,6 +3906,72 @@ (define_insn 
"<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split 
"*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+  [(set (match_operand:SWI248x 0 "register_operand" "=k")
+       (zero_extend:SWI248x
+         (unspec:<VI12_AVX512VL:avx512fmaskmode>
+           [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
+            (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+            (match_operand:SI 3 "const_0_to_7_operand" "n")]
+           UNSPEC_UNSIGNED_PCMP)))]
+  "TARGET_AVX512BW
+  && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
+      < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  "&& <SWI248x:MODE>mode != E_DImode"
+  [(set (match_dup 0)
+       (zero_extend:DI
+         (unspec:<VI12_AVX512VL:avx512fmaskmode>
+           [(match_dup 1)
+            (match_dup 2)
+            (match_dup 3)]
+           UNSPEC_UNSIGNED_PCMP)))]
+  "operands[0] = lowpart_subreg (DImode, operands[0], <SWI248x:MODE>mode);"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
+
+(define_insn_and_split "*<avx512>_ucmp<mode>3"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+       (unspec:<avx512fmaskmode>
+         [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
+          (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+          (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+         UNSPEC_UNSIGNED_PCMP))]
+  "TARGET_AVX512BW
+   && GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) < 64"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (zero_extend:DI
+         (unspec:<avx512fmaskmode>
+           [(match_dup 1)
+            (match_dup 2)
+            (match_dup 3)]
+           UNSPEC_UNSIGNED_PCMP)))]
+  "operands[0] = lowpart_subreg (DImode, operands[0], <avx512fmaskmode>mode);"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_ucmp<mode>3_zero_extenddi"
+  [(set (match_operand:DI 0 "register_operand" "=k")
+       (zero_extend:DI
+         (unspec:<avx512fmaskmode>
+           [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
+            (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+            (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+           UNSPEC_UNSIGNED_PCMP)))]
+  "TARGET_AVX512BW
+   && GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) < 64"
+  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
        (unspec:<avx512fmaskmode>
@@ -3785,6 +3986,72 @@ (define_insn 
"<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split 
"*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+  [(set (match_operand:SWI248x 0 "register_operand" "=k")
+       (zero_extend:SWI248x
+         (unspec:<VI48_AVX512VL:avx512fmaskmode>
+           [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
+            (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
+            (match_operand:SI 3 "const_0_to_7_operand" "n")]
+           UNSPEC_UNSIGNED_PCMP)))]
+  "TARGET_AVX512BW
+  && (GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode)
+      < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  "&& <SWI248x:MODE>mode != E_DImode"
+  [(set (match_dup 0)
+       (zero_extend:DI
+         (unspec:<VI48_AVX512VL:avx512fmaskmode>
+           [(match_dup 1)
+            (match_dup 2)
+            (match_dup 3)]
+          UNSPEC_UNSIGNED_PCMP)))]
+  "operands[0] = lowpart_subreg (DImode, operands[0], <SWI248x:MODE>mode);"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<VI48_AVX512VL:sseinsnmode>")])
+
+(define_insn_and_split "*<avx512>_ucmp<mode>3"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+       (unspec:<avx512fmaskmode>
+         [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
+          (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
+          (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+         UNSPEC_UNSIGNED_PCMP))]
+  "TARGET_AVX512BW
+   && GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode) < 64"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (zero_extend:DI
+         (unspec:<avx512fmaskmode>
+           [(match_dup 1)
+            (match_dup 2)
+            (match_dup 3)]
+           UNSPEC_UNSIGNED_PCMP)))]
+  "operands[0] = lowpart_subreg (DImode, operands[0], <avx512fmaskmode>mode);"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_ucmp<mode>3_zero_extenddi"
+  [(set (match_operand:DI 0 "register_operand" "=k")
+       (zero_extend:DI
+         (unspec:<avx512fmaskmode>
+           [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
+            (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
+            (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+           UNSPEC_UNSIGNED_PCMP)))]
+  "TARGET_AVX512BW
+   && GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode) < 64"
+  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn_and_split "*<avx512>_ucmp<mode>3"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
        (not:<avx512fmaskmode>
diff --git a/gcc/testsuite/g++.target/i386/pr103750-1.C 
b/gcc/testsuite/g++.target/i386/pr103750-1.C
new file mode 100644
index 00000000000..83f471331b3
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr103750-1.C
@@ -0,0 +1,50 @@
+/* PR target/103750 */
+/* { dg-do compile }  */
+/* { dg-options "-O2 -march=cannonlake -maes -std=c++1y" } */
+/* { dg-final { scan-assembler-times "kmovw" 2 } } */
+/* { dg-final { scan-assembler-times "kmovd" 2 } } */
+/* There shouldn't be any kmovw/kmovd inside the loop.  */
+#include <immintrin.h>
+
+const char16_t *qustrchr(char16_t *n, char16_t *e, char16_t c) noexcept
+{
+  __m256i mch256 = _mm256_set1_epi16(c);
+  for ( ; n < e; n += 32) {
+    __m256i data1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n));
+    __m256i data2 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n) + 
1);
+    __mmask16 mask1 = _mm256_cmpeq_epu16_mask(data1, mch256);
+    __mmask16 mask2 = _mm256_cmpeq_epu16_mask(data2, mch256);
+    if (_kortestz_mask16_u8(mask1, mask2))
+      continue;
+
+    unsigned idx = _tzcnt_u32(mask1);
+    if (mask1 == 0) {
+      idx = __tzcnt_u16(mask2);
+      n += 16;
+    }
+    return n + idx;
+  }
+  return e;
+}
+
+const char16_t *qustrchr1(char16_t *n, char16_t *e, char16_t c) noexcept
+{
+  __m256i mch256 = _mm256_set1_epi16(c);
+  for ( ; n < e; n += 32) {
+    __m256i data1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n));
+    __m256i data2 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n) + 
1);
+    __mmask16 mask1 = _mm256_cmpeq_epu16_mask(data1, mch256);
+    __mmask16 mask2 = _mm256_cmpeq_epu16_mask(data2, mch256);
+    if (_kortestz_mask32_u8(mask1, mask2))
+      continue;
+
+    unsigned idx = _tzcnt_u32(mask1);
+    if (mask1 == 0) {
+      idx = __tzcnt_u16(mask2);
+      n += 16;
+    }
+    return n + idx;
+  }
+  return e;
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/bitwise_mask_op-3.c 
b/gcc/testsuite/gcc.target/i386/bitwise_mask_op-3.c
index 352c49d6c6b..82bb99e30af 100644
--- a/gcc/testsuite/gcc.target/i386/bitwise_mask_op-3.c
+++ b/gcc/testsuite/gcc.target/i386/bitwise_mask_op-3.c
@@ -12,7 +12,7 @@ foo_orb (__m512i a, __m512i b)
   foo = m1 | m2;
 }
 
-/* { dg-final { scan-assembler-times "korb\[\t \]" "1" { xfail *-*-* } } }  */
+/* { dg-final { scan-assembler-times "korb\[\t \]" "1" { xfail { *-*-* && { ! 
ia32 } } } } }  */
 
 void
 foo_xorb (__m512i a, __m512i b)
@@ -22,7 +22,7 @@ foo_xorb (__m512i a, __m512i b)
   foo = m1 ^ m2;
 }
 
-/* { dg-final { scan-assembler-times "kxorb\[\t \]" "1" { xfail *-*-* } } }  */
+/* { dg-final { scan-assembler-times "kxorb\[\t \]" "1" { xfail { *-*-* && { ! 
ia32 } } } } }  */
 
 void
 foo_andb (__m512i a, __m512i b)
@@ -40,4 +40,4 @@ foo_andnb (__m512i a, __m512i b)
   foo = m1 & ~m2;
 }
 
-/* { dg-final { scan-assembler-times "kmovb\[\t \]" "4" { xfail *-*-* } } }  */
+/* { dg-final { scan-assembler-times "kmovb\[\t \]" "4" { xfail { *-*-* && { ! 
ia32 } } } } }  */
-- 
2.18.1

Reply via email to