r16-1298-gcdfa5fe03512f7 optimizes vpcmp + and15/and3 to vpcmp when VF
is 2 or 4. vptestnm is a variant of vpcmpeq which accepts
nonimm_or_0_operand. The patch handles that.
Also fix the typo in r16-5219-g7d297806214d84
> > +;; It must be put before *<code><mode>_3, the blow one.
>Typo above : ... the one below.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready push to trunk.
gcc/ChangeLog:
PR target/103750
* config/i386/sse.md (*<avx512>_eq<mode>3_and15): New
define_insn.
(*avx512vl_eqv2di_and3): Ditto.
* config/i386/i386.md (*ior<mode>_ccz_1): Fix the typo in the
comments above.
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx512vl-pr103750-2.c: New test.
---
gcc/config/i386/i386.md | 2 +-
gcc/config/i386/sse.md | 40 +++++++++++++++++--
.../gcc.target/i386/avx512vl-pr103750-2.c | 13 ++++++
3 files changed, 51 insertions(+), 4 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr103750-2.c
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index b925a037b2d..6af7dcfcdd3 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14203,7 +14203,7 @@ (define_insn "*<code>si_2_zext_imm"
(set_attr "isa" "*,apx_ndd")
(set_attr "mode" "SI")])
-;; It must be put before *<code><mode>_3, the blow one.
+;; It must be put before *<code><mode>_3, the one below.
(define_insn "*ior<mode>_ccz_1"
[(set (reg:CCZ FLAGS_REG)
(compare:CCZ
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 7d91585b05d..8b90845260a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4653,6 +4653,9 @@ (define_insn_and_split "*<avx512>_cmp<mode>3"
UNSPEC_PCMP))]
"operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
+(define_int_iterator UNSPEC_PCMP_ITER
+ [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP])
+
(define_insn "*<avx512>_cmp<mode>3_and15"
[(set (match_operand:QI 0 "register_operand" "=k")
(and:QI
@@ -4685,6 +4688,23 @@ (define_insn "*<avx512>_ucmp<mode>3_and15"
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "*<avx512>_eq<mode>3_and15"
+ [(set (match_operand:QI 0 "register_operand" "=k, k")
+ (and:QI
+ (unspec:QI
+ [(match_operand:VI48_AVX512VL_4 1 "nonimm_or_0_operand" "%v, v")
+ (match_operand:VI48_AVX512VL_4 2 "nonimm_or_0_operand" "vm, C")
+ (const_int 0)]
+ UNSPEC_PCMP_ITER)
+ (const_int 15)))]
+ "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vptestnm<ssemodesuffix>\t{%1, %1, %0|%0, %1, %1}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "*<avx512>_cmp<mode>3_and3"
[(set (match_operand:QI 0 "register_operand" "=k")
(and:QI
@@ -4717,6 +4737,23 @@ (define_insn "*avx512vl_ucmpv2di3_and3"
(set_attr "prefix" "evex")
(set_attr "mode" "TI")])
+(define_insn "*avx512vl_eqv2di_and3"
+ [(set (match_operand:QI 0 "register_operand" "=k, k")
+ (and:QI
+ (unspec:QI
+ [(match_operand:V2DI 1 "nonimm_or_0_operand" "%v, v")
+ (match_operand:V2DI 2 "nonimm_or_0_operand" "vm, C")
+ (const_int 0)]
+ UNSPEC_PCMP_ITER)
+ (const_int 3)))]
+ "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ vpcmpeqq\t{%2, %1, %0|%0, %1, %2}
+ vptestnmq\t{%1, %1, %0|%0, %1, %1}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
@@ -4790,9 +4827,6 @@ (define_insn_and_split
"*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:m
(set_attr "prefix" "evex")
(set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
-(define_int_iterator UNSPEC_PCMP_ITER
- [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP])
-
(define_insn_and_split "*<avx512>_cmp<mode>3"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
(not:<avx512fmaskmode>
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr103750-2.c
b/gcc/testsuite/gcc.target/i386/avx512vl-pr103750-2.c
new file mode 100644
index 00000000000..7c6e77b79e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr103750-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -mprefer-vector-width=128 -O3" } */
+/* { dg-final { scan-assembler "kortest" } } */
+/* { dg-final { scan-assembler-not "kmov" } } */
+
+int
+foo (int *__restrict a)
+{
+ for (int i = 0; i != 100; i++)
+ if (a[i] == 0)
+ return 1;
+ return 0;
+}
--
2.34.1