https://gcc.gnu.org/g:594dc80c8e49cb65f77a13c201a0bea9423329ec
commit r16-5369-g594dc80c8e49cb65f77a13c201a0bea9423329ec Author: liuhongt <[email protected]> Date: Thu Nov 13 19:59:08 2025 -0800 Also handle vptestnm + and15/and3 to just vptestnm. r16-1298-gcdfa5fe03512f7 optimizes vpcmp + and15/and3 to vpcmp when VF is 2 or 4. vptestnm is a variant of vpcmpeq which accepts nonimm_or_0_operand. The patch handles that. gcc/ChangeLog: PR target/103750 * config/i386/sse.md (*<avx512>_eq<mode>3_and15): New define_insn. (*avx512vl_eqv2di_and3): Ditto. * config/i386/i386.md (*ior<mode>_ccz_1): Fix the typo in the comments above. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512vl-pr103750-2.c: New test. Diff: --- gcc/config/i386/i386.md | 2 +- gcc/config/i386/sse.md | 40 ++++++++++++++++++++-- .../gcc.target/i386/avx512vl-pr103750-2.c | 13 +++++++ 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index b925a037b2df..6af7dcfcdd32 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -14203,7 +14203,7 @@ (set_attr "isa" "*,apx_ndd") (set_attr "mode" "SI")]) -;; It must be put before *<code><mode>_3, the blow one. +;; It must be put before *<code><mode>_3, the one below. (define_insn "*ior<mode>_ccz_1" [(set (reg:CCZ FLAGS_REG) (compare:CCZ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 7d91585b05d1..8b90845260a3 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4653,6 +4653,9 @@ UNSPEC_PCMP))] "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);") +(define_int_iterator UNSPEC_PCMP_ITER + [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP]) + (define_insn "*<avx512>_cmp<mode>3_and15" [(set (match_operand:QI 0 "register_operand" "=k") (and:QI @@ -4685,6 +4688,23 @@ (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) +(define_insn "*<avx512>_eq<mode>3_and15" + [(set (match_operand:QI 0 "register_operand" "=k, k") + (and:QI + (unspec:QI + [(match_operand:VI48_AVX512VL_4 1 "nonimm_or_0_operand" "%v, v") + (match_operand:VI48_AVX512VL_4 2 "nonimm_or_0_operand" "vm, C") + (const_int 0)] + UNSPEC_PCMP_ITER) + (const_int 15)))] + "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} + vptestnm<ssemodesuffix>\t{%1, %1, %0|%0, %1, %1}" + [(set_attr "type" "ssecmp") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + (define_insn "*<avx512>_cmp<mode>3_and3" [(set (match_operand:QI 0 "register_operand" "=k") (and:QI @@ -4717,6 +4737,23 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) +(define_insn "*avx512vl_eqv2di_and3" + [(set (match_operand:QI 0 "register_operand" "=k, k") + (and:QI + (unspec:QI + [(match_operand:V2DI 1 "nonimm_or_0_operand" "%v, v") + (match_operand:V2DI 2 "nonimm_or_0_operand" "vm, C") + (const_int 0)] + UNSPEC_PCMP_ITER) + (const_int 3)))] + "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + vpcmpeqq\t{%2, %1, %0|%0, %1, %2} + vptestnmq\t{%1, %1, %0|%0, %1, %1}" + [(set_attr "type" "ssecmp") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") (unspec:<avx512fmaskmode> @@ -4790,9 +4827,6 @@ (set_attr "prefix" "evex") (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")]) -(define_int_iterator UNSPEC_PCMP_ITER - [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP]) - (define_insn_and_split "*<avx512>_cmp<mode>3" [(set (match_operand:<avx512fmaskmode> 0 "register_operand") (not:<avx512fmaskmode> diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr103750-2.c new file mode 100644 index 000000000000..7c6e77b79e36 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr103750-2.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -mprefer-vector-width=128 -O3" } */ +/* { dg-final { scan-assembler "kortest" } } */ +/* { dg-final { scan-assembler-not "kmov" } } */ + +int +foo (int *__restrict a) +{ + for (int i = 0; i != 100; i++) + if (a[i] == 0) + return 1; + return 0; +}
