Hi: This patch is about to do transformation like below. Bootstrapped and regtested on x86_64-linux-gnu{-m32,}. Ok for trunk?
from notl %edi vpbroadcastd %edi, %xmm0 vpand %xmm1, %xmm0, %xmm0 to vpbroadcastd %edi, %xmm0 vpandn %xmm1, %xmm0, %xmm0 gcc/ChangeLog: PR target/100711 * config/i386/sse.md (*andnot<mode>3): New combine splitter after it. gcc/testsuite/ChangeLog: PR target/100711 * gcc.target/i386/avx2-pr100711.c: New test. * gcc.target/i386/avx512bw-pr100711.c: New test. -- BR, Hongtao
From 2a70b50fe3ebe129a66d8e4d5c8c025cb6df6e4c Mon Sep 17 00:00:00 2001 From: liuhongt <hongtao....@intel.com> Date: Fri, 21 May 2021 11:12:49 +0800 Subject: [PATCH] [i386] Split not+broadcast+pand to broadcast+pandn. Split notl %edi vpbroadcastd %edi, %xmm0 vpand %xmm1, %xmm0, %xmm0 to vpbroadcastd %edi, %xmm0 vpandn %xmm1, %xmm0, %xmm0 gcc/ChangeLog: PR target/100711 * config/i386/sse.md (*andnot<mode>3): New combine splitter after it. gcc/testsuite/ChangeLog: PR target/100711 * gcc.target/i386/avx2-pr100711.c: New test. * gcc.target/i386/avx512bw-pr100711.c: New test. --- gcc/config/i386/sse.md | 20 +++++ gcc/testsuite/gcc.target/i386/avx2-pr100711.c | 73 +++++++++++++++++++ .../gcc.target/i386/avx512bw-pr100711.c | 48 ++++++++++++ 3 files changed, 141 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/avx2-pr100711.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a4503ddcb73..999c7322aac 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3990,6 +3990,26 @@ (define_insn "*andnot<mode>3" ] (const_string "<ssevecmode>")))]) +;; Split +;; notl %edi +;; vpbroadcastd %edi, %xmm0 +;; vpand %xmm1, %xmm0, %xmm0 +;;to +;; vpbroadcastd %edi, %xmm0 +;; vpandn %xmm1, %xmm0, %xmm0 + +(define_split + [(set (match_operand:VI 0 "register_operand") + (and:VI + (vec_duplicate:VI + (not:<ssescalarmode> + (match_operand:<ssescalarmode> 1 "register_operand"))) + (match_operand:VI 2 "bcst_vector_operand")))] + "TARGET_AVX2" + [(set (match_dup 3) (vec_duplicate:VI (match_dup 1))) + (set (match_dup 0) (and:VI (not:VI (match_dup 3)) (match_dup 2)))] + "operands[3] = gen_reg_rtx (<MODE>mode);") + (define_insn "*andnottf3" [(set (match_operand:TF 0 "register_operand" "=x,x,v,v") (and:TF diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr100711.c b/gcc/testsuite/gcc.target/i386/avx2-pr100711.c new file mode 100644 index 00000000000..5b144623873 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx2-pr100711.c @@ -0,0 +1,73 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -O2" } */ +/* { dg-final { scan-assembler-times "pandn" 8 } } */ +/* { dg-final { scan-assembler-not "not\[bwlq\]" } } */ +typedef char v16qi __attribute__((vector_size(16))); +typedef char v32qi __attribute__((vector_size(32))); +typedef short v8hi __attribute__((vector_size(16))); +typedef short v16hi __attribute__((vector_size(32))); +typedef int v4si __attribute__((vector_size(16))); +typedef int v8si __attribute__((vector_size(32))); +typedef long long v2di __attribute__((vector_size(16))); +typedef long long v4di __attribute__((vector_size(32))); + +v16qi +f1 (char a, v16qi c) +{ + char b = ~a; + return (__extension__(v16qi) {b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b}) & c; +} + +v32qi +f2 (char a, v32qi c) +{ + char b = ~a; + return (__extension__(v32qi) {b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b}) & c; +} + +v8hi +f3 (short a, v8hi c) +{ + short b = ~a; + return (__extension__(v8hi) {b, b, b, b, b, b, b, b}) & c; +} + +v16hi +f4 (short a, v16hi c) +{ + short b = ~a; + return (__extension__(v16hi) {b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b}) & c; +} + +v4si +f5 (int a, v4si c) +{ + int b = ~a; + return (__extension__(v4si) {b, b, b, b}) & c; +} + +v8si +f6 (int a, v8si c) +{ + int b = ~a; + return (__extension__(v8si) {b, b, b, b, b, b, b, b}) & c; +} + +v2di +f7 (long long a, v2di c) +{ + long long b = ~a; + return (__extension__(v2di) {b, b}) & c; +} + +v4di +f8 (long long a, v4di c) +{ + long long b = ~a; + return (__extension__(v4di) {b, b, b, b}) & c; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c new file mode 100644 index 00000000000..f0a103d0bc2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr100711.c @@ -0,0 +1,48 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -O2" } */ +/* { dg-final { scan-assembler-times "pandn" 4 } } */ +/* { dg-final { scan-assembler-not "not\[bwlq\]" } } */ + +typedef char v64qi __attribute__((vector_size(64))); +typedef short v32hi __attribute__((vector_size(64))); +typedef int v16si __attribute__((vector_size(64))); +typedef long long v8di __attribute__((vector_size(64))); + +v64qi +f1 (char a, v64qi c) +{ + char b = ~a; + return (__extension__(v64qi) {b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b}) & c; +} + +v32hi +f2 (short a, v32hi c) +{ + short b = ~a; + return (__extension__(v32hi) {b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b}) & c; +} + +v16si +f3 (int a, v16si c) +{ + int b = ~a; + return (__extension__(v16si) {b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b}) & c; +} + +v8di +f4 (long long a, v8di c) +{ + long long b = ~a; + return (__extension__(v8di) {b, b, b, b, b, b, b, b}) & c; +} -- 2.18.1