On Wed, Dec 8, 2021 at 11:13 AM Jiang, Haochen via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Hi Uros, > > I have fixed that in this patch attached for checking in. Is that ok for > trunk? > Uros already said it's ok with that change, let me check in the patch for you. > Regtested on x86_64-pc-linux-gnu. > > Thx, > Haochen > > -----Original Message----- > From: Uros Bizjak <ubiz...@gmail.com> > Sent: Wednesday, December 8, 2021 12:14 AM > To: Jiang, Haochen <haochen.ji...@intel.com> > Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao....@intel.com> > Subject: Re: [PATCH] [i386]Add combine splitter to transform > vpcmpeqd/vpxor/vblendvps to vblendvps for ~op0 > > On Tue, Dec 7, 2021 at 3:10 AM Haochen Jiang via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > This patch adds combine splitter to transform vpcmpeqd/vpxor/vblendvps to > > vblendvps for ~op0. > > > > OK for trunk? > > > > BRs, > > Haochen > > > > gcc/ChangeLog: > > > > PR target/100738 > > * config/i386/sse.md > > (*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint): > > Add new define_insn_and_split. > > > > gcc/testsuite/ChangeLog: > > > > PR target/100738 > > * g++.target/i386/pr100738-1.C: New test. > > OK with a change below. > > Thanks, > Uros. > > > > > --- > > gcc/config/i386/sse.md | 28 ++++++++++++++++++++++ > > gcc/testsuite/g++.target/i386/pr100738-1.C | 19 +++++++++++++++ > > 2 files changed, 47 insertions(+) > > create mode 100755 gcc/testsuite/g++.target/i386/pr100738-1.C > > > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index > > 08bdcddc111..db3506c78d7 100644 > > --- a/gcc/config/i386/sse.md > > +++ b/gcc/config/i386/sse.md > > @@ -20659,6 +20659,34 @@ > > (set_attr "btver2_decode" "vector,vector,vector") > > (set_attr "mode" "<ssefltvecmode>")]) > > > > +;; PR target/100738: Transform vpcmpeqd + vpxor + vblendvps to > > +vblendvps for inverted mask; (define_insn_and_split > > "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint" > > + [(set (match_operand:<ssebytemode> 0 "register_operand") > > + (unspec:<ssebytemode> > > + [(match_operand:<ssebytemode> 1 "register_operand") > > + (match_operand:<ssebytemode> 2 "vector_operand") > > + (subreg:<ssebytemode> > > + (lt:VI48_AVX > > + (subreg:VI48_AVX > > + (not:<ssebytemode> > > + (match_operand:<ssebytemode> 3 "register_operand")) 0) > > + (match_operand:VI48_AVX 4 "const0_operand")) 0)] > > + UNSPEC_BLENDV))] > > + "TARGET_SSE4_1 && ix86_pre_reload_split ()" > > + "#" > > + "&& 1" > > + [(set (match_dup 0) > > + (unspec:<ssefltvecmode> > > + [(match_dup 2) (match_dup 1) (match_dup 3)] UNSPEC_BLENDV))] > > +{ > > + operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]); > > + operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]); > > + operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]); > > + operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]); > > + if (MEM_P (operands[2])) > > + operands[2] = force_reg (<ssefltvecmode>mode, operands[2]); > > You don't need to check for MEM_P, force_reg will do it for you. > > > +}) > > + > > (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>" > > [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") > > (unspec:VF_128_256 > > diff --git a/gcc/testsuite/g++.target/i386/pr100738-1.C > > b/gcc/testsuite/g++.target/i386/pr100738-1.C > > new file mode 100755 > > index 00000000000..5a04c5b031f > > --- /dev/null > > +++ b/gcc/testsuite/g++.target/i386/pr100738-1.C > > @@ -0,0 +1,19 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-Ofast -mavx2" } */ > > +/* { dg-final {scan-assembler-times "vblendvps\[ \\t\]" 2 } } */ > > +/* { dg-final {scan-assembler-not "vpcmpeqd\[ \\t\]" } } */ > > +/* { dg-final {scan-assembler-not "vpxor\[ \\t\]" } } */ > > + > > +typedef int v4si __attribute__((vector_size(16))); typedef char v16qi > > +__attribute__((vector_size(16))); > > +v4si > > +foo_1 (v16qi a, v4si b, v4si c, v4si d) { > > + return ((v4si)~a) < 0 ? c : d; > > +} > > + > > +v4si > > +foo_2 (v16qi a, v4si b, v4si c, v4si d) { > > + return ((v4si)~a) >= 0 ? c : d; > > +} > > -- > > 2.18.1 > >
-- BR, Hongtao