This patch adds combine splitter to transform vpcmpeqd/vpxor/vblendvps to vblendvps for ~op0.
OK for trunk? BRs, Haochen gcc/ChangeLog: PR target/100738 * config/i386/sse.md (*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint): Add new define_insn_and_split. gcc/testsuite/ChangeLog: PR target/100738 * g++.target/i386/pr100738-1.C: New test. --- gcc/config/i386/sse.md | 28 ++++++++++++++++++++++ gcc/testsuite/g++.target/i386/pr100738-1.C | 19 +++++++++++++++ 2 files changed, 47 insertions(+) create mode 100755 gcc/testsuite/g++.target/i386/pr100738-1.C diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 08bdcddc111..db3506c78d7 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -20659,6 +20659,34 @@ (set_attr "btver2_decode" "vector,vector,vector") (set_attr "mode" "<ssefltvecmode>")]) +;; PR target/100738: Transform vpcmpeqd + vpxor + vblendvps to vblendvps for inverted mask; +(define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint" + [(set (match_operand:<ssebytemode> 0 "register_operand") + (unspec:<ssebytemode> + [(match_operand:<ssebytemode> 1 "register_operand") + (match_operand:<ssebytemode> 2 "vector_operand") + (subreg:<ssebytemode> + (lt:VI48_AVX + (subreg:VI48_AVX + (not:<ssebytemode> + (match_operand:<ssebytemode> 3 "register_operand")) 0) + (match_operand:VI48_AVX 4 "const0_operand")) 0)] + UNSPEC_BLENDV))] + "TARGET_SSE4_1 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:<ssefltvecmode> + [(match_dup 2) (match_dup 1) (match_dup 3)] UNSPEC_BLENDV))] +{ + operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]); + operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]); + operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]); + operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]); + if (MEM_P (operands[2])) + operands[2] = force_reg (<ssefltvecmode>mode, operands[2]); +}) + (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>" [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") (unspec:VF_128_256 diff --git a/gcc/testsuite/g++.target/i386/pr100738-1.C b/gcc/testsuite/g++.target/i386/pr100738-1.C new file mode 100755 index 00000000000..5a04c5b031f --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr100738-1.C @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mavx2" } */ +/* { dg-final {scan-assembler-times "vblendvps\[ \\t\]" 2 } } */ +/* { dg-final {scan-assembler-not "vpcmpeqd\[ \\t\]" } } */ +/* { dg-final {scan-assembler-not "vpxor\[ \\t\]" } } */ + +typedef int v4si __attribute__((vector_size(16))); +typedef char v16qi __attribute__((vector_size(16))); +v4si +foo_1 (v16qi a, v4si b, v4si c, v4si d) +{ + return ((v4si)~a) < 0 ? c : d; +} + +v4si +foo_2 (v16qi a, v4si b, v4si c, v4si d) +{ + return ((v4si)~a) >= 0 ? c : d; +} -- 2.18.1