This patch adds combine splitter to transform vpcmpeqd/vpxor/vblendvps to 
vblendvps for ~op0.

OK for trunk?

BRs,
Haochen

gcc/ChangeLog:

        PR target/100738
        * config/i386/sse.md 
(*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint):
        Add new define_insn_and_split.

gcc/testsuite/ChangeLog:

        PR target/100738
        * g++.target/i386/pr100738-1.C: New test.

---
 gcc/config/i386/sse.md                     | 28 ++++++++++++++++++++++
 gcc/testsuite/g++.target/i386/pr100738-1.C | 19 +++++++++++++++
 2 files changed, 47 insertions(+)
 create mode 100755 gcc/testsuite/g++.target/i386/pr100738-1.C

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 08bdcddc111..db3506c78d7 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -20659,6 +20659,34 @@
    (set_attr "btver2_decode" "vector,vector,vector") 
    (set_attr "mode" "<ssefltvecmode>")])
 
+;; PR target/100738: Transform vpcmpeqd + vpxor + vblendvps to vblendvps for 
inverted mask;
+(define_insn_and_split 
"*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint"
+  [(set (match_operand:<ssebytemode> 0 "register_operand")
+       (unspec:<ssebytemode>
+         [(match_operand:<ssebytemode> 1 "register_operand")
+          (match_operand:<ssebytemode> 2 "vector_operand")
+          (subreg:<ssebytemode>
+            (lt:VI48_AVX
+              (subreg:VI48_AVX
+              (not:<ssebytemode>
+                (match_operand:<ssebytemode> 3 "register_operand")) 0)
+              (match_operand:VI48_AVX 4 "const0_operand")) 0)]
+         UNSPEC_BLENDV))]
+  "TARGET_SSE4_1 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (unspec:<ssefltvecmode>
+        [(match_dup 2) (match_dup 1) (match_dup 3)] UNSPEC_BLENDV))]
+{
+  operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
+  operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]);
+  operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]);
+  operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
+  if (MEM_P (operands[2]))
+    operands[2] = force_reg (<ssefltvecmode>mode, operands[2]);
+})
+
 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
   [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
        (unspec:VF_128_256
diff --git a/gcc/testsuite/g++.target/i386/pr100738-1.C 
b/gcc/testsuite/g++.target/i386/pr100738-1.C
new file mode 100755
index 00000000000..5a04c5b031f
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr100738-1.C
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx2" } */
+/* { dg-final {scan-assembler-times "vblendvps\[ \\t\]" 2 } } */
+/* { dg-final {scan-assembler-not "vpcmpeqd\[ \\t\]" } } */
+/* { dg-final {scan-assembler-not "vpxor\[ \\t\]" } } */
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef char v16qi __attribute__((vector_size(16)));
+v4si
+foo_1 (v16qi a, v4si b, v4si c, v4si d)
+{
+  return ((v4si)~a) < 0 ? c : d;
+}
+
+v4si
+foo_2 (v16qi a, v4si b, v4si c, v4si d)
+{
+  return ((v4si)~a) >= 0 ? c : d;
+}
-- 
2.18.1

Reply via email to