Hi All,

Advanced SIMD lacks a cmpeq for vectors, and unlike compare to 0 we can't
rewrite to a cmtst.

This operation is however fairly common, especially now that we support early
break vectorization.

As such this adds a pattern to recognize the negated any comparison and
transform it to an all.  i.e. any(~x) => all(x) and invert the branches.

For e.g.

void f1 (int x)
{
  for (int i = 0; i < N; i++)
    {
      b[i] += a[i];
      if (a[i] != x)
        break;
    }
}

We currently generate:

        cmeq    v31.4s, v30.4s, v29.4s
        not     v31.16b, v31.16b
        umaxp   v31.4s, v31.4s, v31.4s
        fmov    x5, d31
        cbnz    x5, .L2

and after this patch:

        cmeq    v31.4s, v30.4s, v29.4s
        uminp   v31.4s, v31.4s, v31.4s
        fmov    x5, d31
        cbz     x5, .L2

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * config/aarch64/aarch64-simd.md (*cbranchnev4si): New.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/vect-early-break-cbranch_2.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
cd5ec35c3f53028f14828bd70a92924f62524c15..b1a2c617d7d4106ab725d53a5d0b5c2fb61a0c78
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3870,6 +3870,37 @@ (define_expand "cbranch<mode>4"
   DONE;
 })
 
+;; Avdanced SIMD lacks a vector != comparison, but this is a quite common
+;; operation.  To not pay the penalty for inverting == we can map our any
+;; comparisons to all i.e. any(~x) => all(x).
+(define_insn_and_split "*cbranchnev4si"
+  [(set (pc)
+    (if_then_else
+      (ne (subreg:DI
+           (unspec:V4SI
+             [(not:V4SI (match_operand:V4SI 0 "register_operand" "w"))
+              (not:V4SI (match_dup 0))]
+               UNSPEC_UMAXV) 0)
+          (const_int 0))
+       (label_ref (match_operand 1 ""))
+       (pc)))
+    (clobber (match_scratch:DI 2 "=w"))]
+  "TARGET_SIMD"
+  "#"
+  "&& true"
+  [(set (match_dup 2)
+       (unspec:V4SI [(match_dup 0) (match_dup 0)] UNSPEC_UMINV))
+   (set (pc)
+    (if_then_else
+      (eq (subreg:DI (match_dup 2) 0)
+         (const_int 0))
+       (label_ref (match_dup 1))
+       (pc)))]
+{
+  if (can_create_pseudo_p ())
+    operands[2] = gen_reg_rtx (V4SImode);
+})
+
 ;; Patterns comparing two vectors to produce a mask.
 
 (define_expand "vec_cmp<mode><mode>"
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c 
b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
new file mode 100644
index 
0000000000000000000000000000000000000000..e81027bb50138be627f4dfdffb1557893a5a7723
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#define N 640
+int a[N] = {0};
+int b[N] = {0};
+
+
+/*
+** f1:
+**     ...
+       cmeq    v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+       uminp   v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+       fmov    x[0-9]+, d[0-9]+
+       cbz     x[0-9]+, \.L[0-9]+
+**     ...
+*/
+void f1 (int x)
+{
+  for (int i = 0; i < N; i++)
+    {
+      b[i] += a[i];
+      if (a[i] != x)
+       break;
+    }
+}




-- 
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
cd5ec35c3f53028f14828bd70a92924f62524c15..b1a2c617d7d4106ab725d53a5d0b5c2fb61a0c78
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3870,6 +3870,37 @@ (define_expand "cbranch<mode>4"
   DONE;
 })
 
+;; Avdanced SIMD lacks a vector != comparison, but this is a quite common
+;; operation.  To not pay the penalty for inverting == we can map our any
+;; comparisons to all i.e. any(~x) => all(x).
+(define_insn_and_split "*cbranchnev4si"
+  [(set (pc)
+    (if_then_else
+      (ne (subreg:DI
+           (unspec:V4SI
+             [(not:V4SI (match_operand:V4SI 0 "register_operand" "w"))
+              (not:V4SI (match_dup 0))]
+               UNSPEC_UMAXV) 0)
+          (const_int 0))
+       (label_ref (match_operand 1 ""))
+       (pc)))
+    (clobber (match_scratch:DI 2 "=w"))]
+  "TARGET_SIMD"
+  "#"
+  "&& true"
+  [(set (match_dup 2)
+       (unspec:V4SI [(match_dup 0) (match_dup 0)] UNSPEC_UMINV))
+   (set (pc)
+    (if_then_else
+      (eq (subreg:DI (match_dup 2) 0)
+         (const_int 0))
+       (label_ref (match_dup 1))
+       (pc)))]
+{
+  if (can_create_pseudo_p ())
+    operands[2] = gen_reg_rtx (V4SImode);
+})
+
 ;; Patterns comparing two vectors to produce a mask.
 
 (define_expand "vec_cmp<mode><mode>"
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c 
b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
new file mode 100644
index 
0000000000000000000000000000000000000000..e81027bb50138be627f4dfdffb1557893a5a7723
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#define N 640
+int a[N] = {0};
+int b[N] = {0};
+
+
+/*
+** f1:
+**     ...
+       cmeq    v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+       uminp   v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+       fmov    x[0-9]+, d[0-9]+
+       cbz     x[0-9]+, \.L[0-9]+
+**     ...
+*/
+void f1 (int x)
+{
+  for (int i = 0; i < N; i++)
+    {
+      b[i] += a[i];
+      if (a[i] != x)
+       break;
+    }
+}



Reply via email to