Hi,

This is a cleaned up version addressing all feedback.

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * match.pd: Add new rule.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/if-compare_1.c: New test.
        * gcc.target/aarch64/if-compare_2.c: New test.

--- inline copy of patch ---

diff --git a/gcc/match.pd b/gcc/match.pd
index 
297c4d7a2355c2faa2afb7cfa0b4e171a161ef39..463132bfd35cf61f50381cedd07d7617c638034b
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1903,6 +1903,61 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type))
   (bit_and @0 @1)))
 
+(for cmp (tcc_comparison)
+     icmp (inverted_tcc_comparison)
+ /* Fold (((a < b) & c) | ((a >= b) & d)) into (a < b ? c : d) & 1.  */
+ (simplify
+  (bit_ior
+   (bit_and:c (convert? (cmp@0  @01 @02)) @3)
+   (bit_and:c (convert? (icmp@4 @01 @02)) @5))
+    (if (INTEGRAL_TYPE_P (type)
+        /* The scalar version has to be canonicalized after vectorization
+           because it makes unconditional loads conditional ones, which
+           means we lose vectorization because the loads may trap.  */
+        && canonicalize_math_after_vectorization_p ())
+     (bit_and (cond @0 @3 @5) { build_one_cst (type); })))
+
+ /* Fold ((-(a < b) & c) | (-(a >= b) & d)) into a < b ? c : d.  This is
+    canonicalized further and we recognize the conditional form:
+    (a < b ? c : 0) | (a >= b ? d : 0) into a < b ? c : d.  */
+ (simplify
+  (bit_ior
+   (cond (cmp@0  @01 @02) @3 zerop)
+   (cond (icmp@4 @01 @02) @5 zerop))
+    (if (INTEGRAL_TYPE_P (type)
+        /* The scalar version has to be canonicalized after vectorization
+           because it makes unconditional loads conditional ones, which
+           means we lose vectorization because the loads may trap.  */
+        && canonicalize_math_after_vectorization_p ())
+    (cond @0 @3 @5)))
+
+ /* Vector Fold (((a < b) & c) | ((a >= b) & d)) into a < b ? c : d. 
+    and ((~(a < b) & c) | (~(a >= b) & d)) into a < b ? c : d.  */
+ (simplify
+  (bit_ior
+   (bit_and:c (vec_cond:s (cmp@0 @6 @7) @4 @5) @2)
+   (bit_and:c (vec_cond:s (icmp@1 @6 @7) @4 @5) @3))
+    (if (integer_zerop (@5))
+     (switch
+      (if (integer_onep (@4))
+       (bit_and (vec_cond @0 @2 @3) @4))
+       (if (integer_minus_onep (@4))
+        (vec_cond @0 @2 @3)))
+    (if (integer_zerop (@4))
+     (switch
+      (if (integer_onep (@5))
+       (bit_and (vec_cond @0 @3 @2) @5))
+      (if (integer_minus_onep (@5))
+       (vec_cond @0 @3 @2))))))
+
+ /* Scalar Vectorized Fold ((-(a < b) & c) | (-(a >= b) & d))
+    into a < b ? d : c.  */
+ (simplify
+  (bit_ior
+   (vec_cond:s (cmp@0 @4 @5) @2 integer_zerop)
+   (vec_cond:s (icmp@1 @4 @5) @3 integer_zerop))
+    (vec_cond @0 @2 @3)))
+
 /* Transform X & -Y into X * Y when Y is { 0 or 1 }.  */
 (simplify
  (bit_and:c (convert? (negate zero_one_valued_p@0)) @1)
diff --git a/gcc/testsuite/gcc.target/aarch64/if-compare_1.c 
b/gcc/testsuite/gcc.target/aarch64/if-compare_1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..53bbd779a30e1a30e0ce0e4e5eaf589bfaf570fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/if-compare_1.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O -save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+extern void abort ();
+
+/*
+**zoo1:
+**     cmp     w0, w1
+**     csel    w0, w2, w3, lt
+**     and     w0, w0, 1
+**     ret
+*/
+__attribute((noipa, noinline))
+int zoo1 (int a, int b, int c, int d)
+{
+   return ((a < b) & c) | ((a >= b) & d);
+}
+
+/*
+**zoo2:
+**     cmp     w0, w1
+**     csel    w0, w2, w3, lt
+**     ret
+*/
+__attribute((noipa, noinline))
+int zoo2 (int a, int b, int c, int d)
+{
+   return (-(a < b) & c) | (-(a >= b) & d);
+}
+
+int main ()
+{
+  if (zoo1 (-3, 3, 5, 8) != 1)
+    abort ();
+
+  if (zoo1 (3, -3, 5, 8) != 0)
+    abort ();
+
+  if (zoo2 (-3, 3, 5, 8) != 5)
+    abort ();
+
+  if (zoo2 (3, -3, 5, 8) != 8)
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/if-compare_2.c 
b/gcc/testsuite/gcc.target/aarch64/if-compare_2.c
new file mode 100644
index 
0000000000000000000000000000000000000000..14988abac45989578b198f28c7c0ea203959c08b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/if-compare_2.c
@@ -0,0 +1,96 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 -std=c99 -save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#pragma GCC target "+nosve"
+
+#include <string.h>
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+/*
+**foo1:
+**     cmgt    v0.4s, v1.4s, v0.4s
+**     bsl     v0.16b, v2.16b, v3.16b
+**     ret
+*/
+v4si foo1 (v4si a, v4si b, v4si c, v4si d) {
+    return ((a < b) & c) | ((a >= b) & d);
+}
+
+/*
+**foo2:
+**     cmgt    v0.4s, v1.4s, v0.4s
+**     bsl     v0.16b, v3.16b, v2.16b
+**     ret
+*/
+v4si foo2 (v4si a, v4si b, v4si c, v4si d) {
+    return (~(a < b) & c) | (~(a >= b) & d);
+}
+
+
+/**
+**bar1:
+**...
+**     cmge    v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+**     bsl     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**     and     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**...
+*/
+void bar1 (int * restrict a, int * restrict b, int * restrict c,
+         int * restrict d, int * restrict res, int n)
+{
+  for (int i = 0; i < (n & -4); i++)
+    res[i] = ((a[i] < b[i]) & c[i]) | ((a[i] >= b[i]) & d[i]);
+}
+
+/**
+**bar2:
+**...
+**     cmge    v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+**     bsl     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**...
+*/
+void bar2 (int * restrict a, int * restrict b, int * restrict c,
+         int * restrict d, int * restrict res, int n)
+{
+  for (int i = 0; i < (n & -4); i++)
+    res[i] = (-(a[i] < b[i]) & c[i]) | (-(a[i] >= b[i]) & d[i]);
+}
+
+extern void abort ();
+
+int main ()
+{
+
+  v4si a = { -3, -3, -3, -3 };
+  v4si b = { 3, 3, 3, 3 };
+  v4si c = { 5, 5, 5, 5 };
+  v4si d = { 8, 8, 8, 8 };
+
+  v4si res1 = foo1 (a, b, c, d);
+  if (memcmp (&res1, &c, 16UL) != 0)
+    abort ();
+
+  v4si res2 = foo2 (a, b, c, d);
+  if (memcmp (&res2, &d, 16UL) != 0)
+   abort ();
+
+  int ar[4] = { -3, -3, -3, -3 };
+  int br[4] = { 3, 3, 3, 3 };
+  int cr[4] = { 5, 5, 5, 5 };
+  int dr[4] = { 8, 8, 8, 8 };
+
+  int exp1[4] = { 1, 1, 1, 1 };
+  int res3[4];
+  bar1 ((int*)&ar, (int*)&br, (int*)&cr, (int*)&dr, (int*)&res3, 4);
+  if (memcmp (&res3, &exp1, 16UL) != 0)
+    abort ();
+
+  int res4[4];
+  bar2 ((int*)&ar, (int*)&br, (int*)&cr, (int*)&dr, (int*)&res4, 4);
+  if (memcmp (&res4, &cr, 16UL) != 0)
+    abort ();
+
+  return 0;
+}

Attachment: rb15679.patch
Description: rb15679.patch

Reply via email to