Hi All,

This optimizes the following sequence

  ((a < b) & c) | ((a >= b) & d)

into

  (a < b ? c : d) & 1

for scalar. On vector we can omit the & 1.

This changes the code generation from

zoo2:
        cmp     w0, w1
        cset    w0, lt
        cset    w1, ge
        and     w0, w0, w2
        and     w1, w1, w3
        orr     w0, w0, w1
        ret

into

        cmp     w0, w1
        csel    w0, w2, w3, lt
        and     w0, w0, 1
        ret

and significantly reduces the number of selects we have to do in the vector
code.

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * fold-const.cc (inverse_conditions_p): Traverse if SSA_NAME.
        * match.pd: Add new rule.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/if-compare_1.c: New test.
        * gcc.target/aarch64/if-compare_2.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 
39a5a52958d87497f301826e706886b290771a2d..f180599b90150acd3ed895a64280aa3255061256
 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -2833,15 +2833,38 @@ compcode_to_comparison (enum comparison_code code)
 bool
 inverse_conditions_p (const_tree cond1, const_tree cond2)
 {
-  return (COMPARISON_CLASS_P (cond1)
-         && COMPARISON_CLASS_P (cond2)
-         && (invert_tree_comparison
-             (TREE_CODE (cond1),
-              HONOR_NANS (TREE_OPERAND (cond1, 0))) == TREE_CODE (cond2))
-         && operand_equal_p (TREE_OPERAND (cond1, 0),
-                             TREE_OPERAND (cond2, 0), 0)
-         && operand_equal_p (TREE_OPERAND (cond1, 1),
-                             TREE_OPERAND (cond2, 1), 0));
+  if (COMPARISON_CLASS_P (cond1)
+      && COMPARISON_CLASS_P (cond2)
+      && (invert_tree_comparison
+          (TREE_CODE (cond1),
+           HONOR_NANS (TREE_OPERAND (cond1, 0))) == TREE_CODE (cond2))
+      && operand_equal_p (TREE_OPERAND (cond1, 0),
+                         TREE_OPERAND (cond2, 0), 0)
+      && operand_equal_p (TREE_OPERAND (cond1, 1),
+                         TREE_OPERAND (cond2, 1), 0))
+    return true;
+
+  if (TREE_CODE (cond1) == SSA_NAME
+      && TREE_CODE (cond2) == SSA_NAME)
+    {
+      gimple *gcond1 = SSA_NAME_DEF_STMT (cond1);
+      gimple *gcond2 = SSA_NAME_DEF_STMT (cond2);
+      if (!is_gimple_assign (gcond1) || !is_gimple_assign (gcond2))
+       return false;
+
+      tree_code code1 = gimple_assign_rhs_code (gcond1);
+      tree_code code2 = gimple_assign_rhs_code (gcond2);
+      return TREE_CODE_CLASS (code1) == tcc_comparison
+            && TREE_CODE_CLASS (code2) == tcc_comparison
+            && invert_tree_comparison (code1,
+                 HONOR_NANS (gimple_arg (gcond1, 0))) == code2
+            && operand_equal_p (gimple_arg (gcond1, 0),
+                                gimple_arg (gcond2, 0), 0)
+            && operand_equal_p (gimple_arg (gcond1, 1),
+                                gimple_arg (gcond2, 1), 0);
+    }
+
+  return false;
 }
 
 /* Return a tree for the comparison which is the combination of
diff --git a/gcc/match.pd b/gcc/match.pd
index 
6d691d302b339c0e4556b40af158b5208c12d08f..bad49dd348add751d9ec1e3023e34d9ac123194f
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1160,6 +1160,32 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      (convert (bit_and (negate (convert:utype { pmop[0]; }))
                       (convert:utype @1)))))))
 
+/* Fold (((a < b) & c) | ((a >= b) & d)) into (a < b ? c : d) & 1.  */
+(simplify
+ (bit_ior
+  (bit_and:c (convert? @0) @2)
+  (bit_and:c (convert? @1) @3))
+   (if (inverse_conditions_p (@0, @1)
+       /* The scalar version has to be canonicalized after vectorization
+          because it makes unconditional loads conditional ones, which
+          means we lose vectorization because the loads may trap.  */
+       && canonicalize_math_after_vectorization_p ())
+    (bit_and (cond @0 @2 @3) { build_each_one_cst (type); })))
+(simplify
+ (bit_ior
+  (bit_and:c (convert? (vec_cond:s @0 @4 integer_zerop)) @2)
+  (bit_and:c (convert? (vec_cond:s @1 @4 integer_zerop)) @3))
+   (if (inverse_conditions_p (@0, @1)
+       && integer_onep (@4))
+    (bit_and (vec_cond @0 @2 @3) @4)))
+/* Fold (((a < b) & c) | ((a >= b) & d)) into a < b ? c : d.  */
+(simplify
+ (bit_ior
+  (bit_and:c (convert? (vec_cond:s @0 integer_minus_onep integer_zerop)) @2)
+  (bit_and:c (convert? (vec_cond:s @1 integer_minus_onep integer_zerop)) @3))
+   (if (inverse_conditions_p (@0, @1))
+    (vec_cond @0 @2 @3)))
+
 /* X % Y is smaller than Y.  */
 (for cmp (lt ge)
  (simplify
diff --git a/gcc/testsuite/gcc.target/aarch64/if-compare_1.c 
b/gcc/testsuite/gcc.target/aarch64/if-compare_1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..05a1292fa90c70b14a7985122f43711f55d047ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/if-compare_1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+/*
+**zoo2:
+**     cmp     w0, w1
+**     csel    w0, w2, w3, lt
+**     and     w0, w0, 1
+**     ret
+*/
+int zoo2 (int a, int b, int c, int d)
+{
+   return ((a < b) & c) | ((a >= b) & d);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/if-compare_2.c 
b/gcc/testsuite/gcc.target/aarch64/if-compare_2.c
new file mode 100644
index 
0000000000000000000000000000000000000000..34bc65f5db10eae81b8dee3316dfb7d12bf471c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/if-compare_2.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -std=c99" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+/*
+**foo:
+**     cmgt    v0.4s, v1.4s, v0.4s
+**     bsl     v0.16b, v2.16b, v3.16b
+**     ret
+*/
+v4si foo (v4si a, v4si b, v4si c, v4si d) {
+    return ((a < b) & c) | ((a >= b) & d);
+}
+
+
+/**
+**bar:
+**...
+**     cmge    v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+**     bsl     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**     and     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**...
+*/
+void bar (int * restrict a, int * restrict b, int * restrict c,
+         int * restrict d, int * restrict res, int n)
+{
+  for (int i = 0; i < (n & -4); i++)
+    res[i] = ((a[i] < b[i]) & c[i]) | ((a[i] >= b[i]) & d[i]);
+}
+




-- 
diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 
39a5a52958d87497f301826e706886b290771a2d..f180599b90150acd3ed895a64280aa3255061256
 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -2833,15 +2833,38 @@ compcode_to_comparison (enum comparison_code code)
 bool
 inverse_conditions_p (const_tree cond1, const_tree cond2)
 {
-  return (COMPARISON_CLASS_P (cond1)
-         && COMPARISON_CLASS_P (cond2)
-         && (invert_tree_comparison
-             (TREE_CODE (cond1),
-              HONOR_NANS (TREE_OPERAND (cond1, 0))) == TREE_CODE (cond2))
-         && operand_equal_p (TREE_OPERAND (cond1, 0),
-                             TREE_OPERAND (cond2, 0), 0)
-         && operand_equal_p (TREE_OPERAND (cond1, 1),
-                             TREE_OPERAND (cond2, 1), 0));
+  if (COMPARISON_CLASS_P (cond1)
+      && COMPARISON_CLASS_P (cond2)
+      && (invert_tree_comparison
+          (TREE_CODE (cond1),
+           HONOR_NANS (TREE_OPERAND (cond1, 0))) == TREE_CODE (cond2))
+      && operand_equal_p (TREE_OPERAND (cond1, 0),
+                         TREE_OPERAND (cond2, 0), 0)
+      && operand_equal_p (TREE_OPERAND (cond1, 1),
+                         TREE_OPERAND (cond2, 1), 0))
+    return true;
+
+  if (TREE_CODE (cond1) == SSA_NAME
+      && TREE_CODE (cond2) == SSA_NAME)
+    {
+      gimple *gcond1 = SSA_NAME_DEF_STMT (cond1);
+      gimple *gcond2 = SSA_NAME_DEF_STMT (cond2);
+      if (!is_gimple_assign (gcond1) || !is_gimple_assign (gcond2))
+       return false;
+
+      tree_code code1 = gimple_assign_rhs_code (gcond1);
+      tree_code code2 = gimple_assign_rhs_code (gcond2);
+      return TREE_CODE_CLASS (code1) == tcc_comparison
+            && TREE_CODE_CLASS (code2) == tcc_comparison
+            && invert_tree_comparison (code1,
+                 HONOR_NANS (gimple_arg (gcond1, 0))) == code2
+            && operand_equal_p (gimple_arg (gcond1, 0),
+                                gimple_arg (gcond2, 0), 0)
+            && operand_equal_p (gimple_arg (gcond1, 1),
+                                gimple_arg (gcond2, 1), 0);
+    }
+
+  return false;
 }
 
 /* Return a tree for the comparison which is the combination of
diff --git a/gcc/match.pd b/gcc/match.pd
index 
6d691d302b339c0e4556b40af158b5208c12d08f..bad49dd348add751d9ec1e3023e34d9ac123194f
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1160,6 +1160,32 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      (convert (bit_and (negate (convert:utype { pmop[0]; }))
                       (convert:utype @1)))))))
 
+/* Fold (((a < b) & c) | ((a >= b) & d)) into (a < b ? c : d) & 1.  */
+(simplify
+ (bit_ior
+  (bit_and:c (convert? @0) @2)
+  (bit_and:c (convert? @1) @3))
+   (if (inverse_conditions_p (@0, @1)
+       /* The scalar version has to be canonicalized after vectorization
+          because it makes unconditional loads conditional ones, which
+          means we lose vectorization because the loads may trap.  */
+       && canonicalize_math_after_vectorization_p ())
+    (bit_and (cond @0 @2 @3) { build_each_one_cst (type); })))
+(simplify
+ (bit_ior
+  (bit_and:c (convert? (vec_cond:s @0 @4 integer_zerop)) @2)
+  (bit_and:c (convert? (vec_cond:s @1 @4 integer_zerop)) @3))
+   (if (inverse_conditions_p (@0, @1)
+       && integer_onep (@4))
+    (bit_and (vec_cond @0 @2 @3) @4)))
+/* Fold (((a < b) & c) | ((a >= b) & d)) into a < b ? c : d.  */
+(simplify
+ (bit_ior
+  (bit_and:c (convert? (vec_cond:s @0 integer_minus_onep integer_zerop)) @2)
+  (bit_and:c (convert? (vec_cond:s @1 integer_minus_onep integer_zerop)) @3))
+   (if (inverse_conditions_p (@0, @1))
+    (vec_cond @0 @2 @3)))
+
 /* X % Y is smaller than Y.  */
 (for cmp (lt ge)
  (simplify
diff --git a/gcc/testsuite/gcc.target/aarch64/if-compare_1.c 
b/gcc/testsuite/gcc.target/aarch64/if-compare_1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..05a1292fa90c70b14a7985122f43711f55d047ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/if-compare_1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+/*
+**zoo2:
+**     cmp     w0, w1
+**     csel    w0, w2, w3, lt
+**     and     w0, w0, 1
+**     ret
+*/
+int zoo2 (int a, int b, int c, int d)
+{
+   return ((a < b) & c) | ((a >= b) & d);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/if-compare_2.c 
b/gcc/testsuite/gcc.target/aarch64/if-compare_2.c
new file mode 100644
index 
0000000000000000000000000000000000000000..34bc65f5db10eae81b8dee3316dfb7d12bf471c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/if-compare_2.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -std=c99" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+/*
+**foo:
+**     cmgt    v0.4s, v1.4s, v0.4s
+**     bsl     v0.16b, v2.16b, v3.16b
+**     ret
+*/
+v4si foo (v4si a, v4si b, v4si c, v4si d) {
+    return ((a < b) & c) | ((a >= b) & d);
+}
+
+
+/**
+**bar:
+**...
+**     cmge    v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+**     bsl     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**     and     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**...
+*/
+void bar (int * restrict a, int * restrict b, int * restrict c,
+         int * restrict d, int * restrict res, int n)
+{
+  for (int i = 0; i < (n & -4); i++)
+    res[i] = ((a[i] < b[i]) & c[i]) | ((a[i] >= b[i]) & d[i]);
+}
+



Reply via email to