Hi,
As commented in patch, this one simplifies (cond (cmp x c1) (op x c2) c3) into 
(op (minmax x c1) c2) if:

     1) OP is PLUS or MINUS.
     2) CMP is LT, LE, GT or GE.
     3) C3 == (C1 op C2), and the experation isn't undefined behavior.

   This pattern also handles special cases like:

     1) Comparison's operand x is a unsigned to signed type conversion
        and c1 is integer zero.  In this case,
          (signed type)x  < 0  <=>  x  > MAX_VAL(signed type)
          (signed type)x >= 0  <=>  x <= MAX_VAL(signed type)
     2) Const c1 may not equal to (C3 op' C2).  In this case we also
        check equality for (c1+1) and (c1-1) by adjusting comparison
        code.

Also note: Though signed type is handled by this pattern, it cannot be 
simplified at the moment because C standard requires additional type promotion. 
 In order to match&simplify signed type cases, the IR needs to be cleaned up by 
other optimizers, i.e, VRP.
For given loop:
int foo1 (unsigned short a[], unsigned int x)
{
  unsigned int i;
  for (i = 0; i < 1000; i++)
    {
      x = a[i];
      a[i] = (unsigned short)(x <= 32768 ? x + 32768 : 0);
    }
  return x;
}

Generated assembly can be improved from:
.L4:
        ldr     q5, [x3, x1]
        add     w2, w2, 1
        cmp     w0, w2
        ushll   v1.4s, v5.4h, 0
        ushll2  v0.4s, v5.8h, 0
        add     v4.4s, v1.4s, v2.4s
        add     v3.4s, v0.4s, v2.4s
        cmhs    v1.4s, v2.4s, v1.4s
        cmhs    v0.4s, v2.4s, v0.4s
        and     v1.16b, v4.16b, v1.16b
        and     v0.16b, v3.16b, v0.16b
        xtn     v3.4h, v1.4s
        xtn2    v3.8h, v0.4s
        str     q3, [x3, x1]
        add     x1, x1, 16
        bhi     .L4

To:
.L4:
        ldr     q1, [x3, x1]
        add     w2, w2, 1
        cmp     w0, w2
        umin    v0.8h, v1.8h, v2.8h
        add     v0.8h, v0.8h, v2.8h
        str     q0, [x3, x1]
        add     x1, x1, 16
        bhi     .L4

Bootstrap and test on x86_64 and AArch64 for whole patch set.  Any comments?

Thanks,
bin

2016-10-21  Bin Cheng  <bin.ch...@arm.com>

        * match.pd ((cond (cmp x c1) (op x c2) c3) -> (op (minmax x c1) c2)):
        New pattern.

gcc/testsuite/ChangeLog
2016-10-21  Bin Cheng  <bin.ch...@arm.com>

        * gcc.dg/fold-bopcond-1.c: New test.
        * gcc.dg/fold-bopcond-2.c: New test.
diff --git a/gcc/match.pd b/gcc/match.pd
index 071db2c..704421d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1981,6 +1981,109 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
      (max @1 @2)
      (if (code == MIN_EXPR)
       (min @1 @2))))))
+
+/* (cond (cmp x c1) (op x c2) c3) -> (op (minmax x c1) c2) if:
+
+     1) OP is PLUS or MINUS.
+     2) CMP is LT, LE, GT or GE.
+     3) C3 == (C1 op C2), and the experation isn't undefined behavior.
+
+   This pattern also handles special cases like:
+
+     1) Comparison's operand x is a unsigned to signed type conversion
+       and c1 is integer zero.  In this case,
+         (signed type)x  < 0  <=>  x  > MAX_VAL(signed type)
+         (signed type)x >= 0  <=>  x <= MAX_VAL(signed type)
+     2) Const c1 may not equal to (C3 op' C2).  In this case we also
+       check equality for (c1+1) and (c1-1) by adjusting comparison
+       code.
+
+   TODO: Though signed type is handled by this pattern, it cannot be
+   simplified at the moment because C standard requires additional
+   promoting conversion.  In order to match&simplify it here, the IR
+   needs to be cleaned up by other optimizers, i.e, VRP.  */
+(for op (plus minus)
+ (for cmp (lt le gt ge)
+  (simplify
+   (cond (cmp@0 (convert?@00 @10) INTEGER_CST@01)
+        (op @10 INTEGER_CST@11)
+        INTEGER_CST@2)
+    (with { tree from_type = TREE_TYPE (@10), to_type = TREE_TYPE (@00); }
+     (if (@00 == @10
+         || (TYPE_UNSIGNED (from_type)
+             && !TYPE_UNSIGNED (to_type)
+             && TYPE_PRECISION (from_type) == TYPE_PRECISION (to_type)
+             && integer_zerop (@01)
+             && (TREE_CODE (@0) == LT_EXPR || TREE_CODE (@0) == GE_EXPR)))
+      (with
+       {
+       bool overflow = false;
+       enum tree_code code, cmp_code = TREE_CODE (@0);
+       tree real_c1, c1 = @01, c2 = @11, c3 = @2;
+       tree op_type = TREE_TYPE (@10);
+       signop sgn = TYPE_SIGN (op_type);
+       widest_int wmin = widest_int::from (wi::min_value (op_type), sgn);
+       widest_int wmax = widest_int::from (wi::max_value (op_type), sgn);
+
+       /* Handle special cases, given x of unsigned type:
+           ((signed type)x  < 0) <=> (x  > MAX_VAL(signed type))
+           ((signed type)x >= 0) <=> (x <= MAX_VAL(signed type))  */
+       if (@00 != @10)
+         {
+           if (cmp_code == LT_EXPR)
+             cmp_code = GT_EXPR;
+           if (cmp_code == GE_EXPR)
+             cmp_code = LE_EXPR;
+           c1 = wide_int_to_tree (op_type, wi::max_value (to_type));
+         }
+       /* To simplify this pattern, we require c3 = (c1 op c2).  Here we
+          compute (c3 op' c2) and check it equals to c1 in which op' is
+          the inverted operator of op.  Also make sure overflow does not
+          happen if it is undefined.  */
+       if (op == PLUS_EXPR)
+         real_c1 = wide_int_to_tree (op_type,
+                                     wi::sub (c3, c2, sgn, &overflow));
+       else
+         real_c1 = wide_int_to_tree (op_type,
+                                     wi::add (c3, c2, sgn, &overflow));
+
+       code = cmp_code;
+       if (!overflow || !TYPE_OVERFLOW_UNDEFINED (op_type))
+         {
+           /* Check if c1 equals to real_c1.  Boundary condition is handled
+              by adjusting comparison operation if necessary.  */
+           if (wi::to_widest (c1) == (wi::to_widest (real_c1) - 1))
+             {
+               /* X <= Y - 1 equals to X < Y.  */
+               if (cmp_code == LE_EXPR)
+                 code = LT_EXPR;
+               /* X > Y - 1 equals to X >= Y.  */
+               if (cmp_code == GT_EXPR)
+                 code = GE_EXPR;
+             }
+           if (wi::to_widest (c1) == (wi::to_widest (real_c1) + 1))
+             {
+               /* X < Y + 1 equals to X <= Y.  */
+               if (cmp_code == LT_EXPR)
+                 code = LE_EXPR;
+               /* X >= Y + 1 equals to X > Y.  */
+               if (cmp_code == GE_EXPR)
+                 code = GT_EXPR;
+             }
+           if (code != cmp_code
+               || wi::to_widest (c1) == wi::to_widest (real_c1))
+             {
+               if (cmp_code == LT_EXPR || cmp_code == LE_EXPR)
+                 code = MIN_EXPR;
+               if (cmp_code == GT_EXPR || cmp_code == GE_EXPR)
+                 code = MAX_EXPR;
+             }
+         }
+      }
+      (if (code == MAX_EXPR)
+       (op (max @10 { real_c1; }) { c2; })
+       (if (code == MIN_EXPR)
+       (op (min @10 { real_c1; }) { c2; })))))))))
 #endif
 
 (for cnd (cond vec_cond)
diff --git a/gcc/testsuite/gcc.dg/fold-bopcond-1.c 
b/gcc/testsuite/gcc.dg/fold-bopcond-1.c
new file mode 100644
index 0000000..7324c16
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-bopcond-1.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-ifcvt" } */
+
+int foo1 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x <= 32768 ? x + 32768 : 0);
+    }
+  return x;
+}
+
+int foo2 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x < 32768 ? x + 32768 : 0);
+    }
+  return x;
+}
+
+int foo3 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x < 1000 ? x - 1000 : 0);
+    }
+  return x;
+}
+
+int foo4 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x <= 2 ? x + 999 : 1001);
+    }
+  return x;
+}
+
+/* { dg-final { scan-tree-dump-times "MIN_EXPR " 4 "ifcvt" } } */
diff --git a/gcc/testsuite/gcc.dg/fold-bopcond-2.c 
b/gcc/testsuite/gcc.dg/fold-bopcond-2.c
new file mode 100644
index 0000000..7a47449
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-bopcond-2.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-ifcvt" } */
+
+int foo1 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x >= 32768 ? x - 32768 : 0);
+    }
+  return x;
+}
+
+int foo2 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x > 32768 ? x - 32768 : 0);
+    }
+  return x;
+}
+
+int foo3 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x > 1000 ? x - 1000 : 0);
+    }
+  return x;
+}
+
+int foo4 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x >= 2 ? x - 32768 : 32770);
+    }
+  return x;
+}
+
+/* { dg-final { scan-tree-dump-times "MAX_EXPR " 4 "ifcvt" } } */

Reply via email to