On Wed, Nov 30, 2016 at 3:10 PM, Richard Biener <richard.guent...@gmail.com> wrote: > On Fri, Nov 18, 2016 at 5:53 PM, Bin Cheng <bin.ch...@arm.com> wrote: >> Hi, >> This is a rework of https://gcc.gnu.org/ml/gcc-patches/2016-10/msg02007.html. >> Though review comments suggested it could be merged with last kind >> simplification >> of fold_cond_expr_with_comparison, it's not really applicable. As a matter >> of fact, >> the suggestion stands for patch >> @https://gcc.gnu.org/ml/gcc-patches/2016-10/msg02005.html. >> I had previous patch >> (https://gcc.gnu.org/ml/gcc-patches/2016-11/msg01898.html) >> moving fold_cond_expr_with_comparison to match.pd pattern and incorporated >> that patch into it. For this one, the rework is trivial, just renames >> several variable >> tags as suggested. Bootstrap and test on x86_64 and AArch64, is it OK? > > + A) Operand x is a unsigned to signed type conversion and c1 is > + integer zero. In this case, > + (signed type)x < 0 <=> x > MAX_VAL(signed type) > + (signed type)x >= 0 <=> x <= MAX_VAL(signed type) > > for (singed type)x < 0 -> x > signed-type-max we probably do a reverse > "canonicalization" transform? Yeah, > > /* Non-equality compare simplifications from fold_binary */ > (for cmp (lt gt le ge) > ... > (if (wi::eq_p (@1, signed_max) > && TYPE_UNSIGNED (arg1_type) > /* We will flip the signedness of the comparison operator > associated with the mode of @1, so the sign bit is > specified by this mode. Check that @1 is the signed > max associated with this sign bit. */ > && prec == GET_MODE_PRECISION (TYPE_MODE (arg1_type)) > /* signed_type does not work on pointer types. */ > && INTEGRAL_TYPE_P (arg1_type)) > /* The following case also applies to X < signed_max+1 > and X >= signed_max+1 because previous transformations. */ > (if (cmp == LE_EXPR || cmp == GT_EXPR) > (with { tree st = signed_type_for (arg1_type); } > (if (cmp == LE_EXPR) > (ge (convert:st @0) { build_zero_cst (st); }) > (lt (convert:st @0) { build_zero_cst (st); })))))))))) > > + if (cmp_code == GE_EXPR) > + cmp_code = LE_EXPR; > + c1 = wide_int_to_tree (op_type, wi::max_value (to_type)); > + } > ... > + if (op == PLUS_EXPR) > + real_c1 = wide_int_to_tree (op_type, > + wi::sub (c3, c2, sgn, &overflow)); > + else > + real_c1 = wide_int_to_tree (op_type, > + wi::add (c3, c2, sgn, &overflow)); > > can you avoid the tree building here and just continue using wide-ints please? > Simply do the wide_int_to_tree in the result patterns. Hi, I updated patch wrto your comments, also deleted two useless variables. Bootstrap and test, is it OK?
Thanks, bin 2016-12-01 Bin Cheng <bin.ch...@arm.com> * match.pd: Add new pattern: (cond (cmp (convert? x) c1) (op x c2) c3) -> (op (minmax x c1) c2). gcc/testsuite/ChangeLog 2016-12-01 Bin Cheng <bin.ch...@arm.com> * gcc.dg/fold-bopcond-1.c: New test. * gcc.dg/fold-bopcond-2.c: New test. > > Otherwise looks ok to me. > > Thanks, > Richard. > > >> Thanks, >> bin >> >> 2016-11-17 Bin Cheng <bin.ch...@arm.com> >> >> * match.pd: Add new pattern: >> (cond (cmp (convert? x) c1) (op x c2) c3) -> (op (minmax x c1) c2). >> >> gcc/testsuite/ChangeLog >> 2016-11-17 Bin Cheng <bin.ch...@arm.com> >> >> * gcc.dg/fold-bopcond-1.c: New test. >> * gcc.dg/fold-bopcond-2.c: New test.
diff --git a/gcc/match.pd b/gcc/match.pd index bc8a5e7..dbb9103 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -2038,6 +2038,106 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (convert (cond (eq @1 (convert @3)) (convert:from_type @3) (convert:from_type @2))))))))) +/* (cond (cmp (convert? x) c1) (op x c2) c3) -> (op (minmax x c1) c2) if: + + 1) OP is PLUS or MINUS. + 2) CMP is LT, LE, GT or GE. + 3) C3 == (C1 op C2), and computation doesn't have undefined behavior. + + This pattern also handles special cases like: + + A) Operand x is a unsigned to signed type conversion and c1 is + integer zero. In this case, + (signed type)x < 0 <=> x > MAX_VAL(signed type) + (signed type)x >= 0 <=> x <= MAX_VAL(signed type) + B) Const c1 may not equal to (C3 op' C2). In this case we also + check equality for (c1+1) and (c1-1) by adjusting comparison + code. + + TODO: Though signed type is handled by this pattern, it cannot be + simplified at the moment because C standard requires additional + type promotion. In order to match&simplify it here, the IR needs + to be cleaned up by other optimizers, i.e, VRP. */ +(for op (plus minus) + (for cmp (lt le gt ge) + (simplify + (cond (cmp (convert? @X) INTEGER_CST@1) (op @X INTEGER_CST@2) INTEGER_CST@3) + (with { tree from_type = TREE_TYPE (@X), to_type = TREE_TYPE (@1); } + (if (types_match (from_type, to_type) + /* Check if it is special case A). */ + || (TYPE_UNSIGNED (from_type) + && !TYPE_UNSIGNED (to_type) + && TYPE_PRECISION (from_type) == TYPE_PRECISION (to_type) + && integer_zerop (@1) + && (cmp == LT_EXPR || cmp == GE_EXPR))) + (with + { + bool overflow = false; + enum tree_code code, cmp_code = cmp; + wide_int real_c1, c1 = @1, c2 = @2, c3 = @3; + signop sgn = TYPE_SIGN (from_type); + + /* Handle special case A), given x of unsigned type: + ((signed type)x < 0) <=> (x > MAX_VAL(signed type)) + ((signed type)x >= 0) <=> (x <= MAX_VAL(signed type)) */ + if (!types_match (from_type, to_type)) + { + if (cmp_code == LT_EXPR) + cmp_code = GT_EXPR; + if (cmp_code == GE_EXPR) + cmp_code = LE_EXPR; + c1 = wi::max_value (to_type); + } + /* To simplify this pattern, we require c3 = (c1 op c2). Here we + compute (c3 op' c2) and check if it equals to c1 with op' being + the inverted operator of op. Make sure overflow doesn't happen + if it is undefined. */ + if (op == PLUS_EXPR) + real_c1 = wi::sub (c3, c2, sgn, &overflow); + else + real_c1 = wi::add (c3, c2, sgn, &overflow); + + code = cmp_code; + if (!overflow || !TYPE_OVERFLOW_UNDEFINED (from_type)) + { + /* Check if c1 equals to real_c1. Boundary condition is handled + by adjusting comparison operation if necessary. */ + if (!wi::cmp (wi::sub (real_c1, 1, sgn, &overflow), c1, sgn) + && !overflow) + { + /* X <= Y - 1 equals to X < Y. */ + if (cmp_code == LE_EXPR) + code = LT_EXPR; + /* X > Y - 1 equals to X >= Y. */ + if (cmp_code == GT_EXPR) + code = GE_EXPR; + } + if (!wi::cmp (wi::add (real_c1, 1, sgn, &overflow), c1, sgn) + && !overflow) + { + /* X < Y + 1 equals to X <= Y. */ + if (cmp_code == LT_EXPR) + code = LE_EXPR; + /* X >= Y + 1 equals to X > Y. */ + if (cmp_code == GE_EXPR) + code = GT_EXPR; + } + if (code != cmp_code || !wi::cmp (real_c1, c1, sgn)) + { + if (cmp_code == LT_EXPR || cmp_code == LE_EXPR) + code = MIN_EXPR; + if (cmp_code == GT_EXPR || cmp_code == GE_EXPR) + code = MAX_EXPR; + } + } + } + (if (code == MAX_EXPR) + (op (max @X { wide_int_to_tree (from_type, real_c1); }) + { wide_int_to_tree (from_type, c2); }) + (if (code == MIN_EXPR) + (op (min @X { wide_int_to_tree (from_type, real_c1); }) + { wide_int_to_tree (from_type, c2); }))))))))) + (for cnd (cond vec_cond) /* A ? B : (A ? X : C) -> A ? B : C. */ (simplify diff --git a/gcc/testsuite/gcc.dg/fold-bopcond-1.c b/gcc/testsuite/gcc.dg/fold-bopcond-1.c new file mode 100644 index 0000000..7324c16 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-bopcond-1.c @@ -0,0 +1,48 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-ifcvt" } */ + +int foo1 (unsigned short a[], unsigned int x) +{ + unsigned int i; + for (i = 0; i < 1000; i++) + { + x = a[i]; + a[i] = (unsigned short)(x <= 32768 ? x + 32768 : 0); + } + return x; +} + +int foo2 (unsigned short a[], unsigned int x) +{ + unsigned int i; + for (i = 0; i < 1000; i++) + { + x = a[i]; + a[i] = (unsigned short)(x < 32768 ? x + 32768 : 0); + } + return x; +} + +int foo3 (unsigned short a[], unsigned int x) +{ + unsigned int i; + for (i = 0; i < 1000; i++) + { + x = a[i]; + a[i] = (unsigned short)(x < 1000 ? x - 1000 : 0); + } + return x; +} + +int foo4 (unsigned short a[], unsigned int x) +{ + unsigned int i; + for (i = 0; i < 1000; i++) + { + x = a[i]; + a[i] = (unsigned short)(x <= 2 ? x + 999 : 1001); + } + return x; +} + +/* { dg-final { scan-tree-dump-times "MIN_EXPR " 4 "ifcvt" } } */ diff --git a/gcc/testsuite/gcc.dg/fold-bopcond-2.c b/gcc/testsuite/gcc.dg/fold-bopcond-2.c new file mode 100644 index 0000000..7a47449 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-bopcond-2.c @@ -0,0 +1,48 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-ifcvt" } */ + +int foo1 (unsigned short a[], unsigned int x) +{ + unsigned int i; + for (i = 0; i < 1000; i++) + { + x = a[i]; + a[i] = (unsigned short)(x >= 32768 ? x - 32768 : 0); + } + return x; +} + +int foo2 (unsigned short a[], unsigned int x) +{ + unsigned int i; + for (i = 0; i < 1000; i++) + { + x = a[i]; + a[i] = (unsigned short)(x > 32768 ? x - 32768 : 0); + } + return x; +} + +int foo3 (unsigned short a[], unsigned int x) +{ + unsigned int i; + for (i = 0; i < 1000; i++) + { + x = a[i]; + a[i] = (unsigned short)(x > 1000 ? x - 1000 : 0); + } + return x; +} + +int foo4 (unsigned short a[], unsigned int x) +{ + unsigned int i; + for (i = 0; i < 1000; i++) + { + x = a[i]; + a[i] = (unsigned short)(x >= 2 ? x - 32768 : 32770); + } + return x; +} + +/* { dg-final { scan-tree-dump-times "MAX_EXPR " 4 "ifcvt" } } */