Hi,
This is a patch set adding various match.pd patterns in order to generate
simplified MIN/MAX_EXPR mostly from COND_EXPR. This is the first one
optimizing (convert1 (minmax ((convert2 (x) c)))) to minmax (x c), if convert2
promotes x and convert1 demotes back to x's type. With this patch, generated
assembly for test:
.L4:
ldr q0, [x3, x1]
add w2, w2, 1
cmp w0, w2
ushll v2.4s, v0.4h, 0
ushll2 v1.4s, v0.8h, 0
umin v2.4s, v2.4s, v3.4s
umin v1.4s, v1.4s, v3.4s
xtn v4.4h, v2.4s
xtn2 v4.8h, v1.4s
str q4, [x3, x1]
add x1, x1, 16
bhi .L4
can be improved to:
.L4:
ldr q0, [x3, x1]
add w2, w2, 1
cmp w0, w2
umin v1.8h, v0.8h, v2.8h
str q1, [x3, x1]
add x1, x1, 16
bhi .L4
Bootstrap and test on x86_64 and AArch64 for whole patch set. Is it OK?
Thanks,
bin
2016-10-21 Bin Cheng <bin.ch...@arm.com>
* match.pd ((convert1 (minmax ((convert2 (x) c)))) -> minmax (x c)):
New pattern.
gcc/testsuite/ChangeLog
2016-10-21 Bin Cheng <bin.ch...@arm.com>
* gcc.dg/fold-convmaxconv-1.c: New test.
* gcc.dg/fold-convminconv-1.c: New test.
diff --git a/gcc/match.pd b/gcc/match.pd
index b782a1e..7365bc1 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1331,6 +1331,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& TYPE_MIN_VALUE (type)
&& operand_equal_p (@1, TYPE_MIN_VALUE (type), OEP_ONLY_CONST))
@0)))
+
+#if GIMPLE
+/* (convert1 (minmax ((convert2 (x) c)))) -> minmax (x c) if convert2
+ promotes x and convert1 demotes back to x's type. */
+
+(for minmax (min max)
+ (simplify
+ (convert (minmax@0 (convert @1) INTEGER_CST@2))
+ (if (types_compatible_p (TREE_TYPE (@1), type))
+ (with
+ {
+ tree minmax_type = TREE_TYPE (@0);
+ signop sgn = TYPE_SIGN (type);
+ widest_int type_min = widest_int::from (wi::min_value (type), sgn);
+ widest_int type_max = widest_int::from (wi::max_value (type), sgn);
+ }
+ (if (sgn == TYPE_SIGN (minmax_type)
+ && TYPE_PRECISION (minmax_type) >= TYPE_PRECISION (type)
+ && wi::to_widest (@2) >= type_min && wi::to_widest (@2) <= type_max)
+ (minmax @1 { fold_convert (type, @2); }))))))
+#endif
+
(for minmax (FMIN FMAX)
/* If either argument is NaN, return the other one. Avoid the
transformation if we get (and honor) a signalling NaN. */
diff --git a/gcc/testsuite/gcc.dg/fold-convmaxconv-1.c
b/gcc/testsuite/gcc.dg/fold-convmaxconv-1.c
new file mode 100644
index 0000000..3ffff8b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-convmaxconv-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+int foo (short a[], int x)
+{
+ unsigned int i;
+ for (i = 0; i < 1000; i++)
+ {
+ x = a[i];
+ a[i] = (x <= 0 ? 0 : x);
+ }
+ return x;
+}
+
+/* { dg-final { scan-tree-dump-not " = MAX_EXPR <x_\[0-9\]*" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/fold-convminconv-1.c
b/gcc/testsuite/gcc.dg/fold-convminconv-1.c
new file mode 100644
index 0000000..f4a048e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-convminconv-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+int foo (unsigned short a[], unsigned int x)
+{
+ unsigned int i;
+ for (i = 0; i < 1000; i++)
+ {
+ x = a[i];
+ a[i] = (x >= 255 ? 255 : x);
+ }
+ return x;
+}
+
+/* { dg-final { scan-tree-dump-not " = MIN_EXPR <x_\[0-9\]*" "optimized" } } */