> >
> > +/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */ (for
> > +cst (INTEGER_CST VECTOR_CST)  (simplify
> > +  (rshift (negate:s @0) cst@1)
> > +   (if (!flag_wrapv)
> 
> Don't test flag_wrapv directly, instead use the appropriate
> TYPE_OVERFLOW_{UNDEFINED,WRAPS} predicates.  But I'm not sure what
> we are protecting against?  Right-shift of signed integers is implementation-
> defined and GCC treats it as you'd expect, sign-extending the result.
> 

It's protecting against the overflow of the negate on INT_MIN. When wrapping
overflows are enabled the results would be wrong.

> > +    (with { tree ctype = TREE_TYPE (@0);
> > +       tree stype = TREE_TYPE (@1);
> > +       tree bt = truth_type_for (ctype); }
> > +     (switch
> > +      /* Handle scalar case.  */
> > +      (if (INTEGRAL_TYPE_P (ctype)
> > +      && !VECTOR_TYPE_P (ctype)
> > +      && !TYPE_UNSIGNED (ctype)
> > +      && canonicalize_math_after_vectorization_p ()
> > +      && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
> > +       (convert:bt (gt:bt @0 { build_zero_cst (stype); })))
> 
> I'm not sure why the result is of type 'bt' rather than the original type of 
> the
> expression?

That was to satisfy some RTL check that expected results of comparisons to 
always
be a Boolean, though for scalar that logically always is the case, I just added 
it
for consistency.

> 
> In that regard for non-vectors we'd have to add the sign extension from
> unsigned bool, in the vector case we'd hope the type of the comparison is
> correct.  I think in both cases it might be convenient to use
> 
>   (cond (gt:bt @0 { build_zero_cst (ctype); }) { build_all_ones_cst (ctype); }
> { build_zero_cost (ctype); })
> 
> to compute the correct result and rely on (cond ..) simplifications to 
> simplify
> that if possible.
> 
> Btw, 'stype' should be irrelevant - you need to look at the precision of 
> 'ctype',
> no?

I was working under the assumption that both input types must have the same
precision, but turns out that assumption doesn't need to hold.

New version attached.

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no regressions.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * match.pd: New negate+shift pattern.

gcc/testsuite/ChangeLog:

        * gcc.dg/signbit-2.c: New test.
        * gcc.dg/signbit-3.c: New test.
        * gcc.target/aarch64/signbit-1.c: New test.

--- inline copy of patch ---

diff --git a/gcc/match.pd b/gcc/match.pd
index 
7d2a24dbc5e9644a09968f877e12a824d8ba1caa..9532cae582e152cae6e22fcce95a9744a844e3c2
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -38,7 +38,8 @@ along with GCC; see the file COPYING3.  If not see
    uniform_integer_cst_p
    HONOR_NANS
    uniform_vector_p
-   bitmask_inv_cst_vector_p)
+   bitmask_inv_cst_vector_p
+   expand_vec_cmp_expr_p)
 
 /* Operator lists.  */
 (define_operator_list tcc_comparison
@@ -826,6 +827,42 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     { tree utype = unsigned_type_for (type); }
     (convert (rshift (lshift (convert:utype @0) @2) @3))))))
 
+/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */
+(for cst (INTEGER_CST VECTOR_CST)
+ (simplify
+  (rshift (negate:s @0) cst@1)
+   (if (!TYPE_OVERFLOW_WRAPS (type))
+    (with { tree ctype = TREE_TYPE (@0);
+           tree stype = TREE_TYPE (@1);
+           tree bt = truth_type_for (ctype);
+           tree zeros = build_zero_cst (ctype); }
+     (switch
+      /* Handle scalar case.  */
+      (if (INTEGRAL_TYPE_P (ctype)
+          && !VECTOR_TYPE_P (ctype)
+          && !TYPE_UNSIGNED (ctype)
+          && canonicalize_math_after_vectorization_p ()
+          && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (ctype) - 1))
+       (cond (gt:bt @0 { zeros; }) { build_all_ones_cst (ctype); } { zeros; }))
+      /* Handle vector case with a scalar immediate.  */
+      (if (VECTOR_INTEGER_TYPE_P (ctype)
+          && !VECTOR_TYPE_P (stype)
+          && !TYPE_UNSIGNED (ctype)
+          && expand_vec_cmp_expr_p (ctype, ctype, { GT_EXPR }))
+       (with { HOST_WIDE_INT bits = GET_MODE_UNIT_BITSIZE (TYPE_MODE (ctype)); 
}
+       (if (wi::eq_p (wi::to_wide (@1), bits - 1))
+        (convert:bt (gt:bt @0 { zeros; })))))
+      /* Handle vector case with a vector immediate.   */
+      (if (VECTOR_INTEGER_TYPE_P (ctype)
+          && VECTOR_TYPE_P (stype)
+          && !TYPE_UNSIGNED (ctype)
+          && uniform_vector_p (@1)
+          && expand_vec_cmp_expr_p (ctype, ctype, { GT_EXPR }))
+       (with { tree cst = vector_cst_elt (@1, 0);
+              HOST_WIDE_INT bits = GET_MODE_UNIT_BITSIZE (TYPE_MODE (ctype)); }
+        (if (wi::eq_p (wi::to_wide (cst), bits - 1))
+        (convert:bt (gt:bt @0 { zeros; }))))))))))
+
 /* Fold (C1/X)*C2 into (C1*C2)/X.  */
 (simplify
  (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2)
diff --git a/gcc/testsuite/gcc.dg/signbit-2.c b/gcc/testsuite/gcc.dg/signbit-2.c
new file mode 100644
index 
0000000000000000000000000000000000000000..fc0157cbc5c7996b481f2998bc30176c96a669bb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-2.c
@@ -0,0 +1,19 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */
+
+#include <stdint.h>
+
+void fun1(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 31;
+}
+
+void fun2(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 30;
+}
+
+/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1 optimized } } 
*/
+/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
diff --git a/gcc/testsuite/gcc.dg/signbit-3.c b/gcc/testsuite/gcc.dg/signbit-3.c
new file mode 100644
index 
0000000000000000000000000000000000000000..19e9c06c349b3287610f817628f00938ece60bf7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-3.c
@@ -0,0 +1,13 @@
+/* { dg-do assemble } */
+/* { dg-options "-O1 --save-temps -fdump-tree-optimized" } */
+
+#include <stdint.h>
+
+void fun1(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 31;
+}
+
+/* { dg-final { scan-tree-dump-times {\s+>\s+0;} 1 optimized } } */
+/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/signbit-1.c 
b/gcc/testsuite/gcc.target/aarch64/signbit-1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..3ebfb0586f37de29cf58635b27fe48503714447e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbit-1.c
@@ -0,0 +1,18 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+void fun1(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 31;
+}
+
+void fun2(int32_t *x, int n)
+{
+    for (int i = 0; i < (n & -16); i++)
+      x[i] = (-x[i]) >> 30;
+}
+
+/* { dg-final { scan-assembler-times {\tcmgt\t} 1 } } */

Attachment: rb14918.patch
Description: rb14918.patch

Reply via email to