[PATCH]middle-end match.pd: optimize fneg (fabs (x)) to x | (1 << signbit(x)) [PR109154]

Tamar Christina Tue, 26 Sep 2023 17:50:48 -0700

Hi All,

For targets that allow conversion between int and float modes this adds a new
optimization transforming fneg (fabs (x)) into x | (1 << signbit(x)).  Such
sequences are common in scientific code working with gradients.


The transformed instruction if the target has an inclusive-OR that takes an
immediate is both shorter an faster.  For those that don't the immediate has
to be seperate constructed but this still ends up being faster as the immediate
construction is not on the critical path.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        PR tree-optimization/109154
        * match.pd: Add new neg+abs rule.

gcc/testsuite/ChangeLog:

        PR tree-optimization/109154
        * gcc.target/aarch64/fneg-abs_1.c: New test.
        * gcc.target/aarch64/fneg-abs_2.c: New test.
        * gcc.target/aarch64/fneg-abs_3.c: New test.
        * gcc.target/aarch64/fneg-abs_4.c: New test.
        * gcc.target/aarch64/sve/fneg-abs_1.c: New test.
        * gcc.target/aarch64/sve/fneg-abs_2.c: New test.
        * gcc.target/aarch64/sve/fneg-abs_3.c: New test.
        * gcc.target/aarch64/sve/fneg-abs_4.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/match.pd b/gcc/match.pd
index 
39c7ea1088f25538ed8bd26ee89711566141a71f..8ebde06dcd4b26d694826cffad0fb17e1136600a
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -9476,3 +9476,57 @@ and,
       }
       (if (full_perm_p)
        (vec_perm (op@3 @0 @1) @3 @2))))))
+
+/* Transform fneg (fabs (X)) -> X | 1 << signbit (X).  */
+
+(simplify
+ (negate (abs @0))
+ (if (FLOAT_TYPE_P (type)
+      /* We have to delay this rewriting till after forward prop because 
otherwise
+        it's harder to do trigonometry optimizations. e.g. cos(-fabs(x)) is not
+        matched in one go.  Instead cos (-x) is matched first followed by 
cos(|x|).
+        The bottom op approach makes this rule match first and it's not untill
+        fwdprop that we match top down.  There are manu such simplications so 
we
+        delay this optimization till later on.  */
+      && canonicalize_math_after_vectorization_p ())
+  (with {
+    tree itype = unsigned_type_for (type);
+    machine_mode mode = TYPE_MODE (type);
+    const struct real_format *float_fmt = FLOAT_MODE_FORMAT (mode);
+    auto optab = VECTOR_TYPE_P (type) ? optab_vector : optab_default; }
+   (if (float_fmt
+       && float_fmt->signbit_rw >= 0
+       && targetm.can_change_mode_class (TYPE_MODE (itype),
+                                         TYPE_MODE (type), ALL_REGS)
+        && target_supports_op_p (itype, BIT_IOR_EXPR, optab))
+    (with { wide_int wone = wi::one (element_precision (type));
+           int sbit = float_fmt->signbit_rw;
+           auto stype = VECTOR_TYPE_P (type) ? TREE_TYPE (itype) : itype;
+           tree sign_bit = wide_int_to_tree (stype, wi::lshift (wone, sbit));}
+     (view_convert:type
+      (bit_ior (view_convert:itype @0)
+              { build_uniform_cst (itype, sign_bit); } )))))))
+
+/* Repeat the same but for conditional negate.  */
+
+(simplify
+ (IFN_COND_NEG @1 (abs @0) @2)
+ (if (FLOAT_TYPE_P (type))
+  (with {
+    tree itype = unsigned_type_for (type);
+    machine_mode mode = TYPE_MODE (type);
+    const struct real_format *float_fmt = FLOAT_MODE_FORMAT (mode);
+    auto optab = VECTOR_TYPE_P (type) ? optab_vector : optab_default; }
+   (if (float_fmt
+       && float_fmt->signbit_rw >= 0
+       && targetm.can_change_mode_class (TYPE_MODE (itype),
+                                         TYPE_MODE (type), ALL_REGS)
+        && target_supports_op_p (itype, BIT_IOR_EXPR, optab))
+    (with { wide_int wone = wi::one (element_precision (type));
+           int sbit = float_fmt->signbit_rw;
+           auto stype = VECTOR_TYPE_P (type) ? TREE_TYPE (itype) : itype;
+           tree sign_bit = wide_int_to_tree (stype, wi::lshift (wone, sbit));}
+     (view_convert:type
+      (IFN_COND_IOR @1 (view_convert:itype @0)
+              { build_uniform_cst (itype, sign_bit); }
+              (view_convert:itype @2) )))))))
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c 
b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..f823013c3ddf6b3a266c3abfcbf2642fc2a75fa6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+
+/*
+** t1:
+**     orr     v[0-9]+.2s, #128, lsl #24
+**     ret
+*/
+float32x2_t t1 (float32x2_t a)
+{
+  return vneg_f32 (vabs_f32 (a));
+}
+
+/*
+** t2:
+**     orr     v[0-9]+.4s, #128, lsl #24
+**     ret
+*/
+float32x4_t t2 (float32x4_t a)
+{
+  return vnegq_f32 (vabsq_f32 (a));
+}
+
+/*
+** t3:
+**     adrp    x0, .LC[0-9]+
+**     ldr     q[0-9]+, \[x0, #:lo12:.LC0\]
+**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**     ret
+*/
+float64x2_t t3 (float64x2_t a)
+{
+  return vnegq_f64 (vabsq_f64 (a));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c 
b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
new file mode 100644
index 
0000000000000000000000000000000000000000..141121176b309e4b2aa413dc55271a6e3c93d5e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**     movi    v[0-9]+.2s, 0x80, lsl 24
+**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**     ret
+*/
+float32_t f1 (float32_t a)
+{
+  return -fabsf (a);
+}
+
+/*
+** f2:
+**     mov     x0, -9223372036854775808
+**     fmov    d[0-9]+, x0
+**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**     ret
+*/
+float64_t f2 (float64_t a)
+{
+  return -fabs (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c 
b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
new file mode 100644
index 
0000000000000000000000000000000000000000..b4652173a95d104ddfa70c497f0627a61ea89d3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**     ...
+**     ldr     q[0-9]+, \[x0\]
+**     orr     v[0-9]+.4s, #128, lsl #24
+**     str     q[0-9]+, \[x0\], 16
+**     ...
+*/
+void f1 (float32_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabsf (a[i]);
+}
+
+/*
+** f2:
+**     ...
+**     ldr     q[0-9]+, \[x0\]
+**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**     str     q[0-9]+, \[x0\], 16
+**     ...
+*/
+void f2 (float64_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabs (a[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c 
b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
new file mode 100644
index 
0000000000000000000000000000000000000000..10879dea74462d34b26160eeb0bd54ead063166b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <string.h>
+
+/*
+** negabs:
+**     mov     x0, -9223372036854775808
+**     fmov    d[0-9]+, x0
+**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**     ret
+*/
+double negabs (double x)
+{
+   unsigned long long y;
+   memcpy (&y, &x, sizeof(double));
+   y = y | (1UL << 63);
+   memcpy (&x, &y, sizeof(double));
+   return x;
+}
+
+/*
+** negabsf:
+**     movi    v[0-9]+.2s, 0x80, lsl 24
+**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**     ret
+*/
+float negabsf (float x)
+{
+   unsigned int y;
+   memcpy (&y, &x, sizeof(float));
+   y = y | (1U << 31);
+   memcpy (&x, &y, sizeof(float));
+   return x;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..0c7664e6de77a497682952653ffd417453854d52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+
+/*
+** t1:
+**     orr     v[0-9]+.2s, #128, lsl #24
+**     ret
+*/
+float32x2_t t1 (float32x2_t a)
+{
+  return vneg_f32 (vabs_f32 (a));
+}
+
+/*
+** t2:
+**     orr     v[0-9]+.4s, #128, lsl #24
+**     ret
+*/
+float32x4_t t2 (float32x4_t a)
+{
+  return vnegq_f32 (vabsq_f32 (a));
+}
+
+/*
+** t3:
+**     adrp    x0, .LC[0-9]+
+**     ldr     q[0-9]+, \[x0, #:lo12:.LC0\]
+**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**     ret
+*/
+float64x2_t t3 (float64x2_t a)
+{
+  return vnegq_f64 (vabsq_f64 (a));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
new file mode 100644
index 
0000000000000000000000000000000000000000..a60cd31b9294af2dac69eed1c93f899bd5c78fca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**     movi    v[0-9]+.2s, 0x80, lsl 24
+**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**     ret
+*/
+float32_t f1 (float32_t a)
+{
+  return -fabsf (a);
+}
+
+/*
+** f2:
+**     mov     x0, -9223372036854775808
+**     fmov    d[0-9]+, x0
+**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**     ret
+*/
+float64_t f2 (float64_t a)
+{
+  return -fabs (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
new file mode 100644
index 
0000000000000000000000000000000000000000..1bf34328d8841de8e6b0a5458562a9f00e31c275
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**     ...
+**     ld1w    z[0-9]+.s, p[0-9]+/z, \[x0, x2, lsl 2\]
+**     orr     z[0-9]+.s, z[0-9]+.s, #0x80000000
+**     st1w    z[0-9]+.s, p[0-9]+, \[x0, x2, lsl 2\]
+**     ...
+*/
+void f1 (float32_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabsf (a[i]);
+}
+
+/*
+** f2:
+**     ...
+**     ld1d    z[0-9]+.d, p[0-9]+/z, \[x0, x2, lsl 3\]
+**     orr     z[0-9]+.d, z[0-9]+.d, #0x8000000000000000
+**     st1d    z[0-9]+.d, p[0-9]+, \[x0, x2, lsl 3\]
+**     ...
+*/
+void f2 (float64_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabs (a[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
new file mode 100644
index 
0000000000000000000000000000000000000000..21f2a8da2a5d44e3d01f6604ca7be87e3744d494
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <string.h>
+
+/*
+** negabs:
+**     mov     x0, -9223372036854775808
+**     fmov    d[0-9]+, x0
+**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**     ret
+*/
+double negabs (double x)
+{
+   unsigned long long y;
+   memcpy (&y, &x, sizeof(double));
+   y = y | (1UL << 63);
+   memcpy (&x, &y, sizeof(double));
+   return x;
+}
+
+/*
+** negabsf:
+**     movi    v[0-9]+.2s, 0x80, lsl 24
+**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**     ret
+*/
+float negabsf (float x)
+{
+   unsigned int y;
+   memcpy (&y, &x, sizeof(float));
+   y = y | (1U << 31);
+   memcpy (&x, &y, sizeof(float));
+   return x;
+}
+




--

diff --git a/gcc/match.pd b/gcc/match.pd
index 
39c7ea1088f25538ed8bd26ee89711566141a71f..8ebde06dcd4b26d694826cffad0fb17e1136600a
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -9476,3 +9476,57 @@ and,
       }
       (if (full_perm_p)
        (vec_perm (op@3 @0 @1) @3 @2))))))
+
+/* Transform fneg (fabs (X)) -> X | 1 << signbit (X).  */
+
+(simplify
+ (negate (abs @0))
+ (if (FLOAT_TYPE_P (type)
+      /* We have to delay this rewriting till after forward prop because 
otherwise
+        it's harder to do trigonometry optimizations. e.g. cos(-fabs(x)) is not
+        matched in one go.  Instead cos (-x) is matched first followed by 
cos(|x|).
+        The bottom op approach makes this rule match first and it's not untill
+        fwdprop that we match top down.  There are manu such simplications so 
we
+        delay this optimization till later on.  */
+      && canonicalize_math_after_vectorization_p ())
+  (with {
+    tree itype = unsigned_type_for (type);
+    machine_mode mode = TYPE_MODE (type);
+    const struct real_format *float_fmt = FLOAT_MODE_FORMAT (mode);
+    auto optab = VECTOR_TYPE_P (type) ? optab_vector : optab_default; }
+   (if (float_fmt
+       && float_fmt->signbit_rw >= 0
+       && targetm.can_change_mode_class (TYPE_MODE (itype),
+                                         TYPE_MODE (type), ALL_REGS)
+        && target_supports_op_p (itype, BIT_IOR_EXPR, optab))
+    (with { wide_int wone = wi::one (element_precision (type));
+           int sbit = float_fmt->signbit_rw;
+           auto stype = VECTOR_TYPE_P (type) ? TREE_TYPE (itype) : itype;
+           tree sign_bit = wide_int_to_tree (stype, wi::lshift (wone, sbit));}
+     (view_convert:type
+      (bit_ior (view_convert:itype @0)
+              { build_uniform_cst (itype, sign_bit); } )))))))
+
+/* Repeat the same but for conditional negate.  */
+
+(simplify
+ (IFN_COND_NEG @1 (abs @0) @2)
+ (if (FLOAT_TYPE_P (type))
+  (with {
+    tree itype = unsigned_type_for (type);
+    machine_mode mode = TYPE_MODE (type);
+    const struct real_format *float_fmt = FLOAT_MODE_FORMAT (mode);
+    auto optab = VECTOR_TYPE_P (type) ? optab_vector : optab_default; }
+   (if (float_fmt
+       && float_fmt->signbit_rw >= 0
+       && targetm.can_change_mode_class (TYPE_MODE (itype),
+                                         TYPE_MODE (type), ALL_REGS)
+        && target_supports_op_p (itype, BIT_IOR_EXPR, optab))
+    (with { wide_int wone = wi::one (element_precision (type));
+           int sbit = float_fmt->signbit_rw;
+           auto stype = VECTOR_TYPE_P (type) ? TREE_TYPE (itype) : itype;
+           tree sign_bit = wide_int_to_tree (stype, wi::lshift (wone, sbit));}
+     (view_convert:type
+      (IFN_COND_IOR @1 (view_convert:itype @0)
+              { build_uniform_cst (itype, sign_bit); }
+              (view_convert:itype @2) )))))))
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c 
b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..f823013c3ddf6b3a266c3abfcbf2642fc2a75fa6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+
+/*
+** t1:
+**     orr     v[0-9]+.2s, #128, lsl #24
+**     ret
+*/
+float32x2_t t1 (float32x2_t a)
+{
+  return vneg_f32 (vabs_f32 (a));
+}
+
+/*
+** t2:
+**     orr     v[0-9]+.4s, #128, lsl #24
+**     ret
+*/
+float32x4_t t2 (float32x4_t a)
+{
+  return vnegq_f32 (vabsq_f32 (a));
+}
+
+/*
+** t3:
+**     adrp    x0, .LC[0-9]+
+**     ldr     q[0-9]+, \[x0, #:lo12:.LC0\]
+**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**     ret
+*/
+float64x2_t t3 (float64x2_t a)
+{
+  return vnegq_f64 (vabsq_f64 (a));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c 
b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
new file mode 100644
index 
0000000000000000000000000000000000000000..141121176b309e4b2aa413dc55271a6e3c93d5e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**     movi    v[0-9]+.2s, 0x80, lsl 24
+**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**     ret
+*/
+float32_t f1 (float32_t a)
+{
+  return -fabsf (a);
+}
+
+/*
+** f2:
+**     mov     x0, -9223372036854775808
+**     fmov    d[0-9]+, x0
+**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**     ret
+*/
+float64_t f2 (float64_t a)
+{
+  return -fabs (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c 
b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
new file mode 100644
index 
0000000000000000000000000000000000000000..b4652173a95d104ddfa70c497f0627a61ea89d3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**     ...
+**     ldr     q[0-9]+, \[x0\]
+**     orr     v[0-9]+.4s, #128, lsl #24
+**     str     q[0-9]+, \[x0\], 16
+**     ...
+*/
+void f1 (float32_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabsf (a[i]);
+}
+
+/*
+** f2:
+**     ...
+**     ldr     q[0-9]+, \[x0\]
+**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**     str     q[0-9]+, \[x0\], 16
+**     ...
+*/
+void f2 (float64_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabs (a[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c 
b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
new file mode 100644
index 
0000000000000000000000000000000000000000..10879dea74462d34b26160eeb0bd54ead063166b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <string.h>
+
+/*
+** negabs:
+**     mov     x0, -9223372036854775808
+**     fmov    d[0-9]+, x0
+**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**     ret
+*/
+double negabs (double x)
+{
+   unsigned long long y;
+   memcpy (&y, &x, sizeof(double));
+   y = y | (1UL << 63);
+   memcpy (&x, &y, sizeof(double));
+   return x;
+}
+
+/*
+** negabsf:
+**     movi    v[0-9]+.2s, 0x80, lsl 24
+**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**     ret
+*/
+float negabsf (float x)
+{
+   unsigned int y;
+   memcpy (&y, &x, sizeof(float));
+   y = y | (1U << 31);
+   memcpy (&x, &y, sizeof(float));
+   return x;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..0c7664e6de77a497682952653ffd417453854d52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+
+/*
+** t1:
+**     orr     v[0-9]+.2s, #128, lsl #24
+**     ret
+*/
+float32x2_t t1 (float32x2_t a)
+{
+  return vneg_f32 (vabs_f32 (a));
+}
+
+/*
+** t2:
+**     orr     v[0-9]+.4s, #128, lsl #24
+**     ret
+*/
+float32x4_t t2 (float32x4_t a)
+{
+  return vnegq_f32 (vabsq_f32 (a));
+}
+
+/*
+** t3:
+**     adrp    x0, .LC[0-9]+
+**     ldr     q[0-9]+, \[x0, #:lo12:.LC0\]
+**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**     ret
+*/
+float64x2_t t3 (float64x2_t a)
+{
+  return vnegq_f64 (vabsq_f64 (a));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
new file mode 100644
index 
0000000000000000000000000000000000000000..a60cd31b9294af2dac69eed1c93f899bd5c78fca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**     movi    v[0-9]+.2s, 0x80, lsl 24
+**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**     ret
+*/
+float32_t f1 (float32_t a)
+{
+  return -fabsf (a);
+}
+
+/*
+** f2:
+**     mov     x0, -9223372036854775808
+**     fmov    d[0-9]+, x0
+**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**     ret
+*/
+float64_t f2 (float64_t a)
+{
+  return -fabs (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
new file mode 100644
index 
0000000000000000000000000000000000000000..1bf34328d8841de8e6b0a5458562a9f00e31c275
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**     ...
+**     ld1w    z[0-9]+.s, p[0-9]+/z, \[x0, x2, lsl 2\]
+**     orr     z[0-9]+.s, z[0-9]+.s, #0x80000000
+**     st1w    z[0-9]+.s, p[0-9]+, \[x0, x2, lsl 2\]
+**     ...
+*/
+void f1 (float32_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabsf (a[i]);
+}
+
+/*
+** f2:
+**     ...
+**     ld1d    z[0-9]+.d, p[0-9]+/z, \[x0, x2, lsl 3\]
+**     orr     z[0-9]+.d, z[0-9]+.d, #0x8000000000000000
+**     st1d    z[0-9]+.d, p[0-9]+, \[x0, x2, lsl 3\]
+**     ...
+*/
+void f2 (float64_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabs (a[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
new file mode 100644
index 
0000000000000000000000000000000000000000..21f2a8da2a5d44e3d01f6604ca7be87e3744d494
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <string.h>
+
+/*
+** negabs:
+**     mov     x0, -9223372036854775808
+**     fmov    d[0-9]+, x0
+**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**     ret
+*/
+double negabs (double x)
+{
+   unsigned long long y;
+   memcpy (&y, &x, sizeof(double));
+   y = y | (1UL << 63);
+   memcpy (&x, &y, sizeof(double));
+   return x;
+}
+
+/*
+** negabsf:
+**     movi    v[0-9]+.2s, 0x80, lsl 24
+**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**     ret
+*/
+float negabsf (float x)
+{
+   unsigned int y;
+   memcpy (&y, &x, sizeof(float));
+   y = y | (1U << 31);
+   memcpy (&x, &y, sizeof(float));
+   return x;
+}
+

[PATCH]middle-end match.pd: optimize fneg (fabs (x)) to x | (1 << signbit(x)) [PR109154]

Reply via email to