Hi All, For targets that allow conversion between int and float modes this adds a new optimization transforming fneg (fabs (x)) into x | (1 << signbit(x)). Such sequences are common in scientific code working with gradients.
The transformed instruction if the target has an inclusive-OR that takes an immediate is both shorter an faster. For those that don't the immediate has to be seperate constructed but this still ends up being faster as the immediate construction is not on the critical path. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: PR tree-optimization/109154 * match.pd: Add new neg+abs rule. gcc/testsuite/ChangeLog: PR tree-optimization/109154 * gcc.target/aarch64/fneg-abs_1.c: New test. * gcc.target/aarch64/fneg-abs_2.c: New test. * gcc.target/aarch64/fneg-abs_3.c: New test. * gcc.target/aarch64/fneg-abs_4.c: New test. * gcc.target/aarch64/sve/fneg-abs_1.c: New test. * gcc.target/aarch64/sve/fneg-abs_2.c: New test. * gcc.target/aarch64/sve/fneg-abs_3.c: New test. * gcc.target/aarch64/sve/fneg-abs_4.c: New test. --- inline copy of patch -- diff --git a/gcc/match.pd b/gcc/match.pd index 39c7ea1088f25538ed8bd26ee89711566141a71f..8ebde06dcd4b26d694826cffad0fb17e1136600a 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -9476,3 +9476,57 @@ and, } (if (full_perm_p) (vec_perm (op@3 @0 @1) @3 @2)))))) + +/* Transform fneg (fabs (X)) -> X | 1 << signbit (X). */ + +(simplify + (negate (abs @0)) + (if (FLOAT_TYPE_P (type) + /* We have to delay this rewriting till after forward prop because otherwise + it's harder to do trigonometry optimizations. e.g. cos(-fabs(x)) is not + matched in one go. Instead cos (-x) is matched first followed by cos(|x|). + The bottom op approach makes this rule match first and it's not untill + fwdprop that we match top down. There are manu such simplications so we + delay this optimization till later on. */ + && canonicalize_math_after_vectorization_p ()) + (with { + tree itype = unsigned_type_for (type); + machine_mode mode = TYPE_MODE (type); + const struct real_format *float_fmt = FLOAT_MODE_FORMAT (mode); + auto optab = VECTOR_TYPE_P (type) ? optab_vector : optab_default; } + (if (float_fmt + && float_fmt->signbit_rw >= 0 + && targetm.can_change_mode_class (TYPE_MODE (itype), + TYPE_MODE (type), ALL_REGS) + && target_supports_op_p (itype, BIT_IOR_EXPR, optab)) + (with { wide_int wone = wi::one (element_precision (type)); + int sbit = float_fmt->signbit_rw; + auto stype = VECTOR_TYPE_P (type) ? TREE_TYPE (itype) : itype; + tree sign_bit = wide_int_to_tree (stype, wi::lshift (wone, sbit));} + (view_convert:type + (bit_ior (view_convert:itype @0) + { build_uniform_cst (itype, sign_bit); } ))))))) + +/* Repeat the same but for conditional negate. */ + +(simplify + (IFN_COND_NEG @1 (abs @0) @2) + (if (FLOAT_TYPE_P (type)) + (with { + tree itype = unsigned_type_for (type); + machine_mode mode = TYPE_MODE (type); + const struct real_format *float_fmt = FLOAT_MODE_FORMAT (mode); + auto optab = VECTOR_TYPE_P (type) ? optab_vector : optab_default; } + (if (float_fmt + && float_fmt->signbit_rw >= 0 + && targetm.can_change_mode_class (TYPE_MODE (itype), + TYPE_MODE (type), ALL_REGS) + && target_supports_op_p (itype, BIT_IOR_EXPR, optab)) + (with { wide_int wone = wi::one (element_precision (type)); + int sbit = float_fmt->signbit_rw; + auto stype = VECTOR_TYPE_P (type) ? TREE_TYPE (itype) : itype; + tree sign_bit = wide_int_to_tree (stype, wi::lshift (wone, sbit));} + (view_convert:type + (IFN_COND_IOR @1 (view_convert:itype @0) + { build_uniform_cst (itype, sign_bit); } + (view_convert:itype @2) ))))))) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c new file mode 100644 index 0000000000000000000000000000000000000000..f823013c3ddf6b3a266c3abfcbf2642fc2a75fa6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#pragma GCC target "+nosve" + +#include <arm_neon.h> + +/* +** t1: +** orr v[0-9]+.2s, #128, lsl #24 +** ret +*/ +float32x2_t t1 (float32x2_t a) +{ + return vneg_f32 (vabs_f32 (a)); +} + +/* +** t2: +** orr v[0-9]+.4s, #128, lsl #24 +** ret +*/ +float32x4_t t2 (float32x4_t a) +{ + return vnegq_f32 (vabsq_f32 (a)); +} + +/* +** t3: +** adrp x0, .LC[0-9]+ +** ldr q[0-9]+, \[x0, #:lo12:.LC0\] +** orr v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b +** ret +*/ +float64x2_t t3 (float64x2_t a) +{ + return vnegq_f64 (vabsq_f64 (a)); +} diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c new file mode 100644 index 0000000000000000000000000000000000000000..141121176b309e4b2aa413dc55271a6e3c93d5e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#pragma GCC target "+nosve" + +#include <arm_neon.h> +#include <math.h> + +/* +** f1: +** movi v[0-9]+.2s, 0x80, lsl 24 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +float32_t f1 (float32_t a) +{ + return -fabsf (a); +} + +/* +** f2: +** mov x0, -9223372036854775808 +** fmov d[0-9]+, x0 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +float64_t f2 (float64_t a) +{ + return -fabs (a); +} diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c new file mode 100644 index 0000000000000000000000000000000000000000..b4652173a95d104ddfa70c497f0627a61ea89d3b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#pragma GCC target "+nosve" + +#include <arm_neon.h> +#include <math.h> + +/* +** f1: +** ... +** ldr q[0-9]+, \[x0\] +** orr v[0-9]+.4s, #128, lsl #24 +** str q[0-9]+, \[x0\], 16 +** ... +*/ +void f1 (float32_t *a, int n) +{ + for (int i = 0; i < (n & -8); i++) + a[i] = -fabsf (a[i]); +} + +/* +** f2: +** ... +** ldr q[0-9]+, \[x0\] +** orr v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b +** str q[0-9]+, \[x0\], 16 +** ... +*/ +void f2 (float64_t *a, int n) +{ + for (int i = 0; i < (n & -8); i++) + a[i] = -fabs (a[i]); +} diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c new file mode 100644 index 0000000000000000000000000000000000000000..10879dea74462d34b26160eeb0bd54ead063166b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#pragma GCC target "+nosve" + +#include <string.h> + +/* +** negabs: +** mov x0, -9223372036854775808 +** fmov d[0-9]+, x0 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +double negabs (double x) +{ + unsigned long long y; + memcpy (&y, &x, sizeof(double)); + y = y | (1UL << 63); + memcpy (&x, &y, sizeof(double)); + return x; +} + +/* +** negabsf: +** movi v[0-9]+.2s, 0x80, lsl 24 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +float negabsf (float x) +{ + unsigned int y; + memcpy (&y, &x, sizeof(float)); + y = y | (1U << 31); + memcpy (&x, &y, sizeof(float)); + return x; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c new file mode 100644 index 0000000000000000000000000000000000000000..0c7664e6de77a497682952653ffd417453854d52 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#include <arm_neon.h> + +/* +** t1: +** orr v[0-9]+.2s, #128, lsl #24 +** ret +*/ +float32x2_t t1 (float32x2_t a) +{ + return vneg_f32 (vabs_f32 (a)); +} + +/* +** t2: +** orr v[0-9]+.4s, #128, lsl #24 +** ret +*/ +float32x4_t t2 (float32x4_t a) +{ + return vnegq_f32 (vabsq_f32 (a)); +} + +/* +** t3: +** adrp x0, .LC[0-9]+ +** ldr q[0-9]+, \[x0, #:lo12:.LC0\] +** orr v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b +** ret +*/ +float64x2_t t3 (float64x2_t a) +{ + return vnegq_f64 (vabsq_f64 (a)); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c new file mode 100644 index 0000000000000000000000000000000000000000..a60cd31b9294af2dac69eed1c93f899bd5c78fca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#include <arm_neon.h> +#include <math.h> + +/* +** f1: +** movi v[0-9]+.2s, 0x80, lsl 24 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +float32_t f1 (float32_t a) +{ + return -fabsf (a); +} + +/* +** f2: +** mov x0, -9223372036854775808 +** fmov d[0-9]+, x0 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +float64_t f2 (float64_t a) +{ + return -fabs (a); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c new file mode 100644 index 0000000000000000000000000000000000000000..1bf34328d8841de8e6b0a5458562a9f00e31c275 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#include <arm_neon.h> +#include <math.h> + +/* +** f1: +** ... +** ld1w z[0-9]+.s, p[0-9]+/z, \[x0, x2, lsl 2\] +** orr z[0-9]+.s, z[0-9]+.s, #0x80000000 +** st1w z[0-9]+.s, p[0-9]+, \[x0, x2, lsl 2\] +** ... +*/ +void f1 (float32_t *a, int n) +{ + for (int i = 0; i < (n & -8); i++) + a[i] = -fabsf (a[i]); +} + +/* +** f2: +** ... +** ld1d z[0-9]+.d, p[0-9]+/z, \[x0, x2, lsl 3\] +** orr z[0-9]+.d, z[0-9]+.d, #0x8000000000000000 +** st1d z[0-9]+.d, p[0-9]+, \[x0, x2, lsl 3\] +** ... +*/ +void f2 (float64_t *a, int n) +{ + for (int i = 0; i < (n & -8); i++) + a[i] = -fabs (a[i]); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c new file mode 100644 index 0000000000000000000000000000000000000000..21f2a8da2a5d44e3d01f6604ca7be87e3744d494 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#include <string.h> + +/* +** negabs: +** mov x0, -9223372036854775808 +** fmov d[0-9]+, x0 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +double negabs (double x) +{ + unsigned long long y; + memcpy (&y, &x, sizeof(double)); + y = y | (1UL << 63); + memcpy (&x, &y, sizeof(double)); + return x; +} + +/* +** negabsf: +** movi v[0-9]+.2s, 0x80, lsl 24 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +float negabsf (float x) +{ + unsigned int y; + memcpy (&y, &x, sizeof(float)); + y = y | (1U << 31); + memcpy (&x, &y, sizeof(float)); + return x; +} + --
diff --git a/gcc/match.pd b/gcc/match.pd index 39c7ea1088f25538ed8bd26ee89711566141a71f..8ebde06dcd4b26d694826cffad0fb17e1136600a 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -9476,3 +9476,57 @@ and, } (if (full_perm_p) (vec_perm (op@3 @0 @1) @3 @2)))))) + +/* Transform fneg (fabs (X)) -> X | 1 << signbit (X). */ + +(simplify + (negate (abs @0)) + (if (FLOAT_TYPE_P (type) + /* We have to delay this rewriting till after forward prop because otherwise + it's harder to do trigonometry optimizations. e.g. cos(-fabs(x)) is not + matched in one go. Instead cos (-x) is matched first followed by cos(|x|). + The bottom op approach makes this rule match first and it's not untill + fwdprop that we match top down. There are manu such simplications so we + delay this optimization till later on. */ + && canonicalize_math_after_vectorization_p ()) + (with { + tree itype = unsigned_type_for (type); + machine_mode mode = TYPE_MODE (type); + const struct real_format *float_fmt = FLOAT_MODE_FORMAT (mode); + auto optab = VECTOR_TYPE_P (type) ? optab_vector : optab_default; } + (if (float_fmt + && float_fmt->signbit_rw >= 0 + && targetm.can_change_mode_class (TYPE_MODE (itype), + TYPE_MODE (type), ALL_REGS) + && target_supports_op_p (itype, BIT_IOR_EXPR, optab)) + (with { wide_int wone = wi::one (element_precision (type)); + int sbit = float_fmt->signbit_rw; + auto stype = VECTOR_TYPE_P (type) ? TREE_TYPE (itype) : itype; + tree sign_bit = wide_int_to_tree (stype, wi::lshift (wone, sbit));} + (view_convert:type + (bit_ior (view_convert:itype @0) + { build_uniform_cst (itype, sign_bit); } ))))))) + +/* Repeat the same but for conditional negate. */ + +(simplify + (IFN_COND_NEG @1 (abs @0) @2) + (if (FLOAT_TYPE_P (type)) + (with { + tree itype = unsigned_type_for (type); + machine_mode mode = TYPE_MODE (type); + const struct real_format *float_fmt = FLOAT_MODE_FORMAT (mode); + auto optab = VECTOR_TYPE_P (type) ? optab_vector : optab_default; } + (if (float_fmt + && float_fmt->signbit_rw >= 0 + && targetm.can_change_mode_class (TYPE_MODE (itype), + TYPE_MODE (type), ALL_REGS) + && target_supports_op_p (itype, BIT_IOR_EXPR, optab)) + (with { wide_int wone = wi::one (element_precision (type)); + int sbit = float_fmt->signbit_rw; + auto stype = VECTOR_TYPE_P (type) ? TREE_TYPE (itype) : itype; + tree sign_bit = wide_int_to_tree (stype, wi::lshift (wone, sbit));} + (view_convert:type + (IFN_COND_IOR @1 (view_convert:itype @0) + { build_uniform_cst (itype, sign_bit); } + (view_convert:itype @2) ))))))) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c new file mode 100644 index 0000000000000000000000000000000000000000..f823013c3ddf6b3a266c3abfcbf2642fc2a75fa6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#pragma GCC target "+nosve" + +#include <arm_neon.h> + +/* +** t1: +** orr v[0-9]+.2s, #128, lsl #24 +** ret +*/ +float32x2_t t1 (float32x2_t a) +{ + return vneg_f32 (vabs_f32 (a)); +} + +/* +** t2: +** orr v[0-9]+.4s, #128, lsl #24 +** ret +*/ +float32x4_t t2 (float32x4_t a) +{ + return vnegq_f32 (vabsq_f32 (a)); +} + +/* +** t3: +** adrp x0, .LC[0-9]+ +** ldr q[0-9]+, \[x0, #:lo12:.LC0\] +** orr v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b +** ret +*/ +float64x2_t t3 (float64x2_t a) +{ + return vnegq_f64 (vabsq_f64 (a)); +} diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c new file mode 100644 index 0000000000000000000000000000000000000000..141121176b309e4b2aa413dc55271a6e3c93d5e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#pragma GCC target "+nosve" + +#include <arm_neon.h> +#include <math.h> + +/* +** f1: +** movi v[0-9]+.2s, 0x80, lsl 24 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +float32_t f1 (float32_t a) +{ + return -fabsf (a); +} + +/* +** f2: +** mov x0, -9223372036854775808 +** fmov d[0-9]+, x0 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +float64_t f2 (float64_t a) +{ + return -fabs (a); +} diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c new file mode 100644 index 0000000000000000000000000000000000000000..b4652173a95d104ddfa70c497f0627a61ea89d3b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#pragma GCC target "+nosve" + +#include <arm_neon.h> +#include <math.h> + +/* +** f1: +** ... +** ldr q[0-9]+, \[x0\] +** orr v[0-9]+.4s, #128, lsl #24 +** str q[0-9]+, \[x0\], 16 +** ... +*/ +void f1 (float32_t *a, int n) +{ + for (int i = 0; i < (n & -8); i++) + a[i] = -fabsf (a[i]); +} + +/* +** f2: +** ... +** ldr q[0-9]+, \[x0\] +** orr v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b +** str q[0-9]+, \[x0\], 16 +** ... +*/ +void f2 (float64_t *a, int n) +{ + for (int i = 0; i < (n & -8); i++) + a[i] = -fabs (a[i]); +} diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c new file mode 100644 index 0000000000000000000000000000000000000000..10879dea74462d34b26160eeb0bd54ead063166b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#pragma GCC target "+nosve" + +#include <string.h> + +/* +** negabs: +** mov x0, -9223372036854775808 +** fmov d[0-9]+, x0 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +double negabs (double x) +{ + unsigned long long y; + memcpy (&y, &x, sizeof(double)); + y = y | (1UL << 63); + memcpy (&x, &y, sizeof(double)); + return x; +} + +/* +** negabsf: +** movi v[0-9]+.2s, 0x80, lsl 24 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +float negabsf (float x) +{ + unsigned int y; + memcpy (&y, &x, sizeof(float)); + y = y | (1U << 31); + memcpy (&x, &y, sizeof(float)); + return x; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c new file mode 100644 index 0000000000000000000000000000000000000000..0c7664e6de77a497682952653ffd417453854d52 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#include <arm_neon.h> + +/* +** t1: +** orr v[0-9]+.2s, #128, lsl #24 +** ret +*/ +float32x2_t t1 (float32x2_t a) +{ + return vneg_f32 (vabs_f32 (a)); +} + +/* +** t2: +** orr v[0-9]+.4s, #128, lsl #24 +** ret +*/ +float32x4_t t2 (float32x4_t a) +{ + return vnegq_f32 (vabsq_f32 (a)); +} + +/* +** t3: +** adrp x0, .LC[0-9]+ +** ldr q[0-9]+, \[x0, #:lo12:.LC0\] +** orr v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b +** ret +*/ +float64x2_t t3 (float64x2_t a) +{ + return vnegq_f64 (vabsq_f64 (a)); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c new file mode 100644 index 0000000000000000000000000000000000000000..a60cd31b9294af2dac69eed1c93f899bd5c78fca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#include <arm_neon.h> +#include <math.h> + +/* +** f1: +** movi v[0-9]+.2s, 0x80, lsl 24 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +float32_t f1 (float32_t a) +{ + return -fabsf (a); +} + +/* +** f2: +** mov x0, -9223372036854775808 +** fmov d[0-9]+, x0 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +float64_t f2 (float64_t a) +{ + return -fabs (a); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c new file mode 100644 index 0000000000000000000000000000000000000000..1bf34328d8841de8e6b0a5458562a9f00e31c275 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#include <arm_neon.h> +#include <math.h> + +/* +** f1: +** ... +** ld1w z[0-9]+.s, p[0-9]+/z, \[x0, x2, lsl 2\] +** orr z[0-9]+.s, z[0-9]+.s, #0x80000000 +** st1w z[0-9]+.s, p[0-9]+, \[x0, x2, lsl 2\] +** ... +*/ +void f1 (float32_t *a, int n) +{ + for (int i = 0; i < (n & -8); i++) + a[i] = -fabsf (a[i]); +} + +/* +** f2: +** ... +** ld1d z[0-9]+.d, p[0-9]+/z, \[x0, x2, lsl 3\] +** orr z[0-9]+.d, z[0-9]+.d, #0x8000000000000000 +** st1d z[0-9]+.d, p[0-9]+, \[x0, x2, lsl 3\] +** ... +*/ +void f2 (float64_t *a, int n) +{ + for (int i = 0; i < (n & -8); i++) + a[i] = -fabs (a[i]); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c new file mode 100644 index 0000000000000000000000000000000000000000..21f2a8da2a5d44e3d01f6604ca7be87e3744d494 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#include <string.h> + +/* +** negabs: +** mov x0, -9223372036854775808 +** fmov d[0-9]+, x0 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +double negabs (double x) +{ + unsigned long long y; + memcpy (&y, &x, sizeof(double)); + y = y | (1UL << 63); + memcpy (&x, &y, sizeof(double)); + return x; +} + +/* +** negabsf: +** movi v[0-9]+.2s, 0x80, lsl 24 +** orr v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b +** ret +*/ +float negabsf (float x) +{ + unsigned int y; + memcpy (&y, &x, sizeof(float)); + y = y | (1U << 31); + memcpy (&x, &y, sizeof(float)); + return x; +} +