Hi,
This patch implements the behavior of vadd_f64 and vsub_f64 NEON intrinsics. Regression tests are added. Regression tests for aarch64-none-elf completed with no regressions. OK? Thanks, Alex gcc/testsuite/ 2013-10-08 Alex Velenko <alex.vele...@arm.com> * gcc.target/aarch64/vadd_f64.c: New testcase. * gcc.target/aarch64/vsub_f64.c: New testcase. gcc/ 2013-10-08 Alex Velenko <alex.vele...@arm.com> * config/aarch64/arm_neon.h (vadd_f64): Implementation added. (vsub_f64): Likewise.
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 1bd098d2a9c3a204c0fb57ee3ef31cbb5f328d8e..b8791b7b5dd7123b6d708aeb2321986673a0c0cd 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -634,6 +634,12 @@ vadd_f32 (float32x2_t __a, float32x2_t __b) return __a + __b; } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vadd_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a + __b; +} + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vadd_u8 (uint8x8_t __a, uint8x8_t __b) { @@ -1824,6 +1830,12 @@ vsub_f32 (float32x2_t __a, float32x2_t __b) return __a - __b; } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vsub_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a - __b; +} + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vsub_u8 (uint8x8_t __a, uint8x8_t __b) { diff --git a/gcc/testsuite/gcc.target/aarch64/vadd_f64.c b/gcc/testsuite/gcc.target/aarch64/vadd_f64.c new file mode 100644 index 0000000000000000000000000000000000000000..c3bf7349597aa9b75e0bc34cfd4cde4dc16b95f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vadd_f64.c @@ -0,0 +1,114 @@ +/* Test vadd works correctly. */ +/* { dg-do run } */ +/* { dg-options "--save-temps" } */ + +#include <arm_neon.h> + +#define FLT_EPSILON __FLT_EPSILON__ +#define DBL_EPSILON __DBL_EPSILON__ + +#define TESTA0 0.33333 +#define TESTA1 -1.7777 +#define TESTA2 0 +#define TESTA3 1.23456 +/* 2^54, double has 53 significand bits + according to Double-precision floating-point format. */ +#define TESTA4 18014398509481984 +#define TESTA5 (1.0 / TESTA4) + +#define TESTB0 0.66667 +#define TESTB1 2 +#define TESTB2 0 +#define TESTB3 -2 +#define TESTB4 1.0 +#define TESTB5 2.0 + +#define ANSW0 1 +#define ANSW1 0.2223 +#define ANSW2 0 +#define ANSW3 -0.76544 +#define ANSW4 TESTA4 +#define ANSW5 2.0 + +extern void abort (void); + +#define EPSILON __DBL_EPSILON__ +#define ABS(a) __builtin_fabs (a) +#define ISNAN(a) __builtin_isnan (a) +#define FP_equals(a, b, epsilon) \ + ( \ + ((a) == (b)) \ + || (ISNAN (a) && ISNAN (b)) \ + || (ABS (a - b) < epsilon) \ + ) + +int +test_vadd_f64 () +{ + float64x1_t a; + float64x1_t b; + float64x1_t c; + + a = TESTA0; + b = TESTB0; + c = ANSW0; + + a = vadd_f64 (a, b); + if (!FP_equals (a, c, EPSILON)) + return 1; + + a = TESTA1; + b = TESTB1; + c = ANSW1; + + a = vadd_f64 (a, b); + if (!FP_equals (a, c, EPSILON)) + return 1; + + a = TESTA2; + b = TESTB2; + c = ANSW2; + + a = vadd_f64 (a, b); + if (!FP_equals (a, c, EPSILON)) + return 1; + + a = TESTA3; + b = TESTB3; + c = ANSW3; + + a = vadd_f64 (a, b); + if (!FP_equals (a, c, EPSILON)) + return 1; + + a = TESTA4; + b = TESTB4; + c = ANSW4; + + a = vadd_f64 (a, b); + if (!FP_equals (a, c, EPSILON)) + return 1; + + a = TESTA5; + b = TESTB5; + c = ANSW5; + + a = vadd_f64 (a, b); + if (!FP_equals (a, c, EPSILON)) + return 1; + + return 0; +} + +/* { dg-final { scan-assembler-times "fadd\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 6 } } */ + +int +main (int argc, char **argv) +{ + if (test_vadd_f64 ()) + abort (); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vsub_f64.c b/gcc/testsuite/gcc.target/aarch64/vsub_f64.c new file mode 100644 index 0000000000000000000000000000000000000000..abf4fc42d49dc695f435b1e0f331737c8e9367b0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vsub_f64.c @@ -0,0 +1,116 @@ +/* Test vsub works correctly. */ +/* { dg-do run } */ +/* { dg-options "--save-temps" } */ + +#include <arm_neon.h> + +#define FLT_EPSILON __FLT_EPSILON__ +#define DBL_EPSILON __DBL_EPSILON__ + +#define TESTA0 1 +#define TESTA1 0.2223 +#define TESTA2 0 +#define TESTA3 -0.76544 +/* 2^54, double has 53 significand bits + according to Double-precision floating-point format. */ +#define TESTA4 18014398509481984 +#define TESTA5 2.0 + +#define TESTB0 0.66667 +#define TESTB1 2 +#define TESTB2 0 +#define TESTB3 -2 +#define TESTB4 1.0 +#define TESTB5 (1.0 / TESTA4) + +#define ANSW0 0.33333 +#define ANSW1 -1.7777 +#define ANSW2 0 +#define ANSW3 1.23456 +#define ANSW4 TESTA4 +#define ANSW5 2.0 + +extern void abort (void); + +#define EPSILON __DBL_EPSILON__ +#define ISNAN(a) __builtin_isnan (a) +/* FP_equals is implemented like this to execute subtraction + exectly once during a single test run. */ +#define FP_equals(a, b, epsilon) \ +( \ + ((a) == (b)) \ + || (ISNAN (a) && ISNAN (b)) \ + || (((a > b) && (a < (b + epsilon))) \ + || ((b > a) && (b < (a + epsilon)))) \ +) + +int +test_vsub_f64 () +{ + float64x1_t a; + float64x1_t b; + float64x1_t c; + + a = TESTA0; + b = TESTB0; + c = ANSW0; + + a = vsub_f64 (a, b); + if (!FP_equals (a, c, EPSILON)) + return 1; + + a = TESTA1; + b = TESTB1; + c = ANSW1; + + a = vsub_f64 (a, b); + if (!FP_equals (a, c, EPSILON)) + return 1; + + a = TESTA2; + b = TESTB2; + c = ANSW2; + + a = vsub_f64 (a, b); + if (!FP_equals (a, c, EPSILON)) + return 1; + + a = TESTA3; + b = TESTB3; + c = ANSW3; + + a = vsub_f64 (a, b); + if (!FP_equals (a, c, EPSILON)) + return 1; + + a = TESTA4; + b = TESTB4; + c = ANSW4; + + a = vsub_f64 (a, b); + if (!FP_equals (a, c, EPSILON)) + return 1; + + a = TESTA5; + b = TESTB5; + c = ANSW5; + + a = vsub_f64 (a, b); + if (!FP_equals (a, c, EPSILON)) + return 1; + + return 0; +} + +/* { dg-final { scan-assembler-times "fsub\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 6 } } */ + +int +main (int argc, char **argv) +{ + if (test_vsub_f64 ()) + abort (); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */