Hi, As subject, config/arm/arm_neon.h currently uses __FAST_MATH, but:
$ gcc -E -dM - -ffast-math < /dev/null | grep FAST_MATH #define __FAST_MATH__ 1 It should be spelled as __FAST_MATH__. I've made that change, and confirmed that it causes the preprocessor to do what was intended for these intrinsics under -ffast-math. Currently bootstrapped on arm-none-linux-gnueabihf with no issues. This could also be backported to release branches. I think Ramana's patch went in for GCC 5.0, so backports to gcc_5_branch and gcc_6_branch would be feasible. Thanks, James --- 2016-06-16 James Greenhalgh <james.greenha...@arm.com> * config/arm/arm_neon.h (vadd_f32): replace __FAST_MATH with __FAST_MATH__. (vaddq_f32): Likewise. (vmul_f32): Likewise. (vmulq_f32): Likewise. (vsub_f32): Likewise. (vsubq_f32): Likewise.
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 7997cb4..32ee06c 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -530,7 +530,7 @@ vadd_s32 (int32x2_t __a, int32x2_t __b) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vadd_f32 (float32x2_t __a, float32x2_t __b) { -#ifdef __FAST_MATH +#ifdef __FAST_MATH__ return __a + __b; #else return (float32x2_t) __builtin_neon_vaddv2sf (__a, __b); @@ -594,7 +594,7 @@ vaddq_s64 (int64x2_t __a, int64x2_t __b) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vaddq_f32 (float32x4_t __a, float32x4_t __b) { -#ifdef __FAST_MATH +#ifdef __FAST_MATH__ return __a + __b; #else return (float32x4_t) __builtin_neon_vaddv4sf (__a, __b); @@ -1030,7 +1030,7 @@ vmul_s32 (int32x2_t __a, int32x2_t __b) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmul_f32 (float32x2_t __a, float32x2_t __b) { -#ifdef __FAST_MATH +#ifdef __FAST_MATH__ return __a * __b; #else return (float32x2_t) __builtin_neon_vmulfv2sf (__a, __b); @@ -1077,7 +1077,7 @@ vmulq_s32 (int32x4_t __a, int32x4_t __b) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmulq_f32 (float32x4_t __a, float32x4_t __b) { -#ifdef __FAST_MATH +#ifdef __FAST_MATH__ return __a * __b; #else return (float32x4_t) __builtin_neon_vmulfv4sf (__a, __b); @@ -1678,7 +1678,7 @@ vsub_s32 (int32x2_t __a, int32x2_t __b) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vsub_f32 (float32x2_t __a, float32x2_t __b) { -#ifdef __FAST_MATH +#ifdef __FAST_MATH__ return __a - __b; #else return (float32x2_t) __builtin_neon_vsubv2sf (__a, __b); @@ -1742,7 +1742,7 @@ vsubq_s64 (int64x2_t __a, int64x2_t __b) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vsubq_f32 (float32x4_t __a, float32x4_t __b) { -#ifdef __FAST_MATH +#ifdef __FAST_MATH__ return __a - __b; #else return (float32x4_t) __builtin_neon_vsubv4sf (__a, __b);