This patch introduces inline definitions for the __fma and __fmaf functions in arm_acle.h for arm targets. These definitions rely on __builtin_fma and __builtin_fmaf to ensure proper inlining and to meet the ACLE requirements [1].
The patch has been tested locally using a crosstool-NG sysroot for arm-cortexa9_neon-linux-gnueabihf, confirming that the generated code uses the expected fused multiply-accumulate instructions: vfma.f32 for single precision vmfa.f64 for double precision Signed-off-by: Ayan Shafqat <ayan.x.shaf...@gmail.com> [1] https://arm-software.github.io/acle/main/acle.html#fused-multiply-accumulate-fma --- gcc/config/arm/arm_acle.h | 18 ++++++++++++++++++ gcc/testsuite/gcc.target/arm/acle/acle_fma.c | 17 +++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 gcc/testsuite/gcc.target/arm/acle/acle_fma.c diff --git a/gcc/config/arm/arm_acle.h b/gcc/config/arm/arm_acle.h index c6c03fdce27..14c28f11b9c 100644 --- a/gcc/config/arm/arm_acle.h +++ b/gcc/config/arm/arm_acle.h @@ -829,6 +829,24 @@ __crc32cd (uint32_t __a, uint64_t __b) #endif /* __ARM_FEATURE_CRC32 */ #pragma GCC pop_options +#ifdef __ARM_FEATURE_FMA +__extension__ extern __inline double +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__fma (double __x, double __y, double __z) +{ + return __builtin_fma (__x, __y, __z); +} +#endif + +#ifdef __ARM_FEATURE_FMA +__extension__ extern __inline float +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__fmaf (float __x, float __y, float __z) +{ + return __builtin_fmaf (__x, __y, __z); +} +#endif + #ifdef __cplusplus } #endif diff --git a/gcc/testsuite/gcc.target/arm/acle/acle_fma.c b/gcc/testsuite/gcc.target/arm/acle/acle_fma.c new file mode 100644 index 00000000000..4177ac81f07 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/acle/acle_fma.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv7-a -mfpu=neon-vfpv4 -mfloat-abi=hard" } */ + +#include "arm_acle.h" + +double test_acle_fma (double x, double y, double z) +{ + return __fma (x, y, z); +} + +float test_acle_fmaf (float x, float y, float z) +{ + return __fmaf (x, y, z); +} + +/* { dg-final { scan-assembler-times "vfma.f64\td\[0-9\]," 1 } } */ +/* { dg-final { scan-assembler-times "vfma.f32\ts\[0-9\]" 1 } } */ -- 2.43.0