Hi, This patch replaces builtins with __a * __b for signed variants of vmul_n intrinsics. As discussed earlier, the patch has issue if __a * __b overflows, and whether we wish to leave that as UB.
Thanks, Prathamesh
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 41b596b5fc6..5928c25318b 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -8370,14 +8370,14 @@ __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmul_n_s16 (int16x4_t __a, int16_t __b) { - return (int16x4_t)__builtin_neon_vmul_nv4hi (__a, (__builtin_neon_hi) __b); + return __a * __b; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmul_n_s32 (int32x2_t __a, int32_t __b) { - return (int32x2_t)__builtin_neon_vmul_nv2si (__a, (__builtin_neon_si) __b); + return __a * __b; } __extension__ extern __inline float32x2_t @@ -8409,14 +8409,14 @@ __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulq_n_s16 (int16x8_t __a, int16_t __b) { - return (int16x8_t)__builtin_neon_vmul_nv8hi (__a, (__builtin_neon_hi) __b); + return __a * __b; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vmulq_n_s32 (int32x4_t __a, int32_t __b) { - return (int32x4_t)__builtin_neon_vmul_nv4si (__a, (__builtin_neon_si) __b); + return __a * __b; } __extension__ extern __inline float32x4_t