https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116564
--- Comment #2 from Alex Coplan <acoplan at gcc dot gnu.org> --- Here's a preprocessed testcase (not for the testsuite, just to make it easier to reproduce using only cc1): #pragma GCC aarch64 "arm_neon.h" typedef double float64_t; __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_f64 (float64_t * __a, float64x1x2_t __val) { __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __val); } void test() { for (int L = 0; L < 4; ++L) { float64_t ResData[1 * 2]; float64x1x2_t Src1; vst2_f64(ResData, Src1); } }