Jonathan Wright <jonathan.wri...@arm.com> writes: > Hi, > > This patch declares unsigned and polynomial type-qualified builtins and > uses them to implement the LD1/ST1 Neon intrinsics. This removes the > need for many casts in arm_neon.h. > > The new type-qualified builtins are also lowered to gimple - as the > unqualified builtins are already. > > Regression tested and bootstrapped on aarch64-none-linux-gnu - no > issues. > > Ok for master? > > Thanks, > Jonathan > > --- > > gcc/ChangeLog: > > 2021-11-10 Jonathan Wright <jonathan.wri...@arm.com> > > * config/aarch64/aarch64-builtins.c (TYPES_LOAD1_U): Define. > (TYPES_LOAD1_P): Define. > (TYPES_STORE1_U): Define. > (TYPES_STORE1P): Rename to... > (TYPES_STORE1_P): This. > (get_mem_type_for_load_store): Add unsigned and poly types. > (aarch64_general_gimple_fold_builtin): Add unsigned and poly > type-qualified builtin declarations. > * config/aarch64/aarch64-simd-builtins.def: Declare type- > qualified builtins for LD1/ST1. > * config/aarch64/arm_neon.h (vld1_p8): Use type-qualified > builtin and remove cast. > (vld1_p16): Likewise. > (vld1_u8): Likewise. > (vld1_u16): Likewise. > (vld1_u32): Likewise. > (vld1q_p8): Likewise. > (vld1q_p16): Likewise. > (vld1q_p64): Likewise. > (vld1q_u8): Likewise. > (vld1q_u16): Likewise. > (vld1q_u32): Likewise. > (vld1q_u64): Likewise. > (vst1_p8): Likewise. > (vst1_p16): Likewise. > (vst1_u8): Likewise. > (vst1_u16): Likewise. > (vst1_u32): Likewise. > (vst1q_p8): Likewise. > (vst1q_p16): Likewise. > (vst1q_p64): Likewise. > (vst1q_u8): Likewise. > (vst1q_u16): Likewise. > (vst1q_u32): Likewise. > (vst1q_u64): Likewise. > * config/aarch64/iterators.md (VALLP_NO_DI): New iterator. > > diff --git a/gcc/config/aarch64/aarch64-builtins.c > b/gcc/config/aarch64/aarch64-builtins.c > index > 5053bf0f8fd6638bf84a6df06c0987a0216b69e7..f286401ff3ab01dd860ae22858ca07e364247414 > 100644 > --- a/gcc/config/aarch64/aarch64-builtins.c > +++ b/gcc/config/aarch64/aarch64-builtins.c > @@ -372,10 +372,12 @@ aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS] > static enum aarch64_type_qualifiers > aarch64_types_load1_u_qualifiers[SIMD_MAX_BUILTIN_ARGS] > = { qualifier_unsigned, qualifier_const_pointer_map_mode }; > +#define TYPES_LOAD1_U (aarch64_types_load1_u_qualifiers) > #define TYPES_LOADSTRUCT_U (aarch64_types_load1_u_qualifiers) > static enum aarch64_type_qualifiers > aarch64_types_load1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] > = { qualifier_poly, qualifier_const_pointer_map_mode }; > +#define TYPES_LOAD1_P (aarch64_types_load1_p_qualifiers) > #define TYPES_LOADSTRUCT_P (aarch64_types_load1_p_qualifiers) > > static enum aarch64_type_qualifiers > @@ -423,11 +425,12 @@ aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS] > static enum aarch64_type_qualifiers > aarch64_types_store1_u_qualifiers[SIMD_MAX_BUILTIN_ARGS] > = { qualifier_void, qualifier_pointer_map_mode, qualifier_unsigned }; > +#define TYPES_STORE1_U (aarch64_types_store1_u_qualifiers) > #define TYPES_STORESTRUCT_U (aarch64_types_store1_u_qualifiers) > static enum aarch64_type_qualifiers > aarch64_types_store1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] > = { qualifier_void, qualifier_pointer_map_mode, qualifier_poly }; > -#define TYPES_STORE1P (aarch64_types_store1_p_qualifiers) > +#define TYPES_STORE1_P (aarch64_types_store1_p_qualifiers) > #define TYPES_STORESTRUCT_P (aarch64_types_store1_p_qualifiers) > > static enum aarch64_type_qualifiers > @@ -2590,47 +2593,83 @@ get_mem_type_for_load_store (unsigned int fcode) > { > switch (fcode) > { > - VAR1 (LOAD1, ld1 , 0, LOAD, v8qi) > - VAR1 (STORE1, st1 , 0, STORE, v8qi) > + VAR1 (LOAD1, ld1, 0, LOAD, v8qi) > + VAR1 (STORE1, st1, 0, STORE, v8qi) > return Int8x8_t; > - VAR1 (LOAD1, ld1 , 0, LOAD, v16qi) > - VAR1 (STORE1, st1 , 0, STORE, v16qi) > + VAR1 (LOAD1, ld1, 0, LOAD, v16qi) > + VAR1 (STORE1, st1, 0, STORE, v16qi) > return Int8x16_t; > - VAR1 (LOAD1, ld1 , 0, LOAD, v4hi) > - VAR1 (STORE1, st1 , 0, STORE, v4hi) > + VAR1 (LOAD1, ld1, 0, LOAD, v4hi) > + VAR1 (STORE1, st1, 0, STORE, v4hi) > return Int16x4_t; > - VAR1 (LOAD1, ld1 , 0, LOAD, v8hi) > - VAR1 (STORE1, st1 , 0, STORE, v8hi) > + VAR1 (LOAD1, ld1, 0, LOAD, v8hi) > + VAR1 (STORE1, st1, 0, STORE, v8hi) > return Int16x8_t; > - VAR1 (LOAD1, ld1 , 0, LOAD, v2si) > - VAR1 (STORE1, st1 , 0, STORE, v2si) > + VAR1 (LOAD1, ld1, 0, LOAD, v2si) > + VAR1 (STORE1, st1, 0, STORE, v2si) > return Int32x2_t; > - VAR1 (LOAD1, ld1 , 0, LOAD, v4si) > - VAR1 (STORE1, st1 , 0, STORE, v4si) > + VAR1 (LOAD1, ld1, 0, LOAD, v4si) > + VAR1 (STORE1, st1, 0, STORE, v4si) > return Int32x4_t; > - VAR1 (LOAD1, ld1 , 0, LOAD, v2di) > - VAR1 (STORE1, st1 , 0, STORE, v2di) > + VAR1 (LOAD1, ld1, 0, LOAD, v2di) > + VAR1 (STORE1, st1, 0, STORE, v2di) > return Int64x2_t; > - VAR1 (LOAD1, ld1 , 0, LOAD, v4hf) > - VAR1 (STORE1, st1 , 0, STORE, v4hf) > + VAR1 (LOAD1_U, ld1, 0, LOAD, v8qi) > + VAR1 (STORE1_U, st1, 0, STORE, v8qi) > + return Uint8x8_t; > + VAR1 (LOAD1_U, ld1, 0, LOAD, v16qi) > + VAR1 (STORE1_U, st1, 0, STORE, v16qi) > + return Uint8x16_t; > + VAR1 (LOAD1_U, ld1, 0, LOAD, v4hi) > + VAR1 (STORE1_U, st1, 0, STORE, v4hi) > + return Uint16x4_t; > + VAR1 (LOAD1_U, ld1, 0, LOAD, v8hi) > + VAR1 (STORE1_U, st1, 0, STORE, v8hi) > + return Uint16x8_t; > + VAR1 (LOAD1_U, ld1, 0, LOAD, v2si) > + VAR1 (STORE1_U, st1, 0, STORE, v2si) > + return Uint32x2_t; > + VAR1 (LOAD1_U, ld1, 0, LOAD, v4si) > + VAR1 (STORE1_U, st1, 0, STORE, v4si) > + return Uint32x4_t; > + VAR1 (LOAD1_U, ld1, 0, LOAD, v2di) > + VAR1 (STORE1_U, st1, 0, STORE, v2di) > + return Uint64x2_t; > + VAR1 (LOAD1_P, ld1, 0, LOAD, v8qi) > + VAR1 (STORE1_P, st1, 0, STORE, v8qi) > + return Poly8x8_t; > + VAR1 (LOAD1_P, ld1, 0, LOAD, v16qi) > + VAR1 (STORE1_P, st1, 0, STORE, v16qi) > + return Poly8x16_t; > + VAR1 (LOAD1_P, ld1, 0, LOAD, v4hi) > + VAR1 (STORE1_P, st1, 0, STORE, v4hi) > + return Poly16x4_t; > + VAR1 (LOAD1_P, ld1, 0, LOAD, v8hi) > + VAR1 (STORE1_P, st1, 0, STORE, v8hi) > + return Poly16x8_t; > + VAR1 (LOAD1_P, ld1, 0, LOAD, v2di) > + VAR1 (STORE1_P, st1, 0, STORE, v2di) > + return Poly64x2_t; > + VAR1 (LOAD1, ld1, 0, LOAD, v4hf) > + VAR1 (STORE1, st1, 0, STORE, v4hf) > return Float16x4_t; > - VAR1 (LOAD1, ld1 , 0, LOAD, v8hf) > - VAR1 (STORE1, st1 , 0, STORE, v8hf) > + VAR1 (LOAD1, ld1, 0, LOAD, v8hf) > + VAR1 (STORE1, st1, 0, STORE, v8hf) > return Float16x8_t; > - VAR1 (LOAD1, ld1 , 0, LOAD, v4bf) > - VAR1 (STORE1, st1 , 0, STORE, v4bf) > + VAR1 (LOAD1, ld1, 0, LOAD, v4bf) > + VAR1 (STORE1, st1, 0, STORE, v4bf) > return Bfloat16x4_t; > - VAR1 (LOAD1, ld1 , 0, LOAD, v8bf) > - VAR1 (STORE1, st1 , 0, STORE, v8bf) > + VAR1 (LOAD1, ld1, 0, LOAD, v8bf) > + VAR1 (STORE1, st1, 0, STORE, v8bf) > return Bfloat16x8_t; > - VAR1 (LOAD1, ld1 , 0, LOAD, v2sf) > - VAR1 (STORE1, st1 , 0, STORE, v2sf) > + VAR1 (LOAD1, ld1, 0, LOAD, v2sf) > + VAR1 (STORE1, st1, 0, STORE, v2sf) > return Float32x2_t; > - VAR1 (LOAD1, ld1 , 0, LOAD, v4sf) > - VAR1 (STORE1, st1 , 0, STORE, v4sf) > + VAR1 (LOAD1, ld1, 0, LOAD, v4sf) > + VAR1 (STORE1, st1, 0, STORE, v4sf) > return Float32x4_t; > - VAR1 (LOAD1, ld1 , 0, LOAD, v2df) > - VAR1 (STORE1, st1 , 0, STORE, v2df) > + VAR1 (LOAD1, ld1, 0, LOAD, v2df) > + VAR1 (STORE1, st1, 0, STORE, v2df) > return Float64x2_t; > default: > gcc_unreachable ();
I think we could probably get rid of this and get the type directly from the function decl. That's a separate clean-up though, so the patch is OK as-is. For the record: it would be nice to get rid of the pointer casts too in future, but that would require using an exact copy of the <stdint.h> types for the pointer target (using INT8_TYPE etc.). Thanks, Richard > @@ -2664,6 +2703,8 @@ aarch64_general_gimple_fold_builtin (unsigned int > fcode, gcall *stmt, > > /*lower store and load neon builtins to gimple. */ > BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD) > + BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD) > + BUILTIN_VALLP_NO_DI (LOAD1_P, ld1, 0, LOAD) > if (!BYTES_BIG_ENDIAN) > { > enum aarch64_simd_type mem_type > @@ -2686,6 +2727,8 @@ aarch64_general_gimple_fold_builtin (unsigned int > fcode, gcall *stmt, > break; > > BUILTIN_VALL_F16 (STORE1, st1, 0, STORE) > + BUILTIN_VDQ_I (STORE1_U, st1, 0, STORE) > + BUILTIN_VALLP_NO_DI (STORE1_P, st1, 0, STORE) > if (!BYTES_BIG_ENDIAN) > { > enum aarch64_simd_type mem_type > diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def > b/gcc/config/aarch64/aarch64-simd-builtins.def > index > 35a099e1fb8dd1acb9e35583d1267df257d961b0..404696a71e0c1fc37cdf53fc42439a28bc9a745a > 100644 > --- a/gcc/config/aarch64/aarch64-simd-builtins.def > +++ b/gcc/config/aarch64/aarch64-simd-builtins.def > @@ -699,11 +699,13 @@ > > /* Implemented by aarch64_ld1<VALL_F16:mode>. */ > BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD) > - VAR1(STORE1P, ld1, 0, ALL, v2di) > + BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD) > + BUILTIN_VALLP_NO_DI (LOAD1_P, ld1, 0, LOAD) > > /* Implemented by aarch64_st1<VALL_F16:mode>. */ > BUILTIN_VALL_F16 (STORE1, st1, 0, STORE) > - VAR1 (STORE1P, st1, 0, STORE, v2di) > + BUILTIN_VDQ_I (STORE1_U, st1, 0, STORE) > + BUILTIN_VALLP_NO_DI (STORE1_P, st1, 0, STORE) > > /* Implemented by fma<mode>4. */ > BUILTIN_VHSDF (TERNOP, fma, 4, FP) > diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h > index > 3c03432b5b6c6cd0f349671366615925d38121e5..7abd1821840f84a79c37c40a33214294b06edbc6 > 100644 > --- a/gcc/config/aarch64/arm_neon.h > +++ b/gcc/config/aarch64/arm_neon.h > @@ -14960,16 +14960,16 @@ __extension__ extern __inline poly8x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vld1_p8 (const poly8_t *__a) > { > - return (poly8x8_t) > - __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) __a); > + return __builtin_aarch64_ld1v8qi_ps ( > + (const __builtin_aarch64_simd_qi *) __a); > } > > __extension__ extern __inline poly16x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vld1_p16 (const poly16_t *__a) > { > - return (poly16x4_t) > - __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) __a); > + return __builtin_aarch64_ld1v4hi_ps ( > + (const __builtin_aarch64_simd_hi *) __a); > } > > __extension__ extern __inline poly64x1_t > @@ -15011,24 +15011,24 @@ __extension__ extern __inline uint8x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vld1_u8 (const uint8_t *__a) > { > - return (uint8x8_t) > - __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) __a); > + return __builtin_aarch64_ld1v8qi_us ( > + (const __builtin_aarch64_simd_qi *) __a); > } > > __extension__ extern __inline uint16x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vld1_u16 (const uint16_t *__a) > { > - return (uint16x4_t) > - __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) __a); > + return __builtin_aarch64_ld1v4hi_us ( > + (const __builtin_aarch64_simd_hi *) __a); > } > > __extension__ extern __inline uint32x2_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vld1_u32 (const uint32_t *__a) > { > - return (uint32x2_t) > - __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) __a); > + return __builtin_aarch64_ld1v2si_us ( > + (const __builtin_aarch64_simd_si *) __a); > } > > __extension__ extern __inline uint64x1_t > @@ -15278,24 +15278,24 @@ __extension__ extern __inline poly8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vld1q_p8 (const poly8_t *__a) > { > - return (poly8x16_t) > - __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) __a); > + return __builtin_aarch64_ld1v16qi_ps ( > + (const __builtin_aarch64_simd_qi *) __a); > } > > __extension__ extern __inline poly16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vld1q_p16 (const poly16_t *__a) > { > - return (poly16x8_t) > - __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) __a); > + return __builtin_aarch64_ld1v8hi_ps ( > + (const __builtin_aarch64_simd_hi *) __a); > } > > __extension__ extern __inline poly64x2_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vld1q_p64 (const poly64_t *__a) > { > - return (poly64x2_t) > - __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) __a); > + return __builtin_aarch64_ld1v2di_ps ( > + (const __builtin_aarch64_simd_di *) __a); > } > > __extension__ extern __inline int8x16_t > @@ -15330,8 +15330,8 @@ __extension__ extern __inline uint8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vld1q_u8 (const uint8_t *__a) > { > - return (uint8x16_t) > - __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) __a); > + return __builtin_aarch64_ld1v16qi_us ( > + (const __builtin_aarch64_simd_qi *) __a); > } > > __extension__ extern __inline uint8x8x2_t > @@ -15549,24 +15549,24 @@ __extension__ extern __inline uint16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vld1q_u16 (const uint16_t *__a) > { > - return (uint16x8_t) > - __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) __a); > + return __builtin_aarch64_ld1v8hi_us ( > + (const __builtin_aarch64_simd_hi *) __a); > } > > __extension__ extern __inline uint32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vld1q_u32 (const uint32_t *__a) > { > - return (uint32x4_t) > - __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) __a); > + return __builtin_aarch64_ld1v4si_us ( > + (const __builtin_aarch64_simd_si *) __a); > } > > __extension__ extern __inline uint64x2_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vld1q_u64 (const uint64_t *__a) > { > - return (uint64x2_t) > - __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) __a); > + return __builtin_aarch64_ld1v2di_us ( > + (const __builtin_aarch64_simd_di *) __a); > } > > /* vld1(q)_x4. */ > @@ -24624,16 +24624,14 @@ __extension__ extern __inline void > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vst1_p8 (poly8_t *__a, poly8x8_t __b) > { > - __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) __a, > - (int8x8_t) __b); > + __builtin_aarch64_st1v8qi_sp ((__builtin_aarch64_simd_qi *) __a, __b); > } > > __extension__ extern __inline void > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vst1_p16 (poly16_t *__a, poly16x4_t __b) > { > - __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) __a, > - (int16x4_t) __b); > + __builtin_aarch64_st1v4hi_sp ((__builtin_aarch64_simd_hi *) __a, __b); > } > > __extension__ extern __inline void > @@ -24675,24 +24673,21 @@ __extension__ extern __inline void > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vst1_u8 (uint8_t *__a, uint8x8_t __b) > { > - __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) __a, > - (int8x8_t) __b); > + __builtin_aarch64_st1v8qi_su ((__builtin_aarch64_simd_qi *) __a, __b); > } > > __extension__ extern __inline void > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vst1_u16 (uint16_t *__a, uint16x4_t __b) > { > - __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) __a, > - (int16x4_t) __b); > + __builtin_aarch64_st1v4hi_su ((__builtin_aarch64_simd_hi *) __a, __b); > } > > __extension__ extern __inline void > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vst1_u32 (uint32_t *__a, uint32x2_t __b) > { > - __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) __a, > - (int32x2_t) __b); > + __builtin_aarch64_st1v2si_su ((__builtin_aarch64_simd_si *) __a, __b); > } > > __extension__ extern __inline void > @@ -24729,24 +24724,21 @@ __extension__ extern __inline void > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vst1q_p8 (poly8_t *__a, poly8x16_t __b) > { > - __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) __a, > - (int8x16_t) __b); > + __builtin_aarch64_st1v16qi_sp ((__builtin_aarch64_simd_qi *) __a, __b); > } > > __extension__ extern __inline void > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vst1q_p16 (poly16_t *__a, poly16x8_t __b) > { > - __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) __a, > - (int16x8_t) __b); > + __builtin_aarch64_st1v8hi_sp ((__builtin_aarch64_simd_hi *) __a, __b); > } > > __extension__ extern __inline void > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vst1q_p64 (poly64_t *__a, poly64x2_t __b) > { > - __builtin_aarch64_st1v2di_sp ((__builtin_aarch64_simd_di *) __a, > - (poly64x2_t) __b); > + __builtin_aarch64_st1v2di_sp ((__builtin_aarch64_simd_di *) __a, __b); > } > > __extension__ extern __inline void > @@ -24781,32 +24773,28 @@ __extension__ extern __inline void > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vst1q_u8 (uint8_t *__a, uint8x16_t __b) > { > - __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) __a, > - (int8x16_t) __b); > + __builtin_aarch64_st1v16qi_su ((__builtin_aarch64_simd_qi *) __a, __b); > } > > __extension__ extern __inline void > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vst1q_u16 (uint16_t *__a, uint16x8_t __b) > { > - __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) __a, > - (int16x8_t) __b); > + __builtin_aarch64_st1v8hi_su ((__builtin_aarch64_simd_hi *) __a, __b); > } > > __extension__ extern __inline void > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vst1q_u32 (uint32_t *__a, uint32x4_t __b) > { > - __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) __a, > - (int32x4_t) __b); > + __builtin_aarch64_st1v4si_su ((__builtin_aarch64_simd_si *) __a, __b); > } > > __extension__ extern __inline void > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vst1q_u64 (uint64_t *__a, uint64x2_t __b) > { > - __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) __a, > - (int64x2_t) __b); > + __builtin_aarch64_st1v2di_su ((__builtin_aarch64_simd_di *) __a, __b); > } > > /* vst1_lane */ > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index > bdc8ba3576cf2c9b4ae96b45a382234e4e25b13f..9389242a1c269cf3b108ef4abbcc3d3f5bf08842 > 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -207,6 +207,9 @@ > ;; All Advanced SIMD polynomial modes and DI. > (define_mode_iterator VALLP [V8QI V16QI V4HI V8HI V2DI DI]) > > +;; All Advanced SIMD polynomial modes. > +(define_mode_iterator VALLP_NO_DI [V8QI V16QI V4HI V8HI V2DI]) > + > ;; Advanced SIMD modes for Integer reduction across lanes. > (define_mode_iterator VDQV [V8QI V16QI V4HI V8HI V4SI V2DI]) >