Jonathan Wright <jonathan.wri...@arm.com> writes: > Hi, > > This patch declares unsigned and polynomial type-qualified builtins for > vcombine_* Neon intrinsics. Using these builtins removes the need for > many casts in arm_neon.h. > > Bootstrapped and regression tested on aarch64-none-linux-gnu - no > issues. > > Ok for master? > > Thanks, > Jonathan > > --- > > gcc/ChangeLog: > > 2021-11-10 Jonathan Wright <jonathan.wri...@arm.com> > > * config/aarch64/aarch64-builtins.c (TYPES_COMBINE): Delete. > (TYPES_COMBINEP): Delete. > * config/aarch64/aarch64-simd-builtins.def: Declare type- > qualified builtins for vcombine_* intrinsics. > * config/aarch64/arm_neon.h (vcombine_s8): Remove unnecessary > cast. > (vcombine_s16): Likewise. > (vcombine_s32): Likewise. > (vcombine_f32): Likewise. > (vcombine_u8): Use type-qualified builtin and remove casts. > (vcombine_u16): Likewise. > (vcombine_u32): Likewise. > (vcombine_u64): Likewise. > (vcombine_p8): Likewise. > (vcombine_p16): Likewise. > (vcombine_p64): Likewise. > (vcombine_bf16): Remove unnecessary cast. > * config/aarch64/iterators.md (VDC_I): New mode iterator. > (VDC_P): New mode iterator. > > diff --git a/gcc/config/aarch64/aarch64-builtins.c > b/gcc/config/aarch64/aarch64-builtins.c > index > f286401ff3ab01dd860ae22858ca07e364247414..7abf8747b69591815068709af42598c47d73269e > 100644 > --- a/gcc/config/aarch64/aarch64-builtins.c > +++ b/gcc/config/aarch64/aarch64-builtins.c > @@ -353,17 +353,6 @@ > aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS] > qualifier_immediate }; > #define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers) > > - > -static enum aarch64_type_qualifiers > -aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS] > - = { qualifier_none, qualifier_none, qualifier_none }; > -#define TYPES_COMBINE (aarch64_types_combine_qualifiers) > - > -static enum aarch64_type_qualifiers > -aarch64_types_combine_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] > - = { qualifier_poly, qualifier_poly, qualifier_poly }; > -#define TYPES_COMBINEP (aarch64_types_combine_p_qualifiers) > - > static enum aarch64_type_qualifiers > aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS] > = { qualifier_none, qualifier_const_pointer_map_mode }; > diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def > b/gcc/config/aarch64/aarch64-simd-builtins.def > index > 404696a71e0c1fc37cdf53fc42439a28bc9a745a..ab5f3a098f2047d0f1ba933f4418609678102c3d > 100644 > --- a/gcc/config/aarch64/aarch64-simd-builtins.def > +++ b/gcc/config/aarch64/aarch64-simd-builtins.def > @@ -43,8 +43,9 @@ > help describe the attributes (for example, pure) for the intrinsic > function. */ > > - BUILTIN_VDC (COMBINE, combine, 0, AUTO_FP) > - VAR1 (COMBINEP, combine, 0, NONE, di) > + BUILTIN_VDC (BINOP, combine, 0, AUTO_FP) > + BUILTIN_VDC_I (BINOPU, combine, 0, NONE) > + BUILTIN_VDC_P (BINOPP, combine, 0, NONE) > BUILTIN_VB (BINOPP, pmul, 0, NONE) > VAR1 (BINOPP, pmull, 0, NONE, v8qi) > VAR1 (BINOPP, pmull_hi, 0, NONE, v16qi) > diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h > index > 7abd1821840f84a79c37c40a33214294b06edbc6..c374e90f31546886a519ba270113ccedd4ca7abf > 100644 > --- a/gcc/config/aarch64/arm_neon.h > +++ b/gcc/config/aarch64/arm_neon.h > @@ -5975,21 +5975,21 @@ __extension__ extern __inline int8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vcombine_s8 (int8x8_t __a, int8x8_t __b) > { > - return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b); > + return __builtin_aarch64_combinev8qi (__a, __b); > } > > __extension__ extern __inline int16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vcombine_s16 (int16x4_t __a, int16x4_t __b) > { > - return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b); > + return __builtin_aarch64_combinev4hi (__a, __b); > } > > __extension__ extern __inline int32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vcombine_s32 (int32x2_t __a, int32x2_t __b) > { > - return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b); > + return __builtin_aarch64_combinev2si (__a, __b); > } > > __extension__ extern __inline int64x2_t > @@ -6010,38 +6010,35 @@ __extension__ extern __inline float32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vcombine_f32 (float32x2_t __a, float32x2_t __b) > { > - return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b); > + return __builtin_aarch64_combinev2sf (__a, __b); > } > > __extension__ extern __inline uint8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vcombine_u8 (uint8x8_t __a, uint8x8_t __b) > { > - return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a, > - (int8x8_t) __b); > + return __builtin_aarch64_combinev8qi_uuu (__a, __b); > } > > __extension__ extern __inline uint16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vcombine_u16 (uint16x4_t __a, uint16x4_t __b) > { > - return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a, > - (int16x4_t) __b); > + return __builtin_aarch64_combinev4hi_uuu (__a, __b); > } > > __extension__ extern __inline uint32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vcombine_u32 (uint32x2_t __a, uint32x2_t __b) > { > - return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a, > - (int32x2_t) __b); > + return __builtin_aarch64_combinev2si_uuu (__a, __b); > } > > __extension__ extern __inline uint64x2_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vcombine_u64 (uint64x1_t __a, uint64x1_t __b) > { > - return (uint64x2_t) __builtin_aarch64_combinedi (__a[0], __b[0]); > + return __builtin_aarch64_combinedi_uuu (__a[0], __b[0]); > } > > __extension__ extern __inline float64x2_t > @@ -6055,23 +6052,21 @@ __extension__ extern __inline poly8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vcombine_p8 (poly8x8_t __a, poly8x8_t __b) > { > - return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a, > - (int8x8_t) __b); > + return __builtin_aarch64_combinev8qi_ppp (__a, __b); > } > > __extension__ extern __inline poly16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vcombine_p16 (poly16x4_t __a, poly16x4_t __b) > { > - return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a, > - (int16x4_t) __b); > + return __builtin_aarch64_combinev4hi_ppp (__a, __b); > } > > __extension__ extern __inline poly64x2_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vcombine_p64 (poly64x1_t __a, poly64x1_t __b) > { > - return (poly64x2_t) __builtin_aarch64_combinedi_ppp (__a[0], __b[0]); > + return __builtin_aarch64_combinedi_ppp (__a[0], __b[0]); > } > > /* Start of temporary inline asm implementations. */ > @@ -30648,7 +30643,7 @@ __extension__ extern __inline bfloat16x8_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vcombine_bf16 (bfloat16x4_t __a, bfloat16x4_t __b) > { > - return (bfloat16x8_t)__builtin_aarch64_combinev4bf (__a, __b); > + return __builtin_aarch64_combinev4bf (__a, __b); > } > > /* vdup */ > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index > 9389242a1c269cf3b108ef4abbcc3d3f5bf08842..1598e19751ff5112a072118a629755272f48e83b > 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -238,6 +238,12 @@ > ;; Double vector modes for combines. > (define_mode_iterator VDC [V8QI V4HI V4BF V4HF V2SI V2SF DI DF]) > > +;; Integer modes for vector combines. > +(define_mode_iterator VDC_I [V8QI V4HI V2SI DI])
I think just VD_I would be better for this, placed near VQ_I. (Surprised to see that we didn't have it already.) OK with that change, thanks. Richard > + > +;; Polynomial modes for vector combines. > +(define_mode_iterator VDC_P [V8QI V4HI DI]) > + > ;; Advanced SIMD modes except double int. > (define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF]) > (define_mode_iterator VDQIF_F16 [V8QI V16QI V4HI V8HI V2SI V4SI