> > 2013-07-02 James Greenhalgh <james.greenha...@arm.com> > > > > * config/aarch64/aarch64-builtins.c > > (aarch64_simd_expand_builtin): Handle AARCH64_SIMD_STORE1. > > * config/aarch64/aarch64-simd-builtins.def (ld1): New. > > (st1): Likewise. > > * config/aarch64/aarch64-simd.md > > (aarch64_ld1<VALL:mode>): New. > > (aarch64_st1<VALL:mode>): Likewise. > > * config/aarch64/arm_neon.h > > (vld1<q>_<fpsu><8, 16, 32, 64>): Convert to RTL builtins. > > > > OK > /Marcus
Thanks Marcus, I've committed this as r200634. As this is a bug-fix I'd like to backport it to the 4.8 branch. I've attached a copy of the patch that applies to 4.8 and run it through testing on aarch64-none-elf with no issues. Is this OK to commit to gcc-4_8-branch? Thanks, James --- gcc/ 2013-07-04 James Greenhalgh <james.greenha...@arm.com> Backport From mainline: 2013-07-03 James Greenhalgh <james.greenha...@arm.com> * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_builtin): Handle AARCH64_SIMD_STORE1. * config/aarch64/aarch64-simd-builtins.def (ld1): New. (st1): Likewise. * config/aarch64/aarch64-simd.md (aarch64_ld1<VALL:mode>): New. (aarch64_st1<VALL:mode>): Likewise. * config/aarch64/arm_neon.h (vld1<q>_<fpsu><8, 16, 32, 64>): Convert to RTL builtins.
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 1ea55a8..b2901db 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -1154,6 +1154,7 @@ aarch64_simd_expand_builtin (int fcode, tree exp, rtx target) return aarch64_simd_expand_args (target, icode, 1, exp, SIMD_ARG_COPY_TO_REG, SIMD_ARG_STOP); + case AARCH64_SIMD_STORE1: case AARCH64_SIMD_STORESTRUCT: return aarch64_simd_expand_args (target, icode, 0, exp, SIMD_ARG_COPY_TO_REG, diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index a6a5e12..955da26 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -256,3 +256,10 @@ BUILTIN_VALL (BINOP, uzp2) BUILTIN_VALL (BINOP, trn1) BUILTIN_VALL (BINOP, trn2) + + /* Implemented by aarch64_ld1<VALL:mode>. */ + BUILTIN_VALL (LOAD1, ld1) + + /* Implemented by aarch64_st1<VALL:mode>. */ + BUILTIN_VALL (STORE1, st1) + diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 79c3093..00f3c31 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3457,6 +3457,17 @@ DONE; }) +(define_expand "aarch64_ld1<VALL:mode>" + [(match_operand:VALL 0 "register_operand") + (match_operand:DI 1 "register_operand")] + "TARGET_SIMD" +{ + enum machine_mode mode = <VALL:MODE>mode; + rtx mem = gen_rtx_MEM (mode, operands[1]); + emit_move_insn (operands[0], mem); + DONE; +}) + (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>" [(match_operand:VSTRUCT 0 "register_operand" "=w") (match_operand:DI 1 "register_operand" "r") @@ -3673,6 +3684,17 @@ DONE; }) +(define_expand "aarch64_st1<VALL:mode>" + [(match_operand:DI 0 "register_operand") + (match_operand:VALL 1 "register_operand")] + "TARGET_SIMD" +{ + enum machine_mode mode = <VALL:MODE>mode; + rtx mem = gen_rtx_MEM (mode, operands[0]); + emit_move_insn (mem, operands[1]); + DONE; +}) + ;; Expander for builtins to insert vector registers into large ;; opaque integer modes. diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 669217e..60e1f7d 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -8518,28 +8518,6 @@ vld1_dup_u64 (const uint64_t * a) return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vld1_f32 (const float32_t * a) -{ - float32x2_t result; - __asm__ ("ld1 {%0.2s}, %1" - : "=w"(result) - : "Utv"(({const float32x2_t *_a = (float32x2_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -vld1_f64 (const float64_t * a) -{ - float64x1_t result; - __asm__ ("ld1 {%0.1d}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - #define vld1_lane_f32(a, b, c) \ __extension__ \ ({ \ @@ -8696,116 +8674,6 @@ vld1_f64 (const float64_t * a) result; \ }) -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vld1_p8 (const poly8_t * a) -{ - poly8x8_t result; - __asm__ ("ld1 {%0.8b}, %1" - : "=w"(result) - : "Utv"(({const poly8x8_t *_a = (poly8x8_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vld1_p16 (const poly16_t * a) -{ - poly16x4_t result; - __asm__ ("ld1 {%0.4h}, %1" - : "=w"(result) - : "Utv"(({const poly16x4_t *_a = (poly16x4_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vld1_s8 (const int8_t * a) -{ - int8x8_t result; - __asm__ ("ld1 {%0.8b}, %1" - : "=w"(result) - : "Utv"(({const int8x8_t *_a = (int8x8_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vld1_s16 (const int16_t * a) -{ - int16x4_t result; - __asm__ ("ld1 {%0.4h}, %1" - : "=w"(result) - : "Utv"(({const int16x4_t *_a = (int16x4_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vld1_s32 (const int32_t * a) -{ - int32x2_t result; - __asm__ ("ld1 {%0.2s}, %1" - : "=w"(result) - : "Utv"(({const int32x2_t *_a = (int32x2_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vld1_s64 (const int64_t * a) -{ - int64x1_t result; - __asm__ ("ld1 {%0.1d}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vld1_u8 (const uint8_t * a) -{ - uint8x8_t result; - __asm__ ("ld1 {%0.8b}, %1" - : "=w"(result) - : "Utv"(({const uint8x8_t *_a = (uint8x8_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vld1_u16 (const uint16_t * a) -{ - uint16x4_t result; - __asm__ ("ld1 {%0.4h}, %1" - : "=w"(result) - : "Utv"(({const uint16x4_t *_a = (uint16x4_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vld1_u32 (const uint32_t * a) -{ - uint32x2_t result; - __asm__ ("ld1 {%0.2s}, %1" - : "=w"(result) - : "Utv"(({const uint32x2_t *_a = (uint32x2_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vld1_u64 (const uint64_t * a) -{ - uint64x1_t result; - __asm__ ("ld1 {%0.1d}, %1" - : "=w"(result) - : "Utv"(*a) - : /* No clobbers */); - return result; -} - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vld1q_dup_f32 (const float32_t * a) { @@ -8938,28 +8806,6 @@ vld1q_dup_u64 (const uint64_t * a) return result; } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vld1q_f32 (const float32_t * a) -{ - float32x4_t result; - __asm__ ("ld1 {%0.4s}, %1" - : "=w"(result) - : "Utv"(({const float32x4_t *_a = (float32x4_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vld1q_f64 (const float64_t * a) -{ - float64x2_t result; - __asm__ ("ld1 {%0.2d}, %1" - : "=w"(result) - : "Utv"(({const float64x2_t *_a = (float64x2_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - #define vld1q_lane_f32(a, b, c) \ __extension__ \ ({ \ @@ -9116,116 +8962,6 @@ vld1q_f64 (const float64_t * a) result; \ }) -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vld1q_p8 (const poly8_t * a) -{ - poly8x16_t result; - __asm__ ("ld1 {%0.16b}, %1" - : "=w"(result) - : "Utv"(({const poly8x16_t *_a = (poly8x16_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vld1q_p16 (const poly16_t * a) -{ - poly16x8_t result; - __asm__ ("ld1 {%0.16b}, %1" - : "=w"(result) - : "Utv"(({const poly16x8_t *_a = (poly16x8_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vld1q_s8 (const int8_t * a) -{ - int8x16_t result; - __asm__ ("ld1 {%0.16b}, %1" - : "=w"(result) - : "Utv"(({const int8x16_t *_a = (int8x16_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vld1q_s16 (const int16_t * a) -{ - int16x8_t result; - __asm__ ("ld1 {%0.8h}, %1" - : "=w"(result) - : "Utv"(({const int16x8_t *_a = (int16x8_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vld1q_s32 (const int32_t * a) -{ - int32x4_t result; - __asm__ ("ld1 {%0.4s}, %1" - : "=w"(result) - : "Utv"(({const int32x4_t *_a = (int32x4_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vld1q_s64 (const int64_t * a) -{ - int64x2_t result; - __asm__ ("ld1 {%0.2d}, %1" - : "=w"(result) - : "Utv"(({const int64x2_t *_a = (int64x2_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vld1q_u8 (const uint8_t * a) -{ - uint8x16_t result; - __asm__ ("ld1 {%0.16b}, %1" - : "=w"(result) - : "Utv"(({const uint8x16_t *_a = (uint8x16_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vld1q_u16 (const uint16_t * a) -{ - uint16x8_t result; - __asm__ ("ld1 {%0.8h}, %1" - : "=w"(result) - : "Utv"(({const uint16x8_t *_a = (uint16x8_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vld1q_u32 (const uint32_t * a) -{ - uint32x4_t result; - __asm__ ("ld1 {%0.4s}, %1" - : "=w"(result) - : "Utv"(({const uint32x4_t *_a = (uint32x4_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vld1q_u64 (const uint64_t * a) -{ - uint64x2_t result; - __asm__ ("ld1 {%0.2d}, %1" - : "=w"(result) - : "Utv"(({const uint64x2_t *_a = (uint64x2_t *) a; *_a;})) - : /* No clobbers */); - return result; -} - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmaxnm_f32 (float32x2_t a, float32x2_t b) { @@ -16285,24 +16021,6 @@ vrsubhn_u64 (uint64x2_t a, uint64x2_t b) result; \ }) -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1_f32 (float32_t * a, float32x2_t b) -{ - __asm__ ("st1 {%1.2s},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1_f64 (float64_t * a, float64x1_t b) -{ - __asm__ ("st1 {%1.1d},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - #define vst1_lane_f32(a, b, c) \ __extension__ \ ({ \ @@ -16435,113 +16153,6 @@ vst1_f64 (float64_t * a, float64x1_t b) : "memory"); \ }) -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1_p8 (poly8_t * a, poly8x8_t b) -{ - __asm__ ("st1 {%1.8b},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1_p16 (poly16_t * a, poly16x4_t b) -{ - __asm__ ("st1 {%1.4h},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1_s8 (int8_t * a, int8x8_t b) -{ - __asm__ ("st1 {%1.8b},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1_s16 (int16_t * a, int16x4_t b) -{ - __asm__ ("st1 {%1.4h},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1_s32 (int32_t * a, int32x2_t b) -{ - __asm__ ("st1 {%1.2s},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1_s64 (int64_t * a, int64x1_t b) -{ - __asm__ ("st1 {%1.1d},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1_u8 (uint8_t * a, uint8x8_t b) -{ - __asm__ ("st1 {%1.8b},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1_u16 (uint16_t * a, uint16x4_t b) -{ - __asm__ ("st1 {%1.4h},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1_u32 (uint32_t * a, uint32x2_t b) -{ - __asm__ ("st1 {%1.2s},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1_u64 (uint64_t * a, uint64x1_t b) -{ - __asm__ ("st1 {%1.1d},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1q_f32 (float32_t * a, float32x4_t b) -{ - __asm__ ("st1 {%1.4s},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1q_f64 (float64_t * a, float64x2_t b) -{ - __asm__ ("st1 {%1.2d},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} #define vst1q_lane_f32(a, b, c) \ __extension__ \ @@ -16675,96 +16286,6 @@ vst1q_f64 (float64_t * a, float64x2_t b) : "memory"); \ }) -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1q_p8 (poly8_t * a, poly8x16_t b) -{ - __asm__ ("st1 {%1.16b},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1q_p16 (poly16_t * a, poly16x8_t b) -{ - __asm__ ("st1 {%1.8h},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1q_s8 (int8_t * a, int8x16_t b) -{ - __asm__ ("st1 {%1.16b},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1q_s16 (int16_t * a, int16x8_t b) -{ - __asm__ ("st1 {%1.8h},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1q_s32 (int32_t * a, int32x4_t b) -{ - __asm__ ("st1 {%1.4s},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1q_s64 (int64_t * a, int64x2_t b) -{ - __asm__ ("st1 {%1.2d},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1q_u8 (uint8_t * a, uint8x16_t b) -{ - __asm__ ("st1 {%1.16b},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1q_u16 (uint16_t * a, uint16x8_t b) -{ - __asm__ ("st1 {%1.8h},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1q_u32 (uint32_t * a, uint32x4_t b) -{ - __asm__ ("st1 {%1.4s},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - -__extension__ static __inline void __attribute__ ((__always_inline__)) -vst1q_u64 (uint64_t * a, uint64x2_t b) -{ - __asm__ ("st1 {%1.2d},[%0]" - : - : "r"(a), "w"(b) - : "memory"); -} - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c) { @@ -20537,6 +20058,165 @@ vdupd_lane_u64 (uint64x2_t a, int const b) return (uint64x1_t) __builtin_aarch64_dup_lanedi ((int64x2_t) a, b); } +/* vld1 */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vld1_f32 (const float32_t *a) +{ + return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vld1_f64 (const float64_t *a) +{ + return *a; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vld1_p8 (const poly8_t *a) +{ + return (poly8x8_t) + __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vld1_p16 (const poly16_t *a) +{ + return (poly16x4_t) + __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vld1_s8 (const int8_t *a) +{ + return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vld1_s16 (const int16_t *a) +{ + return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vld1_s32 (const int32_t *a) +{ + return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vld1_s64 (const int64_t *a) +{ + return *a; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vld1_u8 (const uint8_t *a) +{ + return (uint8x8_t) + __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vld1_u16 (const uint16_t *a) +{ + return (uint16x4_t) + __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vld1_u32 (const uint32_t *a) +{ + return (uint32x2_t) + __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vld1_u64 (const uint64_t *a) +{ + return *a; +} + +/* vld1q */ + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vld1q_f32 (const float32_t *a) +{ + return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vld1q_f64 (const float64_t *a) +{ + return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vld1q_p8 (const poly8_t *a) +{ + return (poly8x16_t) + __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vld1q_p16 (const poly16_t *a) +{ + return (poly16x8_t) + __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vld1q_s8 (const int8_t *a) +{ + return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vld1q_s16 (const int16_t *a) +{ + return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vld1q_s32 (const int32_t *a) +{ + return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vld1q_s64 (const int64_t *a) +{ + return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vld1q_u8 (const uint8_t *a) +{ + return (uint8x16_t) + __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vld1q_u16 (const uint16_t *a) +{ + return (uint16x8_t) + __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vld1q_u32 (const uint32_t *a) +{ + return (uint32x4_t) + __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vld1q_u64 (const uint64_t *a) +{ + return (uint64x2_t) + __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); +} + /* vldn */ __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__)) @@ -24307,6 +23987,165 @@ vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c); } +/* vst1 */ + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_f32 (float32_t *a, float32x2_t b) +{ + __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_f64 (float64_t *a, float64x1_t b) +{ + *a = b; +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_p8 (poly8_t *a, poly8x8_t b) +{ + __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, + (int8x8_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_p16 (poly16_t *a, poly16x4_t b) +{ + __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, + (int16x4_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_s8 (int8_t *a, int8x8_t b) +{ + __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_s16 (int16_t *a, int16x4_t b) +{ + __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_s32 (int32_t *a, int32x2_t b) +{ + __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_s64 (int64_t *a, int64x1_t b) +{ + *a = b; +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_u8 (uint8_t *a, uint8x8_t b) +{ + __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, + (int8x8_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_u16 (uint16_t *a, uint16x4_t b) +{ + __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, + (int16x4_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_u32 (uint32_t *a, uint32x2_t b) +{ + __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, + (int32x2_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_u64 (uint64_t *a, uint64x1_t b) +{ + *a = b; +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_f32 (float32_t *a, float32x4_t b) +{ + __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_f64 (float64_t *a, float64x2_t b) +{ + __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b); +} + +/* vst1q */ + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_p8 (poly8_t *a, poly8x16_t b) +{ + __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, + (int8x16_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_p16 (poly16_t *a, poly16x8_t b) +{ + __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, + (int16x8_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_s8 (int8_t *a, int8x16_t b) +{ + __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_s16 (int16_t *a, int16x8_t b) +{ + __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_s32 (int32_t *a, int32x4_t b) +{ + __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_s64 (int64_t *a, int64x2_t b) +{ + __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_u8 (uint8_t *a, uint8x16_t b) +{ + __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, + (int8x16_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_u16 (uint16_t *a, uint16x8_t b) +{ + __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, + (int16x8_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_u32 (uint32_t *a, uint32x4_t b) +{ + __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, + (int32x4_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_u64 (uint64_t *a, uint64x2_t b) +{ + __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, + (int64x2_t) b); +} + /* vstn */ __extension__ static __inline void