Hi all,
These intrinsics are implemented as macros that map down to asms but the
types they accept are inconsistent with the ACLE spec. This patch fixes
them, although they should be reimplemented properly in C in the future.
This is a bugfix and it applies cleanly to trunk, 4.9 and 4.8.
I know we're close to the 4.9.1 release, but this is not an ABI-breaking
change so it's the aarch64 maintainers' call on whether it should be
backported.
Tested aarch64-none-elf
Ok?
Thanks,
Kyrill
2014-07-09 Kyrylo Tkachov <kyrylo.tkac...@arm.com>
* config/aarch64/arm_neon.h (vmlal_high_lane_s16): Fix type.
(vmlal_high_lane_s32): Likewise.
(vmlal_high_lane_u16): Likewise.
(vmlal_high_lane_u32): Likewise.
(vmlsl_high_lane_s16): Likewise.
(vmlsl_high_lane_s32): Likewise.
(vmlsl_high_lane_u16): Likewise.
(vmlsl_high_lane_u32): Likewise.
commit 991893519ceea282bfaf696b88d5c9291ce2e3a0
Author: Kyrylo Tkachov <kyrylo.tkac...@arm.com>
Date: Thu Jun 26 13:59:19 2014 +0100
[AArch64] Fix types for some assembly intrinsics
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 7807181..9e8d15a 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -6735,7 +6735,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
#define vmlal_high_lane_s16(a, b, c, d) \
__extension__ \
({ \
- int16x8_t c_ = (c); \
+ int16x4_t c_ = (c); \
int16x8_t b_ = (b); \
int32x4_t a_ = (a); \
int32x4_t result; \
@@ -6749,7 +6749,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
#define vmlal_high_lane_s32(a, b, c, d) \
__extension__ \
({ \
- int32x4_t c_ = (c); \
+ int32x2_t c_ = (c); \
int32x4_t b_ = (b); \
int64x2_t a_ = (a); \
int64x2_t result; \
@@ -6763,7 +6763,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
#define vmlal_high_lane_u16(a, b, c, d) \
__extension__ \
({ \
- uint16x8_t c_ = (c); \
+ uint16x4_t c_ = (c); \
uint16x8_t b_ = (b); \
uint32x4_t a_ = (a); \
uint32x4_t result; \
@@ -6777,7 +6777,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
#define vmlal_high_lane_u32(a, b, c, d) \
__extension__ \
({ \
- uint32x4_t c_ = (c); \
+ uint32x2_t c_ = (c); \
uint32x4_t b_ = (b); \
uint64x2_t a_ = (a); \
uint64x2_t result; \
@@ -7423,7 +7423,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
#define vmlsl_high_lane_s16(a, b, c, d) \
__extension__ \
({ \
- int16x8_t c_ = (c); \
+ int16x4_t c_ = (c); \
int16x8_t b_ = (b); \
int32x4_t a_ = (a); \
int32x4_t result; \
@@ -7437,7 +7437,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
#define vmlsl_high_lane_s32(a, b, c, d) \
__extension__ \
({ \
- int32x4_t c_ = (c); \
+ int32x2_t c_ = (c); \
int32x4_t b_ = (b); \
int64x2_t a_ = (a); \
int64x2_t result; \
@@ -7451,7 +7451,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
#define vmlsl_high_lane_u16(a, b, c, d) \
__extension__ \
({ \
- uint16x8_t c_ = (c); \
+ uint16x4_t c_ = (c); \
uint16x8_t b_ = (b); \
uint32x4_t a_ = (a); \
uint32x4_t result; \
@@ -7465,7 +7465,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
#define vmlsl_high_lane_u32(a, b, c, d) \
__extension__ \
({ \
- uint32x4_t c_ = (c); \
+ uint32x2_t c_ = (c); \
uint32x4_t b_ = (b); \
uint64x2_t a_ = (a); \
uint64x2_t result; \