[PATCH][AArch64] Fix argument types for some high_lane* intrinsics implemented in assembly

Kyrill Tkachov Wed, 09 Jul 2014 07:38:39 -0700

Hi all,

These intrinsics are implemented as macros that map down to asms but thetypes they accept are inconsistent with the ACLE spec. This patch fixesthem, although they should be reimplemented properly in C in the future.


This is a bugfix and it applies cleanly to trunk, 4.9 and 4.8.

I know we're close to the 4.9.1 release, but this is not an ABI-breakingchange so it's the aarch64 maintainers' call on whether it should bebackported.


Tested aarch64-none-elf

Ok?

Thanks,
Kyrill

2014-07-09  Kyrylo Tkachov  <[email protected]>

    * config/aarch64/arm_neon.h (vmlal_high_lane_s16): Fix type.
    (vmlal_high_lane_s32): Likewise.
    (vmlal_high_lane_u16): Likewise.
    (vmlal_high_lane_u32): Likewise.
    (vmlsl_high_lane_s16): Likewise.
    (vmlsl_high_lane_s32): Likewise.
    (vmlsl_high_lane_u16): Likewise.
    (vmlsl_high_lane_u32): Likewise.

commit 991893519ceea282bfaf696b88d5c9291ce2e3a0
Author: Kyrylo Tkachov <[email protected]>
Date:   Thu Jun 26 13:59:19 2014 +0100

    [AArch64] Fix types for some assembly intrinsics

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 7807181..9e8d15a 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -6735,7 +6735,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
 #define vmlal_high_lane_s16(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       int16x8_t c_ = (c);                                              \
+       int16x4_t c_ = (c);                                              \
        int16x8_t b_ = (b);                                              \
        int32x4_t a_ = (a);                                              \
        int32x4_t result;                                                \
@@ -6749,7 +6749,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
 #define vmlal_high_lane_s32(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       int32x4_t c_ = (c);                                              \
+       int32x2_t c_ = (c);                                              \
        int32x4_t b_ = (b);                                              \
        int64x2_t a_ = (a);                                              \
        int64x2_t result;                                                \
@@ -6763,7 +6763,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
 #define vmlal_high_lane_u16(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       uint16x8_t c_ = (c);                                             \
+       uint16x4_t c_ = (c);                                             \
        uint16x8_t b_ = (b);                                             \
        uint32x4_t a_ = (a);                                             \
        uint32x4_t result;                                               \
@@ -6777,7 +6777,7 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
 #define vmlal_high_lane_u32(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       uint32x4_t c_ = (c);                                             \
+       uint32x2_t c_ = (c);                                             \
        uint32x4_t b_ = (b);                                             \
        uint64x2_t a_ = (a);                                             \
        uint64x2_t result;                                               \
@@ -7423,7 +7423,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
 #define vmlsl_high_lane_s16(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       int16x8_t c_ = (c);                                              \
+       int16x4_t c_ = (c);                                              \
        int16x8_t b_ = (b);                                              \
        int32x4_t a_ = (a);                                              \
        int32x4_t result;                                                \
@@ -7437,7 +7437,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
 #define vmlsl_high_lane_s32(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       int32x4_t c_ = (c);                                              \
+       int32x2_t c_ = (c);                                              \
        int32x4_t b_ = (b);                                              \
        int64x2_t a_ = (a);                                              \
        int64x2_t result;                                                \
@@ -7451,7 +7451,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
 #define vmlsl_high_lane_u16(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       uint16x8_t c_ = (c);                                             \
+       uint16x4_t c_ = (c);                                             \
        uint16x8_t b_ = (b);                                             \
        uint32x4_t a_ = (a);                                             \
        uint32x4_t result;                                               \
@@ -7465,7 +7465,7 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
 #define vmlsl_high_lane_u32(a, b, c, d)                                 \
   __extension__                                                         \
     ({                                                                  \
-       uint32x4_t c_ = (c);                                             \
+       uint32x2_t c_ = (c);                                             \
        uint32x4_t b_ = (b);                                             \
        uint64x2_t a_ = (a);                                             \
        uint64x2_t result;                                               \

[PATCH][AArch64] Fix argument types for some high_lane* intrinsics implemented in assembly

Reply via email to