On Thu, 22 Jul 2021 at 16:03, Richard Earnshaw <richard.earns...@foss.arm.com> wrote: > > > > On 22/07/2021 08:45, Prathamesh Kulkarni via Gcc-patches wrote: > > Hi, > > The attached patch removes calls to builtins from vshl_n intrinsics, > > and replacing them > > with left shift operator. The patch passes bootstrap+test on > > arm-linux-gnueabihf. > > > > Altho, I noticed, that the patch causes 3 extra registers to spill > > using << instead > > of the builtin for vshl_n.c. Could that be perhaps due to inlining of > > intrinsics ? > > Before patch, the shift operation was performed by call to > > __builtin_neon_vshl<type> (__a, __b) > > and now it's inlined to __a << __b, which might result in increased > > register pressure ? > > > > Thanks, > > Prathamesh > > > > > You're missing a ChangeLog for the patch. Sorry, updated in this patch. > > However, I'm not sure about this. The register shift form of VSHL > performs a right shift if the value is negative, which is UB if you > write `<<` instead. > > Have I missed something here? Hi Richard, According to this article: https://developer.arm.com/documentation/den0018/a/NEON-Intrinsics-Reference/Shift/VSHL-N For vshl_n, the shift amount is always in the non-negative range for all types.
I tried using vshl_n_s32 (a, -1), and the compiler emitted following diagnostic: foo.c: In function ‘main’: foo.c:17:1: error: constant -1 out of range 0 - 31 17 | } | ^ So, is the attached patch OK ? Thanks, Prathamesh > > R.
2021-22-07 Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> PR target/66791 * config/arm/arm_neon.h (vshl_n_s8): Replace call to builtin with left shift operator. (vshl_n_s16): Likewise. (vshl_n_s32): Likewise. (vshl_n_s64): Likewise. (vshl_n_u8): Likewise. (vshl_n_u16): Likewise. (vshl_n_u32): Likewise. (vshl_n_u64): Likewise. (vshlq_n_s8): Likewise. (vshlq_n_s16): Likewise. (vshlq_n_s32): Likewise. (vshlq_n_s64): Likewise. (vshlq_n_u8): Likewise. (vshlq_n_u16): Likewise. (vshlq_n_u32): Likewise. (vshlq_n_u64): Likewise. * config/arm/arm_neon_builtins.def (vshl_n): Remove entry. diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 41b596b5fc6..f5c85eb43e7 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -4887,112 +4887,112 @@ __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshl_n_s8 (int8x8_t __a, const int __b) { - return (int8x8_t)__builtin_neon_vshl_nv8qi (__a, __b); + return __a << __b; } __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshl_n_s16 (int16x4_t __a, const int __b) { - return (int16x4_t)__builtin_neon_vshl_nv4hi (__a, __b); + return __a << __b; } __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshl_n_s32 (int32x2_t __a, const int __b) { - return (int32x2_t)__builtin_neon_vshl_nv2si (__a, __b); + return __a << __b; } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshl_n_s64 (int64x1_t __a, const int __b) { - return (int64x1_t)__builtin_neon_vshl_ndi (__a, __b); + return __a << __b; } __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshl_n_u8 (uint8x8_t __a, const int __b) { - return (uint8x8_t)__builtin_neon_vshl_nv8qi ((int8x8_t) __a, __b); + return __a << __b; } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshl_n_u16 (uint16x4_t __a, const int __b) { - return (uint16x4_t)__builtin_neon_vshl_nv4hi ((int16x4_t) __a, __b); + return __a << __b; } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshl_n_u32 (uint32x2_t __a, const int __b) { - return (uint32x2_t)__builtin_neon_vshl_nv2si ((int32x2_t) __a, __b); + return __a << __b; } __extension__ extern __inline uint64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshl_n_u64 (uint64x1_t __a, const int __b) { - return (uint64x1_t)__builtin_neon_vshl_ndi ((int64x1_t) __a, __b); + return __a << __b; } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshlq_n_s8 (int8x16_t __a, const int __b) { - return (int8x16_t)__builtin_neon_vshl_nv16qi (__a, __b); + return __a << __b; } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshlq_n_s16 (int16x8_t __a, const int __b) { - return (int16x8_t)__builtin_neon_vshl_nv8hi (__a, __b); + return __a << __b; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshlq_n_s32 (int32x4_t __a, const int __b) { - return (int32x4_t)__builtin_neon_vshl_nv4si (__a, __b); + return __a << __b; } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshlq_n_s64 (int64x2_t __a, const int __b) { - return (int64x2_t)__builtin_neon_vshl_nv2di (__a, __b); + return __a << __b; } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshlq_n_u8 (uint8x16_t __a, const int __b) { - return (uint8x16_t)__builtin_neon_vshl_nv16qi ((int8x16_t) __a, __b); + return __a << __b; } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshlq_n_u16 (uint16x8_t __a, const int __b) { - return (uint16x8_t)__builtin_neon_vshl_nv8hi ((int16x8_t) __a, __b); + return __a << __b; } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshlq_n_u32 (uint32x4_t __a, const int __b) { - return (uint32x4_t)__builtin_neon_vshl_nv4si ((int32x4_t) __a, __b); + return __a << __b; } __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vshlq_n_u64 (uint64x2_t __a, const int __b) { - return (uint64x2_t)__builtin_neon_vshl_nv2di ((int64x2_t) __a, __b); + return __a << __b; } __extension__ extern __inline int8x8_t diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 70438ac1848..ea6bd43a035 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -103,7 +103,6 @@ VAR3 (BINOP_IMM, vqrshrns_n, v8hi, v4si, v2di) VAR3 (BINOP_IMM, vqrshrnu_n, v8hi, v4si, v2di) VAR3 (BINOP_IMM, vqshrun_n, v8hi, v4si, v2di) VAR3 (BINOP_IMM, vqrshrun_n, v8hi, v4si, v2di) -VAR8 (BINOP_IMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) VAR8 (BINOP_IMM, vqshl_s_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) VAR8 (BINOP_IMM, vqshl_u_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) VAR8 (BINOP_IMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)