On Thu, 22 Jul 2021 at 16:03, Richard Earnshaw
<richard.earns...@foss.arm.com> wrote:
>
>
>
> On 22/07/2021 08:45, Prathamesh Kulkarni via Gcc-patches wrote:
> > Hi,
> > The attached patch removes calls to builtins from vshl_n intrinsics,
> > and replacing them
> > with left shift operator. The patch passes bootstrap+test on
> > arm-linux-gnueabihf.
> >
> > Altho, I noticed, that the patch causes 3 extra registers to spill
> > using << instead
> > of the builtin for vshl_n.c. Could that be perhaps due to inlining of
> > intrinsics ?
> > Before patch, the shift operation was performed by call to
> > __builtin_neon_vshl<type> (__a, __b)
> > and now it's inlined to __a << __b, which might result in increased
> > register pressure ?
> >
> > Thanks,
> > Prathamesh
> >
>
>
> You're missing a ChangeLog for the patch.
Sorry, updated in this patch.
>
> However, I'm not sure about this.  The register shift form of VSHL
> performs a right shift if the value is negative, which is UB if you
> write `<<` instead.
>
> Have I missed something here?
Hi Richard,
According to this article:
https://developer.arm.com/documentation/den0018/a/NEON-Intrinsics-Reference/Shift/VSHL-N
For vshl_n, the shift amount is always in the non-negative range for all types.

I tried using vshl_n_s32 (a, -1), and the compiler emitted following diagnostic:
foo.c: In function ‘main’:
foo.c:17:1: error: constant -1 out of range 0 - 31
   17 | }
      | ^

So, is the attached patch OK ?

Thanks,
Prathamesh
>
> R.
2021-22-07  Prathamesh Kulkarni  <prathamesh.kulka...@linaro.org>

        PR target/66791
        * config/arm/arm_neon.h (vshl_n_s8): Replace call to builtin with
        left shift operator.
        (vshl_n_s16): Likewise.
        (vshl_n_s32): Likewise.
        (vshl_n_s64): Likewise.
        (vshl_n_u8): Likewise.
        (vshl_n_u16): Likewise.
        (vshl_n_u32): Likewise.
        (vshl_n_u64): Likewise.
        (vshlq_n_s8): Likewise.
        (vshlq_n_s16): Likewise.
        (vshlq_n_s32): Likewise.
        (vshlq_n_s64): Likewise.
        (vshlq_n_u8): Likewise.
        (vshlq_n_u16): Likewise.
        (vshlq_n_u32): Likewise.
        (vshlq_n_u64): Likewise.
        * config/arm/arm_neon_builtins.def (vshl_n): Remove entry.

diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 41b596b5fc6..f5c85eb43e7 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -4887,112 +4887,112 @@ __extension__ extern __inline int8x8_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vshl_n_s8 (int8x8_t __a, const int __b)
 {
-  return (int8x8_t)__builtin_neon_vshl_nv8qi (__a, __b);
+  return __a << __b;
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vshl_n_s16 (int16x4_t __a, const int __b)
 {
-  return (int16x4_t)__builtin_neon_vshl_nv4hi (__a, __b);
+  return __a << __b;
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vshl_n_s32 (int32x2_t __a, const int __b)
 {
-  return (int32x2_t)__builtin_neon_vshl_nv2si (__a, __b);
+  return __a << __b;
 }
 
 __extension__ extern __inline int64x1_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vshl_n_s64 (int64x1_t __a, const int __b)
 {
-  return (int64x1_t)__builtin_neon_vshl_ndi (__a, __b);
+  return __a << __b;
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vshl_n_u8 (uint8x8_t __a, const int __b)
 {
-  return (uint8x8_t)__builtin_neon_vshl_nv8qi ((int8x8_t) __a, __b);
+  return __a << __b;
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vshl_n_u16 (uint16x4_t __a, const int __b)
 {
-  return (uint16x4_t)__builtin_neon_vshl_nv4hi ((int16x4_t) __a, __b);
+  return __a << __b;
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vshl_n_u32 (uint32x2_t __a, const int __b)
 {
-  return (uint32x2_t)__builtin_neon_vshl_nv2si ((int32x2_t) __a, __b);
+  return __a << __b;
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vshl_n_u64 (uint64x1_t __a, const int __b)
 {
-  return (uint64x1_t)__builtin_neon_vshl_ndi ((int64x1_t) __a, __b);
+  return __a << __b;
 }
 
 __extension__ extern __inline int8x16_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vshlq_n_s8 (int8x16_t __a, const int __b)
 {
-  return (int8x16_t)__builtin_neon_vshl_nv16qi (__a, __b);
+  return __a << __b;
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vshlq_n_s16 (int16x8_t __a, const int __b)
 {
-  return (int16x8_t)__builtin_neon_vshl_nv8hi (__a, __b);
+  return __a << __b;
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vshlq_n_s32 (int32x4_t __a, const int __b)
 {
-  return (int32x4_t)__builtin_neon_vshl_nv4si (__a, __b);
+  return __a << __b;
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vshlq_n_s64 (int64x2_t __a, const int __b)
 {
-  return (int64x2_t)__builtin_neon_vshl_nv2di (__a, __b);
+  return __a << __b;
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vshlq_n_u8 (uint8x16_t __a, const int __b)
 {
-  return (uint8x16_t)__builtin_neon_vshl_nv16qi ((int8x16_t) __a, __b);
+  return __a << __b;
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vshlq_n_u16 (uint16x8_t __a, const int __b)
 {
-  return (uint16x8_t)__builtin_neon_vshl_nv8hi ((int16x8_t) __a, __b);
+  return __a << __b;
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vshlq_n_u32 (uint32x4_t __a, const int __b)
 {
-  return (uint32x4_t)__builtin_neon_vshl_nv4si ((int32x4_t) __a, __b);
+  return __a << __b;
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vshlq_n_u64 (uint64x2_t __a, const int __b)
 {
-  return (uint64x2_t)__builtin_neon_vshl_nv2di ((int64x2_t) __a, __b);
+  return __a << __b;
 }
 
 __extension__ extern __inline int8x8_t
diff --git a/gcc/config/arm/arm_neon_builtins.def 
b/gcc/config/arm/arm_neon_builtins.def
index 70438ac1848..ea6bd43a035 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -103,7 +103,6 @@ VAR3 (BINOP_IMM, vqrshrns_n, v8hi, v4si, v2di)
 VAR3 (BINOP_IMM, vqrshrnu_n, v8hi, v4si, v2di)
 VAR3 (BINOP_IMM, vqshrun_n, v8hi, v4si, v2di)
 VAR3 (BINOP_IMM, vqrshrun_n, v8hi, v4si, v2di)
-VAR8 (BINOP_IMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
 VAR8 (BINOP_IMM, vqshl_s_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
 VAR8 (BINOP_IMM, vqshl_u_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
 VAR8 (BINOP_IMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)

Reply via email to