Hi Damjan, There's a compiling error with the debug image with the code segment below. The error message is pasted as follow.
>>static_always_inline u8x16 u8x16_word_shift_left (u8x16 x, const int n) { return vextq_u8 (u8x16_splat (0), x, 16 - n); } In function 'vextq_u8', inlined from 'u8x16_word_shift_left' at vpp/src/vppinfra/vector_neon.h:188:10: /usr/lib/gcc/aarch64-linux-gnu/8/include/arm_neon.h:16646:3: error: lane index must be a constant immediate __AARCH64_LANE_CHECK (__a, __c); ^~~~~~~~~~~~~~~~~~~~ The root-cause is the third parameter of vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c) requires a compile-time immediate value instead of a const variable, because the intrinsic eventually maps down to the instruction: EXT Vd.16B,Vn.16B,Vm.16B,#n Where #n is the operand __c. Unfortunately there is no good way of expressing that requirement in the C language, so "const int" is the best compiler team can do in the current implementation, but the __AARCH64_LANE is there to enforce that constraint. This failure is observed when building debug image, but not happening when building release image. Because GCC can get the instant number for operand __c at compile-time via the inlined functions. If the __always_inline attribute is removed, GCC will not be able to figure out the instant number, and will report errors. That's why VPP release image is fine, while debug image is reporting errors. #include "arm_neon.h" static inline __attribute__ ((__always_inline__)) uint8x16_t shift_left (uint8x16_t x, const int n) { return vextq_u8 (x, x, n+1); } uint8x16_t shift_left2 (uint8x16_t x) { return shift_left (x, 1); } const int a = 3; uint8x16_t shift_left3 (uint8x16_t x) { return shift_left (x, a); } However, the above inline solution is not accepted by Clang. Clang will report error with above code no matter the function is inlined or not. For debug image, I tried to add __always_inline attribute for function u8x16_word_shift_left (), but it doesn't work. It seems the callee and even several upper layer callee of function u8x16_word_shift_left should be added with attribute __always_inline. I didn't manage to resolve the errors this ways, as it seems there are too many places requires such code change. One solution in my mind is, making a wrapper for vextq_u8() with an array of functions _vextq_u8[]. Each entry of the function array is calling vextq_u8 with an instant number. Below is some pseudo code. A complete patch is attached in the end of the email. Could you please share your idea on this solution and suggest on this issue? +static u8x16 \ + u8x16_word_shift_left (u8x16 x, const int n) \ +{ return (u8x16) _vextq_u8 [c - n] (vdupq_n_u8 (0), x); } \ static u8x16 \ _vextq_u8_0 (u8x16 a, u8x16 b) \ { return (u8x16) vextq_u8 (a, b, 0); } static u8x16 \ _vextq_u8_1 (u8x16 a, u8x16 b) \ { return (u8x16) vextq_u8 (a, b, 1); } .... static u8x16 \ _vextq_u8_16 (u8x16 a, u8x16 b) \ { return (u8x16) vextq_u8 (a, b, 16); } +static u8x16 (* _vextq_u8 [16]) (u8x16, u8x16) = { \ + _vextq_u8_0, \ + _vextq_u8_1, \ + _vextq_u8_2, \ + _vextq_u8_3, \ + _vextq_u8_4, \ + _vextq_u8_5, \ + _vextq_u8_6, \ + _vextq_u8_7, \ + _vextq_u8_8, \ + _vextq_u8_9, \ + _vextq_u8_10, \ + _vextq_u8_11, \ + _vextq_u8_12, \ + _vextq_u8_13, \ + _vextq_u8_14, \ + _vextq_u8_15, \ +}; \ Complete patch: diff --git a/src/vppinfra/vector_neon.h b/src/vppinfra/vector_neon.h index 3855f55ad..de77d0864 100644 --- a/src/vppinfra/vector_neon.h +++ b/src/vppinfra/vector_neon.h @@ -46,6 +46,81 @@ u8x16_compare_byte_mask (u8x16 v) return (u32) (vgetq_lane_u64 (x64, 0) + (vgetq_lane_u64 (x64, 1) << 8)); } +/* *INDENT-OFF* */ +#define foreach_neon_vec128i \ + _(i,8,16,s8,0) _(i,8,16,s8,1) _(i,8,16,s8,2) _(i,8,16,s8,3) \ + _(i,8,16,s8,4) _(i,8,16,s8,5) _(i,8,16,s8,6) _(i,8,16,s8,7) \ + _(i,8,16,s8,8) _(i,8,16,s8,9) _(i,8,16,s8,10) _(i,8,16,s8,11) \ + _(i,8,16,s8,12) _(i,8,16,s8,13) _(i,8,16,s8,14) _(i,8,16,s8,15) \ + _(i,16,8,s16,0) _(i,16,8,s16,1) _(i,16,8,s16,2) _(i,16,8,s16,3) \ + _(i,16,8,s16,4) _(i,16,8,s16,5) _(i,16,8,s16,6) _(i,16,8,s16,7) \ + _(i,32,4,s32,0) _(i,32,4,s32,1) _(i,32,4,s32,2) _(i,32,4,s32,3) \ + _(i,64,2,s64,0) _(i,64,2,s64,1) +#define foreach_neon_vec128u \ + _(u,8,16,u8,0) _(u,8,16,u8,1) _(u,8,16,u8,2) _(u,8,16,u8,3) \ + _(u,8,16,u8,4) _(u,8,16,u8,5) _(u,8,16,u8,6) _(u,8,16,u8,7) \ + _(u,8,16,u8,8) _(u,8,16,u8,9) _(u,8,16,u8,10) _(u,8,16,u8,11) \ + _(u,8,16,u8,12) _(u,8,16,u8,13) _(u,8,16,u8,14) _(u,8,16,u8,15) \ + _(u,16,8,u16,0) _(u,16,8,u16,1) _(u,16,8,u16,2) _(u,16,8,u16,3) \ + _(u,16,8,u16,4) _(u,16,8,u16,5) _(u,16,8,u16,6) _(u,16,8,u16,7) \ + _(u,32,4,u32,0) _(u,32,4,u32,1) _(u,32,4,u32,2) _(u,32,4,u32,3) \ + _(u,64,2,u64,0) _(u,64,2,u64,1) + +#define _(t, s, c, i, n) \ +static t##s##x##c \ +_vextq_##i##_##n (t##s##x##c a, t##s##x##c b) \ +{ return (t##s##x##c) vextq_##i (a, b, n); } + +foreach_neon_vec128i foreach_neon_vec128u +#undef _ + +#define _(t, i) \ +static t##8x16 (* _vextq_##i##8 [16]) (t##8x16, t##8x16) = { \ + _vextq_##i##8_0, \ + _vextq_##i##8_1, \ + _vextq_##i##8_2, \ + _vextq_##i##8_3, \ + _vextq_##i##8_4, \ + _vextq_##i##8_5, \ + _vextq_##i##8_6, \ + _vextq_##i##8_7, \ + _vextq_##i##8_8, \ + _vextq_##i##8_9, \ + _vextq_##i##8_10, \ + _vextq_##i##8_11, \ + _vextq_##i##8_12, \ + _vextq_##i##8_13, \ + _vextq_##i##8_14, \ + _vextq_##i##8_15, \ +}; \ +static t##16x8 (* _vextq_##i##16 [8]) (t##16x8, t##16x8) = { \ + _vextq_##i##16_0, \ + _vextq_##i##16_1, \ + _vextq_##i##16_2, \ + _vextq_##i##16_3, \ + _vextq_##i##16_4, \ + _vextq_##i##16_5, \ + _vextq_##i##16_6, \ + _vextq_##i##16_7, \ +}; \ +static t##32x4 (* _vextq_##i##32 [4]) (t##32x4, t##32x4) = { \ + _vextq_##i##32_0, \ + _vextq_##i##32_1, \ + _vextq_##i##32_2, \ + _vextq_##i##32_3, \ +}; \ +static t##64x2 (* _vextq_##i##64 [2]) (t##64x2, t##64x2) = { \ + (void *) _vextq_##i##64_0, \ + (void *) _vextq_##i##64_1, \ +}; +_(i, s) +_(u, u) +#undef _ +#undef foreach_neon_vec128i +#undef foreach_neon_vec128u +/* *INDENT-ON* */ + /* *INDENT-OFF* */ #define foreach_neon_vec128i \ _(i,8,16,s8) _(i,16,8,s16) _(i,32,4,s32) _(i,64,2,s64) @@ -90,7 +165,17 @@ t##s##x##c##_is_greater (t##s##x##c a, t##s##x##c b) \ \ static_always_inline t##s##x##c \ t##s##x##c##_blend (t##s##x##c dst, t##s##x##c src, u##s##x##c mask) \ -{ return (t##s##x##c) vbslq_##i (mask, src, dst); } +{ return (t##s##x##c) vbslq_##i (mask, src, dst); } \ +\ +static_always_inline u##s##x##c \ +t##s##x##c##_word_shift_left (t##s##x##c x, const int n) \ +{ /*ASSERT ((c >= n) && (0 < n));*/ \ + return (u##s##x##c) _vextq_##i [c - n] (t##s##x##c##_splat (0), x); } \ +\ +static_always_inline u##s##x##c \ +t##s##x##c##_word_shift_right (t##s##x##c x, const int n) \ +{ /*ASSERT ((0 <= n) && (c > n));*/ \ + return (u##s##x##c) _vextq_##i [n] (x, t##s##x##c##_splat (0)); } foreach_neon_vec128i foreach_neon_vec128u Thanks.
-=-=-=-=-=-=-=-=-=-=-=- Links: You receive all messages sent to this group. View/Reply Online (#15442): https://lists.fd.io/g/vpp-dev/message/15442 Mute This Topic: https://lists.fd.io/mt/71367647/21656 Group Owner: vpp-dev+ow...@lists.fd.io Unsubscribe: https://lists.fd.io/g/vpp-dev/unsub [arch...@mail-archive.com] -=-=-=-=-=-=-=-=-=-=-=-