From: Matthias Kretz <kr...@kde.org> Intrinsics types for NEON differ from gnu::vector_size types now. This requires explicit specializations for __intrinsic_type and a new __is_intrinsic_type trait.
libstdc++-v3/ChangeLog: * include/experimental/bits/simd.h (__is_intrinsic_type): New internal type trait. Alias for __is_vector_type on x86. (_VectorTraitsImpl): Enable for __intrinsic_type in addition for __vector_type. (__intrin_bitcast): Allow casting to & from vector & intrinsic types. (__intrinsic_type): Explicitly specialize for NEON intrinsic vector types. --- libstdc++-v3/include/experimental/bits/simd.h | 70 +++++++++++++++++-- 1 file changed, 66 insertions(+), 4 deletions(-) diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/ include/experimental/bits/simd.h index 00eec50d64f..d56176210df 100644 --- a/libstdc++-v3/include/experimental/bits/simd.h +++ b/libstdc++-v3/include/experimental/bits/simd.h @@ -1379,13 +1379,35 @@ template <typename _Tp> template <typename _Tp> inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value; +// }}} +// __is_intrinsic_type {{{ +#if _GLIBCXX_SIMD_HAVE_SSE_ABI +template <typename _Tp> + using __is_intrinsic_type = __is_vector_type<_Tp>; +#else // not SSE (x86) +template <typename _Tp, typename = void_t<>> + struct __is_intrinsic_type : false_type {}; + +template <typename _Tp> + struct __is_intrinsic_type< + _Tp, void_t<typename __intrinsic_type< + remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>> + : is_same<_Tp, typename __intrinsic_type< + remove_reference_t<decltype(declval<_Tp>()[0])>, + sizeof(_Tp)>::type> {}; +#endif + +template <typename _Tp> + inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value; + // }}} // _VectorTraits{{{ template <typename _Tp, typename = void_t<>> struct _VectorTraitsImpl; template <typename _Tp> - struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>>> + struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp> + || __is_intrinsic_type_v<_Tp>>> { using type = _Tp; using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>; @@ -1457,7 +1479,8 @@ template <typename _To, typename _From> _GLIBCXX_SIMD_INTRINSIC constexpr _To __intrin_bitcast(_From __v) { - static_assert(__is_vector_type_v<_From> && __is_vector_type_v<_To>); + static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>) + && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>)); if constexpr (sizeof(_To) == sizeof(_From)) return reinterpret_cast<_To>(__v); else if constexpr (sizeof(_From) > sizeof(_To)) @@ -2183,16 +2206,55 @@ template <typename _Tp, size_t _Bytes> #endif // _GLIBCXX_SIMD_HAVE_SSE_ABI // __intrinsic_type (ARM){{{ #if _GLIBCXX_SIMD_HAVE_NEON +template <> + struct __intrinsic_type<float, 8, void> + { using type = float32x2_t; }; + +template <> + struct __intrinsic_type<float, 16, void> + { using type = float32x4_t; }; + +#if _GLIBCXX_SIMD_HAVE_NEON_A64 +template <> + struct __intrinsic_type<double, 8, void> + { using type = float64x1_t; }; + +template <> + struct __intrinsic_type<double, 16, void> + { using type = float64x2_t; }; +#endif + +#define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) \ +template <> \ + struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, \ + _Np * _Bits / 8, void> \ + { using type = int##_Bits##x##_Np##_t; }; \ +template <> \ + struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>, \ + _Np * _Bits / 8, void> \ + { using type = uint##_Bits##x##_Np##_t; } +_GLIBCXX_SIMD_ARM_INTRIN(8, 8); +_GLIBCXX_SIMD_ARM_INTRIN(8, 16); +_GLIBCXX_SIMD_ARM_INTRIN(16, 4); +_GLIBCXX_SIMD_ARM_INTRIN(16, 8); +_GLIBCXX_SIMD_ARM_INTRIN(32, 2); +_GLIBCXX_SIMD_ARM_INTRIN(32, 4); +_GLIBCXX_SIMD_ARM_INTRIN(64, 1); +_GLIBCXX_SIMD_ARM_INTRIN(64, 2); +#undef _GLIBCXX_SIMD_ARM_INTRIN + template <typename _Tp, size_t _Bytes> struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>> { - static constexpr int _S_VBytes = _Bytes <= 8 ? 8 : 16; + static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16; using _Ip = __int_for_sizeof_t<_Tp>; using _Up = conditional_t< is_floating_point_v<_Tp>, _Tp, conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>; - using type [[__gnu__::__vector_size__(_S_VBytes)]] = _Up; + static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes, + "should use explicit specialization above"); + using type = typename __intrinsic_type<_Up, _SVecBytes>::type; }; #endif // _GLIBCXX_SIMD_HAVE_NEON -- ────────────────────────────────────────────────────────────────────────── Dr. Matthias Kretz https://mattkretz.github.io GSI Helmholtz Centre for Heavy Ion Research https://gsi.de std::experimental::simd https://github.com/VcDevel/std-simd ──────────────────────────────────────────────────────────────────────────