timshen created this revision. timshen added a reviewer: mclow.lists. Herald added subscribers: christof, sanjoy. Herald added a reviewer: EricWF.
Simply use __attribute__((align_value(...))). https://reviews.llvm.org/D44658 Files: libcxx/include/experimental/simd
Index: libcxx/include/experimental/simd =================================================================== --- libcxx/include/experimental/simd +++ libcxx/include/experimental/simd @@ -1238,25 +1238,36 @@ "Element type should be vectorizable"); }; -template <class _SimdType, class _ValueType = typename _SimdType::value_type> -struct memory_alignment; +template <class _ValueType, class _Up, class _Flags> +struct __memory_alignment_impl : std::integral_constant<size_t, alignof(_Up)> { +}; -// TODO: May extend this after implementing vector_aligned. template <class _Tp, class _Abi, class _Up> -struct memory_alignment<simd<_Tp, _Abi>, _Up> +struct __memory_alignment_impl<simd<_Tp, _Abi>, _Up, vector_aligned_tag> : std::integral_constant<size_t, alignof(simd<_Tp, _Abi>)> {}; -template <class _Tp, class _Abi> -struct memory_alignment<simd_mask<_Tp, _Abi>, bool> - : std::integral_constant<size_t, alignof(simd_mask<_Tp, _Abi>)> {}; +// TODO: Figure out a useful alignment based on simd_mask load and store +// implementation. Currently, make sure that the buffer is suitable for aligned +// SIMD load. +template <class _Tp, class _Abi, class _Up> +struct __memory_alignment_impl<simd_mask<_Tp, _Abi>, _Up, vector_aligned_tag> + : std::integral_constant<size_t, alignof(simd<uint8_t, _Abi>)> {}; + +template <class _ValueType, class _Up, size_t __alignment> +struct __memory_alignment_impl<_ValueType, _Up, overaligned_tag<__alignment>> + : std::integral_constant<size_t, __alignment> {}; + +template <class _SimdType, class _Up = typename _SimdType::value_type> +struct memory_alignment + : __memory_alignment_impl<_SimdType, _Up, vector_aligned_tag> {}; #if _LIBCPP_STD_VER >= 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES) template <class _Tp, class _Abi = simd_abi::compatible<_Tp>> _LIBCPP_INLINE_VAR constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value; -template <class _Tp, class _Up = typename _Tp::value_type> +template <class _SimdType, class _Up = typename _SimdType::value_type> _LIBCPP_INLINE_VAR constexpr size_t memory_alignment_v = - memory_alignment<_Tp, _Up>::value; + memory_alignment<_SimdType, _Up>::value; #endif // class template simd [simd.class] @@ -1932,6 +1943,22 @@ (void)__unused; } + template <size_t __alignment, class _Up> + void __copy_from_impl(const _Up* __buffer + __attribute__((align_value(__alignment)))) { + for (size_t __i = 0; __i < size(); __i++) { + (*this)[__i] = static_cast<_Tp>(__buffer[__i]); + } + } + + template <size_t __alignment, class _Up> + void __copy_to_impl(_Up* __buffer + __attribute__((align_value(__alignment)))) const { + for (size_t __i = 0; __i < size(); __i++) { + __buffer[__i] = static_cast<_Up>((*this)[__i]); + } + } + public: // implicit type conversion constructor template <class _Up, @@ -1989,10 +2016,8 @@ class = typename std::enable_if<__vectorizable<_Up>()>::type, class = typename std::enable_if<is_simd_flag_type<_Flags>::value>::type> simd(const _Up* __buffer, _Flags) { - // TODO: optimize for overaligned flags - for (size_t __i = 0; __i < size(); __i++) { - (*this)[__i] = static_cast<_Tp>(__buffer[__i]); - } + __copy_from_impl<__memory_alignment_impl<simd, _Up, _Flags>::value>( + __buffer); } // loads [simd.load] @@ -2008,10 +2033,7 @@ typename std::enable_if<__vectorizable<_Up>() && is_simd_flag_type<_Flags>::value>::type copy_to(_Up* __buffer, _Flags) const { - // TODO: optimize for overaligned flags - for (size_t __i = 0; __i < size(); __i++) { - __buffer[__i] = static_cast<_Up>((*this)[__i]); - } + __copy_to_impl<__memory_alignment_impl<simd, _Up, _Flags>::value>(__buffer); } // scalar access [simd.subscr] @@ -2265,6 +2287,24 @@ friend struct __simd_mask_friend; + // Use a non-member function, only because Clang 3.8 crashes with a member function. + template <size_t __alignment> + static void __copy_from_impl(simd_mask* __mask, const bool* __buffer + __attribute__((align_value(__alignment)))) { + for (size_t __i = 0; __i < size(); __i++) { + (*__mask)[__i] = __buffer[__i]; + } + } + + // Use a non-member function, only because Clang 3.8 crashes with a member function. + template <size_t __alignment> + static void __copy_to_impl(const simd_mask* __mask, bool* __buffer + __attribute__((align_value(__alignment)))) { + for (size_t __i = 0; __i < size(); __i++) { + __buffer[__i] = (*__mask)[__i]; + } + } + public: using value_type = bool; using reference = __simd_reference<bool, __element_type, _Abi>; @@ -2300,10 +2340,8 @@ template <class _Flags, class = typename std::enable_if< is_simd_flag_type<_Flags>::value>::type> simd_mask(const value_type* __buffer, _Flags) { - // TODO: optimize for overaligned flags - for (size_t __i = 0; __i < size(); __i++) { - (*this)[__i] = __buffer[__i]; - } + __copy_from_impl<__memory_alignment_impl<simd_mask, bool, _Flags>::value>( + this, __buffer); } template <class _Up = _Tp> @@ -2336,10 +2374,8 @@ template <class _Flags> typename std::enable_if<is_simd_flag_type<_Flags>::value>::type copy_to(value_type* __buffer, _Flags) const { - // TODO: optimize for overaligned flags - for (size_t __i = 0; __i < size(); __i++) { - __buffer[__i] = (*this)[__i]; - } + __copy_to_impl<__memory_alignment_impl<simd_mask, bool, _Flags>::value>( + this, __buffer); } // scalar access [simd.mask.subscr] @@ -2401,60 +2437,66 @@ } }; -template <class _Tp, class _Abi, class _Up, class _Flags> +template <size_t __alignment, class _Tp, class _Abi, class _Up> void __mask_copy_to(const simd<_Tp, _Abi>& __v, const simd_mask<_Tp, _Abi>& __m, - _Up* __buffer, _Flags) { - // TODO: optimize for overaligned flags + _Up* __buffer __attribute__((align_value(__alignment)))) { + // TODO: optimize based on bool's bit pattern. for (size_t __i = 0; __i < __v.size(); __i++) { if (__m[__i]) { __buffer[__i] = static_cast<_Up>(__v[__i]); } } } -template <class _Tp, class _Abi, class _Up, class _Flags> +template <size_t __alignment, class _Tp, class _Abi, class _Up> void __mask_copy_to(const simd_mask<_Tp, _Abi>& __v, - const simd_mask<_Tp, _Abi>& __m, _Up* __buffer, _Flags) { + const simd_mask<_Tp, _Abi>& __m, + _Up* __buffer __attribute__((align_value(__alignment)))) { // TODO: optimize based on bool's bit pattern. for (size_t __i = 0; __i < __v.size(); __i++) { if (__m[__i]) { __buffer[__i] = static_cast<_Up>(__v[__i]); } } } -template <class _Tp, class _Up, class _Flags> -void __mask_copy_to(_Tp __val, bool __m, _Up* __buffer, _Flags) { +template <size_t __alignment, class _Tp, class _Up> +void __mask_copy_to(_Tp __val, bool __m, + _Up* __buffer __attribute__((align_value(__alignment)))) { if (__m) { *__buffer = static_cast<_Up>(__val); } } -template <class _Tp, class _Abi, class _Up, class _Flags> +template <size_t __alignment, class _Tp, class _Abi, class _Up> void __mask_copy_from(simd<_Tp, _Abi>& __v, const simd_mask<_Tp, _Abi>& __m, - const _Up* __buffer, _Flags) { - // TODO: optimize for overaligned flags + const _Up* __buffer + __attribute__((align_value(__alignment)))) { + // TODO: optimize based on bool's bit pattern. for (size_t __i = 0; __i < __v.size(); __i++) { if (__m[__i]) { __v[__i] = static_cast<_Tp>(__buffer[__i]); } } } -template <class _Tp, class _Abi, class _Up, class _Flags> +template <size_t __alignment, class _Tp, class _Abi, class _Up> void __mask_copy_from(simd_mask<_Tp, _Abi>& __v, - const simd_mask<_Tp, _Abi>& __m, const _Up* __buffer, - _Flags) { + const simd_mask<_Tp, _Abi>& __m, + const _Up* __buffer + __attribute__((align_value(__alignment)))) { // TODO: optimize based on bool's bit pattern. for (size_t __i = 0; __i < __v.size(); __i++) { if (__m[__i]) { __v[__i] = static_cast<bool>(__buffer[__i]); } } } -template <class _Tp, class _Up, class _Flags> -void __mask_copy_from(_Tp& __val, bool __m, const _Up* __buffer, _Flags) { +template <size_t __alignment, class _Tp, class _Up> +void __mask_copy_from(_Tp& __val, bool __m, + const _Up* __buffer + __attribute__((align_value(__alignment)))) { if (__m) { __val = static_cast<_Tp>(*__buffer); } @@ -2545,7 +2587,8 @@ typename std::enable_if<std::is_same<_Tp, _Up>::value || !std::is_same<_Tp, bool>::value>::type copy_to(_Up* __buffer, _Flags) const&& { - __mask_copy_to(__v_, __m_, __buffer, _Flags()); + __mask_copy_to<__memory_alignment_impl<_ValueType, _Up, _Flags>::value>( + __v_, __m_, __buffer); } }; @@ -2664,7 +2707,8 @@ typename std::enable_if<std::is_same<_Tp, _Up>::value || !std::is_same<_Tp, bool>::value>::type copy_from(const _Up* __buffer, _Flags) { - __mask_copy_from(this->__v_, this->__m_, __buffer, _Flags()); + __mask_copy_from<__memory_alignment_impl<_ValueType, _Up, _Flags>::value>( + this->__v_, this->__m_, __buffer); } };
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits