https://github.com/tru updated https://github.com/llvm/llvm-project/pull/101892
>From 01a49d21c757fa80b3d6cf5cb153840cc94f8830 Mon Sep 17 00:00:00 2001 From: Mark de Wever <ko...@xs4all.nl> Date: Sat, 3 Aug 2024 11:19:00 +0200 Subject: [PATCH] [libc++][bit] Improves rotate functions. (#98032) Investigating #96612 shows our implementation was different from the Standard and could cause UB. Testing the codegen showed quite a bit of assembly generated for these functions. The functions have been written differently which allows Clang to optimize the code to use simple CPU rotate instructions. Fixes: https://github.com/llvm/llvm-project/issues/96612 --- libcxx/include/__bit/rotate.h | 37 +++++++++++++------ .../std/numerics/bit/bitops.rot/rotl.pass.cpp | 4 ++ .../std/numerics/bit/bitops.rot/rotr.pass.cpp | 4 ++ 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/libcxx/include/__bit/rotate.h b/libcxx/include/__bit/rotate.h index d848056c3350d0..90e430e9d04256 100644 --- a/libcxx/include/__bit/rotate.h +++ b/libcxx/include/__bit/rotate.h @@ -20,24 +20,37 @@ _LIBCPP_BEGIN_NAMESPACE_STD +// Writing two full functions for rotl and rotr makes it easier for the compiler +// to optimize the code. On x86 this function becomes the ROL instruction and +// the rotr function becomes the ROR instruction. template <class _Tp> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotr(_Tp __t, int __cnt) _NOEXCEPT { - static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotr requires an unsigned integer type"); - const unsigned int __dig = numeric_limits<_Tp>::digits; - if ((__cnt % __dig) == 0) - return __t; +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotl(_Tp __x, int __s) _NOEXCEPT { + static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotl requires an unsigned integer type"); + const int __N = numeric_limits<_Tp>::digits; + int __r = __s % __N; + + if (__r == 0) + return __x; - if (__cnt < 0) { - __cnt *= -1; - return (__t << (__cnt % __dig)) | (__t >> (__dig - (__cnt % __dig))); // rotr with negative __cnt is similar to rotl - } + if (__r > 0) + return (__x << __r) | (__x >> (__N - __r)); - return (__t >> (__cnt % __dig)) | (__t << (__dig - (__cnt % __dig))); + return (__x >> -__r) | (__x << (__N + __r)); } template <class _Tp> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotl(_Tp __t, int __cnt) _NOEXCEPT { - return std::__rotr(__t, -__cnt); +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotr(_Tp __x, int __s) _NOEXCEPT { + static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotr requires an unsigned integer type"); + const int __N = numeric_limits<_Tp>::digits; + int __r = __s % __N; + + if (__r == 0) + return __x; + + if (__r > 0) + return (__x >> __r) | (__x << (__N - __r)); + + return (__x << -__r) | (__x >> (__N + __r)); } #if _LIBCPP_STD_VER >= 20 diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp index 50e498b5761e54..16eabbd2a5a4de 100644 --- a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp @@ -41,6 +41,8 @@ constexpr bool test() assert(std::rotl(T(max - 1), 5) == T(max - 32)); assert(std::rotl(T(max - 1), 6) == T(max - 64)); assert(std::rotl(T(max - 1), 7) == T(max - 128)); + assert(std::rotl(T(max - 1), std::numeric_limits<int>::max()) == + std::rotl(T(max - 1), std::numeric_limits<int>::max() % std::numeric_limits<T>::digits)); assert(std::rotl(T(max - 1), -1) == T(max - highbit)); assert(std::rotl(T(max - 1), -2) == T(max - (highbit >> 1))); @@ -49,6 +51,8 @@ constexpr bool test() assert(std::rotl(T(max - 1), -5) == T(max - (highbit >> 4))); assert(std::rotl(T(max - 1), -6) == T(max - (highbit >> 5))); assert(std::rotl(T(max - 1), -7) == T(max - (highbit >> 6))); + assert(std::rotl(T(max - 1), std::numeric_limits<int>::min()) == + std::rotl(T(max - 1), std::numeric_limits<int>::min() % std::numeric_limits<T>::digits)); assert(std::rotl(T(1), 0) == T(1)); assert(std::rotl(T(1), 1) == T(2)); diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp index 00c9e617d2edf3..53405588266f74 100644 --- a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp @@ -41,6 +41,8 @@ constexpr bool test() assert(std::rotr(T(max - 1), 5) == T(max - (highbit >> 4))); assert(std::rotr(T(max - 1), 6) == T(max - (highbit >> 5))); assert(std::rotr(T(max - 1), 7) == T(max - (highbit >> 6))); + assert(std::rotr(T(max - 1), std::numeric_limits<int>::max()) == + std::rotr(T(max - 1), std::numeric_limits<int>::max() % std::numeric_limits<T>::digits)); assert(std::rotr(T(max - 1), -1) == T(max - 2)); assert(std::rotr(T(max - 1), -2) == T(max - 4)); @@ -49,6 +51,8 @@ constexpr bool test() assert(std::rotr(T(max - 1), -5) == T(max - 32)); assert(std::rotr(T(max - 1), -6) == T(max - 64)); assert(std::rotr(T(max - 1), -7) == T(max - 128)); + assert(std::rotr(T(max - 1), std::numeric_limits<int>::min()) == + std::rotr(T(max - 1), std::numeric_limits<int>::min() % std::numeric_limits<T>::digits)); assert(std::rotr(T(128), 0) == T(128)); assert(std::rotr(T(128), 1) == T(64)); _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits