[llvm-branch-commits] [libcxx] [libc++][bit] Improves rotate functions. (#98032) (PR #101892)

Tobias Hieta via llvm-branch-commits Sat, 10 Aug 2024 02:52:51 -0700

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/101892


>From 01a49d21c757fa80b3d6cf5cb153840cc94f8830 Mon Sep 17 00:00:00 2001
From: Mark de Wever <ko...@xs4all.nl>
Date: Sat, 3 Aug 2024 11:19:00 +0200
Subject: [PATCH] [libc++][bit] Improves rotate functions. (#98032)

Investigating #96612 shows our implementation was different from the
Standard and could cause UB. Testing the codegen showed quite a bit of
assembly generated for these functions. The functions have been written
differently which allows Clang to optimize the code to use simple CPU
rotate instructions.

Fixes: https://github.com/llvm/llvm-project/issues/96612
---
 libcxx/include/__bit/rotate.h                 | 37 +++++++++++++------
 .../std/numerics/bit/bitops.rot/rotl.pass.cpp |  4 ++
 .../std/numerics/bit/bitops.rot/rotr.pass.cpp |  4 ++
 3 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/libcxx/include/__bit/rotate.h b/libcxx/include/__bit/rotate.h
index d848056c3350d0..90e430e9d04256 100644
--- a/libcxx/include/__bit/rotate.h
+++ b/libcxx/include/__bit/rotate.h
@@ -20,24 +20,37 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+// Writing two full functions for rotl and rotr makes it easier for the 
compiler
+// to optimize the code. On x86 this function becomes the ROL instruction and
+// the rotr function becomes the ROR instruction.
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotr(_Tp __t, int 
__cnt) _NOEXCEPT {
-  static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotr requires an 
unsigned integer type");
-  const unsigned int __dig = numeric_limits<_Tp>::digits;
-  if ((__cnt % __dig) == 0)
-    return __t;
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotl(_Tp __x, int 
__s) _NOEXCEPT {
+  static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotl requires an 
unsigned integer type");
+  const int __N = numeric_limits<_Tp>::digits;
+  int __r       = __s % __N;
+
+  if (__r == 0)
+    return __x;
 
-  if (__cnt < 0) {
-    __cnt *= -1;
-    return (__t << (__cnt % __dig)) | (__t >> (__dig - (__cnt % __dig))); // 
rotr with negative __cnt is similar to rotl
-  }
+  if (__r > 0)
+    return (__x << __r) | (__x >> (__N - __r));
 
-  return (__t >> (__cnt % __dig)) | (__t << (__dig - (__cnt % __dig)));
+  return (__x >> -__r) | (__x << (__N + __r));
 }
 
 template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotl(_Tp __t, int 
__cnt) _NOEXCEPT {
-  return std::__rotr(__t, -__cnt);
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotr(_Tp __x, int 
__s) _NOEXCEPT {
+  static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotr requires an 
unsigned integer type");
+  const int __N = numeric_limits<_Tp>::digits;
+  int __r       = __s % __N;
+
+  if (__r == 0)
+    return __x;
+
+  if (__r > 0)
+    return (__x >> __r) | (__x << (__N - __r));
+
+  return (__x << -__r) | (__x >> (__N + __r));
 }
 
 #if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp 
b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp
index 50e498b5761e54..16eabbd2a5a4de 100644
--- a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp
+++ b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp
@@ -41,6 +41,8 @@ constexpr bool test()
     assert(std::rotl(T(max - 1), 5) == T(max - 32));
     assert(std::rotl(T(max - 1), 6) == T(max - 64));
     assert(std::rotl(T(max - 1), 7) == T(max - 128));
+    assert(std::rotl(T(max - 1), std::numeric_limits<int>::max()) ==
+           std::rotl(T(max - 1), std::numeric_limits<int>::max() % 
std::numeric_limits<T>::digits));
 
     assert(std::rotl(T(max - 1), -1) == T(max - highbit));
     assert(std::rotl(T(max - 1), -2) == T(max - (highbit >> 1)));
@@ -49,6 +51,8 @@ constexpr bool test()
     assert(std::rotl(T(max - 1), -5) == T(max - (highbit >> 4)));
     assert(std::rotl(T(max - 1), -6) == T(max - (highbit >> 5)));
     assert(std::rotl(T(max - 1), -7) == T(max - (highbit >> 6)));
+    assert(std::rotl(T(max - 1), std::numeric_limits<int>::min()) ==
+           std::rotl(T(max - 1), std::numeric_limits<int>::min() % 
std::numeric_limits<T>::digits));
 
     assert(std::rotl(T(1), 0) == T(1));
     assert(std::rotl(T(1), 1) == T(2));
diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp 
b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp
index 00c9e617d2edf3..53405588266f74 100644
--- a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp
+++ b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp
@@ -41,6 +41,8 @@ constexpr bool test()
     assert(std::rotr(T(max - 1), 5) == T(max - (highbit >> 4)));
     assert(std::rotr(T(max - 1), 6) == T(max - (highbit >> 5)));
     assert(std::rotr(T(max - 1), 7) == T(max - (highbit >> 6)));
+    assert(std::rotr(T(max - 1), std::numeric_limits<int>::max()) ==
+           std::rotr(T(max - 1), std::numeric_limits<int>::max() % 
std::numeric_limits<T>::digits));
 
     assert(std::rotr(T(max - 1), -1) == T(max - 2));
     assert(std::rotr(T(max - 1), -2) == T(max - 4));
@@ -49,6 +51,8 @@ constexpr bool test()
     assert(std::rotr(T(max - 1), -5) == T(max - 32));
     assert(std::rotr(T(max - 1), -6) == T(max - 64));
     assert(std::rotr(T(max - 1), -7) == T(max - 128));
+    assert(std::rotr(T(max - 1), std::numeric_limits<int>::min()) ==
+           std::rotr(T(max - 1), std::numeric_limits<int>::min() % 
std::numeric_limits<T>::digits));
 
     assert(std::rotr(T(128), 0) == T(128));
     assert(std::rotr(T(128), 1) == T(64));

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [libcxx] [libc++][bit] Improves rotate functions. (#98032) (PR #101892)

Reply via email to