This implements a minimal integer class type that emulates 128-bit unsigned arithmetic using a pair of 64-bit integers, which the floating-point std::to_chars implementation then uses as a drop-in replacement for unsigned __int128 on targets that lack the latter. This allows us to fully support formatting of large long double types on targets that lack __int128.
Since Ryu performs 128-bit division/modulus only by 2, 5 and 10, the integer class type supports only these divisors rather than supporting general division/modulus. Tested on x86, x86_64, ppc64le, ppc64be and aarch64, with and without performing the equivalent of -U__SIZEOF_INT128__ in floating_to_chars.cc (so that we also test using the class type on targets when __int128 is available). libstdc++-v3/ChangeLog: * src/c++17/floating_to_chars.cc: Simplify the file as if __SIZEOF_INT128__ is always defined. [!defined __SIZEOF_INT128__]: Include "uint128_t.h". Define a to_chars overload for the uint128_t class type. * src/c++17/uint128_t.h: New file. * testsuite/20_util/to_chars/long_double.cc: No longer expect an execution FAIL on targets that have a large long double type but lack __int128. --- libstdc++-v3/src/c++17/floating_to_chars.cc | 58 ++-- libstdc++-v3/src/c++17/uint128_t.h | 297 ++++++++++++++++++ .../testsuite/20_util/to_chars/long_double.cc | 1 - 3 files changed, 332 insertions(+), 24 deletions(-) create mode 100644 libstdc++-v3/src/c++17/uint128_t.h diff --git a/libstdc++-v3/src/c++17/floating_to_chars.cc b/libstdc++-v3/src/c++17/floating_to_chars.cc index da3fbaa1ed1..86f4401e134 100644 --- a/libstdc++-v3/src/c++17/floating_to_chars.cc +++ b/libstdc++-v3/src/c++17/floating_to_chars.cc @@ -64,25 +64,19 @@ extern "C" int __sprintfieee128(char*, const char*, ...); #if __LDBL_MANT_DIG__ == __DBL_MANT_DIG__ # define LONG_DOUBLE_KIND LDK_BINARY64 -#elif defined(__SIZEOF_INT128__) -// The Ryu routines need a 128-bit integer type in order to do shortest -// formatting of types larger than 64-bit double, so without __int128 we can't -// support any large long double format. This is the case for e.g. i386. -# if __LDBL_MANT_DIG__ == 64 +#elif __LDBL_MANT_DIG__ == 64 # define LONG_DOUBLE_KIND LDK_FLOAT80 -# elif __LDBL_MANT_DIG__ == 113 -# define LONG_DOUBLE_KIND LDK_BINARY128 -# elif __LDBL_MANT_DIG__ == 106 -# define LONG_DOUBLE_KIND LDK_IBM128 -# endif -# if defined _GLIBCXX_USE_FLOAT128 && __FLT128_MANT_DIG__ == 113 -// Define overloads of std::to_chars for __float128. -# define FLOAT128_TO_CHARS 1 -# endif +#elif __LDBL_MANT_DIG__ == 113 +# define LONG_DOUBLE_KIND LDK_BINARY128 +#elif __LDBL_MANT_DIG__ == 106 +# define LONG_DOUBLE_KIND LDK_IBM128 +#else +# define LONG_DOUBLE_KIND LDK_UNSUPPORTED #endif -#if !defined(LONG_DOUBLE_KIND) -# define LONG_DOUBLE_KIND LDK_UNSUPPORTED +#if defined _GLIBCXX_USE_FLOAT128 && __FLT128_MANT_DIG__ == 113 +// Define overloads of std::to_chars for __float128. +# define FLOAT128_TO_CHARS 1 #endif // For now we only support __float128 when it's the powerpc64 __ieee128 type. @@ -100,6 +94,8 @@ namespace { #if defined __SIZEOF_INT128__ using uint128_t = unsigned __int128; +#else +# include "uint128_t.h" #endif namespace ryu @@ -114,7 +110,6 @@ namespace #include "ryu/d2fixed.c" #include "ryu/f2s.c" -#ifdef __SIZEOF_INT128__ namespace generic128 { // Put the generic Ryu bits in their own namespace to avoid name conflicts. @@ -129,7 +124,6 @@ namespace int to_chars(const floating_decimal_128 v, char* const result) { return generic128::generic_to_chars(v, result); } -#endif } // namespace ryu // A traits class that contains pertinent information about the binary @@ -407,10 +401,8 @@ namespace return uint32_t{}; else if constexpr (total_bits <= 64) return uint64_t{}; -#ifdef __SIZEOF_INT128__ else if constexpr (total_bits <= 128) return uint128_t{}; -#endif }; using uint_t = decltype(get_uint_t()); uint_t value_bits = 0; @@ -503,7 +495,6 @@ namespace return ryu::floating_to_fd32(value); else if constexpr (std::is_same_v<T, double>) return ryu::floating_to_fd64(value); -#ifdef __SIZEOF_INT128__ else if constexpr (std::is_same_v<T, long double> || std::is_same_v<T, F128_type>) { @@ -519,7 +510,6 @@ namespace mantissa_bits, exponent_bits, !has_implicit_leading_bit); } -#endif } // This subroutine returns true if the shortest scientific form fd is a @@ -558,10 +548,32 @@ namespace get_mantissa_length(const ryu::floating_decimal_64 fd) { return ryu::decimalLength17(fd.mantissa); } -#ifdef __SIZEOF_INT128__ int get_mantissa_length(const ryu::floating_decimal_128 fd) { return ryu::generic128::decimalLength(fd.mantissa); } + +#if !defined __SIZEOF_INT128__ + // An implementation of base-10 std::to_chars for uint128_t on targets that + // lack __int128. + std::to_chars_result + to_chars(char* first, char* last, uint128_t x) + { + const int len = ryu::generic128::decimalLength(x); + if (last - first < len) + return {last, std::errc::value_too_large}; + if (x == 0) + { + *first++ = '0'; + return {first, std::errc{}}; + } + for (int i = 0; i < len; ++i) + { + first[len - 1 - i] = '0' + static_cast<char>(x % 10); + x /= 10; + } + __glibcxx_assert(x == 0); + return {first + len, std::errc{}}; + } #endif } // anon namespace diff --git a/libstdc++-v3/src/c++17/uint128_t.h b/libstdc++-v3/src/c++17/uint128_t.h new file mode 100644 index 00000000000..90ebae2ffd2 --- /dev/null +++ b/libstdc++-v3/src/c++17/uint128_t.h @@ -0,0 +1,297 @@ +// A relatiely minimal unsigned 128-bit integer class type, used by the +// floating-point std::to_chars implementation on targets that lack __int128. + +// Copyright (C) 2021 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +struct uint128_t +{ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + uint64_t lo, hi; +#else + uint64_t hi, lo; +#endif + + uint128_t() = default; + + constexpr + uint128_t(uint64_t lo, uint64_t hi = 0) +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + : lo(lo), hi(hi) +#else + : hi(hi), lo(lo) +#endif + { } + + constexpr explicit + operator bool() const + { return *this != 0; } + + template<typename T, typename = std::enable_if_t<std::is_integral_v<T>>> + constexpr explicit + operator T() const + { + static_assert(sizeof(T) <= sizeof(uint64_t)); + return static_cast<T>(lo); + } + + friend constexpr uint128_t + operator&(uint128_t x, const uint128_t y) + { + x.lo &= y.lo; + x.hi &= y.hi; + return x; + } + + friend constexpr uint128_t + operator|(uint128_t x, const uint128_t y) + { + x.lo |= y.lo; + x.hi |= y.hi; + return x; + } + + friend constexpr uint128_t + operator<<(uint128_t x, const uint128_t y) + { + __glibcxx_assert(y < 128); + // TODO: Convince GCC to use shldq on x86 here. + if (y.lo >= 64) + { + x.hi = x.lo << (y.lo - 64); + x.lo = 0; + } + else if (y.lo != 0) + { + x.hi <<= y.lo; + x.hi |= x.lo >> (64 - y.lo); + x.lo <<= y.lo; + } + return x; + } + + friend constexpr uint128_t + operator>>(uint128_t x, const uint128_t y) + { + __glibcxx_assert(y < 128); + // TODO: Convince GCC to use shrdq on x86 here. + if (y.lo >= 64) + { + x.lo = x.hi >> (y.lo - 64); + x.hi = 0; + } + else if (y.lo != 0) + { + x.lo >>= y.lo; + x.lo |= x.hi << (64 - y.lo); + x.hi >>= y.lo; + } + return x; + } + + constexpr uint128_t + operator~() const + { return {~lo, ~hi}; } + + constexpr uint128_t + operator-() const + { return operator~() + 1; } + + friend constexpr uint128_t + operator+(uint128_t x, const uint128_t y) + { + x.hi += __builtin_add_overflow(x.lo, y.lo, &x.lo); + x.hi += y.hi; + return x; + } + + friend constexpr uint128_t + operator-(uint128_t x, const uint128_t y) + { + x.hi -= __builtin_sub_overflow(x.lo, y.lo, &x.lo); + x.hi -= y.hi; + return x; + } + + static constexpr uint128_t + umul64_64_128(const uint64_t x, const uint64_t y) + { + const uint64_t xl = x & 0xffffffff; + const uint64_t xh = x >> 32; + const uint64_t yl = y & 0xffffffff; + const uint64_t yh = y >> 32; + const uint64_t ll = xl * yl; + const uint64_t lh = xl * yh; + const uint64_t hl = xh * yl; + const uint64_t hh = xh * yh; + const uint64_t m = (ll >> 32) + lh + (hl & 0xffffffff); + const uint64_t l = (ll & 0xffffffff ) | (m << 32); + const uint64_t h = (m >> 32) + (hl >> 32) + hh; + return {l, h}; + } + + friend constexpr uint128_t + operator*(uint128_t x, const uint128_t y) + { + uint128_t z = umul64_64_128(x.lo, y.lo); + z.hi += x.hi*y.lo + y.hi*x.lo; + return z; + } + + friend constexpr uint128_t + operator/(const uint128_t x, const uint128_t y) + { + // Ryu performs 128-bit division only by 5 and 10, so that's what we + // implement. The strategy here is to relate division of x with that of + // x.hi and x.lo separately. + __glibcxx_assert(y == 5 || y == 10); + // The following implements division by 5 and 10. In either case, we + // first compute division by 5: + // x/5 = (x.hi*2^64 + x.lo)/5 + // = (x.hi*(2^64-1) + x.hi + x.lo)/5 + // = x.hi*((2^64-1)/5) + (x.hi + x.lo)/5 since CST=(2^64-1)/5 is exact + // = x.hi*CST + x.hi/5 + x.lo/5 + ((x.lo%5) + (x.hi%5) >= 5) + // We go a step further and replace the last adjustment term with a + // lookup table, which we encode as a binary literal. This seems to + // yield smaller code on x86 at least. + constexpr auto cst = ~uint64_t(0) / 5; + uint128_t q = uint128_t{x.hi}*cst + uint128_t{x.hi/5 + x.lo/5}; + constexpr auto lookup = 0b111100000u; + q += (lookup >> ((x.hi % 5) + (x.lo % 5))) & 1; + if (y == 10) + q >>= 1; + return q; + } + + friend constexpr uint128_t + operator%(uint128_t x, const uint128_t y) + { + // Ryu performs 128-bit modulus only by 2, 5 and 10, so that's what we + // implement. The strategy here is to relate modulus of x with that of + // x.hi and x.lo separately. + if (y == 2) + return x & 1; + __glibcxx_assert(y == 5 || y == 10); + // The following implements modulus by 5 and 10. In either case, + // we first compute modulus by 5: + // x (mod 5) = x.hi*2^64 + x.lo (mod 5) + // = x.hi + x.lo (mod 5) since 2^64 ≡ 1 (mod 5) + // So the straightforward implementation would be + // ((x.hi % 5) + (x.lo % 5)) % 5 + // But we go a step further and replace the outermost % with a + // lookup table: + // = {0,1,2,3,4,0,1,2,3}[(x.hi % 5) + (x.lo % 5)] (mod 5) + // which we encode as an octal literal. + constexpr auto lookup = 0321043210u; + auto r = (lookup >> 3*((x.hi % 5) + (x.lo % 5))) & 7; + if (y == 10) + // x % 10 = (x % 5) if x / 5 is even + // (x % 5) + 5 if x / 5 is odd + // The compiler should be able to CSE the below computation of x/5 and + // the above modulus operations with a nearby inlined computation of x/10. + r += ((x / 5).lo & 1) * 5; + return r; + } + + friend constexpr bool + operator==(const uint128_t x, const uint128_t y) + { return x.hi == y.hi && x.lo == y.lo; } + + friend constexpr bool + operator<(const uint128_t x, const uint128_t y) + { return x.hi < y.hi || (x.hi == y.hi && x.lo < y.lo); } + + friend constexpr auto + __bit_width(const uint128_t x) + { + if (auto w = std::__bit_width(x.hi)) + return w + 64; + else + return std::__bit_width(x.lo); + } + + friend constexpr auto + __countr_zero(const uint128_t x) + { + auto c = std::__countr_zero(x.lo); + if (c == 64) + return 64 + std::__countr_zero(x.hi); + else + return c; + } + + constexpr uint128_t& + operator--() + { return *this -= 1; } + + constexpr uint128_t& + operator++() + { return *this += 1; } + + constexpr uint128_t& + operator+=(const uint128_t y) + { return *this = *this + y; } + + constexpr uint128_t& + operator-=(const uint128_t y) + { return *this = *this - y; } + + constexpr uint128_t& + operator*=(const uint128_t y) + { return *this = *this * y; } + + constexpr uint128_t& + operator<<=(const uint128_t y) + { return *this = *this << y; } + + constexpr uint128_t& + operator>>=(const uint128_t y) + { return *this = *this >> y; } + + constexpr uint128_t& + operator|=(const uint128_t y) + { return *this = *this | y; } + + constexpr uint128_t& + operator&=(const uint128_t y) + { return *this = *this & y; } + + constexpr uint128_t& + operator%=(const uint128_t y) + { return *this = *this % y; } + + constexpr uint128_t& + operator/=(const uint128_t y) + { return *this = *this / y; } + + friend constexpr bool + operator!=(const uint128_t x, const uint128_t y) + { return !(x == y); } + + friend constexpr bool + operator>(const uint128_t x, const uint128_t y) + { return y < x; } + + friend constexpr bool + operator>=(const uint128_t x, const uint128_t y) + { return !(x < y); } +}; diff --git a/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc b/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc index da847ae5401..5c1f7136f21 100644 --- a/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc +++ b/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc @@ -18,7 +18,6 @@ // <charconv> is supported in C++14 as a GNU extension, but this test uses C++17 // hexadecimal floating-point literals. // { dg-do run { target c++17 } } -// { dg-xfail-run-if "Ryu needs __int128" { large_long_double && { ! int128 } } } // { dg-require-effective-target ieee-floats } #include <charconv> -- 2.31.0.rc2