On Sun, 16 Jan 2022 at 18:07, Patrick Palka via Libstdc++ < libstd...@gcc.gnu.org> wrote:
> Series tested on x86_64, i686, ppc64, ppc64le, aarch64, does it look OK for > trunk? > OK, thanks. > libstdc++-v3/ChangeLog: > > * src/c++17/floating_from_chars.cc: Include <bit>. > (ascii_to_hexit, starts_with_ci): Define. > (__floating_from_chars_hex): Define. > (from_chars): Conditionally call __floating_from_chars_hex. > (testsuite/20_util/from_chars/7.cc): New test. > --- > libstdc++-v3/src/c++17/floating_from_chars.cc | 376 ++++++++++++++++++ > .../testsuite/20_util/from_chars/7.cc | 151 +++++++ > 2 files changed, 527 insertions(+) > create mode 100644 libstdc++-v3/testsuite/20_util/from_chars/7.cc > > diff --git a/libstdc++-v3/src/c++17/floating_from_chars.cc > b/libstdc++-v3/src/c++17/floating_from_chars.cc > index 479e042bb5f..b186da9a955 100644 > --- a/libstdc++-v3/src/c++17/floating_from_chars.cc > +++ b/libstdc++-v3/src/c++17/floating_from_chars.cc > @@ -31,6 +31,7 @@ > #define _GLIBCXX_USE_CXX11_ABI 1 > > #include <charconv> > +#include <bit> > #include <string> > #include <memory_resource> > #include <cfenv> > @@ -396,6 +397,371 @@ namespace > } > #endif > > +#if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64 > + // If the given ASCII character represents a hexit, return that hexit. > + // Otherwise return -1. > + int > + ascii_to_hexit(char ch) > + { > + if (ch >= '0' && ch <= '9') > + return ch - '0'; > + if (ch >= 'a' && ch <= 'f') > + return ch - 'a' + 10; > + if (ch >= 'A' && ch <= 'F') > + return ch - 'A' + 10; > + return -1; > + } > + > + // Return true iff [FIRST,LAST) begins with PREFIX, ignoring case. > + bool > + starts_with_ci(const char* first, const char* last, string_view prefix) > + { > + __glibcxx_requires_valid_range(first, last); > + > + for (char ch : prefix) > + { > + __glibcxx_assert(ch >= 'a' && ch <= 'z'); > + if (first == last || (*first != ch && *first != ch - 32)) > + return false; > + ++first; > + } > + > + return true; > + } > + > + // An implementation of hexadecimal float parsing for binary32/64. > + template<typename T> > + from_chars_result > + __floating_from_chars_hex(const char* first, const char* last, T& value) > + { > + static_assert(is_same_v<T, float> || is_same_v<T, double>); > + > + using uint_t = conditional_t<is_same_v<T, float>, uint32_t, uint64_t>; > + constexpr int mantissa_width = is_same_v<T, float> ? 23 : 52; > + constexpr int exponent_width = is_same_v<T, float> ? 8 : 11; > + constexpr int exponent_bias = (1 << (exponent_width - 1)) - 1; > + > + __glibcxx_requires_valid_range(first, last); > + if (first == last) > + return {first, errc::invalid_argument}; > + > + // Consume the sign bit. > + const char* const orig_first = first; > + bool sign_bit = false; > + if (*first == '-') > + { > + sign_bit = true; > + ++first; > + } > + > + // Handle "inf", "infinity", "NaN" and variants thereof. > + if (first != last) > + if (*first == 'i' || *first == 'I' || *first == 'n' || *first == > 'N') [[unlikely]] > + { > + if (starts_with_ci(first, last, "inf"sv)) > + { > + first += 3; > + if (starts_with_ci(first, last, "inity"sv)) > + first += 5; > + > + uint_t result = 0; > + result |= sign_bit; > + result <<= exponent_width; > + result |= (1ull << exponent_width) - 1; > + result <<= mantissa_width; > + memcpy(&value, &result, sizeof(result)); > + > + return {first, errc{}}; > + } > + else if (starts_with_ci(first, last, "nan")) > + { > + first += 3; > + > + if (first != last && *first == '(') > + { > + // Tentatively consume the '(' as we look for an optional > + // n-char-sequence followed by a ')'. > + const char* const fallback_first = first; > + for (;;) > + { > + ++first; > + if (first == last) > + { > + first = fallback_first; > + break; > + } > + > + char ch = *first; > + if (ch == ')') > + { > + ++first; > + break; > + } > + else if ((ch >= '0' && ch <= '9') > + || (ch >= 'a' && ch <= 'z') > + || (ch >= 'A' && ch <= 'Z') > + || ch == '_') > + continue; > + else > + { > + first = fallback_first; > + break; > + } > + } > + } > + > + // We make the implementation-defined decision of ignoring > the > + // sign bit and the n-char-sequence when assembling the NaN. > + uint_t result = 0; > + result <<= exponent_width; > + result |= (1ull << exponent_width) - 1; > + result <<= mantissa_width; > + result |= (1ull << (mantissa_width - 1)) | 1; > + memcpy(&value, &result, sizeof(result)); > + > + return {first, errc{}}; > + } > + } > + > + // Consume all insignificant leading zeros in the whole part of the > + // mantissa. > + bool seen_digit = false; > + while (first != last && *first == '0') > + { > + seen_digit = true; > + ++first; > + } > + > + // Now consume the rest of the written mantissa, populating MANTISSA > with the > + // first MANTISSA_WIDTH+k significant bits of the written mantissa, > where 1 > + // <= k <= 4 is the bit width of the leading significant written > hexit. > + // > + // Examples: > + // After parsing "1.2f3", MANTISSA is 0x12f30000000000 > (bit_width=52+1). > + // After parsing ".0000f0e", MANTISSA is 0xf0e00000000000 > (bit_width=52+4). > + // After parsing ".1234567890abcd8", MANTISSA is 0x1234567890abcd > (bit_width=52+1) > + // and MIDPOINT_BIT is true and NONZERO_TAIL is false. > + uint_t mantissa = 0; > + int mantissa_idx = mantissa_width; // The current bit index into > MANTISSA > + // into which we'll write the next > hexit. > + int exponent_adjustment = 0; // How much we'd have to adjust the > written > + // exponent in order to represent the > mantissa > + // in scientific form h.hhhhhhhhhhhhh. > + bool midpoint_bit = false; // Whether the MANTISSA_WIDTH+k+1 > significant > + // bit is set in the written mantissa. > + bool nonzero_tail = false; // Whether some bit thereafter is set in > the > + // written mantissa. > + bool seen_decimal_point = false; > + for (; first != last; ++first) > + { > + char ch = *first; > + if (ch == '.' && !seen_decimal_point) > + { > + seen_decimal_point = true; > + continue; > + } > + > + int hexit = ascii_to_hexit(ch); > + if (hexit == -1) > + break; > + seen_digit = true; > + > + if (!seen_decimal_point && mantissa != 0) > + exponent_adjustment += 4; > + else if (seen_decimal_point && mantissa == 0) > + { > + exponent_adjustment -= 4; > + if (hexit == 0x0) > + continue; > + } > + > + if (mantissa_idx >= 0) > + mantissa |= uint_t(hexit) << mantissa_idx; > + else if (mantissa_idx >= -4) > + { > + if constexpr (is_same_v<T, float>) > + { > + __glibcxx_assert(mantissa_idx == -1); > + mantissa |= hexit >> 1; > + midpoint_bit = (hexit & 0b0001) != 0; > + } > + else > + { > + __glibcxx_assert(mantissa_idx == -4); > + midpoint_bit = (hexit & 0b1000) != 0; > + nonzero_tail = (hexit & 0b0111) != 0; > + } > + } > + else > + nonzero_tail |= (hexit != 0x0); > + > + mantissa_idx -= 4; > + } > + if (mantissa != 0) > + __glibcxx_assert(__bit_width(mantissa) >= mantissa_width + 1 > + && __bit_width(mantissa) <= mantissa_width + 4); > + else > + __glibcxx_assert(!midpoint_bit && !nonzero_tail); > + > + if (!seen_digit) > + { > + // If we haven't seen any digit at this point, the parse failed. > + first = orig_first; > + return {first, errc::invalid_argument}; > + } > + > + // Parse the written exponent. > + int written_exponent = 0; > + if (first != last && *first == 'p') > + { > + // Tentatively consume the the 'p' and try to parse a decimal > number. > + const char* const fallback_first = first; > + ++first; > + if (first != last && *first == '+') > + ++first; > + from_chars_result fcr = from_chars(first, last, written_exponent, > 10); > + if (fcr.ptr == first) > + // The parse failed, so undo consuming the 'p' and carry on as > if the > + // exponent was omitted (i.e. is 0). > + first = fallback_first; > + else > + { > + first = fcr.ptr; > + if (mantissa != 0 && fcr.ec == errc::result_out_of_range) > + // FIXME: Punt on large exponents for now. > + return {first, errc::result_out_of_range}; > + } > + } > + int biased_exponent = written_exponent + exponent_bias; > + if (exponent_adjustment != 0) > + // The mantissa wasn't written in scientific form. Adjust the > exponent > + // so that we may assume scientific form. > + // > + // Examples; > + // For input "a.bcp5", EXPONENT_ADJUSTMENT would be 0 since this > + // written mantissa is already in scientific form. > + // For input "ab.cp5", EXPONENT_ADJUSTMENT would be 4 since the > + // scientific form is "a.bcp9". > + // For input 0.0abcp5", EXPONENT_ADJUSTMENT would be -8 since the > + // scientific form is "a.bcp-3". > + biased_exponent += exponent_adjustment; > + > + // Shifts the mantissa to the right by AMOUNT while updating > + // BIASED_EXPONENT, MIDPOINT_BIT and NONZERO_TAIL accordingly. > + auto shift_mantissa = [&] (int amount) { > + __glibcxx_assert(amount >= 0); > + if (amount > mantissa_width + 1) > + { > + // Shifting the mantissa by an amount greater than its precision. > + nonzero_tail |= midpoint_bit; > + nonzero_tail |= mantissa != 0; > + midpoint_bit = false; > + mantissa = 0; > + biased_exponent += amount; > + } > + else if (amount != 0) > + { > + nonzero_tail |= midpoint_bit; > + nonzero_tail |= (mantissa & ((1ull << (amount - 1)) - 1)) != 0; > + midpoint_bit = (mantissa & (1ull << (amount - 1))) != 0; > + mantissa >>= amount; > + biased_exponent += amount; > + } > + }; > + > + if (mantissa != 0) > + { > + // If the leading hexit is not '1', shift MANTISSA to make it so. > + // This normalizes input like "4.04p0" into "1.01p2". > + const int leading_hexit = mantissa >> mantissa_width; > + const int leading_hexit_width = __bit_width(leading_hexit); // > FIXME: optimize? > + __glibcxx_assert(leading_hexit_width >= 1 && leading_hexit_width > <= 4); > + shift_mantissa(leading_hexit_width - 1); > + // After this adjustment, we can assume the leading hexit is a '1'. > + __glibcxx_assert((mantissa >> mantissa_width) == 0x1); > + } > + > + if (biased_exponent <= 0) > + { > + // This number is too small to be represented as a normal number, > so > + // try for a subnormal number by shifting the mantissa > sufficiently. > + // We need to shift by 1 more than -BIASED_EXPONENT because the > leading > + // mantissa bit is omitted in the representation of a normal > number but > + // not in a subnormal number. > + shift_mantissa(-biased_exponent + 1); > + __glibcxx_assert(!(mantissa & (1ull << mantissa_width))); > + __glibcxx_assert(biased_exponent == 1); > + biased_exponent = 0; > + } > + > + // Perform round-to-nearest, tie-to-even rounding. > + if (midpoint_bit && (nonzero_tail || (mantissa % 2) != 0)) > + { > + // Rounding away from zero. > + ++mantissa; > + midpoint_bit = false; > + nonzero_tail = false; > + > + // Deal with a couple of corner cases after rounding. > + if (mantissa == (1ull << mantissa_width)) > + { > + // We rounded the subnormal number 1.fffffffffffff...p-1023 > + // up to the normal number 1p-1022. > + __glibcxx_assert(biased_exponent == 0); > + ++biased_exponent; > + } > + else if (mantissa & (1ull << (mantissa_width + 1))) > + { > + // We rounded the normal number 1.fffffffffffff8pN (with > maximal > + // mantissa) up to to 1p(N+1). > + mantissa >>= 1; > + ++biased_exponent; > + } > + } > + else > + { > + // Rounding towards zero. > + > + if (mantissa == 0 && (midpoint_bit || nonzero_tail)) > + { > + // A nonzero number that rounds to zero is unrepresentable. > + __glibcxx_assert(biased_exponent == 0); > + return {first, errc::result_out_of_range}; > + } > + > + midpoint_bit = false; > + nonzero_tail = false; > + } > + > + if (mantissa != 0 && biased_exponent >= (1 << exponent_width) - 1) > + // The exponent of this number is too large to be representable. > + return {first, errc::result_out_of_range}; > + > + uint_t result = 0; > + if (mantissa == 0) > + { > + // Assemble a (possibly signed) zero. > + if (sign_bit) > + result |= 1ull << (exponent_width + mantissa_width); > + } > + else > + { > + // Assemble a nonzero normal or subnormal value. > + result |= sign_bit; > + result <<= exponent_width; > + result |= biased_exponent; > + result <<= mantissa_width; > + result |= mantissa & ((1ull << mantissa_width) - 1); > + // The implicit leading mantissa bit is set iff the number is > normal. > + __glibcxx_assert(((mantissa & (1ull << mantissa_width)) != 0) > + == (biased_exponent != 0)); > + } > + memcpy(&value, &result, sizeof(result)); > + > + return {first, errc{}}; > + } > +#endif > + > } // namespace > > // FIXME: This should be reimplemented so it doesn't use strtod and > newlocale. > @@ -406,6 +772,11 @@ from_chars_result > from_chars(const char* first, const char* last, float& value, > chars_format fmt) noexcept > { > +#if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64 > + if (fmt == chars_format::hex) > + return __floating_from_chars_hex(first, last, value); > +#endif > + > errc ec = errc::invalid_argument; > #if _GLIBCXX_USE_CXX11_ABI > buffer_resource mr; > @@ -432,6 +803,11 @@ from_chars_result > from_chars(const char* first, const char* last, double& value, > chars_format fmt) noexcept > { > +#if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64 > + if (fmt == chars_format::hex) > + return __floating_from_chars_hex(first, last, value); > +#endif > + > errc ec = errc::invalid_argument; > #if _GLIBCXX_USE_CXX11_ABI > buffer_resource mr; > diff --git a/libstdc++-v3/testsuite/20_util/from_chars/7.cc > b/libstdc++-v3/testsuite/20_util/from_chars/7.cc > new file mode 100644 > index 00000000000..090ad7a87bb > --- /dev/null > +++ b/libstdc++-v3/testsuite/20_util/from_chars/7.cc > @@ -0,0 +1,151 @@ > +// Various testcases for binary64 hexfloat std::from_chars. > +// { dg-do run { target c++17 } } > +// { dg-require-effective-target ieee_floats } > + > +#include <charconv> > + > +#include <cfenv> > +#include <cmath> > +#include <cstring> > +#include <cstdio> > +#include <limits> > +#include <testsuite_hooks.h> > + > +struct testcase { > + const char* input; > + size_t correct_idx; > + std::errc correct_ec; > + double correct_value; > +}; > + > +constexpr testcase testcases[] = { > + { "1.fffffffffffff8p0", 18, {}, 0x1.fffffffffffff8p0 }, > + { "0.fffffffffffff8p-1022", 22, std::errc{}, 0x0.fffffffffffffep-1022 }, > + { "inf", 3, {}, std::numeric_limits<double>::infinity() }, > + { "inff", 3, {}, std::numeric_limits<double>::infinity() }, > + { "-inf", 4, {}, -std::numeric_limits<double>::infinity() }, > + { "-inff", 4, {}, -std::numeric_limits<double>::infinity() }, > + { "NAN", 3, {}, std::numeric_limits<double>::quiet_NaN() }, > + { "-NAN", 4, {}, std::numeric_limits<double>::quiet_NaN() }, > + { "NAN()", 5, {}, std::numeric_limits<double>::quiet_NaN() }, > + { "-NAN()", 6, {}, std::numeric_limits<double>::quiet_NaN() }, > + { "-NAN(test)", 10, {}, std::numeric_limits<double>::quiet_NaN() }, > + { "-NAN(test", 4, {}, std::numeric_limits<double>::quiet_NaN() }, > + { "-NAN(", 4, {}, std::numeric_limits<double>::quiet_NaN() }, > + { "0.000000000000001p-100000000000000000", 37, > std::errc::result_out_of_range, 0 }, > + { "-lol", 0, std::errc::invalid_argument, 1 }, > + { " 0", 0, std::errc::invalid_argument, 1 }, > + { "", 0, std::errc::invalid_argument, 0 }, > + { "1", 1, {}, 1 }, > + { "2", 1, {}, 2 }, > + { "3", 1, {}, 3 }, > + { "4", 1, {}, 4 }, > + { "5", 1, {}, 5 }, > + { "6", 1, {}, 6 }, > + { "7", 1, {}, 7 }, > + { "8", 1, {}, 8 }, > + { "9", 1, {}, 9 }, > + { "a", 1, {}, 0xa }, > + { "b", 1, {}, 0xb }, > + { "c", 1, {}, 0xc }, > + { "d", 1, {}, 0xd }, > + { "e", 1, {}, 0xe }, > + { "f", 1, {}, 0xf }, > + { "0.000000000000000000000000000000000000000000001p-1022", 53, > + std::errc::result_out_of_range, 0 }, > + { "0.00000000000000p-1022", 22, {}, 0 }, > + { "0.00000000000009", 16, {}, 0x0.00000000000009p0 }, > + { "0.0", 3, {}, 0 }, > + { "1p10000000000000000000000", 25, std::errc::result_out_of_range, 0 }, > + { "-0.0", 4, {}, -0.0 }, > + { "0.00000000000000", 16, {}, 0 }, > + { "0.0000000000000p-1022", 21, {}, 0 }, > + { ".", 0, std::errc::invalid_argument, 0 }, > + { "-.", 0, std::errc::invalid_argument, 0 }, > + { "0", 1, {}, 0 }, > + { "00", 2, {}, 0 }, > + { "00.", 3, {}, 0 }, > + { "0.", 2, {}, 0 }, > + { "1.ffffFFFFFFFFFF", 16, {}, 0x2 }, > + { "1.ffffffffffffff", 16, {}, 0x2 }, > + { "1.00000000000029", 16, {}, 0x1.0000000000003p0 }, > + { "0.00000000000008p-1022", 22, std::errc::result_out_of_range, 0 }, > + { "1.fffffffffffffp-1023", 21, {}, 0x1p-1022 }, > + { "1.fffffffffffff8p+1023", 22, std::errc::result_out_of_range, 0 }, > + { "0.ffffffffffffe8p-1022", 22, {}, 0x0.ffffffffffffep-1022 }, > + { "2.11111111111111", 16, {}, 0x1.0888888888889p+1 }, > + { "1.1111111111111", 15, {}, 0x1.1111111111111p0 }, > + { "1.11111111111111", 16, {}, 0x1.1111111111111p0 }, > + { "1.11111111111118", 16, {}, 0x1.1111111111112p0 }, > + { "1.11111111111128", 16, {}, 0x1.1111111111112p0 }, > + { "1.1111111111112801", 18, {}, 0x1.1111111111113p0 }, > + { "1.08888888888888", 16, {}, 0x1.0888888888888p0 }, > + { "1.088888888888888", 17, {}, 0x1.0888888888889p0 }, > + { "2.00000000000029", 16, {}, 0x2.0000000000002p0 }, > + { "0.ffffffffffffep-1022", 21, {}, 0x0.ffffffffffffep-1022 }, > + { "3.ffffffffffffep-1024", 21, {}, 0x1p-1022 }, > + { "1.00000000000008p+0", 19, {}, 1 }, > + { "1p-1023", 7, {}, 0x0.8p-1022 }, > + { "1p-1022", 7, {}, 0x1p-1022 }, > + { "1.1p-1033", 9, {}, 0x1.1p-1033 }, // 0.0022p-1022 > + { "22p-1038", 8, {}, 0x1.1p-1033 }, > + { "5", 1, {}, 0x5 }, > + { "a", 1, {}, 0xa }, > + { "1", 1, {}, 1.0 }, > + { "1p1", 3, {}, 0x1p1 }, > + { "1p-1", 4, {}, 0x1p-1 }, > + { "0", 1, {}, 0.0 }, > + { "A", 1, {}, 0xA }, > + { "-1", 2, {}, -1.0 }, > + { "-0", 2, {}, -0.0 }, > + { "42", 2, {}, 0x42p0 }, > + { "-42", 3, {}, -0x42p0 }, > + { ".1", 2, {}, 0x0.1p0 }, > + { "22p-1000", 8, {}, 0x22p-1000 }, > + { ".0000008", 8, {}, 0x.0000008p0 }, > + { ".0000008p-1022", 14, {}, 0x.0000008p-1022 }, > + { "1p-1074", 7, {}, 0x.0000000000001p-1022 }, > + { "9999999999999", 13, {}, 0x9999999999999p0 }, > + { "1.000000000000a000", 18, {}, 0x1.000000000000ap0 }, > + { "1.000000000000a001", 18, {}, 0x1.000000000000ap0 }, > + { "1.000000000000a800", 18, {}, 0x1.000000000000ap0 }, > + { "1.000000000000a801", 18, {}, 0x1.000000000000bp0 }, > + { "1.000000000000b800", 18, {}, 0x1.000000000000cp0 }, > + { "000000", 6, {}, 0x0 }, > + { "1p", 1, {}, 0x1 }, > + { "0p99999999999999999999", 22, {}, 0 }, > + { "1p99999999999999999999", 22, std::errc::result_out_of_range, 0 }, > + { "0p-99999999999999999999", 23, {}, 0 }, > + { "1p-99999999999999999999", 23, std::errc::result_out_of_range, 0 }, > + { "99999999999999999999999", 23, {}, 0x99999999999999999999999p0 }, > + { "-1.fffffffffffffp-1023", 22, {}, -0x1p-1022 }, > + { "1.337", 5, {}, 0x1.337p0 }, > +}; > + > +void > +test01() > +{ > + for (auto [input,correct_idx,correct_ec,correct_value] : testcases) > + { > + double value; > + auto [ptr,ec] = std::from_chars(input, input+strlen(input), > + value, std::chars_format::hex); > + VERIFY( ptr == input + correct_idx ); > + VERIFY( ec == correct_ec ); > + if (ec == std::errc{}) > + { > + if (std::isnan(value) && std::isnan(value)) > + ; > + else > + { > + VERIFY( value == correct_value ); > + VERIFY( !memcmp(&value, &correct_value, sizeof(value)) ); > + } > + } > + } > +} > + > +int main() > +{ > + test01(); > +} > -- > 2.35.0.rc1 > >