Hi Honza, please CC the libstdc++ list on patches, thanks.
On Sun, 24 Nov 2024 at 00:43, Jan Hubicka <hubi...@ucw.cz> wrote:
>
> Hi,
> another common source of unnecesary throw_bad_alloc calls is
> basic_string::_M_create.
> basic_string<_CharT, _Traits, _Alloc>::
> _M_create(size_type& __capacity, size_type __old_capacity)
> {
> // _GLIBCXX_RESOLVE_LIB_DEFECTS
> // 83. String::npos vs. string::max_size()
> if (__capacity > max_size())
> std::__throw_length_error(__N("basic_string::_M_create"));
>
> // The below implements an exponential growth policy, necessary to
> // meet amortized linear time requirements of the library: see
> // http://gcc.gnu.org/ml/libstdc++/2001-07/msg00085.html.
> if (__capacity > __old_capacity && __capacity < 2 * __old_capacity)
> {
> __capacity = 2 * __old_capacity;
> // Never allocate a string bigger than max_size.
> if (__capacity > max_size())
> __capacity = max_size();
> }
>
> // NB: Need an array of char_type[__capacity], plus a terminating
> // null char_type() element.
> return _S_allocate(_M_get_allocator(), __capacity + 1);
> }
>
> The code makes it visible that string is never bigger then max_size, however
> + 1
> makes it one byte bigger than maximal size allowed by allocator. I believe
> this is by miscounting the 0 at the end of string in max_size function:
>
> - { return (_Alloc_traits::max_size(_M_get_allocator()) - 1) / 2; }
> + { return _Alloc_traits::max_size(_M_get_allocator()) / 2 - 1; }
>
> Path also adds __builtin_unreachable to express value ranges of capacity and
> length.
> This makes it possible to optimize out throw when copying strings as in the
> testcase.
>
> std::string
> test (std::string &a)
> {
> return a;
> }
>
> This now yields the following:
>
> struct string test (struct string & a)
> {
> size_type __siz;
> struct string & _2(D);
> char[16] * _4;
> char * _5;
> char * _7;
> char * prephitmp_10;
> char * pretmp_11;
> char _15;
> char * _18;
> char * _27;
> long unsigned int _32;
>
> <bb 2> [local count: 1073741824]:
> _4 = &MEM[(struct basic_string *)_2(D)].D.32970._M_local_buf;
> MEM[(struct _Alloc_hider *)_2(D)]._M_p = _4;
> _5 = MEM[(const struct basic_string *)a_3(D)]._M_dataplus._M_p;
> __siz_6 = MEM[(const struct basic_string *)a_3(D)]._M_string_length;
> if (__siz_6 > 15)
> goto <bb 3>; [33.00%]
> else
> goto <bb 4>; [67.00%]
>
> <bb 3> [local count: 354334800]:
> _32 = __siz_6 + 1;
> _27 = operator new (_32);
> MEM[(struct basic_string *)_2(D)]._M_dataplus._M_p = _27;
> MEM[(struct basic_string *)_2(D)].D.32970._M_allocated_capacity = __siz_6;
> goto <bb 8>; [100.00%]
>
> <bb 4> [local count: 719407024]:
> if (__siz_6 == 1)
> goto <bb 5>; [34.00%]
> else
> goto <bb 7>; [66.00%]
>
> <bb 5> [local count: 353024841]:
> _15 = MEM[(const char_type &)_5];
> MEM[(char_type &)_2(D) + 16] = _15;
>
> <bb 6> [local count: 350316931]:
> goto <bb 9>; [100.00%]
>
> <bb 7> [local count: 474808633]:
> if (__siz_6 == 0)
> goto <bb 6>; [73.78%]
> else
> goto <bb 8>; [26.22%]
>
> <bb 8> [local count: 334966574]:
> # _7 = PHI <_4(7), _27(3)>
> __builtin_memcpy (_7, _5, __siz_6);
> pretmp_11 = MEM[(const struct basic_string *)_2(D)]._M_dataplus._M_p;
>
> <bb 9> [local count: 1038308344]:
> # prephitmp_10 = PHI <_4(6), pretmp_11(8)>
> MEM[(struct basic_string *)_2(D)]._M_string_length = __siz_6;
> _18 = prephitmp_10 + __siz_6;
> MEM[(char_type &)_18] = 0;
> return _2(D);
>
> }
>
> .text segment is reduced from 213 bytes to 192 (saving 2 conditionals
> and 2 calls to throw)
>
> I find it funny that the code special cases a.size () == 1 to copy single byte
> and a.size () == 0 to bypass memcpy call. I would say it is job of middle-end
> to optimize memcpy reasonably even for small blocks. Was this based on some
> data
> showing that many strings have size of 0 and 1 which is not visible to
> compiler?
>
> regtested x86_64-linux, OK?
>
> Honza
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/basic_string.h:
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/tree-ssa/string-1.C: New test.
>
> diff --git a/gcc/testsuite/g++.dg/tree-ssa/string-1.C
> b/gcc/testsuite/g++.dg/tree-ssa/string-1.C
> new file mode 100644
> index 00000000000..d38c23a7628
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/tree-ssa/string-1.C
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -std=c++20 -fdump-tree-optimized" } */
> +#include <string>
> +std::string
> +test (std::string &a)
> +{
> + return a;
> +}
> +/* { dg-final { scan-tree-dump-not "throw" "optimized" } } */
> diff --git a/libstdc++-v3/include/bits/basic_string.h
> b/libstdc++-v3/include/bits/basic_string.h
> index f5b320099b1..d754deb28ed 100644
> --- a/libstdc++-v3/include/bits/basic_string.h
> +++ b/libstdc++-v3/include/bits/basic_string.h
> @@ -1079,20 +1079,30 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
> _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
> size_type
> size() const _GLIBCXX_NOEXCEPT
> - { return _M_string_length; }
> + {
> + size_type __siz = _M_string_length;
> + if (__siz > max_size ())
> + __builtin_unreachable ();
> + return __siz;
> + }
>
> /// Returns the number of characters in the string, not including any
> /// null-termination.
> _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
> size_type
> length() const _GLIBCXX_NOEXCEPT
> - { return _M_string_length; }
> + {
> + size_type __siz = _M_string_length;
> + if (__siz > max_size ())
> + __builtin_unreachable ();
> + return __siz;
> + }
>
> /// Returns the size() of the largest possible %string.
> _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
> size_type
> max_size() const _GLIBCXX_NOEXCEPT
> - { return (_Alloc_traits::max_size(_M_get_allocator()) - 1) / 2; }
> + { return _Alloc_traits::max_size(_M_get_allocator()) / 2 - 1; }
>
> /**
> * @brief Resizes the %string to the specified number of characters.
> @@ -1184,8 +1194,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
> size_type
> capacity() const _GLIBCXX_NOEXCEPT
> {
> - return _M_is_local() ? size_type(_S_local_capacity)
> - : _M_allocated_capacity;
> + size_t __siz = _M_is_local() ? size_type(_S_local_capacity)
> + : _M_allocated_capacity;
> + if (__siz < _S_local_capacity || __siz > max_size () + 1)
> + __builtin_unreachable ();
> + return __siz;
> }
>
> /**
>