Hi Honza, please CC the libstdc++ list on patches, thanks.

On Sun, 24 Nov 2024 at 00:43, Jan Hubicka <hubi...@ucw.cz> wrote:
>
> Hi,
> another common source of unnecesary throw_bad_alloc calls is 
> basic_string::_M_create.
>     basic_string<_CharT, _Traits, _Alloc>::
>     _M_create(size_type& __capacity, size_type __old_capacity)
>     {
>       // _GLIBCXX_RESOLVE_LIB_DEFECTS
>       // 83.  String::npos vs. string::max_size()
>       if (__capacity > max_size())
>         std::__throw_length_error(__N("basic_string::_M_create"));
>
>       // The below implements an exponential growth policy, necessary to
>       // meet amortized linear time requirements of the library: see
>       // http://gcc.gnu.org/ml/libstdc++/2001-07/msg00085.html.
>       if (__capacity > __old_capacity && __capacity < 2 * __old_capacity)
>         {
>           __capacity = 2 * __old_capacity;
>           // Never allocate a string bigger than max_size.
>           if (__capacity > max_size())
>             __capacity = max_size();
>         }
>
>       // NB: Need an array of char_type[__capacity], plus a terminating
>       // null char_type() element.
>       return _S_allocate(_M_get_allocator(), __capacity + 1);
>     }
>
> The code makes it visible that string is never bigger then max_size, however 
> + 1
> makes it one byte bigger than maximal size allowed by allocator.  I believe
> this is by miscounting the 0 at the end of string in max_size function:
>
> -      { return (_Alloc_traits::max_size(_M_get_allocator()) - 1) / 2; }
> +      { return _Alloc_traits::max_size(_M_get_allocator()) / 2 - 1; }
>
> Path also adds __builtin_unreachable to express value ranges of capacity and 
> length.
> This makes it possible to optimize out throw when copying strings as in the 
> testcase.
>
> std::string
> test (std::string &a)
> {
>         return a;
> }
>
> This now yields the following:
>
> struct string test (struct string & a)
> {
>   size_type __siz;
>   struct string & _2(D);
>   char[16] * _4;
>   char * _5;
>   char * _7;
>   char * prephitmp_10;
>   char * pretmp_11;
>   char _15;
>   char * _18;
>   char * _27;
>   long unsigned int _32;
>
>   <bb 2> [local count: 1073741824]:
>   _4 = &MEM[(struct basic_string *)_2(D)].D.32970._M_local_buf;
>   MEM[(struct _Alloc_hider *)_2(D)]._M_p = _4;
>   _5 = MEM[(const struct basic_string *)a_3(D)]._M_dataplus._M_p;
>   __siz_6 = MEM[(const struct basic_string *)a_3(D)]._M_string_length;
>   if (__siz_6 > 15)
>     goto <bb 3>; [33.00%]
>   else
>     goto <bb 4>; [67.00%]
>
>   <bb 3> [local count: 354334800]:
>   _32 = __siz_6 + 1;
>   _27 = operator new (_32);
>   MEM[(struct basic_string *)_2(D)]._M_dataplus._M_p = _27;
>   MEM[(struct basic_string *)_2(D)].D.32970._M_allocated_capacity = __siz_6;
>   goto <bb 8>; [100.00%]
>
>   <bb 4> [local count: 719407024]:
>   if (__siz_6 == 1)
>     goto <bb 5>; [34.00%]
>   else
>     goto <bb 7>; [66.00%]
>
>   <bb 5> [local count: 353024841]:
>   _15 = MEM[(const char_type &)_5];
>   MEM[(char_type &)_2(D) + 16] = _15;
>
>   <bb 6> [local count: 350316931]:
>   goto <bb 9>; [100.00%]
>
>   <bb 7> [local count: 474808633]:
>   if (__siz_6 == 0)
>     goto <bb 6>; [73.78%]
>   else
>     goto <bb 8>; [26.22%]
>
>   <bb 8> [local count: 334966574]:
>   # _7 = PHI <_4(7), _27(3)>
>   __builtin_memcpy (_7, _5, __siz_6);
>   pretmp_11 = MEM[(const struct basic_string *)_2(D)]._M_dataplus._M_p;
>
>   <bb 9> [local count: 1038308344]:
>   # prephitmp_10 = PHI <_4(6), pretmp_11(8)>
>   MEM[(struct basic_string *)_2(D)]._M_string_length = __siz_6;
>   _18 = prephitmp_10 + __siz_6;
>   MEM[(char_type &)_18] = 0;
>   return _2(D);
>
> }
>
> .text segment is reduced from 213 bytes to 192 (saving 2 conditionals
> and 2 calls to throw)
>
> I find it funny that the code special cases a.size () == 1 to copy single byte
> and a.size () == 0 to bypass memcpy call.  I would say it is job of middle-end
> to optimize memcpy reasonably even for small blocks.  Was this based on some 
> data
> showing that many strings have size of 0 and 1 which is not visible to
> compiler?
>
> regtested x86_64-linux, OK?
>
> Honza
>
> libstdc++-v3/ChangeLog:
>
>         * include/bits/basic_string.h:
>
> gcc/testsuite/ChangeLog:
>
>         * g++.dg/tree-ssa/string-1.C: New test.
>
> diff --git a/gcc/testsuite/g++.dg/tree-ssa/string-1.C 
> b/gcc/testsuite/g++.dg/tree-ssa/string-1.C
> new file mode 100644
> index 00000000000..d38c23a7628
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/tree-ssa/string-1.C
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -std=c++20 -fdump-tree-optimized" } */
> +#include <string>
> +std::string
> +test (std::string &a)
> +{
> +       return a;
> +}
> +/* { dg-final { scan-tree-dump-not "throw" "optimized" } } */
> diff --git a/libstdc++-v3/include/bits/basic_string.h 
> b/libstdc++-v3/include/bits/basic_string.h
> index f5b320099b1..d754deb28ed 100644
> --- a/libstdc++-v3/include/bits/basic_string.h
> +++ b/libstdc++-v3/include/bits/basic_string.h
> @@ -1079,20 +1079,30 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
>        _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
>        size_type
>        size() const _GLIBCXX_NOEXCEPT
> -      { return _M_string_length; }
> +      {
> +       size_type __siz = _M_string_length;
> +       if (__siz > max_size ())
> +         __builtin_unreachable ();
> +       return __siz;
> +      }
>
>        ///  Returns the number of characters in the string, not including any
>        ///  null-termination.
>        _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
>        size_type
>        length() const _GLIBCXX_NOEXCEPT
> -      { return _M_string_length; }
> +      {
> +       size_type __siz = _M_string_length;
> +       if (__siz > max_size ())
> +         __builtin_unreachable ();
> +       return __siz;
> +      }
>
>        ///  Returns the size() of the largest possible %string.
>        _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR
>        size_type
>        max_size() const _GLIBCXX_NOEXCEPT
> -      { return (_Alloc_traits::max_size(_M_get_allocator()) - 1) / 2; }
> +      { return _Alloc_traits::max_size(_M_get_allocator()) / 2 - 1; }
>
>        /**
>         *  @brief  Resizes the %string to the specified number of characters.
> @@ -1184,8 +1194,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
>        size_type
>        capacity() const _GLIBCXX_NOEXCEPT
>        {
> -       return _M_is_local() ? size_type(_S_local_capacity)
> -                            : _M_allocated_capacity;
> +       size_t __siz = _M_is_local() ? size_type(_S_local_capacity)
> +                                    : _M_allocated_capacity;
> +       if (__siz < _S_local_capacity || __siz > max_size () + 1)
> +         __builtin_unreachable ();
> +       return __siz;
>        }
>
>        /**
>

Reply via email to