https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98465

--- Comment #25 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
(In reply to Jonathan Wakely from comment #22)
> I think it is rare for _M_disjunct to return false, most strings being
> appended/inserted are disjunct from the string itself.

If it is really rare, then moving it out of line might be beneficial.  It will
slow down the non-disjunct case, not only because of a (possibly PLT) call, but
also because when it isn't inlined, e.g. for constant __len1 and/or __len2 the
various comparisons can't be constant folded.  But it will make code smaller.

As a proof of concept, I've hacked up the preprocessed source:
--- pr98465.ii  2021-02-04 14:09:29.230049287 +0100
+++ pr98465-5.ii        2021-02-04 18:08:19.784739149 +0100
@@ -26229,6 +26229,10 @@ namespace __cxx11 {
       basic_string&
       _M_append(const _CharT* __s, size_type __n);

+      [[gnu::noinline, gnu::cold]] void
+      _M_replace_cold(size_type __pos, size_type __len1, const _CharT* __s,
+   const size_type __len2);
+
     public:
 # 2285
"/opt/compiler-explorer/gcc-trunk-20210204/include/c++/11.0.0/bits/basic_string.h"
3
       size_type
@@ -28220,31 +28224,15 @@ namespace std __attribute__ ((__visibili
     }

   template<typename _CharT, typename _Traits, typename _Alloc>
-    basic_string<_CharT, _Traits, _Alloc>&
+    void
     basic_string<_CharT, _Traits, _Alloc>::
-    _M_replace(size_type __pos, size_type __len1, const _CharT* __s,
+    _M_replace_cold(size_type __pos, size_type __len1, const _CharT* __s,
         const size_type __len2)
     {
-      _M_check_length(__len1, __len2, "basic_string::_M_replace");
+      pointer __p = this->_M_data() + __pos;

       const size_type __old_size = this->size();
-      const size_type __new_size = __old_size + __len2 - __len1;
-
-      if (__new_size <= this->capacity())
- {
-   pointer __p = this->_M_data() + __pos;
-
-   const size_type __how_much = __old_size - __pos - __len1;
-   if (_M_disjunct(__s))
-     {
-       if (__how_much && __len1 != __len2)
-  this->_S_move(__p + __len2, __p + __len1, __how_much);
-       if (__len2)
-  this->_S_copy(__p, __s, __len2);
-     }
-   else
-     {
-
+      const size_type __how_much = __old_size - __pos - __len1;
        if (__len2 && __len2 <= __len1)
   this->_S_move(__p, __s, __len2);
        if (__how_much && __len1 != __len2)
@@ -28263,7 +28251,34 @@ namespace std __attribute__ ((__visibili
         __len2 - __nleft);
       }
   }
+  }
+
+  template<typename _CharT, typename _Traits, typename _Alloc>
+    basic_string<_CharT, _Traits, _Alloc>&
+    basic_string<_CharT, _Traits, _Alloc>::
+    _M_replace(size_type __pos, size_type __len1, const _CharT* __s,
+        const size_type __len2)
+    {
+      _M_check_length(__len1, __len2, "basic_string::_M_replace");
+
+      const size_type __old_size = this->size();
+      const size_type __new_size = __old_size + __len2 - __len1;
+
+      pointer __p = this->_M_data() + __pos;
+
+      const size_type __how_much = __old_size - __pos - __len1;
+
+      if (__new_size <= this->capacity())
+ {
+   if (_M_disjunct(__s))
+     {
+       if (__how_much && __len1 != __len2)
+  this->_S_move(__p + __len2, __p + __len1, __how_much);
+       if (__len2)
+  this->_S_copy(__p, __s, __len2);
      }
+   else
+       _M_replace_cold(__pos, __len1, __s, __len2);
  }
       else
  this->_M_mutate(__pos, __len1, __s, __len2);

Without this patch, _Z1fRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
has 614 bytes,
with the patch there is:
399 bytes of _Z1fRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE + 31 of
_Z1fRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.cold (caller of
_M_replace_cold moved into cold section) and 317 bytes
_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE15_M_replace_coldEmmPKcm. 
But that one can be moved to libstdc++ and shared there.

Reply via email to