https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87502

Jan Hubicka <hubicka at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |hubicka at gcc dot gnu.org
     Ever confirmed|0                           |1
   Last reconfirmed|                            |2024-12-08
             Status|UNCONFIRMED                 |NEW

--- Comment #9 from Jan Hubicka <hubicka at gcc dot gnu.org> ---
We currently do:

{
  <bb 2> [local count: 1073741824]:
  MEM[(struct basic_string *)&D.35505] ={v} {CLOBBER(bob)};
  MEM[(struct _Alloc_hider *)&D.35505] ={v} {CLOBBER(bob)};
  MEM[(struct _Alloc_hider *)&D.35505]._M_p = &D.35505.D.25336._M_local_buf;
  __builtin_memcpy (&D.35505.D.25336._M_local_buf, "abc", 3);
  D.35505._M_string_length = 3;
  MEM[(char_type &)&D.35505 + 11] = 0;
  foo (&D.35505);

  <bb 3> [local count: 1073741824]:
  _33 = D.35505._M_dataplus._M_p;
  if (&D.35505.D.25336._M_local_buf == _33)
    goto <bb 5>; [18.09%]
  else
    goto <bb 4>; [81.91%]

  <bb 4> [local count: 879501928]:
  _35 = D.35505.D.25336._M_allocated_capacity;
  _36 = _35 + 1;
  operator delete (_33, _36);

  <bb 5> [local count: 1073741824]:
  D.35505 ={v} {CLOBBER(eob)};
  D.35505 ={v} {CLOBBER(eos)};
  MEM[(struct basic_string *)&D.35539] ={v} {CLOBBER(bob)};
  MEM[(struct _Alloc_hider *)&D.35539] ={v} {CLOBBER(bob)};
  MEM[(struct _Alloc_hider *)&D.35539]._M_p = &D.35539.D.25336._M_local_buf;
  __builtin_memcpy (&D.35539.D.25336._M_local_buf, "abc", 3);
  D.35539._M_string_length = 3;
  MEM[(char_type &)&D.35539 + 11] = 0;
  foo (&D.35539);

  <bb 6> [local count: 1073741824]:
  _48 = D.35539._M_dataplus._M_p;
  if (&D.35539.D.25336._M_local_buf == _48)
    goto <bb 8>; [18.09%]
  else
    goto <bb 7>; [81.91%]

  <bb 7> [local count: 879501928]:
  _50 = D.35539.D.25336._M_allocated_capacity;
  _51 = _50 + 1;
  operator delete (_48, _51);

  <bb 8> [local count: 1073741824]:
  D.35539 ={v} {CLOBBER(eob)};
  D.35539 ={v} {CLOBBER(eos)};
  return;

  <bb 9> [count: 0]:
<L0>:
  std::__cxx11::basic_string<char>::_M_dispose (&D.35505);
  D.35505 ={v} {CLOBBER(eob)};
  _52 = __builtin_eh_pointer (4);
  __builtin_unwind_resume (_52);

  <bb 10> [count: 0]:
<L4>:
  std::__cxx11::basic_string<char>::_M_dispose (&D.35539);
  D.35539 ={v} {CLOBBER(eob)};
  _37 = __builtin_eh_pointer (12);
  __builtin_unwind_resume (_37);
}

so memcpy calls are still not updated to also copy the zero termination. I
think store merging may do this, but it is also quite specific pattern, so
perhaps handling it on libstdc++ side makes sense. The string constructor knows
source is zero terminated.

The reason why full destructor is inlined is that we do not know what foo is
doing and it may make the string bigger. "const" does not promise that the
callee does not modify the object.  

So I think all we can hope for is merging memcpy with the extra write of 0.

Updating testcase:
#include <string>

__attribute__ ((pure))
extern int foo (const std::string &);

int
bar ()
{
  return foo ("abc") + foo (std::string("abc"));
}

here GCC knows object is read only and constructor is optimized away:

int bar ()
{
  void * D.39810;
  struct string D.35539;
  const struct string D.35505;
  int _5;
  int _8;
  int _9;
  void * _14;

  <bb 2> [local count: 1073741824]:
  MEM[(struct basic_string *)&D.35505] ={v} {CLOBBER(bob)};
  MEM[(struct _Alloc_hider *)&D.35505] ={v} {CLOBBER(bob)};
  MEM[(struct _Alloc_hider *)&D.35505]._M_p = &D.35505.D.25336._M_local_buf;
  __builtin_memcpy (&D.35505.D.25336._M_local_buf, "abc", 3);
  D.35505._M_string_length = 3;
  MEM[(char_type &)&D.35505 + 11] = 0;
  _5 = foo (&D.35505);

  <bb 3> [local count: 1073741824]:
  MEM[(struct basic_string *)&D.35539] ={v} {CLOBBER(bob)};
  MEM[(struct _Alloc_hider *)&D.35539] ={v} {CLOBBER(bob)};
  MEM[(struct _Alloc_hider *)&D.35539]._M_p = &D.35539.D.25336._M_local_buf;
  __builtin_memcpy (&D.35539.D.25336._M_local_buf, "abc", 3);
  D.35539._M_string_length = 3;
  MEM[(char_type &)&D.35539 + 11] = 0;
  _8 = foo (&D.35539);

  <bb 4> [local count: 1073741824]:
  _9 = _5 + _8;
  D.35539 ={v} {CLOBBER(eob)};
  D.35539 ={v} {CLOBBER(eos)};
  D.35505 ={v} {CLOBBER(eob)};
  D.35505 ={v} {CLOBBER(eos)};
  return _9;

  <bb 5> [count: 0]:
<L9>:
  std::__cxx11::basic_string<char>::_M_dispose (&D.35539);
  D.35539 ={v} {CLOBBER(eob)};
  __builtin_eh_copy_values (4, 8);

  <bb 6> [count: 0]:
<L13>:
  D.35539 ={v} {CLOBBER(eos)};
  std::__cxx11::basic_string<char>::_M_dispose (&D.35505);
  D.35505 ={v} {CLOBBER(eob)};
  _14 = __builtin_eh_pointer (4);
  __builtin_unwind_resume (_14);

}
_Z3barv:
.LFB1393:
        .cfi_startproc
        .cfi_personality 0,__gxx_personality_v0
        .cfi_lsda 0,.LLSDA1393
        pushl   %ebp
        .cfi_def_cfa_offset 8
        .cfi_offset 5, -8
        movl    $25185, %edx
        movl    %esp, %ebp
        .cfi_def_cfa_register 5
        pushl   %edi
        pushl   %esi
        leal    -64(%ebp), %eax
        pushl   %ebx
        .cfi_offset 7, -12
        .cfi_offset 6, -16
        .cfi_offset 3, -20
        leal    -72(%ebp), %ebx
        subl    $88, %esp
        movl    %eax, -72(%ebp)
        movw    %dx, -64(%ebp)
        movb    $99, -62(%ebp)
        movl    $3, -68(%ebp)
        movb    $0, -61(%ebp)
        pushl   %ebx
.LEHB0:
        .cfi_escape 0x2e,0x10
        call    _Z3fooRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
.LEHE0:
        movl    %eax, %edi
        leal    -40(%ebp), %eax
        leal    -48(%ebp), %esi
        movb    $99, -38(%ebp)
        movl    %eax, -48(%ebp)
        movl    $25185, %eax
        movw    %ax, -40(%ebp)
        movl    $3, -44(%ebp)
        movb    $0, -37(%ebp)
        movl    %esi, (%esp)
.LEHB1:
        call    _Z3fooRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
.LEHE1:
        addl    $16, %esp
        leal    -12(%ebp), %esp
        addl    %edi, %eax
        popl    %ebx
        .cfi_remember_state
        .cfi_restore 3
        popl    %esi
        .cfi_restore 6
        popl    %edi
        .cfi_restore 7
        popl    %ebp
        .cfi_restore 5
        .cfi_def_cfa 4, 4
        ret

moving using 25185 through register is done to avoid instruction encoding
to be greater than 8 bytes since it is slow on some (quite old) CPUs and we
still have this flag enabled on generic tuning. With -mtune=znver5 it produces:

_Z3barv:
.LFB1393:
        .cfi_startproc
        .cfi_personality 0,__gxx_personality_v0
        .cfi_lsda 0,.LLSDA1393
        pushl   %ebp
        .cfi_def_cfa_offset 8
        .cfi_offset 5, -8
        movl    %esp, %ebp
        .cfi_def_cfa_register 5
        pushl   %edi
        pushl   %esi
        pushl   %ebx
        subl    $88, %esp
        .cfi_offset 7, -12
        .cfi_offset 6, -16
        .cfi_offset 3, -20
        leal    -72(%ebp), %ebx
        leal    -64(%ebp), %eax
        movw    $25185, -64(%ebp)
        movb    $99, -62(%ebp)
        movl    $3, -68(%ebp)
        movl    %eax, -72(%ebp)
        movb    $0, -61(%ebp)
        pushl   %ebx
.LEHB0:
        .cfi_escape 0x2e,0x10
        call    _Z3fooRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
.LEHE0:
        movl    %eax, %edi
        leal    -48(%ebp), %esi
        leal    -40(%ebp), %eax
        movw    $25185, -40(%ebp)
        movb    $99, -38(%ebp)
        movl    $3, -44(%ebp)
        movl    %eax, -48(%ebp)
        movb    $0, -37(%ebp)
        movl    %esi, (%esp)
.LEHB1:
        call    _Z3fooRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
.LEHE1:
        addl    $16, %esp
        leal    -12(%ebp), %esp
        addl    %edi, %eax
        popl    %ebx
        .cfi_remember_state
        .cfi_restore 3
        popl    %esi
        .cfi_restore 6
        popl    %edi
        .cfi_restore 7
        popl    %ebp
        .cfi_restore 5
        .cfi_def_cfa 4, 4
        ret

Reply via email to