https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87502
Jan Hubicka <hubicka at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |hubicka at gcc dot gnu.org Ever confirmed|0 |1 Last reconfirmed| |2024-12-08 Status|UNCONFIRMED |NEW --- Comment #9 from Jan Hubicka <hubicka at gcc dot gnu.org> --- We currently do: { <bb 2> [local count: 1073741824]: MEM[(struct basic_string *)&D.35505] ={v} {CLOBBER(bob)}; MEM[(struct _Alloc_hider *)&D.35505] ={v} {CLOBBER(bob)}; MEM[(struct _Alloc_hider *)&D.35505]._M_p = &D.35505.D.25336._M_local_buf; __builtin_memcpy (&D.35505.D.25336._M_local_buf, "abc", 3); D.35505._M_string_length = 3; MEM[(char_type &)&D.35505 + 11] = 0; foo (&D.35505); <bb 3> [local count: 1073741824]: _33 = D.35505._M_dataplus._M_p; if (&D.35505.D.25336._M_local_buf == _33) goto <bb 5>; [18.09%] else goto <bb 4>; [81.91%] <bb 4> [local count: 879501928]: _35 = D.35505.D.25336._M_allocated_capacity; _36 = _35 + 1; operator delete (_33, _36); <bb 5> [local count: 1073741824]: D.35505 ={v} {CLOBBER(eob)}; D.35505 ={v} {CLOBBER(eos)}; MEM[(struct basic_string *)&D.35539] ={v} {CLOBBER(bob)}; MEM[(struct _Alloc_hider *)&D.35539] ={v} {CLOBBER(bob)}; MEM[(struct _Alloc_hider *)&D.35539]._M_p = &D.35539.D.25336._M_local_buf; __builtin_memcpy (&D.35539.D.25336._M_local_buf, "abc", 3); D.35539._M_string_length = 3; MEM[(char_type &)&D.35539 + 11] = 0; foo (&D.35539); <bb 6> [local count: 1073741824]: _48 = D.35539._M_dataplus._M_p; if (&D.35539.D.25336._M_local_buf == _48) goto <bb 8>; [18.09%] else goto <bb 7>; [81.91%] <bb 7> [local count: 879501928]: _50 = D.35539.D.25336._M_allocated_capacity; _51 = _50 + 1; operator delete (_48, _51); <bb 8> [local count: 1073741824]: D.35539 ={v} {CLOBBER(eob)}; D.35539 ={v} {CLOBBER(eos)}; return; <bb 9> [count: 0]: <L0>: std::__cxx11::basic_string<char>::_M_dispose (&D.35505); D.35505 ={v} {CLOBBER(eob)}; _52 = __builtin_eh_pointer (4); __builtin_unwind_resume (_52); <bb 10> [count: 0]: <L4>: std::__cxx11::basic_string<char>::_M_dispose (&D.35539); D.35539 ={v} {CLOBBER(eob)}; _37 = __builtin_eh_pointer (12); __builtin_unwind_resume (_37); } so memcpy calls are still not updated to also copy the zero termination. I think store merging may do this, but it is also quite specific pattern, so perhaps handling it on libstdc++ side makes sense. The string constructor knows source is zero terminated. The reason why full destructor is inlined is that we do not know what foo is doing and it may make the string bigger. "const" does not promise that the callee does not modify the object. So I think all we can hope for is merging memcpy with the extra write of 0. Updating testcase: #include <string> __attribute__ ((pure)) extern int foo (const std::string &); int bar () { return foo ("abc") + foo (std::string("abc")); } here GCC knows object is read only and constructor is optimized away: int bar () { void * D.39810; struct string D.35539; const struct string D.35505; int _5; int _8; int _9; void * _14; <bb 2> [local count: 1073741824]: MEM[(struct basic_string *)&D.35505] ={v} {CLOBBER(bob)}; MEM[(struct _Alloc_hider *)&D.35505] ={v} {CLOBBER(bob)}; MEM[(struct _Alloc_hider *)&D.35505]._M_p = &D.35505.D.25336._M_local_buf; __builtin_memcpy (&D.35505.D.25336._M_local_buf, "abc", 3); D.35505._M_string_length = 3; MEM[(char_type &)&D.35505 + 11] = 0; _5 = foo (&D.35505); <bb 3> [local count: 1073741824]: MEM[(struct basic_string *)&D.35539] ={v} {CLOBBER(bob)}; MEM[(struct _Alloc_hider *)&D.35539] ={v} {CLOBBER(bob)}; MEM[(struct _Alloc_hider *)&D.35539]._M_p = &D.35539.D.25336._M_local_buf; __builtin_memcpy (&D.35539.D.25336._M_local_buf, "abc", 3); D.35539._M_string_length = 3; MEM[(char_type &)&D.35539 + 11] = 0; _8 = foo (&D.35539); <bb 4> [local count: 1073741824]: _9 = _5 + _8; D.35539 ={v} {CLOBBER(eob)}; D.35539 ={v} {CLOBBER(eos)}; D.35505 ={v} {CLOBBER(eob)}; D.35505 ={v} {CLOBBER(eos)}; return _9; <bb 5> [count: 0]: <L9>: std::__cxx11::basic_string<char>::_M_dispose (&D.35539); D.35539 ={v} {CLOBBER(eob)}; __builtin_eh_copy_values (4, 8); <bb 6> [count: 0]: <L13>: D.35539 ={v} {CLOBBER(eos)}; std::__cxx11::basic_string<char>::_M_dispose (&D.35505); D.35505 ={v} {CLOBBER(eob)}; _14 = __builtin_eh_pointer (4); __builtin_unwind_resume (_14); } _Z3barv: .LFB1393: .cfi_startproc .cfi_personality 0,__gxx_personality_v0 .cfi_lsda 0,.LLSDA1393 pushl %ebp .cfi_def_cfa_offset 8 .cfi_offset 5, -8 movl $25185, %edx movl %esp, %ebp .cfi_def_cfa_register 5 pushl %edi pushl %esi leal -64(%ebp), %eax pushl %ebx .cfi_offset 7, -12 .cfi_offset 6, -16 .cfi_offset 3, -20 leal -72(%ebp), %ebx subl $88, %esp movl %eax, -72(%ebp) movw %dx, -64(%ebp) movb $99, -62(%ebp) movl $3, -68(%ebp) movb $0, -61(%ebp) pushl %ebx .LEHB0: .cfi_escape 0x2e,0x10 call _Z3fooRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE .LEHE0: movl %eax, %edi leal -40(%ebp), %eax leal -48(%ebp), %esi movb $99, -38(%ebp) movl %eax, -48(%ebp) movl $25185, %eax movw %ax, -40(%ebp) movl $3, -44(%ebp) movb $0, -37(%ebp) movl %esi, (%esp) .LEHB1: call _Z3fooRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE .LEHE1: addl $16, %esp leal -12(%ebp), %esp addl %edi, %eax popl %ebx .cfi_remember_state .cfi_restore 3 popl %esi .cfi_restore 6 popl %edi .cfi_restore 7 popl %ebp .cfi_restore 5 .cfi_def_cfa 4, 4 ret moving using 25185 through register is done to avoid instruction encoding to be greater than 8 bytes since it is slow on some (quite old) CPUs and we still have this flag enabled on generic tuning. With -mtune=znver5 it produces: _Z3barv: .LFB1393: .cfi_startproc .cfi_personality 0,__gxx_personality_v0 .cfi_lsda 0,.LLSDA1393 pushl %ebp .cfi_def_cfa_offset 8 .cfi_offset 5, -8 movl %esp, %ebp .cfi_def_cfa_register 5 pushl %edi pushl %esi pushl %ebx subl $88, %esp .cfi_offset 7, -12 .cfi_offset 6, -16 .cfi_offset 3, -20 leal -72(%ebp), %ebx leal -64(%ebp), %eax movw $25185, -64(%ebp) movb $99, -62(%ebp) movl $3, -68(%ebp) movl %eax, -72(%ebp) movb $0, -61(%ebp) pushl %ebx .LEHB0: .cfi_escape 0x2e,0x10 call _Z3fooRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE .LEHE0: movl %eax, %edi leal -48(%ebp), %esi leal -40(%ebp), %eax movw $25185, -40(%ebp) movb $99, -38(%ebp) movl $3, -44(%ebp) movl %eax, -48(%ebp) movb $0, -37(%ebp) movl %esi, (%esp) .LEHB1: call _Z3fooRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE .LEHE1: addl $16, %esp leal -12(%ebp), %esp addl %edi, %eax popl %ebx .cfi_remember_state .cfi_restore 3 popl %esi .cfi_restore 6 popl %edi .cfi_restore 7 popl %ebp .cfi_restore 5 .cfi_def_cfa 4, 4 ret