The problem happens when I try to optimize this procedure: ------------------------------------------------ inline void* bcAtomCompareExchange(void **destination, void *exchange, void *compare) { void* old = *destination; if (old == compare) *destination = exchange;
return old; } ------------------------------------------------- when old == compare *destination changes, but the optimizer keeps using the old value. Here is the source code (has just stdio.h), and yes, I know the names are "Atom" are irrelevant here, it is not thread-safe, but that's not the issue, there are no threads here, straight C code ------------- test.c --------------------- #include <stdio.h> inline void* bcAtomCompareExchange(void **destination, void *exchange, void *compare) { void* old = *destination; if (old == compare) *destination = exchange; return old; } inline void* bcAtomExchange(void **target, void *exchange) { void* old = *target; *target = exchange; return old; } int main() { const char* first = "uno"; const char* second = "duo"; const char* third = "tre"; char *px = (char*) first, *py = (char*) second, *pz = (char*) third; fprintf(stdout, "px= %s py= %s pz= %s\n", px, py, pz); pz = px; char* pw = (char*) bcAtomCompareExchange((void**) &px, py, pz); fprintf(stdout, "pz= px; pw= bcAtomCompareExchange(&px, py, pz); px= %s py= %s pz= %s pw= %s\n", px, py, pz, pw); py = (char*) third; py = (char*) bcAtomExchange((void**) &px, py); fprintf(stdout, "py= tre; py= bcAtomExchange(&px, py); px= %s py= %s pz= %s pw= %s\n", px, py, pz, pw); return 0; } --------------------end of test.c ---------------------- without optimization the output is correct: px= uno py= duo pz= tre pz= px; pw= bcAtomCompareExchange(&px, py, pz); px= duo py= duo pz= uno pw= uno py= tre; py= bcAtomExchange(&px, py); px= tre py= duo pz= uno pw= uno but with optimization (-O3) the output is WRONG! px= uno py= duo pz= tre pz= px; pw= bcAtomCompareExchange(&px, py, pz); px= uno py= duo pz= uno pw= uno py= tre; py= bcAtomExchange(&px, py); px= duo py= duo pz= uno pw= uno to help, I looked at the generated object code, and spotted the source of the probelm, as explained in the following: ----------------- relevand parts of the disassemble using objdump ---- Contents of section .rodata: 400818 ....uno.duo.tre. # uno is 40081c (initial px) 400828 px= %s py= %s p # duo is 400820 (initial py) 400838 z= %s...pz= px; # tre is 400824 (initial pz) 400848 pw= bcAtomCompar # msg1 is 400828 400858 eExchange(&px, p # msg2 is 400840 400868 y, pz); px= %s # msg3 is 400890 400878 py= %s pz= %s pw 400888 = %s....py= tre; 400898 py= bcAtomExcha 4008a8 nge(&px, py); px 4008b8 = %s py= %s pz= 4008c8 %s pw= %s...... 0000000000400660 <main>: main(): 400660: mov %rbx,-0x18(%rsp) 400665: mov %rbp,-0x10(%rsp) 40066a: xor %eax,%eax 40066c: mov %r12,-0x8(%rsp) 400671: sub $0x28,%rsp 400675: mov 0x2009b4(%rip),%rdi # 601030 <stdout@@GLIBC_2.2.5> 40067c: mov $0x400824,%r8d # %r8 gets pz (5th arg ABI) 400682: mov $0x400820,%ecx # %rcx gets py (4th arg ABI) 400687: mov $0x40081c,%edx # %rdx gets px (3rd arg ABI) 40068c: mov $0x400828,%esi # %esi gets msg1 (2nd arg ABI) 400691: movq $0x40081c,0x8(%rsp) # 0x8(%rsp) is &px 400698: 40069a: lea 0x8(%rsp),%r12 40069f: callq 400560 <fpri...@plt> # print first message fine 4006a4: mov 0x8(%rsp),%rbx # %rbx gets px (old = *dest) 4006a9: cmp %rbx,%rbx # inline the comparison old == comp 4006ac: mov %rbx,%rbp # inline return pw = old 4006af: je 400718 <main+0xb8> # jump executed, back next line 4006b1: mov 0x200978(%rip),%rdi # 601030 <stdout@@GLIBC_2.2.5> 4006b8: mov %rbp,%r9 # %r9 gets pw (6th arg ABI) 4006bb: mov %rbx,%r8 # %r8 gets pz (5th arg ABI) 4006be: mov %rbx,%rdx # << ERROR >> (3rd arg ABI) mov 0x8(%rsp),%rdx <<<<<<<< should be that with updated px <<<<<<<< but instead it ignored the fact <<<<<<<< that *destination changed !! 4006c1: mov $0x400820,%ecx # %rcx untouched py (4th arg ABI) 4006c6: mov $0x400840,%esi # %esi gets msg2 (2nd arg ABI) 4006cb: xor %eax,%eax 4006cd: callq 400560 <fpri...@plt> # message prints WRONG!!!!! 4006d2: mov (%r12),%rcx 4006d6: mov 0x8(%rsp),%rdx 4006db: mov %rbp,%r9 4006de: mov 0x20094b(%rip),%rdi # 601030 <stdout@@GLIBC_2.2.5> 4006e5: mov %rbx,%r8 4006e8: mov $0x400890,%esi 4006ed: xor %eax,%eax 4006ef: movq $0x400824,(%r12) 4006f6: 4006f7: callq 400560 <fpri...@plt> 4006fc: xor %eax,%eax 4006fe: mov 0x10(%rsp),%rbx 400703: mov 0x18(%rsp),%rbp 400708: mov 0x20(%rsp),%r12 40070d: add $0x28,%rsp 400711: retq 400712: nopw 0x0(%rax,%rax,1) 400718: movq $0x400820,0x8(%rsp) # px reference is correct but not used 40071f: 400721: jmp 4006b1 <main+0x51> 400723: nop ------------------------------------------------------------------------------ I think I have a workaround, instead of using void* as generic pointer, using a typedef like typedef char * addr_t like in the old days when void did not exist, but not sure under what conditions the compiler will generate wrong code. Thanks, Ricardo -- Summary: inline code optimized wrong with -O3 Product: gcc Version: 4.3.2 Status: UNCONFIRMED Severity: major Priority: P3 Component: c AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: ricardo at teli dot org GCC build triplet: gcc version 4.3.2 [gcc-4_3-branch revision 141291] (SUSE Linux) GCC target triplet: x86_64-suse-linux http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39093