On Fri, Mar 24, 2017 at 06:51:15PM +0100, Dmitry Vyukov wrote: > On Fri, Mar 24, 2017 at 6:23 PM, Peter Zijlstra <pet...@infradead.org> wrote: > > On Fri, Mar 24, 2017 at 09:54:46AM -0700, Andy Lutomirski wrote: > >> > So the first snipped I tested regressed like so: > >> > > >> > > >> > 0000000000000000 <T_refcount_inc>: > >> > 0000000000000000 <T_refcount_inc>: > >> > 0: 8b 07 mov (%rdi),%eax 0: > >> > 8b 17 mov (%rdi),%edx > >> > 2: 83 f8 ff cmp $0xffffffff,%eax 2: > >> > 83 fa ff cmp $0xffffffff,%edx > >> > 5: 74 13 je 1a <T_refcount_inc+0x1a> 5: > >> > 74 1a je 21 <T_refcount_inc+0x21> > >> > 7: 85 c0 test %eax,%eax 7: > >> > 85 d2 test %edx,%edx > >> > 9: 74 0d je 18 <T_refcount_inc+0x18> 9: > >> > 74 13 je 1e <T_refcount_inc+0x1e> > >> > b: 8d 50 01 lea 0x1(%rax),%edx b: > >> > 8d 4a 01 lea 0x1(%rdx),%ecx > >> > e: f0 0f b1 17 lock cmpxchg %edx,(%rdi) e: > >> > 89 d0 mov %edx,%eax > >> > 12: 75 ee jne 2 <T_refcount_inc+0x2> 10: > >> > f0 0f b1 0f lock cmpxchg %ecx,(%rdi) > >> > 14: ff c2 inc %edx 14: > >> > 74 04 je 1a <T_refcount_inc+0x1a> > >> > 16: 75 02 jne 1a <T_refcount_inc+0x1a> 16: > >> > 89 c2 mov %eax,%edx > >> > 18: 0f 0b ud2 18: > >> > eb e8 jmp 2 <T_refcount_inc+0x2> > >> > 1a: c3 retq 1a: > >> > ff c1 inc %ecx > >> > 1c: > >> > 75 03 jne 21 <T_refcount_inc+0x21> > >> > 1e: > >> > 0f 0b ud2 > >> > 20: > >> > c3 retq > >> > 21: > >> > c3 retq > >>
> This seems to help ;) > > #define try_cmpxchg(ptr, pold, new) __atomic_compare_exchange_n(ptr, pold, > new, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) That gets me: 0000000000000000 <T_refcount_inc>: 0: 8b 07 mov (%rdi),%eax 2: 89 44 24 fc mov %eax,-0x4(%rsp) 6: 8b 44 24 fc mov -0x4(%rsp),%eax a: 83 f8 ff cmp $0xffffffff,%eax d: 74 1c je 2b <T_refcount_inc+0x2b> f: 85 c0 test %eax,%eax 11: 75 07 jne 1a <T_refcount_inc+0x1a> 13: 8b 44 24 fc mov -0x4(%rsp),%eax 17: 0f 0b ud2 19: c3 retq 1a: 8d 50 01 lea 0x1(%rax),%edx 1d: 8b 44 24 fc mov -0x4(%rsp),%eax 21: f0 0f b1 17 lock cmpxchg %edx,(%rdi) 25: 75 db jne 2 <T_refcount_inc+0x2> 27: ff c2 inc %edx 29: 74 e8 je 13 <T_refcount_inc+0x13> 2b: c3 retq Which is even worse... (I did double check it actually compiled)