I am trying to track down a bug that I only see on Fedora 21 with the GCC 4.9.2 compiler building x86_64 code. It might have started happening earlier. GCC 4.8 built without this problem.
I am building the c-ares library as part of a larger project and getting malloc failures. Valgrind claims that code is writing outside its allocated blocks. I traced it to the memcpy call memcpy(query->tcpbuf + 2, qbuf, qlen); In that call qlen == 35. I checked the malloc and it allocates 37 bytes for tcpbuf. And it has worked on older compilers for a long time. As best I can tell the builtin memcpy that is being used here (and it is definitely the builtin because turning off builtins builds working code) is writing way past the end of the buffer. But for whatever reason I can't seem to build a stand-alone example. Looking for some ideas. Maybe someone could audit the ASM code for the memcpy builtin, see if anything jumps out at you. I haven't tried that yet. Is it all one piece, or is it multiple chunks? Could it have bad ASM specifications which are allowing the optimizer to write into a register that should be preserve? Here's the asm for the function along with some commentary: 00000000001b1a57 <ares_send>: 1b1a57: 41 57 push %r15 1b1a59: 41 56 push %r14 1b1a5b: 41 55 push %r13 1b1a5d: 41 54 push %r12 1b1a5f: 55 push %rbp 1b1a60: 53 push %rbx 1b1a61: 48 83 ec 28 sub $0x28,%rsp 1b1a65: 89 d5 mov %edx,%ebp 1b1a67: 49 89 ce mov %rcx,%r14 1b1a6a: 4d 89 c5 mov %r8,%r13 1b1a6d: 8d 42 f4 lea -0xc(%rdx),%eax 1b1a70: 3d f3 ff 00 00 cmp $0xfff3,%eax 1b1a75: 76 21 jbe 1b1a98 <ares_send+0x41> 1b1a77: 45 31 c0 xor %r8d,%r8d 1b1a7a: 31 c9 xor %ecx,%ecx 1b1a7c: 31 d2 xor %edx,%edx 1b1a7e: be 07 00 00 00 mov $0x7,%esi 1b1a83: 4c 89 ef mov %r13,%rdi 1b1a86: 41 ff d6 callq *%r14 1b1a89: 48 83 c4 28 add $0x28,%rsp 1b1a8d: 5b pop %rbx 1b1a8e: 5d pop %rbp 1b1a8f: 41 5c pop %r12 1b1a91: 41 5d pop %r13 1b1a93: 41 5e pop %r14 1b1a95: 41 5f pop %r15 1b1a97: c3 retq 1b1a98: 49 89 fc mov %rdi,%r12 1b1a9b: 49 89 f7 mov %rsi,%r15 1b1a9e: bf c8 00 00 00 mov $0xc8,%edi 1b1aa3: e8 78 f4 ea ff callq 60f20 <malloc@plt> 1b1aa8: 48 89 c3 mov %rax,%rbx 1b1aab: 48 85 c0 test %rax,%rax 1b1aae: 0f 84 a0 02 00 00 je 1b1d54 <ares_send+0x2fd> 1b1ab4: 8d 45 02 lea 0x2(%rbp),%eax 1b1ab7: 89 44 24 0c mov %eax,0xc(%rsp) 1b1abb: 48 63 f8 movslq %eax,%rdi 1b1abe: e8 5d f4 ea ff callq 60f20 <malloc@plt> 1b1ac3: 48 89 43 78 mov %rax,0x78(%rbx) 1b1ac7: 48 85 c0 test %rax,%rax 1b1aca: 0f 84 7c 02 00 00 je 1b1d4c <ares_send+0x2f5> 1b1ad0: 48 89 04 24 mov %rax,(%rsp) 1b1ad4: 49 63 bc 24 98 00 00 movslq 0x98(%r12),%rdi 1b1adb: 00 1b1adc: 89 7c 24 08 mov %edi,0x8(%rsp) 1b1ae0: 48 c1 e7 03 shl $0x3,%rdi 1b1ae4: e8 37 f4 ea ff callq 60f20 <malloc@plt> 1b1ae9: 48 89 c7 mov %rax,%rdi 1b1aec: 48 89 83 b0 00 00 00 mov %rax,0xb0(%rbx) 1b1af3: 48 85 c0 test %rax,%rax 1b1af6: 8b 4c 24 08 mov 0x8(%rsp),%ecx 1b1afa: 48 8b 14 24 mov (%rsp),%rdx 1b1afe: 0f 84 40 02 00 00 je 1b1d44 <ares_send+0x2ed> 1b1b04: 41 0f b6 07 movzbl (%r15),%eax 1b1b08: c1 e0 08 shl $0x8,%eax 1b1b0b: 45 0f b6 47 01 movzbl 0x1(%r15),%r8d 1b1b10: 44 09 c0 or %r8d,%eax 1b1b13: 66 89 03 mov %ax,(%rbx) 1b1b16: 48 c7 43 08 00 00 00 movq $0x0,0x8(%rbx) 1b1b1d: 00 1b1b1e: 48 c7 43 10 00 00 00 movq $0x0,0x10(%rbx) 1b1b25: 00 1b1b26: 89 e8 mov %ebp,%eax 1b1b28: c1 f8 08 sar $0x8,%eax 1b1b2b: 88 02 mov %al,(%rdx) 1b1b2d: 40 88 6a 01 mov %bpl,0x1(%rdx) *** HERE IS WHERE IT STARTS: Getting query->tcpbuf + 2 *** *** ebp has the length value 35 *** *** memcpy(query->tcpbuf + 2, qbuf, qlen); *** 1b1b31: 4c 8d 4a 02 lea 0x2(%rdx),%r9 1b1b35: 89 e8 mov %ebp,%eax 1b1b37: 4d 89 c8 mov %r9,%r8 1b1b3a: 4c 89 fe mov %r15,%rsi 1b1b3d: 83 fd 20 cmp $0x20,%ebp 1b1b40: 0f 83 96 01 00 00 jae 1b1cdc <ares_send+0x285> *** That jae took the jump down to 1b1cdc *** 1b1b46: 83 e0 1f and $0x1f,%eax 1b1b49: 74 15 je 1b1b60 <ares_send+0x109> 1b1b4b: 31 d2 xor %edx,%edx 1b1b4d: 41 89 d2 mov %edx,%r10d 1b1b50: 46 0f b6 1c 16 movzbl (%rsi,%r10,1),%r11d 1b1b55: 47 88 1c 10 mov %r11b,(%r8,%r10,1) 1b1b59: 83 c2 01 add $0x1,%edx 1b1b5c: 39 c2 cmp %eax,%edx 1b1b5e: 72 ed jb 1b1b4d <ares_send+0xf6> 1b1b60: 8b 44 24 0c mov 0xc(%rsp),%eax 1b1b64: 89 83 80 00 00 00 mov %eax,0x80(%rbx) 1b1b6a: 4c 89 8b 88 00 00 00 mov %r9,0x88(%rbx) 1b1b71: 89 ab 90 00 00 00 mov %ebp,0x90(%rbx) 1b1b77: 4c 89 b3 98 00 00 00 mov %r14,0x98(%rbx) 1b1b7e: 4c 89 ab a0 00 00 00 mov %r13,0xa0(%rbx) 1b1b85: c7 83 a8 00 00 00 00 movl $0x0,0xa8(%rbx) 1b1b8c: 00 00 00 1b1b8f: 41 8b 84 24 b0 01 00 mov 0x1b0(%r12),%eax 1b1b96: 00 1b1b97: 89 83 ac 00 00 00 mov %eax,0xac(%rbx) 1b1b9d: 41 83 7c 24 10 01 cmpl $0x1,0x10(%r12) 1b1ba3: 0f 84 cc 01 00 00 je 1b1d75 <ares_send+0x31e> 1b1ba9: 85 c9 test %ecx,%ecx 1b1bab: 7e 64 jle 1b1c11 <ares_send+0x1ba> 1b1bad: 8d 51 fe lea -0x2(%rcx),%edx 1b1bb0: d1 ea shr %edx 1b1bb2: 83 c2 01 add $0x1,%edx 1b1bb5: 8d 04 12 lea (%rdx,%rdx,1),%eax 1b1bb8: 8d 71 ff lea -0x1(%rcx),%esi 1b1bbb: 83 fe 01 cmp $0x1,%esi 1b1bbe: 0f 86 db 01 00 00 jbe 1b1d9f <ares_send+0x348> 1b1bc4: 31 f6 xor %esi,%esi 1b1bc6: 66 0f ef c0 pxor %xmm0,%xmm0 1b1bca: 49 89 f0 mov %rsi,%r8 1b1bcd: 49 c1 e0 04 shl $0x4,%r8 1b1bd1: f3 42 0f 7f 04 07 movdqu %xmm0,(%rdi,%r8,1) 1b1bd7: 48 83 c6 01 add $0x1,%rsi 1b1bdb: 39 f2 cmp %esi,%edx 1b1bdd: 77 eb ja 1b1bca <ares_send+0x173> 1b1bdf: 39 c8 cmp %ecx,%eax 1b1be1: 74 2e je 1b1c11 <ares_send+0x1ba> 1b1be3: 48 63 d0 movslq %eax,%rdx 1b1be6: 48 8d 14 d7 lea (%rdi,%rdx,8),%rdx 1b1bea: c7 02 00 00 00 00 movl $0x0,(%rdx) 1b1bf0: c7 42 04 00 00 00 00 movl $0x0,0x4(%rdx) 1b1bf7: 83 c0 01 add $0x1,%eax 1b1bfa: 39 c1 cmp %eax,%ecx 1b1bfc: 7e 13 jle 1b1c11 <ares_send+0x1ba> 1b1bfe: 48 98 cltq 1b1c00: 48 8d 04 c7 lea (%rdi,%rax,8),%rax 1b1c04: c7 00 00 00 00 00 movl $0x0,(%rax) 1b1c0a: c7 40 04 00 00 00 00 movl $0x0,0x4(%rax) 1b1c11: 41 8b 04 24 mov (%r12),%eax 1b1c15: f6 c4 01 test $0x1,%ah 1b1c18: 0f 85 6a 01 00 00 jne 1b1d88 <ares_send+0x331> 1b1c1e: ba 00 02 00 00 mov $0x200,%edx 1b1c23: a8 01 test $0x1,%al 1b1c25: 0f 85 6a 01 00 00 jne 1b1d95 <ares_send+0x33e> 1b1c2b: 31 c0 xor %eax,%eax 1b1c2d: 39 ea cmp %ebp,%edx 1b1c2f: 0f 9c c0 setl %al 1b1c32: 89 83 b8 00 00 00 mov %eax,0xb8(%rbx) 1b1c38: c7 83 bc 00 00 00 0b movl $0xb,0xbc(%rbx) 1b1c3f: 00 00 00 1b1c42: c7 83 c0 00 00 00 00 movl $0x0,0xc0(%rbx) 1b1c49: 00 00 00 1b1c4c: 48 8d 6b 18 lea 0x18(%rbx),%rbp 1b1c50: 48 89 de mov %rbx,%rsi 1b1c53: 48 89 ef mov %rbp,%rdi 1b1c56: e8 b9 ca ff ff callq 1ae714 <ares__init_list_node> 1b1c5b: 48 8d 7b 30 lea 0x30(%rbx),%rdi 1b1c5f: 48 89 de mov %rbx,%rsi 1b1c62: e8 ad ca ff ff callq 1ae714 <ares__init_list_node> 1b1c67: 48 8d 7b 48 lea 0x48(%rbx),%rdi 1b1c6b: 48 89 de mov %rbx,%rsi 1b1c6e: e8 a1 ca ff ff callq 1ae714 <ares__init_list_node> 1b1c73: 4c 8d 6b 60 lea 0x60(%rbx),%r13 1b1c77: 48 89 de mov %rbx,%rsi 1b1c7a: 4c 89 ef mov %r13,%rdi 1b1c7d: e8 92 ca ff ff callq 1ae714 <ares__init_list_node> 1b1c82: 49 8d b4 24 b8 01 00 lea 0x1b8(%r12),%rsi 1b1c89: 00 1b1c8a: 4c 89 ef mov %r13,%rdi 1b1c8d: e8 ac ca ff ff callq 1ae73e <ares__insert_in_list> 1b1c92: 0f b7 03 movzwl (%rbx),%eax 1b1c95: 25 ff 07 00 00 and $0x7ff,%eax 1b1c9a: 48 8d 04 40 lea (%rax,%rax,2),%rax 1b1c9e: 49 8d b4 c4 d0 01 00 lea 0x1d0(%r12,%rax,8),%rsi 1b1ca5: 00 1b1ca6: 48 89 ef mov %rbp,%rdi 1b1ca9: e8 90 ca ff ff callq 1ae73e <ares__insert_in_list> 1b1cae: e8 78 0e 00 00 callq 1b2b2b <ares__tvnow> 1b1cb3: 48 89 44 24 10 mov %rax,0x10(%rsp) 1b1cb8: 48 89 54 24 18 mov %rdx,0x18(%rsp) 1b1cbd: 48 8d 54 24 10 lea 0x10(%rsp),%rdx 1b1cc2: 48 89 de mov %rbx,%rsi 1b1cc5: 4c 89 e7 mov %r12,%rdi 1b1cc8: e8 c1 de ff ff callq 1afb8e <ares__send_query> 1b1ccd: 48 83 c4 28 add $0x28,%rsp 1b1cd1: 5b pop %rbx 1b1cd2: 5d pop %rbp 1b1cd3: 41 5c pop %r12 1b1cd5: 41 5d pop %r13 1b1cd7: 41 5e pop %r14 1b1cd9: 41 5f pop %r15 1b1cdb: c3 retq *** Jumped here from the jae at 1b1b40 *** *** I assume this tests the destination alignment *** 1b1cdc: 41 f6 c1 02 test $0x2,%r9b 1b1ce0: 0f 85 d5 00 00 00 jne 1b1dbb <ares_send+0x364> *** It takes the above jump to 1b1dbb *** *** Code at 1b1dbb jumps back to here *** 1b1ce6: 41 f6 c0 04 test $0x4,%r8b 1b1cea: 0f 85 b6 00 00 00 jne 1b1da6 <ares_send+0x34f> *** It takes the above jump *** *** Jumped here from 1b1db6 *** 1b1cf0: 89 c2 mov %eax,%edx 1b1cf2: 83 e2 e0 and $0xffffffe0,%edx *** Here is where I think it goes wrong actually. edx was 0x1d. The and makes edx == 0 The loop following though copies 32 or 0x20 bytes. That is more than 0x1d. *** 1b1cf5: 89 54 24 08 mov %edx,0x8(%rsp) 1b1cf9: 45 31 d2 xor %r10d,%r10d 1b1cfc: 89 04 24 mov %eax,(%rsp) 1b1cff: 44 89 d2 mov %r10d,%edx 1b1d02: 4c 8b 7c 16 08 mov 0x8(%rsi,%rdx,1),%r15 1b1d07: 4c 8b 5c 16 10 mov 0x10(%rsi,%rdx,1),%r11 1b1d0c: 48 8b 44 16 18 mov 0x18(%rsi,%rdx,1),%rax 1b1d11: 49 89 44 10 18 mov %rax,0x18(%r8,%rdx,1) 1b1d16: 48 8b 04 16 mov (%rsi,%rdx,1),%rax 1b1d1a: 49 89 04 10 mov %rax,(%r8,%rdx,1) 1b1d1e: 4d 89 7c 10 08 mov %r15,0x8(%r8,%rdx,1) 1b1d23: 4d 89 5c 10 10 mov %r11,0x10(%r8,%rdx,1) 1b1d28: 41 83 c2 20 add $0x20,%r10d 1b1d2c: 44 3b 54 24 08 cmp 0x8(%rsp),%r10d 1b1d31: 72 cc jb 1b1cff <ares_send+0x2a8> 1b1d33: 8b 04 24 mov (%rsp),%eax 1b1d36: 44 89 d2 mov %r10d,%edx 1b1d39: 49 01 d0 add %rdx,%r8 1b1d3c: 48 01 d6 add %rdx,%rsi 1b1d3f: e9 02 fe ff ff jmpq 1b1b46 <ares_send+0xef> 1b1d44: 48 89 d7 mov %rdx,%rdi 1b1d47: e8 24 f0 ea ff callq 60d70 <free@plt> 1b1d4c: 48 89 df mov %rbx,%rdi 1b1d4f: e8 1c f0 ea ff callq 60d70 <free@plt> 1b1d54: 45 31 c0 xor %r8d,%r8d 1b1d57: 31 c9 xor %ecx,%ecx 1b1d59: 31 d2 xor %edx,%edx 1b1d5b: be 0f 00 00 00 mov $0xf,%esi 1b1d60: 4c 89 ef mov %r13,%rdi 1b1d63: 41 ff d6 callq *%r14 1b1d66: 48 83 c4 28 add $0x28,%rsp 1b1d6a: 5b pop %rbx 1b1d6b: 5d pop %rbp 1b1d6c: 41 5c pop %r12 1b1d6e: 41 5d pop %r13 1b1d70: 41 5e pop %r14 1b1d72: 41 5f pop %r15 1b1d74: c3 retq 1b1d75: 83 c0 01 add $0x1,%eax 1b1d78: 99 cltd 1b1d79: f7 f9 idiv %ecx 1b1d7b: 41 89 94 24 b0 01 00 mov %edx,0x1b0(%r12) 1b1d82: 00 1b1d83: e9 21 fe ff ff jmpq 1b1ba9 <ares_send+0x152> 1b1d88: 41 8b 54 24 50 mov 0x50(%r12),%edx 1b1d8d: a8 01 test $0x1,%al 1b1d8f: 0f 84 96 fe ff ff je 1b1c2b <ares_send+0x1d4> 1b1d95: b8 01 00 00 00 mov $0x1,%eax 1b1d9a: e9 93 fe ff ff jmpq 1b1c32 <ares_send+0x1db> 1b1d9f: 31 c0 xor %eax,%eax 1b1da1: e9 3d fe ff ff jmpq 1b1be3 <ares_send+0x18c> *** Jumped here from 1b1cea *** 1b1da6: 8b 16 mov (%rsi),%edx 1b1da8: 41 89 10 mov %edx,(%r8) 1b1dab: 49 83 c0 04 add $0x4,%r8 1b1daf: 48 83 c6 04 add $0x4,%rsi 1b1db3: 83 e8 04 sub $0x4,%eax 1b1db6: e9 35 ff ff ff jmpq 1b1cf0 <ares_send+0x299> *** Jumped here from 1b1ce0 *** 1b1dbb: 41 0f b7 07 movzwl (%r15),%eax 1b1dbf: 66 89 42 02 mov %ax,0x2(%rdx) 1b1dc3: 4c 8d 42 04 lea 0x4(%rdx),%r8 1b1dc7: 48 83 c6 02 add $0x2,%rsi 1b1dcb: 8d 45 fe lea -0x2(%rbp),%eax 1b1dce: e9 13 ff ff ff jmpq 1b1ce6 <ares_send+0x28f>