http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46551
Summary: Generate complex addressing mode CMP instruction in x86-64 Product: gcc Version: 4.6.0 Status: UNCONFIRMED Severity: minor Priority: P3 Component: tree-optimization AssignedTo: unassig...@gcc.gnu.org ReportedBy: a...@consulting.net.nz Thanks for the improvements in GCC snapshot. Here's a simplified example where GCC does not emit the instruction cmp (%rsi,%rax,8),%rdx. Instead it generates mov (%rsi,%rax,8),%rcx; cmp %rdx,%rcx: #include <stdint.h> typedef struct { int64_t index[2]; uint64_t cell[16]; } vm_t; typedef void (*inst_t)(uint32_t *inst, vm_t *vm, uint64_t a); void branch_upon_complex_compare(uint32_t *inst, vm_t *vm, uint64_t a) { if (vm->cell[vm->index[0] - 2] != a) { uint64_t dispatch = inst[-1]; inst -= 1; ((inst_t) dispatch)(inst, vm, a); } else { uint64_t dispatch = inst[1]; inst += 1; ((inst_t) dispatch)(inst, vm, a); } } int main() { return 0; } 0000000000400480 <branch_upon_complex_compare>: 400480: 48 8b 06 mov (%rsi),%rax 400483: 48 8b 0c c6 mov (%rsi,%rax,8),%rcx 400487: 48 39 d1 cmp %rdx,%rcx 40048a: 74 0c je 400498 <branch_upon_complex_compare+0x18> 40048c: 8b 47 fc mov -0x4(%rdi),%eax 40048f: 48 83 ef 04 sub $0x4,%rdi 400493: ff e0 jmpq *%rax 400495: 0f 1f 00 nopl (%rax) 400498: 8b 47 04 mov 0x4(%rdi),%eax 40049b: 48 89 ca mov %rcx,%rdx 40049e: 48 83 c7 04 add $0x4,%rdi 4004a2: ff e0 jmpq *%rax 4004a4: 66 66 66 2e 0f 1f 84 nopw %cs:0x0(%rax,%rax,1) 4004ab: 00 00 00 00 00 [I found this inefficient instruction generation while trying to pinpoint an increase in runtime. I reduced the runtime using asm goto to replace the pair of instructions with a single complex addressing mode CMP instruction.]