http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46551

           Summary: Generate complex addressing mode CMP instruction in
                    x86-64
           Product: gcc
           Version: 4.6.0
            Status: UNCONFIRMED
          Severity: minor
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassig...@gcc.gnu.org
        ReportedBy: a...@consulting.net.nz


Thanks for the improvements in GCC snapshot. Here's a simplified example where
GCC does not emit the instruction cmp (%rsi,%rax,8),%rdx. Instead it generates
mov (%rsi,%rax,8),%rcx; cmp %rdx,%rcx:


#include <stdint.h>

typedef struct {
  int64_t index[2];
  uint64_t cell[16];
} vm_t;

typedef void (*inst_t)(uint32_t *inst, vm_t *vm, uint64_t a);

void branch_upon_complex_compare(uint32_t *inst, vm_t *vm, uint64_t a) {
  if (vm->cell[vm->index[0] - 2] != a) {
    uint64_t dispatch = inst[-1];
    inst -= 1;
    ((inst_t) dispatch)(inst, vm, a);
  } else {
    uint64_t dispatch = inst[1];
    inst += 1;
    ((inst_t) dispatch)(inst, vm, a);
  }
}

int main() {
  return 0;
}


0000000000400480 <branch_upon_complex_compare>:
  400480:       48 8b 06                mov    (%rsi),%rax
  400483:       48 8b 0c c6             mov    (%rsi,%rax,8),%rcx
  400487:       48 39 d1                cmp    %rdx,%rcx
  40048a:       74 0c                   je     400498
<branch_upon_complex_compare+0x18>
  40048c:       8b 47 fc                mov    -0x4(%rdi),%eax
  40048f:       48 83 ef 04             sub    $0x4,%rdi
  400493:       ff e0                   jmpq   *%rax
  400495:       0f 1f 00                nopl   (%rax)
  400498:       8b 47 04                mov    0x4(%rdi),%eax
  40049b:       48 89 ca                mov    %rcx,%rdx
  40049e:       48 83 c7 04             add    $0x4,%rdi
  4004a2:       ff e0                   jmpq   *%rax
  4004a4:       66 66 66 2e 0f 1f 84    nopw   %cs:0x0(%rax,%rax,1)
  4004ab:       00 00 00 00 00 


[I found this inefficient instruction generation while trying to pinpoint an
increase in runtime. I reduced the runtime using asm goto to replace the pair
of instructions with a single complex addressing mode CMP instruction.]

Reply via email to