https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92151
Bug ID: 92151 Summary: Spurious register copying Product: gcc Version: unknown Status: UNCONFIRMED Severity: normal Priority: P3 Component: inline-asm Assignee: unassigned at gcc dot gnu.org Reporter: gcc at gmch dot uk Target Milestone: --- Created attachment 47066 --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=47066&action=edit Code to demonstrate the issue. The attached code is a fragment of a larger function. The larger function is a string copy which takes string 's', destination-buffer 'd' and end-of-destination-buffer 'e'. The body of the function is inline asm, The problem I have tripped over is the compiler makes spurious copies of registers. In some cases I have seen it PUSH %RBX in order to do so. Compiling the enclosed with gcc 9.1: 1) with "#define TWITCH 1" produces spurious copying of %rdi 2) with "#define TWITCH 0" produces no spurious copying The results for the two cases are given below, marked up to try to show what is going on. The extra shuffling of registers is ugly as sin, but not I suppose a big overhead, at least until PUSH/POP get added to make a register available for this nonsense :-( For completeness, I tried gcc 8.1 which does some similar (but different) spurious copying -- see Result 3, below. _____________________________________________________________ Result 1 -- gcc 9.1 -O3 -- #define TWITCH 1 qstpxcpy_asm0: // Arguments: d -- %rdi, s -- %rsi, e -- %rdx // Locals: w, t movq %rdi, %rcx // inserted by compiler -- gcc 9.1 mov (%rsi), %rax // w = *s lea -8(%rdx), %rdx // e -= 8 mov %eax, %r11d // t = w .L2: movq %rcx, %rdi // inserted by compiler mov %rax, (%rcx) // *d = w lea 8(%rdi), %rdi // d += 8 movq %rdi, %rcx // inserted by compiler cmp %rdx, %rdi // "d - e" -- __asm__ goto jae .L3 // quit if d >= e mov %eax, %r11d // t = w cmp $-1, %r11 // check 't' -- __asm__ goto jnz .L4 // j if at end mov %rax, (%rdi) // *d = w lea 8(%rdi), %rdi // d += 8 movq %rdi, %rcx // inserted by compiler cmp %rdx, %rdi // "d - e" -- __asm__ goto jae .L3 // quit if d >= e mov %eax, %r11d // t = w cmp $-1, %r11 // check 't' -- __asm__ goto jz .L2 // j if not at end .L4: lea (%rcx, %r11), %rax // return d + t ret .L3: lea (%rdx, %r11), %rax // return e + t ret _____________________________________________________________ Result 2 -- gcc 9.1 -O3 -- #define TWITCH 0 qstpxcpy_asm0: // Arguments: d -- %rdi, s -- %rsi, e -- %rdx // Locals: w, t mov (%rsi), %rax // w = *s lea -8(%rdx), %rdx // e -= 8 mov %eax, %r11d // t = w .L2: mov %rax, (%rdi) // *d = w lea 8(%rdi), %rdi // d += 8 cmp %rdx, %rdi // "d - e" -- __asm__ goto jae .L5 // quit if d >= e mov %eax, %r11d // t = w cmp $-1, %r11 // check 't' -- __asm__ goto jnz .L5 // j if at end mov %rax, (%rdi) // *d = w lea 8(%rdi), %rdi // d += 8 cmp %rdx, %rdi // "d - e" -- __asm__ goto jae .L5 // quit if d >= e mov %eax, %r11d // t = w cmp $-1, %r11 // check 't' -- __asm__ goto jz .L5 // j if not at end .L5: lea (%rdx, %r11), %rax // return e + t ret _____________________________________________________________ Result 3 -- gcc 8.1 -O3 -- #define TWITCH 1 qstpxcpy_asm0: // Arguments: d -- %rdi, s -- %rsi, e -- %rdx // Locals: w, t mov (%rsi), %rax // w = *s lea -8(%rdx), %rdx // e -= 8 mov %eax, %r11d // t = w .L2: movq %rdi, %rcx // inserted by compiler mov %rax, (%rdi) // *d = w lea 8(%rcx), %rcx // d += 8 movq %rcx, %rdi // inserted by compiler cmp %rdx, %rcx // "d - e" -- __asm__ goto jae .L3 // quit if d >= e mov %eax, %r11d // t = w cmp $-1, %r11 // check 't' -- __asm__ goto jnz .L4 // j if at end mov %rax, (%rcx) // *d = w lea 8(%rcx), %rcx // d += 8 movq %rcx, %rdi // inserted by compiler cmp %rdx, %rcx // "d - e" -- __asm__ goto jae .L3 // quit if d >= e mov %eax, %r11d // t = w cmp $-1, %r11 // check 't' -- __asm__ goto jz .L2 // j if not at end .L4: lea (%rdi, %r11), %rax // return d + t ret .L3: lea (%rdx, %r11), %rax // return e + t ret