https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92151

            Bug ID: 92151
           Summary: Spurious register copying
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: inline-asm
          Assignee: unassigned at gcc dot gnu.org
          Reporter: gcc at gmch dot uk
  Target Milestone: ---

Created attachment 47066
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=47066&action=edit
Code to demonstrate the issue.

The attached code is a fragment of a larger function.  The larger function is a
string copy which takes string 's', destination-buffer 'd' and
end-of-destination-buffer 'e'.  The body of the function is inline asm, 

The problem I have tripped over is the compiler makes spurious copies of
registers.  In some cases I have seen it PUSH %RBX in order to do so.

Compiling the enclosed with gcc 9.1:

  1) with "#define TWITCH 1" produces spurious copying of %rdi

  2) with "#define TWITCH 0" produces no spurious copying

The results for the two cases are given below, marked up to try to show what is
going on.

The extra shuffling of registers is ugly as sin, but not I suppose a big
overhead, at least until PUSH/POP get added to make a register available for
this nonsense :-(

For completeness, I tried gcc 8.1 which does some similar (but different)
spurious copying -- see Result 3, below.

_____________________________________________________________
Result 1 -- gcc 9.1 -O3 -- #define TWITCH 1

qstpxcpy_asm0:
// Arguments: d -- %rdi, s -- %rsi, e -- %rdx
//    Locals: w, t

              movq    %rdi, %rcx    // inserted by compiler -- gcc 9.1

    mov  (%rsi), %rax   // w  = *s
    lea  -8(%rdx), %rdx // e -= 8
    mov  %eax, %r11d    // t  = w
.L2:
              movq    %rcx, %rdi    // inserted by compiler

    mov  %rax, (%rcx)   // *d = w
    lea  8(%rdi), %rdi  // d += 8

              movq    %rdi, %rcx    // inserted by compiler

    cmp   %rdx, %rdi    // "d - e"   -- __asm__ goto
    jae   .L3           // quit if d >= e

    mov   %eax, %r11d   // t = w

    cmp   $-1, %r11     // check 't' -- __asm__ goto
    jnz   .L4           // j if at end

    mov   %rax, (%rdi)  // *d = w
    lea   8(%rdi), %rdi // d += 8

              movq    %rdi, %rcx    // inserted by compiler

    cmp   %rdx, %rdi    // "d - e"   -- __asm__ goto
    jae   .L3           // quit if d >= e

    mov   %eax, %r11d   // t = w

    cmp   $-1, %r11     // check 't' -- __asm__ goto
    jz    .L2           // j if not at end

.L4:
    lea   (%rcx, %r11), %rax    // return d + t
    ret
.L3:
    lea   (%rdx, %r11), %rax    // return e + t
    ret

_____________________________________________________________
Result 2 -- gcc 9.1 -O3 -- #define TWITCH 0

qstpxcpy_asm0:
// Arguments: d -- %rdi, s -- %rsi, e -- %rdx
//    Locals: w, t

    mov  (%rsi), %rax   // w  = *s
    lea  -8(%rdx), %rdx // e -= 8
    mov  %eax, %r11d    // t  = w
.L2:
    mov  %rax, (%rdi)   // *d = w
    lea  8(%rdi), %rdi  // d += 8

    cmp   %rdx, %rdi    // "d - e"   -- __asm__ goto
    jae   .L5           // quit if d >= e

    mov   %eax, %r11d   // t = w

    cmp   $-1, %r11     // check 't' -- __asm__ goto
    jnz   .L5           // j if at end

    mov   %rax, (%rdi)  // *d = w
    lea   8(%rdi), %rdi // d += 8

    cmp   %rdx, %rdi    // "d - e"   -- __asm__ goto
    jae   .L5           // quit if d >= e

    mov   %eax, %r11d   // t = w

    cmp   $-1, %r11     // check 't' -- __asm__ goto
    jz    .L5           // j if not at end

.L5:
    lea   (%rdx, %r11), %rax    // return e + t
    ret

_____________________________________________________________
Result 3 -- gcc 8.1 -O3 -- #define TWITCH 1

qstpxcpy_asm0:
// Arguments: d -- %rdi, s -- %rsi, e -- %rdx
//    Locals: w, t

    mov  (%rsi), %rax   // w  = *s
    lea  -8(%rdx), %rdx // e -= 8
    mov  %eax, %r11d    // t  = w
.L2:
              movq    %rdi, %rcx    // inserted by compiler

    mov  %rax, (%rdi)   // *d = w
    lea  8(%rcx), %rcx  // d += 8

              movq    %rcx, %rdi    // inserted by compiler

    cmp   %rdx, %rcx    // "d - e"   -- __asm__ goto
    jae   .L3           // quit if d >= e

    mov   %eax, %r11d   // t = w

    cmp   $-1, %r11     // check 't' -- __asm__ goto
    jnz   .L4           // j if at end

    mov   %rax, (%rcx)  // *d = w
    lea   8(%rcx), %rcx // d += 8

              movq    %rcx, %rdi    // inserted by compiler

    cmp   %rdx, %rcx        // "d - e"   -- __asm__ goto
    jae   .L3               // quit if d >= e

    mov   %eax, %r11d   // t = w

    cmp   $-1, %r11     // check 't' -- __asm__ goto
    jz    .L2           // j if not at end

.L4:
    lea   (%rdi, %r11), %rax    // return d + t
    ret
.L3:
    lea   (%rdx, %r11), %rax    // return e + t
    ret

Reply via email to