The following code

// compilation options: -march=armv5te -mthumb -Os
struct node
{
 long long a;
 long long b;
};

void func (struct node *n);

long long test (int from, int to)
{
 struct node n;
 func(&n);
 if (from == 1)
   return n.a;
 else
   {
     if (to == 2)
       return n.b;
     else
       return n.b - n.a;
   }
}

is compiled by gcc 4.2.1 to 44 bytes, and gcc 4.4.0 to 60 bytes. Assembly files
shows 6 unneeded movs in the else clause and duplication of sp in r4:

gcc 4.2.1 output:
test:
       push    {r4, r5, lr}
       sub     sp, sp, #20
       mov     r4, r0
       mov     r0, sp
       mov     r5, r1
       bl      func
       cmp     r4, #1
       bne     .L2
       ldr     r0, [sp]
       ldr     r1, [sp, #4]
       b       .L4
.L2:
       ldr     r0, [sp, #8]
       ldr     r1, [sp, #12]
       cmp     r5, #2
       beq     .L4
       ldr     r3, [sp]
       ldr     r4, [sp, #4]
       sub     r0, r0, r3
       sbc     r1, r1, r4
.L4:
       add     sp, sp, #20
       @ sp needed for prologue
       pop     {r4, r5, pc}

gcc 4.4 output:
test:
       push    {r4, r5, r6, lr}
       sub     sp, sp, #16
       mov     r5, r0
       mov     r0, sp
       mov     r6, r1
       mov     r4, sp    // why duplicate sp in r4? sp is never modified in the
function
       bl      func
       cmp     r5, #1
       bne     .L2
       ldr     r3, [sp]
       ldr     r4, [sp, #4]
       b       .L3
.L2:
       ldr     r2, [r4, #8]
       ldr     r3, [r4, #12]
       cmp     r6, #2
       bne     .L4
       mov     r4, r3  // the magic dance of the registers
       mov     r3, r2
       b       .L3
.L4:
       ldr     r0, [r4]
       ldr     r1, [r4, #4]
       mov     r4, r3  // another magic dance
       mov     r3, r2
       sub     r3, r3, r0
       sbc     r4, r4, r1
.L3:
       add     sp, sp, #16
       mov     r0, r3   // again, unneeded movs.
       mov     r1, r4   // result can be calculated in r0,r1 directly (see gcc
4.2.1 version)
       @ sp needed for prologue
       pop     {r4, r5, r6, pc}

The code was extracted from GCC SPEC benchmark.


-- 
           Summary: Bad register allocation in a very simple code
           Product: gcc
           Version: 4.4.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: sliao at google dot com
 GCC build triplet: i686-linux
  GCC host triplet: i686-linux
GCC target triplet: arm-eabi


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42502

Reply via email to