[Bug middle-end/42505] New: loop canonicalization causes a lot of unnecessary temporary variables

sliao at google dot com Fri, 25 Dec 2009 10:45:29 -0800

This regression was caused by loop canonicalization.

The following example:


struct A {
 int f1;
 int f2;
};

int func(int c);

int test(struct A* src, struct A* dst, int count)
{
  while (count--) {
    if (!func(src->f2)) {
        return 0;
      }
      *dst++ = *src++;
  }

  return 1;
}

gcc 4.2.1 compiles this to 40 bytes, gcc 4.4.0 to 48 bytes:

gcc 4.2.1 output:
test:
      push    {r4, r5, r6, lr}
      mov     r4, r0
      mov     r5, r1
      mov     r6, r2
      b       .L2
.L3:
      ldr     r0, [r4, #4]
      bl      func
      cmp     r0, #0
      beq     .L6
      mov     r3, r5
      mov     r2, r4
      ldmia   r2!, {r0, r1}
      stmia   r3!, {r0, r1}
      mov     r5, r3
      mov     r4, r2
.L2:
      sub     r6, r6, #1
      bcs     .L3
      mov     r0, #1
.L6:
      @ sp needed for prologue
      pop     {r4, r5, r6, pc}

gcc 4.4.0 output:
      push    {r4, r5, r6, r7, lr}    // note r7 is cloberred
      sub     sp, sp, #12         // why need to store smth on the stack?
      mov     r7, r0
      str     r1, [sp, #4]          // why store r1 onto stack?
      mov     r6, r2
      mov     r5, #0
      b       .L2
.L5:
      add     r4, r7, r5
      ldr     r0, [r4, #4]
      bl      func
      sub     r6, r6, #1
      cmp     r0, #0
      beq     .L4
      ldr     r1, [sp, #4]   // load from stack
      add     r3, r1, r5
      add     r5, r5, #8
      ldmia   r4!, {r1, r2}
      stmia   r3!, {r1, r2}
.L2:
      cmp     r6, #0
      bne     .L5
      mov     r0, #1
.L4:
      add     sp, sp, #12
      @ sp needed for prologue
      pop     {r4, r5, r6, r7, pc}

This is caused by loop canonicalization pass (pass_iv_optimize) that was added
in gcc 4.4.
Final GIMPLE form in gcc 4.2.1 compiler:

test (src, dst, count)
{
 int a;
 int D.1545;

<bb 2>:
 goto <bb 6> (<L3>);

<L0>:;
 a = func (MEM[base: src, offset: 4]);
 if (a == 0) goto <L8>; else goto <L2>;

<L8>:;
 D.1545 = 0;
 goto <bb 8> (<L5>);

<L2>:;
 MEM[base: dst] = MEM[base: src];
 dst = dst + 8B;
 src = src + 8B;

<L3>:;
 count = count - 1;
 if (count != -1) goto <L0>; else goto <L9>;

<L9>:;
 D.1545 = 1;

<L5>:;
 return D.1545;
}

The final GIMPLE in gcc 4.4:

test (struct A * src, struct A * dst, int count)
{
 unsigned int ivtmp.22; // induction variables introduced by pass_iv_optimize
 unsigned int ivtmp.19;
 int a;
 int D.1274;

<bb 2>:
 ivtmp.22 = (unsigned int) count;  // copy of count, count itself is not used
anymore
 ivtmp.19 = 0;
 goto <bb 6>;

<bb 3>:
 a = func (MEM[base: src + ivtmp.19, offset: 4]);
 ivtmp.22 = ivtmp.22 - 1;
 if (a == 0)
   goto <bb 4>;
 else
   goto <bb 5>;

<bb 4>:
 D.1274 = 0;
 goto <bb 8>;

<bb 5>:
 MEM[base: dst, index: ivtmp.19] = MEM[base: src, index: ivtmp.19];
 ivtmp.19 = ivtmp.19 + 8;

<bb 6>:
 if (ivtmp.22 != 0)
   goto <bb 3>;
 else
   goto <bb 7>;

<bb 7>:
 D.1274 = 1;

<bb 8>:
 return D.1274;
}

The following RTL passes could not optimize these temporary induction variables
and they are spilled on the stack, which causes a lot of other inefficiencies.

The main question: there are three way to fix this:
1) turn off loop canonicalization for -Os
2) optimize the extra variable in the GIMPLE passes
3) optimize the extra variable in the RTL passes


-- 
           Summary: loop canonicalization causes a lot of unnecessary
                    temporary variables
           Product: gcc
           Version: 4.4.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: sliao at google dot com
 GCC build triplet: i686-linux
  GCC host triplet: i686-linux
GCC target triplet: arm-eabi


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42505

[Bug middle-end/42505] New: loop canonicalization causes a lot of unnecessary temporary variables

Reply via email to