http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55769



             Bug #: 55769

           Summary: unnecessary spill/reload to compose register pair

    Classification: Unclassified

           Product: gcc

           Version: 4.8.0

            Status: UNCONFIRMED

          Severity: enhancement

          Priority: P3

         Component: target

        AssignedTo: unassig...@gcc.gnu.org

        ReportedBy: car...@google.com

            Target: arm-linux-gnueabi





Created attachment 29018

  --> http://gcc.gnu.org/bugzilla/attachment.cgi?id=29018

testcase



Compile the attached source code with options: -march=armv7-a -mthumb -O2



Trunk gcc generates:



sum_ror_mem:

    @ args = 0, pretend = 0, frame = 40

    @ frame_needed = 0, uses_anonymous_args = 0

    push    {r4, r5, r6, r7, r8, r9, r10, fp, lr}

    add    r8, r1, r2

    cmp    r1, r8

    sub    sp, sp, #44

    mov    r4, r0

    mov    r5, #0

    bcs    .L2

    mov    r9, r1

.L3:

    add    r0, r9, #1024

    add    r9, r9, #64

    bl    prefetch

    ldrd    r2, [r9, #-64]

    adds    r2, r2, r4

    adc    r3, r3, r5

    lsrs    r1, r2, #8

    orr    r1, r1, r3, lsl #24

    lsrs    r3, r3, #8

    str    r1, [sp]                    // A

    orr    r3, r3, r2, lsl #24

    str    r3, [sp, #4]                // B

    ldrd    r0, [r9, #-56]

    ldrd    r2, [sp]                    // C

    adds    r2, r2, r0

    adc    r3, r3, r1

    lsrs    r1, r2, #8

    orr    r1, r1, r3, lsl #24

    lsrs    r3, r3, #8

    str    r1, [sp, #8]

    orr    r3, r3, r2, lsl #24

    str    r3, [sp, #12]

    ldrd    r0, [r9, #-48]

    ldrd    r2, [sp, #8]

    adds    r2, r2, r0

    adc    r3, r3, r1

    lsrs    r1, r2, #8

    orr    r1, r1, r3, lsl #24

    lsrs    r3, r3, #8

    str    r1, [sp, #16]

    orr    r3, r3, r2, lsl #24

    str    r3, [sp, #20]

    ldrd    r0, [r9, #-40]

    ldrd    r2, [sp, #16]

    adds    r2, r2, r0

    adc    r3, r3, r1

    lsrs    r1, r2, #8

    orr    r1, r1, r3, lsl #24

    lsrs    r3, r3, #8

    str    r1, [sp, #24]

    orr    r3, r3, r2, lsl #24

    str    r3, [sp, #28]

    ldrd    r0, [r9, #-32]

    ldrd    r2, [sp, #24]

    adds    r2, r2, r0

    adc    r3, r3, r1

    lsrs    r1, r2, #8

    orr    r10, r1, r3, lsl #24

    lsrs    r3, r3, #8

    orr    fp, r3, r2, lsl #24

    ldrd    r2, [r9, #-24]

    adds    r2, r2, r10

    adc    r3, r3, fp

    lsrs    r1, r2, #8

    orr    r1, r1, r3, lsl #24

    lsrs    r3, r3, #8

    str    r1, [sp, #32]

    orr    r3, r3, r2, lsl #24

    str    r3, [sp, #36]

    ldrd    r0, [r9, #-16]

    ldrd    r2, [sp, #32]

    adds    r2, r2, r0

    adc    r3, r3, r1

    lsr    ip, r2, #8

    ldrd    r0, [r9, #-8]

    orr    r6, ip, r3, lsl #24

    lsrs    r3, r3, #8

    adds    r0, r0, r6

    orr    r7, r3, r2, lsl #24

    adc    r1, r1, r7

    cmp    r8, r9

    lsr    r2, r0, #8

    lsr    r3, r1, #8

    orr    r4, r2, r1, lsl #24

    orr    r5, r3, r0, lsl #24

    bhi    .L3

.L2:

    adds    r0, r5, r4

    add    sp, sp, #44

    @ sp needed

    pop    {r4, r5, r6, r7, r8, r9, r10, fp, pc}



Note that instructions AB spill two value onto stack, and instruction C read

them back to form a 64bit register pair. If we swap the register usage of r1

and r2, then we can avoid these 3 instructions. There are also many similar

patterns in the following instructions that can be avoided.

Reply via email to