http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55769
Bug #: 55769 Summary: unnecessary spill/reload to compose register pair Classification: Unclassified Product: gcc Version: 4.8.0 Status: UNCONFIRMED Severity: enhancement Priority: P3 Component: target AssignedTo: unassig...@gcc.gnu.org ReportedBy: car...@google.com Target: arm-linux-gnueabi Created attachment 29018 --> http://gcc.gnu.org/bugzilla/attachment.cgi?id=29018 testcase Compile the attached source code with options: -march=armv7-a -mthumb -O2 Trunk gcc generates: sum_ror_mem: @ args = 0, pretend = 0, frame = 40 @ frame_needed = 0, uses_anonymous_args = 0 push {r4, r5, r6, r7, r8, r9, r10, fp, lr} add r8, r1, r2 cmp r1, r8 sub sp, sp, #44 mov r4, r0 mov r5, #0 bcs .L2 mov r9, r1 .L3: add r0, r9, #1024 add r9, r9, #64 bl prefetch ldrd r2, [r9, #-64] adds r2, r2, r4 adc r3, r3, r5 lsrs r1, r2, #8 orr r1, r1, r3, lsl #24 lsrs r3, r3, #8 str r1, [sp] // A orr r3, r3, r2, lsl #24 str r3, [sp, #4] // B ldrd r0, [r9, #-56] ldrd r2, [sp] // C adds r2, r2, r0 adc r3, r3, r1 lsrs r1, r2, #8 orr r1, r1, r3, lsl #24 lsrs r3, r3, #8 str r1, [sp, #8] orr r3, r3, r2, lsl #24 str r3, [sp, #12] ldrd r0, [r9, #-48] ldrd r2, [sp, #8] adds r2, r2, r0 adc r3, r3, r1 lsrs r1, r2, #8 orr r1, r1, r3, lsl #24 lsrs r3, r3, #8 str r1, [sp, #16] orr r3, r3, r2, lsl #24 str r3, [sp, #20] ldrd r0, [r9, #-40] ldrd r2, [sp, #16] adds r2, r2, r0 adc r3, r3, r1 lsrs r1, r2, #8 orr r1, r1, r3, lsl #24 lsrs r3, r3, #8 str r1, [sp, #24] orr r3, r3, r2, lsl #24 str r3, [sp, #28] ldrd r0, [r9, #-32] ldrd r2, [sp, #24] adds r2, r2, r0 adc r3, r3, r1 lsrs r1, r2, #8 orr r10, r1, r3, lsl #24 lsrs r3, r3, #8 orr fp, r3, r2, lsl #24 ldrd r2, [r9, #-24] adds r2, r2, r10 adc r3, r3, fp lsrs r1, r2, #8 orr r1, r1, r3, lsl #24 lsrs r3, r3, #8 str r1, [sp, #32] orr r3, r3, r2, lsl #24 str r3, [sp, #36] ldrd r0, [r9, #-16] ldrd r2, [sp, #32] adds r2, r2, r0 adc r3, r3, r1 lsr ip, r2, #8 ldrd r0, [r9, #-8] orr r6, ip, r3, lsl #24 lsrs r3, r3, #8 adds r0, r0, r6 orr r7, r3, r2, lsl #24 adc r1, r1, r7 cmp r8, r9 lsr r2, r0, #8 lsr r3, r1, #8 orr r4, r2, r1, lsl #24 orr r5, r3, r0, lsl #24 bhi .L3 .L2: adds r0, r5, r4 add sp, sp, #44 @ sp needed pop {r4, r5, r6, r7, r8, r9, r10, fp, pc} Note that instructions AB spill two value onto stack, and instruction C read them back to form a 64bit register pair. If we swap the register usage of r1 and r2, then we can avoid these 3 instructions. There are also many similar patterns in the following instructions that can be avoided.