------- Comment #19 from bonzini at gnu dot org 2008-10-24 10:11 ------- Left = old, right = IRA.
It seems to me that the better register allocation of IRA gives the post-regalloc scheduling pass much less freedom. Intel guys, could you run SPEC with -O2 -fschedule-insns and -O2, both of them using IRA? .L4: movsd (%esi,%eax,8), %xmm3 movl 12(%ebp), %edx movsd (%ebx,%eax,8), %xmm4 movsd (%edx,%eax,8), %xmm7 movsd (%ecx,%eax,8), %xmm6 movl -44(%ebp), %edx movl 12(%ebp), %edx movsd %xmm7, -40(%ebp) movsd (%edx,%eax,8), %xmm1 movsd (%edx,%eax,8), %xmm7 movl 16(%ebp), %edx movsd %xmm7, -56(%ebp) movapd %xmm1, %xmm0 movsd -40(%ebp), %xmm7 movsd (%edx,%eax,8), %xmm2 mulsd (%ebx,%eax,8), %xmm7 mulsd %xmm3, %xmm0 addsd %xmm7, %xmm6 movl 20(%ebp), %edx movsd -40(%ebp), %xmm7 addsd -80(%ebp), %xmm0 mulsd (%esi,%eax,8), %xmm7 movsd (%edx,%eax,8), %xmm5 addsd %xmm7, %xmm5 movsd %xmm0, -80(%ebp) movsd -40(%ebp), %xmm7 incl %eax mulsd (%edi,%eax,8), %xmm7 movapd %xmm1, %xmm0 addsd %xmm7, %xmm4 cmpl %eax, %edi movsd -56(%ebp), %xmm7 mulsd %xmm4, %xmm0 mulsd (%ebx,%eax,8), %xmm7 mulsd %xmm6, %xmm1 addsd %xmm7, %xmm3 addsd -72(%ebp), %xmm0 movsd -56(%ebp), %xmm7 addsd -64(%ebp), %xmm1 mulsd (%esi,%eax,8), %xmm7 movsd %xmm0, -72(%ebp) addsd %xmm7, %xmm2 movsd %xmm1, -64(%ebp) movsd -56(%ebp), %xmm7 movapd %xmm2, %xmm0 mulsd (%edi,%eax,8), %xmm7 mulsd %xmm3, %xmm0 addsd %xmm7, %xmm1 mulsd %xmm5, %xmm3 movsd (%ecx,%eax,8), %xmm7 addsd -56(%ebp), %xmm0 mulsd (%ebx,%eax,8), %xmm7 addsd -32(%ebp), %xmm3 addsd -32(%ebp), %xmm7 movsd %xmm0, -56(%ebp) movsd %xmm7, -32(%ebp) movsd %xmm3, -32(%ebp) movsd (%ecx,%eax,8), %xmm7 movapd %xmm2, %xmm0 mulsd (%esi,%eax,8), %xmm7 mulsd %xmm6, %xmm2 addsd -24(%ebp), %xmm7 mulsd %xmm4, %xmm0 movsd %xmm7, -24(%ebp) addsd -40(%ebp), %xmm2 movsd (%ecx,%eax,8), %xmm7 mulsd %xmm5, %xmm4 mulsd (%edi,%eax,8), %xmm7 addsd -48(%ebp), %xmm0 incl %eax addsd -24(%ebp), %xmm4 addsd %xmm7, %xmm0 mulsd %xmm6, %xmm5 cmpl %eax, 8(%ebp) movsd %xmm0, -48(%ebp) jg .L4 movsd %xmm2, -40(%ebp) movsd %xmm4, -24(%ebp) addsd %xmm5, %xmm7 jg .L4 -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37364