------- Comment #9 from nbenoit at tuxfamily dot org 2009-12-16 11:06 ------- Here is a unified diff which focuses on the inner-loop exit conditions.
--- 442/convol.s +++ r155286/convol.s .L3: movl (%edx), %ebx - imull (%esi,%eax,4), %ebx + imull H(,%eax,4), %ebx addl %ebx, %ecx addl $1, %eax subl $4, %edx cmpl $511, %eax - jg .L10 + setle %bl cmpl %edi, %eax - jle .L3 -.L10: + setle -21(%ebp) + testb %bl, -21(%ebp) + jne .L3 movl -16(%ebp), %eax movl %ecx, vH(,%eax,4) -.L6: +.L5: L3 corresponds to the inner loop body. -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42027