------- Comment #4 from pinskia at gcc dot gnu dot org 2005-11-01 16:17 ------- If we change (*p1)[d-1] to (*p1)[d], we get: .L2: movl 8(%edi), %eax movswl (%eax),%edx movl 4(%edi), %eax movswl (%eax),%eax subl %edx, %eax movl %eax, %ecx sarl $31, %ecx xorl %ecx, %eax subl %ecx, %eax movl (%edi), %ecx movswl (%ecx),%ecx subl %ecx, %edx movl %edx, %ecx sarl $31, %ecx xorl %ecx, %edx subl %ecx, %edx cmpl %edx, %eax movl 8(%ebp), %edx setge %al movzbl %al, %eax incl %eax movl %eax, -4(%edx,%esi,4) incl %esi movl %eax, 8(%esp) movsbl (%edi,%eax),%eax movl %ebx, (%esp) addl $32, %ebx movl %eax, 4(%esp) call bar addl %eax, -16(%ebp) cmpl $7, %esi jne .L2 For the loop, which seems like very good as there are no branches, only setge %al movzbl %al, %eax incl %eax
-- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24609