void Ekin(double *e, int *stridee,
double *vx, int *stridevx,
double *vy, int *stridevy,
double *vz, int *stridevz,
int *sz)
{
int i1 = sz[0];
int j1 = sz[1];
int k1 = sz[2];
int i, j, k;
for (k=0; k<k1; ++k)
for (j=0; j<j1; ++j)
for (i=0; i<i1; ++i)
{
e[i + j * stridee[1] + k * stridee[2]]
= 0.128 * (
((vx[i + j * stridevx[1] + k * stridevx[2]]
+ vx[i+1 + j * stridevx[1] + k * stridevx[2]])
* (vx[i + j * stridevx[1] + k * stridevx[2]]
+ vx[i+1 + j * stridevx[1] + k * stridevx[2]]))
+ ((vy[i + j * stridevy[1] + k * stridevy[2]]
+ vy[i + (j+1) * stridevy[1] + k * stridevy[2]])
* (vy[i + j * stridevy[1] + k * stridevy[2]]
+ vy[i + (j+1) * stridevy[1] + k * stridevy[2]]))
+ ((vz[i + j * stridevz[1] + k * stridevz[2]]
+ vz[i + j * stridevz[1] + (k+1) * stridevz[2]])
* (vz[i + j * stridevz[1] + k * stridevz[2]]
+ vz[i + j * stridevz[1] + (k+1) * stridevz[2]])));
}
}
lim moves all the j*stridev?[1] and k*stridev?[2] to the j-loop but
does not move the k*stridev?[2] to the k-loop. This results in the
following asm.
Ekin:
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
pushl %ebx
subl $44, %esp
movl 40(%ebp), %eax
movl $0, -24(%ebp)
movl (%eax), %edx
movl 4(%eax), %ecx
movl 8(%eax), %eax
movl %edx, -40(%ebp)
movl %ecx, -36(%ebp)
testl %eax, %eax
movl %eax, -32(%ebp)
jle .L13
.L4:
movl -24(%ebp), %edx
movl -36(%ebp), %eax
incl %edx
testl %eax, %eax
movl %edx, -48(%ebp)
jle .L7
movl -40(%ebp), %ecx
movl $0, -16(%ebp)
testl %ecx, %ecx
jle .L17
.L10:
movl 28(%ebp), %eax
movl -24(%ebp), %ebx
movl -16(%ebp), %edi
movl $0, -56(%ebp)
fldl .LC0
movl 8(%eax), %edx
movl 4(%eax), %ecx
incl %edi
movl %edi, -28(%ebp)
movl -16(%ebp), %edi
imull %edx, %ebx
movl 36(%ebp), %edx
movl 4(%edx), %eax
movl 8(%edx), %esi
movl 12(%ebp), %edx
imull %eax, %edi
movl -16(%ebp), %eax
movl %edi, -44(%ebp)
movl 4(%edx), %edi
movl -24(%ebp), %edx
imull %edi, %eax
movl 12(%ebp), %edi
imull 8(%edi), %edx
addl %edx, %eax
movl -16(%ebp), %edx
sall $3, %eax
movl %eax, -20(%ebp)
movl 20(%ebp), %eax
movl 4(%eax), %edi
movl -24(%ebp), %eax
imull %edi, %edx
movl 20(%ebp), %edi
imull 8(%edi), %eax
addl %eax, %edx
movl -16(%ebp), %eax
imull %ecx, %eax
addl %ebx, %eax
leal 0(,%eax,8), %edi
movl -28(%ebp), %eax
imull %eax, %ecx
movl -24(%ebp), %eax
addl %ecx, %ebx
movl -44(%ebp), %ecx
imull %esi, %eax
sall $3, %ebx
addl %ecx, %eax
movl -48(%ebp), %ecx
sall $3, %eax
movl %eax, -52(%ebp)
imull %ecx, %esi
movl -44(%ebp), %ecx
addl %esi, %ecx
leal 0(,%ecx,8), %esi
.p2align 4,,15
.L5:
movl 16(%ebp), %eax
movl 24(%ebp), %ecx
incl -56(%ebp)
fldl (%eax,%edx,8)
faddl 8(%eax,%edx,8)
incl %edx
movl -52(%ebp), %eax
addl $8, -52(%ebp)
fldl (%edi,%ecx)
addl $8, %edi
faddl (%ecx,%ebx)
addl $8, %ebx
movl 32(%ebp), %ecx
fldl (%eax,%ecx)
faddl (%ecx,%esi)
fxch %st(2)
addl $8, %esi
movl -20(%ebp), %eax
movl 8(%ebp), %ecx
fmul %st(0), %st
fxch %st(1)
fmul %st(0), %st
faddp %st, %st(1)
fxch %st(1)
fmul %st(0), %st
faddp %st, %st(1)
fmul %st(1), %st
fstpl (%eax,%ecx)
addl $8, %eax
movl %eax, -20(%ebp)
movl -56(%ebp), %eax
cmpl %eax, -40(%ebp)
jne .L5
fstp %st(0)
movl -28(%ebp), %edx
cmpl %edx, -36(%ebp)
jle .L7
.L18:
movl -40(%ebp), %ecx
movl %edx, -16(%ebp)
testl %ecx, %ecx
jg .L10
.L17:
movl -16(%ebp), %ecx
incl %ecx
movl %ecx, -28(%ebp)
movl -28(%ebp), %edx
cmpl %edx, -36(%ebp)
jg .L18
.L7:
movl -48(%ebp), %eax
cmpl %eax, -32(%ebp)
movl %eax, -24(%ebp)
jne .L4
.L13:
addl $44, %esp
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
--
Summary: loop-invariant-motion is not doing it's work
Product: gcc
Version: 4.1.0
Status: UNCONFIRMED
Keywords: missed-optimization
Severity: normal
Priority: P2
Component: tree-optimization
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: rguenth at gcc dot gnu dot org
CC: gcc-bugs at gcc dot gnu dot org
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23970