------- Additional Comments From steven at gcc dot gnu dot org  2005-02-09 
23:35 -------
The entire diff of .optimized dumps and .s output for twolf on AMD64 is really 
small, in fact the asm output is different for only one file: 
 
 config1.c.t65.optimized   |  120 
++++++++++++++++++++++++++++++---------------- 
 configure.c.t65.optimized |   78 +++++++++++++++++++---------- 
 outpins.c.t65.optimized   |    6 +- 
 outpins.s                 |   36 ++++++------- 
 qsorte.c.t65.optimized    |    3 - 
 qsortg.c.t65.optimized    |    3 - 
 qsortgdx.c.t65.optimized  |    3 - 
 qsortx.c.t65.optimized    |    3 - 
 readcell.c.t65.optimized  |    3 - 
 readseg.c.t65.optimized   |    6 +- 
 ucgxp.c.t65.optimized     |    3 - 
 uloop.c.t65.optimized     |    6 +- 
 12 files changed, 174 insertions(+), 96 deletions(-) 
 
The file with the assembler difference is outpins.c.  The relevant diff is 
below.  There is nothing in the diff that explains the ~4% slowdown I see in 
my SPEC benchmarks (3 runs, so the slowdown is consistent).  The same 
instructions are there, just ordered differently and using different 
registers.  So I'm not sure how to proceed... 
 
diff -u base/outpins.c.t65.optimized hacked/outpins.c.t65.optimized 
--- base/outpins.c.t65.optimized        2005-02-10 00:19:20.950581229 +0100 
+++ patched/outpins.c.t65.optimized      2005-02-10 00:16:19.436444879 +0100 
@@ -99,8 +99,9 @@ 
   pairArray.39 = pairArray; 
   carray.40 = carray; 
   D.3698 = *((struct cellbox * *) ((long unsigned int) *(*((int * *) D.3712 + 
pairArray.39 - 8B) + 4B) * 8) + carray.40); 
+  end.81 = D.3698->cxcenter + (int) D.3698->tileptr->left; 
   temp.59 = *(carray.40 + (struct cellbox * *) ((long unsigned int) 
*(*(pairArray.39 + (int * *) D.3712) + 4B) * 8)); 
-  end = MAX_EXPR <D.3698->cxcenter + (int) D.3698->tileptr->left, 
temp.59->cxcenter + (int) temp.59->tileptr->left>; 
+  end = MAX_EXPR <end.81, temp.59->cxcenter + (int) temp.59->tileptr->left>; 
 
 <L4>:; 
   return end; 
@@ -228,9 +229,10 @@ 
   D.3668 = *((int * *) D.3664 + pairArray.36 - 8B); 
   carray.37 = carray; 
   D.3646 = *((struct cellbox * *) ((long unsigned int) *(D.3668 + (int *) 
((long unsigned int) *D.3668 * 4)) * 8) + carray.37); 
+  end.121 = D.3646->cxcenter + (int) D.3646->tileptr->right; 
   D.3676 = *(pairArray.36 + (int * *) D.3664); 
   temp.99 = *(carray.37 + (struct cellbox * *) ((long unsigned int) *(D.3676 
+ (int *) ((long unsigned int) *D.3676 * 4)) * 8)); 
-  end = MIN_EXPR <D.3646->cxcenter + (int) D.3646->tileptr->right, 
temp.99->cxcenter + (int) temp.99->tileptr->right>; 
+  end = MIN_EXPR <end.121, temp.99->cxcenter + (int) 
temp.99->tileptr->right>; 
 
 <L4>:; 
   return end; 
diff -u base/outpins.s hacked/outpins.s 
--- base/outpins.s      2005-02-10 00:19:21.064543028 +0100 
+++ patched/outpins.s    2005-02-10 00:16:19.551406289 +0100 
@@ -18,18 +18,18 @@ 
        movq    -8(%rdx,%rcx), %rax 
        movslq  4(%rax),%rax 
        movq    (%rsi,%rax,8), %rdi 
+       movq    40(%rdi), %rax 
+       movswl  (%rax),%r8d 
        movq    (%rcx,%rdx), %rax 
+       addl    12(%rdi), %r8d 
        movslq  4(%rax),%rax 
        movq    (%rsi,%rax,8), %rdx 
-       movq    40(%rdi), %rax 
-       movswl  (%rax),%ecx 
        movq    40(%rdx), %rax 
-       addl    12(%rdi), %ecx 
        movswl  (%rax),%eax 
        addl    12(%rdx), %eax 
-       cmpl    %eax, %ecx 
-       cmovl   %eax, %ecx 
-       movl    %ecx, %eax 
+       cmpl    %eax, %r8d 
+       cmovl   %eax, %r8d 
+       movl    %r8d, %eax 
        ret 
        .p2align 4,,7 
 .L11: 
@@ -40,9 +40,9 @@ 
        movq    carray(%rip), %rax 
        movq    (%rax,%rdx,8), %rdx 
        movq    40(%rdx), %rax 
-       movswl  (%rax),%ecx 
-       addl    12(%rdx), %ecx 
-       movl    %ecx, %eax 
+       movswl  (%rax),%r8d 
+       addl    12(%rdx), %r8d 
+       movl    %r8d, %eax 
        ret 
        .p2align 4,,7 
 .L12: 
@@ -72,18 +72,18 @@ 
        movslq  (%rcx),%rax 
        movslq  (%rcx,%rax,4),%rax 
        movq    (%rdi,%rax,8), %rcx 
+       movq    40(%rcx), %rax 
+       movswl  2(%rax),%r8d 
        movslq  (%rdx),%rax 
+       addl    12(%rcx), %r8d 
        movslq  (%rdx,%rax,4),%rax 
        movq    (%rdi,%rax,8), %rdx 
-       movq    40(%rcx), %rax 
-       movswl  2(%rax),%esi 
        movq    40(%rdx), %rax 
-       addl    12(%rcx), %esi 
        movswl  2(%rax),%eax 
        addl    12(%rdx), %eax 
-       cmpl    %eax, %esi 
-       cmovg   %eax, %esi 
-       movl    %esi, %eax 
+       cmpl    %eax, %r8d 
+       cmovg   %eax, %r8d 
+       movl    %r8d, %eax 
        ret 
        .p2align 4,,7 
 .L22: 
@@ -95,9 +95,9 @@ 
        movq    carray(%rip), %rax 
        movq    (%rax,%rdx,8), %rdx 
        movq    40(%rdx), %rax 
-       movswl  2(%rax),%esi 
-       addl    12(%rdx), %esi 
-       movl    %esi, %eax 
+       movswl  2(%rax),%r8d 
+       addl    12(%rdx), %r8d 
+       movl    %r8d, %eax 
        ret 
        .p2align 4,,7 
 .L23: 
 
 

-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17549

Reply via email to