------- Additional Comments From giovannibajo at libero dot it  2005-06-01 
22:55 -------
Confirmed. The regression appears only with -fPIC, and it's pretty evident. The 
core is md2_block, the inner loop:

GCC 3.4
=============================================================
.L29:
        xorl    %edx, %edx
        .p2align 2,,3
.L28:
        movl    [EMAIL PROTECTED](%ebx,%eax,4), %esi
        xorl    -216(%ebp,%edx,4), %esi
        movl    [EMAIL PROTECTED](%ebx,%esi,4), %eax
        xorl    -212(%ebp,%edx,4), %eax
        movl    [EMAIL PROTECTED](%ebx,%eax,4), %edi
        xorl    -208(%ebp,%edx,4), %edi
        movl    %esi, -216(%ebp,%edx,4)
        movl    [EMAIL PROTECTED](%ebx,%edi,4), %esi
        xorl    -204(%ebp,%edx,4), %esi
        movl    %eax, -212(%ebp,%edx,4)
        movl    [EMAIL PROTECTED](%ebx,%esi,4), %eax
        xorl    -200(%ebp,%edx,4), %eax
        movl    %edi, -208(%ebp,%edx,4)
        movl    [EMAIL PROTECTED](%ebx,%eax,4), %edi
        xorl    -196(%ebp,%edx,4), %edi
        movl    %esi, -204(%ebp,%edx,4)
        movl    [EMAIL PROTECTED](%ebx,%edi,4), %esi
        xorl    -192(%ebp,%edx,4), %esi
        movl    %eax, -200(%ebp,%edx,4)
        movl    [EMAIL PROTECTED](%ebx,%esi,4), %eax
        xorl    -188(%ebp,%edx,4), %eax
        movl    %edi, -196(%ebp,%edx,4)
        movl    %esi, -192(%ebp,%edx,4)
        movl    %eax, -188(%ebp,%edx,4)
        addl    $8, %edx
        cmpl    $47, %edx
        jle     .L28
        addl    %ecx, %eax
        incl    %ecx
        andl    $255, %eax
        cmpl    $17, %ecx
        jle     .L29
=============================================================



GCC 4.0
=============================================================
.L16:
        movl    -384(%ebp), %eax
        movl    -208(%ebp), %esi
        incl    -384(%ebp)
        addl    %esi, %eax
        movl    -456(%ebp), %esi
        andl    $255, %eax
        movl    (%edi,%eax,4), %ecx
        movl    -464(%ebp), %eax
        xorl    %ecx, %esi
        movl    (%edi,%esi,4), %edx
        movl    %esi, -368(%ebp)
        movl    %esi, -456(%ebp)
        movl    -488(%ebp), %esi
        xorl    %edx, %eax
        movl    -472(%ebp), %edx
        movl    (%edi,%eax,4), %ecx
        movl    (%edi,%eax,4), %ecx
        movl    %eax, -364(%ebp)
        movl    %eax, -464(%ebp)
        xorl    %ecx, %edx
        movl    -480(%ebp), %ecx
        movl    (%edi,%edx,4), %eax
        movl    %edx, -360(%ebp)
        movl    %edx, -472(%ebp)
        xorl    %eax, %ecx
        movl    (%edi,%ecx,4), %eax
        movl    %ecx, -356(%ebp)
        movl    %ecx, -480(%ebp)
        xorl    %eax, %esi
        movl    -496(%ebp), %eax
        movl    (%edi,%esi,4), %edx
        movl    %esi, -352(%ebp)
        movl    %esi, -488(%ebp)
        xorl    %edx, %eax
        movl    -504(%ebp), %edx
        movl    (%edi,%eax,4), %ecx
        movl    %eax, -348(%ebp)
        movl    %eax, -496(%ebp)
        xorl    %ecx, %edx
        movl    -512(%ebp), %ecx
        movl    (%edi,%edx,4), %eax
        movl    %edx, -344(%ebp)
        movl    %edx, -504(%ebp)
        xorl    %eax, %ecx
        movl    %ecx, -340(%ebp)
        movl    (%edi,%ecx,4), %eax
        movl    -520(%ebp), %esi
        movl    %ecx, -512(%ebp)
        xorl    %eax, %esi
        movl    -528(%ebp), %eax
        movl    (%edi,%esi,4), %edx
        movl    %esi, -336(%ebp)
        movl    %esi, -520(%ebp)
        movl    -552(%ebp), %esi
        xorl    %edx, %eax
        movl    -536(%ebp), %edx
        movl    (%edi,%eax,4), %ecx
        movl    %eax, -332(%ebp)
        movl    %eax, -528(%ebp)
        xorl    %ecx, %edx
        movl    -544(%ebp), %ecx
        movl    (%edi,%edx,4), %eax
        movl    %edx, -328(%ebp)
        movl    %edx, -536(%ebp)
        xorl    %eax, %ecx
        movl    (%edi,%ecx,4), %eax
        movl    %ecx, -324(%ebp)
        movl    %ecx, -544(%ebp)
        xorl    %eax, %esi
        movl    -556(%ebp), %eax
        movl    (%edi,%esi,4), %edx
        movl    %esi, -320(%ebp)
        movl    %esi, -552(%ebp)
        movl    -568(%ebp), %esi
        xorl    %edx, %eax
        movl    -560(%ebp), %edx
        movl    (%edi,%eax,4), %ecx
        movl    %eax, -316(%ebp)
        movl    %eax, -556(%ebp)
        xorl    %ecx, %edx
        movl    -564(%ebp), %ecx
        movl    (%edi,%edx,4), %eax
        movl    %edx, -312(%ebp)
        movl    %edx, -560(%ebp)
        xorl    %eax, %ecx
        movl    (%edi,%ecx,4), %eax
        movl    %ecx, -308(%ebp)
        movl    %ecx, -564(%ebp)
        xorl    %eax, %esi
        movl    %esi, -304(%ebp)
        movl    (%edi,%esi,4), %edx
        movl    -572(%ebp), %eax
        movl    %esi, -568(%ebp)
        movl    -396(%ebp), %esi
        xorl    %edx, %eax
        movl    -576(%ebp), %edx
        movl    (%edi,%eax,4), %ecx
        movl    %eax, -300(%ebp)
        movl    %eax, -572(%ebp)
        xorl    %ecx, %edx
        movl    -580(%ebp), %ecx
        movl    (%edi,%edx,4), %eax
        movl    %edx, -296(%ebp)
        movl    %edx, -576(%ebp)
        xorl    %eax, %ecx
        movl    (%edi,%ecx,4), %eax
        movl    %ecx, -292(%ebp)
        movl    %ecx, -580(%ebp)
        xorl    %eax, %esi
        movl    -400(%ebp), %eax
        movl    (%edi,%esi,4), %edx
        movl    %esi, -288(%ebp)
        movl    %esi, -396(%ebp)
        movl    -412(%ebp), %esi
        xorl    %edx, %eax
        movl    -404(%ebp), %edx
        movl    (%edi,%eax,4), %ecx
        movl    %eax, -284(%ebp)
        movl    %eax, -400(%ebp)
        xorl    %ecx, %edx
        movl    -408(%ebp), %ecx
        movl    (%edi,%edx,4), %eax
        movl    %edx, -280(%ebp)
        movl    %edx, -404(%ebp)
        xorl    %eax, %ecx
        movl    (%edi,%ecx,4), %eax
        movl    %ecx, -276(%ebp)
        movl    %ecx, -408(%ebp)
        xorl    %eax, %esi
        movl    -416(%ebp), %eax
        movl    (%edi,%esi,4), %edx
        movl    %esi, -272(%ebp)
        movl    %esi, -412(%ebp)
        xorl    %edx, %eax
        movl    %eax, -268(%ebp)
        movl    (%edi,%eax,4), %ecx
        movl    -420(%ebp), %edx
        movl    %eax, -416(%ebp)
        movl    -428(%ebp), %esi
        xorl    %ecx, %edx
        movl    -424(%ebp), %ecx
        movl    (%edi,%edx,4), %eax
        movl    %edx, -264(%ebp)
        movl    %edx, -420(%ebp)
        xorl    %eax, %ecx
        movl    (%edi,%ecx,4), %eax
        movl    %ecx, -260(%ebp)
        movl    %ecx, -424(%ebp)
        xorl    %eax, %esi
        movl    -432(%ebp), %eax
        movl    (%edi,%esi,4), %edx
        movl    %esi, -256(%ebp)
        movl    %esi, -428(%ebp)
        movl    -444(%ebp), %esi
        xorl    %edx, %eax
        movl    -436(%ebp), %edx
        movl    (%edi,%eax,4), %ecx
        movl    %eax, -252(%ebp)
        movl    %eax, -432(%ebp)
        xorl    %ecx, %edx
        movl    -440(%ebp), %ecx
        movl    (%edi,%edx,4), %eax
        movl    %edx, -248(%ebp)
        movl    %edx, -436(%ebp)
        xorl    %eax, %ecx
        movl    (%edi,%ecx,4), %eax
        movl    %ecx, -244(%ebp)
        movl    %ecx, -440(%ebp)
        xorl    %eax, %esi
        movl    -448(%ebp), %eax
        movl    (%edi,%esi,4), %edx
        movl    %esi, -240(%ebp)
        movl    %esi, -444(%ebp)
        xorl    %edx, %eax
        movl    -452(%ebp), %edx
        movl    (%edi,%eax,4), %ecx
        movl    %eax, -236(%ebp)
        movl    %eax, -448(%ebp)
        xorl    %ecx, %edx
        movl    %edx, -232(%ebp)
        movl    (%edi,%edx,4), %eax
        movl    -460(%ebp), %ecx
        movl    -468(%ebp), %esi
        movl    %edx, -452(%ebp)
        xorl    %eax, %ecx
        movl    (%edi,%ecx,4), %eax
        movl    %ecx, -228(%ebp)
        movl    %ecx, -460(%ebp)
        xorl    %eax, %esi
        movl    -476(%ebp), %eax
        movl    (%edi,%esi,4), %edx
        movl    %esi, -224(%ebp)
        movl    %esi, -468(%ebp)
        movl    -500(%ebp), %esi
        xorl    %edx, %eax
        movl    -484(%ebp), %edx
        movl    (%edi,%eax,4), %ecx
        movl    %eax, -220(%ebp)
        movl    %eax, -476(%ebp)
        xorl    %ecx, %edx
        movl    -492(%ebp), %ecx
        movl    (%edi,%edx,4), %eax
        movl    %edx, -216(%ebp)
        movl    %edx, -484(%ebp)
        xorl    %eax, %ecx
        movl    (%edi,%ecx,4), %eax
        movl    %edx, -216(%ebp)
        movl    %edx, -484(%ebp)
        xorl    %eax, %ecx
        movl    (%edi,%ecx,4), %eax
        movl    %ecx, -212(%ebp)
        movl    %ecx, -492(%ebp)
        xorl    %eax, %esi
        movl    -508(%ebp), %eax
        movl    (%edi,%esi,4), %edx
        movl    %esi, -380(%ebp)
        movl    %esi, -500(%ebp)
        xorl    %edx, %eax
        movl    -516(%ebp), %edx
        movl    (%edi,%eax,4), %esi
        movl    %eax, -376(%ebp)
        movl    %eax, -508(%ebp)
        xorl    %esi, %edx
        movl    -524(%ebp), %esi
        movl    (%edi,%edx,4), %ecx
        movl    %edx, -372(%ebp)
        movl    %edx, -516(%ebp)
        xorl    %ecx, %esi
        movl    %esi, -524(%ebp)
        movl    -532(%ebp), %ecx
        movl    (%edi,%esi,4), %edx
        xorl    %edx, %ecx
        movl    -540(%ebp), %edx
        movl    (%edi,%ecx,4), %eax
        movl    %ecx, -532(%ebp)
        xorl    %eax, %edx
        movl    -548(%ebp), %eax
        xorl    (%edi,%edx,4), %eax
        movl    %edx, -540(%ebp)
        movl    %eax, -584(%ebp)
        movl    %eax, -548(%ebp)
        movl    (%edi,%eax,4), %eax
        xorl    %eax, -208(%ebp)
        cmpl    $17, -384(%ebp)
        jne     .L16
=============================================================


The loop was unrolled, but it's clear that the address mode selection is worse.

-- 
           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |NEW
     Ever Confirmed|                            |1
   Last reconfirmed|0000-00-00 00:00:00         |2005-06-01 22:55:36
               date|                            |


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19923

Reply via email to