------- Additional Comments From giovannibajo at libero dot it 2005-06-01 22:55 ------- Confirmed. The regression appears only with -fPIC, and it's pretty evident. The core is md2_block, the inner loop:
GCC 3.4 ============================================================= .L29: xorl %edx, %edx .p2align 2,,3 .L28: movl [EMAIL PROTECTED](%ebx,%eax,4), %esi xorl -216(%ebp,%edx,4), %esi movl [EMAIL PROTECTED](%ebx,%esi,4), %eax xorl -212(%ebp,%edx,4), %eax movl [EMAIL PROTECTED](%ebx,%eax,4), %edi xorl -208(%ebp,%edx,4), %edi movl %esi, -216(%ebp,%edx,4) movl [EMAIL PROTECTED](%ebx,%edi,4), %esi xorl -204(%ebp,%edx,4), %esi movl %eax, -212(%ebp,%edx,4) movl [EMAIL PROTECTED](%ebx,%esi,4), %eax xorl -200(%ebp,%edx,4), %eax movl %edi, -208(%ebp,%edx,4) movl [EMAIL PROTECTED](%ebx,%eax,4), %edi xorl -196(%ebp,%edx,4), %edi movl %esi, -204(%ebp,%edx,4) movl [EMAIL PROTECTED](%ebx,%edi,4), %esi xorl -192(%ebp,%edx,4), %esi movl %eax, -200(%ebp,%edx,4) movl [EMAIL PROTECTED](%ebx,%esi,4), %eax xorl -188(%ebp,%edx,4), %eax movl %edi, -196(%ebp,%edx,4) movl %esi, -192(%ebp,%edx,4) movl %eax, -188(%ebp,%edx,4) addl $8, %edx cmpl $47, %edx jle .L28 addl %ecx, %eax incl %ecx andl $255, %eax cmpl $17, %ecx jle .L29 ============================================================= GCC 4.0 ============================================================= .L16: movl -384(%ebp), %eax movl -208(%ebp), %esi incl -384(%ebp) addl %esi, %eax movl -456(%ebp), %esi andl $255, %eax movl (%edi,%eax,4), %ecx movl -464(%ebp), %eax xorl %ecx, %esi movl (%edi,%esi,4), %edx movl %esi, -368(%ebp) movl %esi, -456(%ebp) movl -488(%ebp), %esi xorl %edx, %eax movl -472(%ebp), %edx movl (%edi,%eax,4), %ecx movl (%edi,%eax,4), %ecx movl %eax, -364(%ebp) movl %eax, -464(%ebp) xorl %ecx, %edx movl -480(%ebp), %ecx movl (%edi,%edx,4), %eax movl %edx, -360(%ebp) movl %edx, -472(%ebp) xorl %eax, %ecx movl (%edi,%ecx,4), %eax movl %ecx, -356(%ebp) movl %ecx, -480(%ebp) xorl %eax, %esi movl -496(%ebp), %eax movl (%edi,%esi,4), %edx movl %esi, -352(%ebp) movl %esi, -488(%ebp) xorl %edx, %eax movl -504(%ebp), %edx movl (%edi,%eax,4), %ecx movl %eax, -348(%ebp) movl %eax, -496(%ebp) xorl %ecx, %edx movl -512(%ebp), %ecx movl (%edi,%edx,4), %eax movl %edx, -344(%ebp) movl %edx, -504(%ebp) xorl %eax, %ecx movl %ecx, -340(%ebp) movl (%edi,%ecx,4), %eax movl -520(%ebp), %esi movl %ecx, -512(%ebp) xorl %eax, %esi movl -528(%ebp), %eax movl (%edi,%esi,4), %edx movl %esi, -336(%ebp) movl %esi, -520(%ebp) movl -552(%ebp), %esi xorl %edx, %eax movl -536(%ebp), %edx movl (%edi,%eax,4), %ecx movl %eax, -332(%ebp) movl %eax, -528(%ebp) xorl %ecx, %edx movl -544(%ebp), %ecx movl (%edi,%edx,4), %eax movl %edx, -328(%ebp) movl %edx, -536(%ebp) xorl %eax, %ecx movl (%edi,%ecx,4), %eax movl %ecx, -324(%ebp) movl %ecx, -544(%ebp) xorl %eax, %esi movl -556(%ebp), %eax movl (%edi,%esi,4), %edx movl %esi, -320(%ebp) movl %esi, -552(%ebp) movl -568(%ebp), %esi xorl %edx, %eax movl -560(%ebp), %edx movl (%edi,%eax,4), %ecx movl %eax, -316(%ebp) movl %eax, -556(%ebp) xorl %ecx, %edx movl -564(%ebp), %ecx movl (%edi,%edx,4), %eax movl %edx, -312(%ebp) movl %edx, -560(%ebp) xorl %eax, %ecx movl (%edi,%ecx,4), %eax movl %ecx, -308(%ebp) movl %ecx, -564(%ebp) xorl %eax, %esi movl %esi, -304(%ebp) movl (%edi,%esi,4), %edx movl -572(%ebp), %eax movl %esi, -568(%ebp) movl -396(%ebp), %esi xorl %edx, %eax movl -576(%ebp), %edx movl (%edi,%eax,4), %ecx movl %eax, -300(%ebp) movl %eax, -572(%ebp) xorl %ecx, %edx movl -580(%ebp), %ecx movl (%edi,%edx,4), %eax movl %edx, -296(%ebp) movl %edx, -576(%ebp) xorl %eax, %ecx movl (%edi,%ecx,4), %eax movl %ecx, -292(%ebp) movl %ecx, -580(%ebp) xorl %eax, %esi movl -400(%ebp), %eax movl (%edi,%esi,4), %edx movl %esi, -288(%ebp) movl %esi, -396(%ebp) movl -412(%ebp), %esi xorl %edx, %eax movl -404(%ebp), %edx movl (%edi,%eax,4), %ecx movl %eax, -284(%ebp) movl %eax, -400(%ebp) xorl %ecx, %edx movl -408(%ebp), %ecx movl (%edi,%edx,4), %eax movl %edx, -280(%ebp) movl %edx, -404(%ebp) xorl %eax, %ecx movl (%edi,%ecx,4), %eax movl %ecx, -276(%ebp) movl %ecx, -408(%ebp) xorl %eax, %esi movl -416(%ebp), %eax movl (%edi,%esi,4), %edx movl %esi, -272(%ebp) movl %esi, -412(%ebp) xorl %edx, %eax movl %eax, -268(%ebp) movl (%edi,%eax,4), %ecx movl -420(%ebp), %edx movl %eax, -416(%ebp) movl -428(%ebp), %esi xorl %ecx, %edx movl -424(%ebp), %ecx movl (%edi,%edx,4), %eax movl %edx, -264(%ebp) movl %edx, -420(%ebp) xorl %eax, %ecx movl (%edi,%ecx,4), %eax movl %ecx, -260(%ebp) movl %ecx, -424(%ebp) xorl %eax, %esi movl -432(%ebp), %eax movl (%edi,%esi,4), %edx movl %esi, -256(%ebp) movl %esi, -428(%ebp) movl -444(%ebp), %esi xorl %edx, %eax movl -436(%ebp), %edx movl (%edi,%eax,4), %ecx movl %eax, -252(%ebp) movl %eax, -432(%ebp) xorl %ecx, %edx movl -440(%ebp), %ecx movl (%edi,%edx,4), %eax movl %edx, -248(%ebp) movl %edx, -436(%ebp) xorl %eax, %ecx movl (%edi,%ecx,4), %eax movl %ecx, -244(%ebp) movl %ecx, -440(%ebp) xorl %eax, %esi movl -448(%ebp), %eax movl (%edi,%esi,4), %edx movl %esi, -240(%ebp) movl %esi, -444(%ebp) xorl %edx, %eax movl -452(%ebp), %edx movl (%edi,%eax,4), %ecx movl %eax, -236(%ebp) movl %eax, -448(%ebp) xorl %ecx, %edx movl %edx, -232(%ebp) movl (%edi,%edx,4), %eax movl -460(%ebp), %ecx movl -468(%ebp), %esi movl %edx, -452(%ebp) xorl %eax, %ecx movl (%edi,%ecx,4), %eax movl %ecx, -228(%ebp) movl %ecx, -460(%ebp) xorl %eax, %esi movl -476(%ebp), %eax movl (%edi,%esi,4), %edx movl %esi, -224(%ebp) movl %esi, -468(%ebp) movl -500(%ebp), %esi xorl %edx, %eax movl -484(%ebp), %edx movl (%edi,%eax,4), %ecx movl %eax, -220(%ebp) movl %eax, -476(%ebp) xorl %ecx, %edx movl -492(%ebp), %ecx movl (%edi,%edx,4), %eax movl %edx, -216(%ebp) movl %edx, -484(%ebp) xorl %eax, %ecx movl (%edi,%ecx,4), %eax movl %edx, -216(%ebp) movl %edx, -484(%ebp) xorl %eax, %ecx movl (%edi,%ecx,4), %eax movl %ecx, -212(%ebp) movl %ecx, -492(%ebp) xorl %eax, %esi movl -508(%ebp), %eax movl (%edi,%esi,4), %edx movl %esi, -380(%ebp) movl %esi, -500(%ebp) xorl %edx, %eax movl -516(%ebp), %edx movl (%edi,%eax,4), %esi movl %eax, -376(%ebp) movl %eax, -508(%ebp) xorl %esi, %edx movl -524(%ebp), %esi movl (%edi,%edx,4), %ecx movl %edx, -372(%ebp) movl %edx, -516(%ebp) xorl %ecx, %esi movl %esi, -524(%ebp) movl -532(%ebp), %ecx movl (%edi,%esi,4), %edx xorl %edx, %ecx movl -540(%ebp), %edx movl (%edi,%ecx,4), %eax movl %ecx, -532(%ebp) xorl %eax, %edx movl -548(%ebp), %eax xorl (%edi,%edx,4), %eax movl %edx, -540(%ebp) movl %eax, -584(%ebp) movl %eax, -548(%ebp) movl (%edi,%eax,4), %eax xorl %eax, -208(%ebp) cmpl $17, -384(%ebp) jne .L16 ============================================================= The loop was unrolled, but it's clear that the address mode selection is worse. -- What |Removed |Added ---------------------------------------------------------------------------- Status|UNCONFIRMED |NEW Ever Confirmed| |1 Last reconfirmed|0000-00-00 00:00:00 |2005-06-01 22:55:36 date| | http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19923