------- Additional Comments From rakdver at atrey dot karlin dot mff dot cuni 
dot cz  2005-06-06 15:00 -------
Subject: Re:  openssl is slower when compiled with gcc 4.0 than 3.3

> Looks like the culrpit is this:
> 
> =========================================================================
> static unsigned int S[256];
> unsigned
> md2_block (unsigned int *sp1, unsigned int *sp2, const unsigned char *d)
> {
>       register unsigned int t;
>       register int i, j;
>       static unsigned int state[48];
> 
>       j = sp2[16 - 1];
>       for (i = 0; i < 16; i++)
>       {
>               state[i] = sp1[i];
>               state[i + 16] = t = d[i];
>               state[i + 32] = (t ^ sp1[i]);
>               j = sp2[i] ^= S[t ^ j];
>       }
> }
> =========================================================================

with the TARGET_MEM_REFs patch the result is much better.  At
least we avoid the multiplication by 4

>       leal    0(,%edi,4), %ecx

and other results of the DOM missoptimization of addressing modes, that was
one of the main motivations for TARGET_MEM_REFs.

We still use one more iv than in the 3.4 case, and in result we need one
more register.

.L2:
        movl    8(%ebp), %edi
        movl    -4(%edi,%ecx,4), %eax
        movl    %eax, (%esi)
        movl    16(%ebp), %edx
        movzbl  -1(%ecx,%edx), %eax
        movl    %eax, 64(%esi)
        movl    -4(%edi,%ecx,4), %edx
        xorl    %eax, %edx
        movl    %edx, 128(%esi)
        xorl    -20(%ebp), %eax
        movl    -16(%ebp), %edi
        movl    (%edi,%eax,4), %eax
        movl    12(%ebp), %edx
        xorl    -4(%edx,%ecx,4), %eax
        movl    %eax, -4(%edx,%ecx,4)
        movl    %eax, -20(%ebp)
        incl    %ecx
        addl    $4, %esi
        cmpl    $17, %ecx
        jne     .L2


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19923

Reply via email to