On Thursday 21 April 2005 06:16, James E Wilson wrote:
> Denis Vlasenko wrote:
> > Yes. wp512_process_buffer() was using 3k of stack if compiled with -O2.
> > The wp512.c I appended (sans table at top) is instrumented to show it.
> > Use "make crypto/wp512.s".
> 
> See
>      http://gcc.gnu.org/bugs.html
> for info on reporting gcc bugs.
> 
> We need a testcase that we can use to reproduce the problem.  The code 
> sample you gave can't be compiled, because it relies on types that 
> aren't defined in the code sample.

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21141

Testcase:

/* 
Excessive stack usage:

gcc3.4.3 -O2: 
wp512_process_buffer:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %edi
        pushl   %esi
        pushl   %ebx
        subl    $3516, %esp
                ^^^^^
gcc3.4.3 -Os: 
wp512_process_buffer:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %edi
        pushl   %esi
        pushl   %ebx
        subl    $588, %esp
*/

#define WP512_DIGEST_SIZE 64
#define WP384_DIGEST_SIZE 48
#define WP256_DIGEST_SIZE 32
#define WP512_BLOCK_SIZE  64
#define WP512_LENGTHBYTES 32
#define WHIRLPOOL_ROUNDS 10

typedef unsigned char u8;
typedef unsigned u32;
typedef unsigned long long u64;

struct wp512_ctx {
        u8  bitLength[WP512_LENGTHBYTES];
        u8  buffer[WP512_BLOCK_SIZE];
        int bufferBits;
        int bufferPos;
        u64 hash[WP512_DIGEST_SIZE/8];
};

static const u64 C0[256];
#define C1 C0
#define C2 C0
#define C3 C0
#define C4 C0
#define C5 C0
#define C6 C0
#define C7 C0
#define rc C0

void wp512_process_buffer(struct wp512_ctx *wctx) {
        int r;
        u64 K[8];        /* the round key */
        u64 block[8];    /* mu(buffer) */
        u64 state[8];    /* the cipher state */
        u64 L[8];

        for (r = 1; r <= WHIRLPOOL_ROUNDS; r++) {

                L[0] = C0[(int)(K[0] >> 56)       ] ^
                           C1[(int)(K[7] >> 48) & 0xff] ^
                           C2[(int)(K[6] >> 40) & 0xff] ^
                           C3[(int)(K[5] >> 32) & 0xff] ^
                           C4[(int)(K[4] >> 24) & 0xff] ^
                           C5[(int)(K[3] >> 16) & 0xff] ^
                           C6[(int)(K[2] >>  8) & 0xff] ^
                           C7[(int)(K[1]      ) & 0xff] ^
                           rc[r];

                L[1] = C0[(int)(K[1] >> 56)       ] ^
                           C1[(int)(K[0] >> 48) & 0xff] ^
                           C2[(int)(K[7] >> 40) & 0xff] ^
                           C3[(int)(K[6] >> 32) & 0xff] ^
                           C4[(int)(K[5] >> 24) & 0xff] ^
                           C5[(int)(K[4] >> 16) & 0xff] ^
                           C6[(int)(K[3] >>  8) & 0xff] ^
                           C7[(int)(K[2]      ) & 0xff];

                L[2] = C0[(int)(K[2] >> 56)       ] ^
                           C1[(int)(K[1] >> 48) & 0xff] ^
                           C2[(int)(K[0] >> 40) & 0xff] ^
                           C3[(int)(K[7] >> 32) & 0xff] ^
                           C4[(int)(K[6] >> 24) & 0xff] ^
                           C5[(int)(K[5] >> 16) & 0xff] ^
                           C6[(int)(K[4] >>  8) & 0xff] ^
                           C7[(int)(K[3]      ) & 0xff];

                L[3] = C0[(int)(K[3] >> 56)       ] ^
                           C1[(int)(K[2] >> 48) & 0xff] ^
                           C2[(int)(K[1] >> 40) & 0xff] ^
                           C3[(int)(K[0] >> 32) & 0xff] ^
                           C4[(int)(K[7] >> 24) & 0xff] ^
                           C5[(int)(K[6] >> 16) & 0xff] ^
                           C6[(int)(K[5] >>  8) & 0xff] ^
                           C7[(int)(K[4]      ) & 0xff];

                L[4] = C0[(int)(K[4] >> 56)       ] ^
                           C1[(int)(K[3] >> 48) & 0xff] ^
                           C2[(int)(K[2] >> 40) & 0xff] ^
                           C3[(int)(K[1] >> 32) & 0xff] ^
                           C4[(int)(K[0] >> 24) & 0xff] ^
                           C5[(int)(K[7] >> 16) & 0xff] ^
                           C6[(int)(K[6] >>  8) & 0xff] ^
                           C7[(int)(K[5]      ) & 0xff];

                L[5] = C0[(int)(K[5] >> 56)       ] ^
                           C1[(int)(K[4] >> 48) & 0xff] ^
                           C2[(int)(K[3] >> 40) & 0xff] ^
                           C3[(int)(K[2] >> 32) & 0xff] ^
                           C4[(int)(K[1] >> 24) & 0xff] ^
                           C5[(int)(K[0] >> 16) & 0xff] ^
                           C6[(int)(K[7] >>  8) & 0xff] ^
                           C7[(int)(K[6]      ) & 0xff];

                L[6] = C0[(int)(K[6] >> 56)       ] ^
                           C1[(int)(K[5] >> 48) & 0xff] ^
                           C2[(int)(K[4] >> 40) & 0xff] ^
                           C3[(int)(K[3] >> 32) & 0xff] ^
                           C4[(int)(K[2] >> 24) & 0xff] ^
                           C5[(int)(K[1] >> 16) & 0xff] ^
                           C6[(int)(K[0] >>  8) & 0xff] ^
                           C7[(int)(K[7]      ) & 0xff];

                L[7] = C0[(int)(K[7] >> 56)       ] ^
                           C1[(int)(K[6] >> 48) & 0xff] ^
                           C2[(int)(K[5] >> 40) & 0xff] ^
                           C3[(int)(K[4] >> 32) & 0xff] ^
                           C4[(int)(K[3] >> 24) & 0xff] ^
                           C5[(int)(K[2] >> 16) & 0xff] ^
                           C6[(int)(K[1] >>  8) & 0xff] ^
                           C7[(int)(K[0]      ) & 0xff];

                K[0] = L[0];
                K[1] = L[1];
                K[2] = L[2];
                K[3] = L[3];
                K[4] = L[4];
                K[5] = L[5];
                K[6] = L[6];
                K[7] = L[7];

                L[0] = C0[(int)(state[0] >> 56)       ] ^
                           C1[(int)(state[7] >> 48) & 0xff] ^
                           C2[(int)(state[6] >> 40) & 0xff] ^
                           C3[(int)(state[5] >> 32) & 0xff] ^
                           C4[(int)(state[4] >> 24) & 0xff] ^
                           C5[(int)(state[3] >> 16) & 0xff] ^
                           C6[(int)(state[2] >>  8) & 0xff] ^
                           C7[(int)(state[1]      ) & 0xff] ^
                           K[0];

                L[1] = C0[(int)(state[1] >> 56)       ] ^
                           C1[(int)(state[0] >> 48) & 0xff] ^
                           C2[(int)(state[7] >> 40) & 0xff] ^
                           C3[(int)(state[6] >> 32) & 0xff] ^
                           C4[(int)(state[5] >> 24) & 0xff] ^
                           C5[(int)(state[4] >> 16) & 0xff] ^
                           C6[(int)(state[3] >>  8) & 0xff] ^
                           C7[(int)(state[2]      ) & 0xff] ^
                           K[1];

                L[2] = C0[(int)(state[2] >> 56)       ] ^
                           C1[(int)(state[1] >> 48) & 0xff] ^
                           C2[(int)(state[0] >> 40) & 0xff] ^
                           C3[(int)(state[7] >> 32) & 0xff] ^
                           C4[(int)(state[6] >> 24) & 0xff] ^
                           C5[(int)(state[5] >> 16) & 0xff] ^
                           C6[(int)(state[4] >>  8) & 0xff] ^
                           C7[(int)(state[3]      ) & 0xff] ^
                           K[2];

                L[3] = C0[(int)(state[3] >> 56)       ] ^
                           C1[(int)(state[2] >> 48) & 0xff] ^
                           C2[(int)(state[1] >> 40) & 0xff] ^
                           C3[(int)(state[0] >> 32) & 0xff] ^
                           C4[(int)(state[7] >> 24) & 0xff] ^
                           C5[(int)(state[6] >> 16) & 0xff] ^
                           C6[(int)(state[5] >>  8) & 0xff] ^
                           C7[(int)(state[4]      ) & 0xff] ^
                           K[3];

                L[4] = C0[(int)(state[4] >> 56)       ] ^
                           C1[(int)(state[3] >> 48) & 0xff] ^
                           C2[(int)(state[2] >> 40) & 0xff] ^
                           C3[(int)(state[1] >> 32) & 0xff] ^
                           C4[(int)(state[0] >> 24) & 0xff] ^
                           C5[(int)(state[7] >> 16) & 0xff] ^
                           C6[(int)(state[6] >>  8) & 0xff] ^
                           C7[(int)(state[5]      ) & 0xff] ^
                           K[4];

                L[5] = C0[(int)(state[5] >> 56)       ] ^
                           C1[(int)(state[4] >> 48) & 0xff] ^
                           C2[(int)(state[3] >> 40) & 0xff] ^
                           C3[(int)(state[2] >> 32) & 0xff] ^
                           C4[(int)(state[1] >> 24) & 0xff] ^
                           C5[(int)(state[0] >> 16) & 0xff] ^
                           C6[(int)(state[7] >>  8) & 0xff] ^
                           C7[(int)(state[6]      ) & 0xff] ^
                           K[5];

                L[6] = C0[(int)(state[6] >> 56)       ] ^
                           C1[(int)(state[5] >> 48) & 0xff] ^
                           C2[(int)(state[4] >> 40) & 0xff] ^
                           C3[(int)(state[3] >> 32) & 0xff] ^
                           C4[(int)(state[2] >> 24) & 0xff] ^
                           C5[(int)(state[1] >> 16) & 0xff] ^
                           C6[(int)(state[0] >>  8) & 0xff] ^
                           C7[(int)(state[7]      ) & 0xff] ^
                           K[6];

                L[7] = C0[(int)(state[7] >> 56)       ] ^
                           C1[(int)(state[6] >> 48) & 0xff] ^
                           C2[(int)(state[5] >> 40) & 0xff] ^
                           C3[(int)(state[4] >> 32) & 0xff] ^
                           C4[(int)(state[3] >> 24) & 0xff] ^
                           C5[(int)(state[2] >> 16) & 0xff] ^
                           C6[(int)(state[1] >>  8) & 0xff] ^
                           C7[(int)(state[0]      ) & 0xff] ^
                           K[7];

                state[0] = L[0];
                state[1] = L[1];
                state[2] = L[2];
                state[3] = L[3];
                state[4] = L[4];
                state[5] = L[5];
                state[6] = L[6];
                state[7] = L[7];
        }
        wctx->hash[0] ^= state[0] ^ block[0];
        wctx->hash[1] ^= state[1] ^ block[1];
        wctx->hash[2] ^= state[2] ^ block[2];
        wctx->hash[3] ^= state[3] ^ block[3];
        wctx->hash[4] ^= state[4] ^ block[4];
        wctx->hash[5] ^= state[5] ^ block[5];
        wctx->hash[6] ^= state[6] ^ block[6];
        wctx->hash[7] ^= state[7] ^ block[7];
}

Reply via email to