On Thursday 21 April 2005 06:16, James E Wilson wrote: > Denis Vlasenko wrote: > > Yes. wp512_process_buffer() was using 3k of stack if compiled with -O2. > > The wp512.c I appended (sans table at top) is instrumented to show it. > > Use "make crypto/wp512.s". > > See > http://gcc.gnu.org/bugs.html > for info on reporting gcc bugs. > > We need a testcase that we can use to reproduce the problem. The code > sample you gave can't be compiled, because it relies on types that > aren't defined in the code sample.
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21141 Testcase: /* Excessive stack usage: gcc3.4.3 -O2: wp512_process_buffer: pushl %ebp movl %esp, %ebp pushl %edi pushl %esi pushl %ebx subl $3516, %esp ^^^^^ gcc3.4.3 -Os: wp512_process_buffer: pushl %ebp movl %esp, %ebp pushl %edi pushl %esi pushl %ebx subl $588, %esp */ #define WP512_DIGEST_SIZE 64 #define WP384_DIGEST_SIZE 48 #define WP256_DIGEST_SIZE 32 #define WP512_BLOCK_SIZE 64 #define WP512_LENGTHBYTES 32 #define WHIRLPOOL_ROUNDS 10 typedef unsigned char u8; typedef unsigned u32; typedef unsigned long long u64; struct wp512_ctx { u8 bitLength[WP512_LENGTHBYTES]; u8 buffer[WP512_BLOCK_SIZE]; int bufferBits; int bufferPos; u64 hash[WP512_DIGEST_SIZE/8]; }; static const u64 C0[256]; #define C1 C0 #define C2 C0 #define C3 C0 #define C4 C0 #define C5 C0 #define C6 C0 #define C7 C0 #define rc C0 void wp512_process_buffer(struct wp512_ctx *wctx) { int r; u64 K[8]; /* the round key */ u64 block[8]; /* mu(buffer) */ u64 state[8]; /* the cipher state */ u64 L[8]; for (r = 1; r <= WHIRLPOOL_ROUNDS; r++) { L[0] = C0[(int)(K[0] >> 56) ] ^ C1[(int)(K[7] >> 48) & 0xff] ^ C2[(int)(K[6] >> 40) & 0xff] ^ C3[(int)(K[5] >> 32) & 0xff] ^ C4[(int)(K[4] >> 24) & 0xff] ^ C5[(int)(K[3] >> 16) & 0xff] ^ C6[(int)(K[2] >> 8) & 0xff] ^ C7[(int)(K[1] ) & 0xff] ^ rc[r]; L[1] = C0[(int)(K[1] >> 56) ] ^ C1[(int)(K[0] >> 48) & 0xff] ^ C2[(int)(K[7] >> 40) & 0xff] ^ C3[(int)(K[6] >> 32) & 0xff] ^ C4[(int)(K[5] >> 24) & 0xff] ^ C5[(int)(K[4] >> 16) & 0xff] ^ C6[(int)(K[3] >> 8) & 0xff] ^ C7[(int)(K[2] ) & 0xff]; L[2] = C0[(int)(K[2] >> 56) ] ^ C1[(int)(K[1] >> 48) & 0xff] ^ C2[(int)(K[0] >> 40) & 0xff] ^ C3[(int)(K[7] >> 32) & 0xff] ^ C4[(int)(K[6] >> 24) & 0xff] ^ C5[(int)(K[5] >> 16) & 0xff] ^ C6[(int)(K[4] >> 8) & 0xff] ^ C7[(int)(K[3] ) & 0xff]; L[3] = C0[(int)(K[3] >> 56) ] ^ C1[(int)(K[2] >> 48) & 0xff] ^ C2[(int)(K[1] >> 40) & 0xff] ^ C3[(int)(K[0] >> 32) & 0xff] ^ C4[(int)(K[7] >> 24) & 0xff] ^ C5[(int)(K[6] >> 16) & 0xff] ^ C6[(int)(K[5] >> 8) & 0xff] ^ C7[(int)(K[4] ) & 0xff]; L[4] = C0[(int)(K[4] >> 56) ] ^ C1[(int)(K[3] >> 48) & 0xff] ^ C2[(int)(K[2] >> 40) & 0xff] ^ C3[(int)(K[1] >> 32) & 0xff] ^ C4[(int)(K[0] >> 24) & 0xff] ^ C5[(int)(K[7] >> 16) & 0xff] ^ C6[(int)(K[6] >> 8) & 0xff] ^ C7[(int)(K[5] ) & 0xff]; L[5] = C0[(int)(K[5] >> 56) ] ^ C1[(int)(K[4] >> 48) & 0xff] ^ C2[(int)(K[3] >> 40) & 0xff] ^ C3[(int)(K[2] >> 32) & 0xff] ^ C4[(int)(K[1] >> 24) & 0xff] ^ C5[(int)(K[0] >> 16) & 0xff] ^ C6[(int)(K[7] >> 8) & 0xff] ^ C7[(int)(K[6] ) & 0xff]; L[6] = C0[(int)(K[6] >> 56) ] ^ C1[(int)(K[5] >> 48) & 0xff] ^ C2[(int)(K[4] >> 40) & 0xff] ^ C3[(int)(K[3] >> 32) & 0xff] ^ C4[(int)(K[2] >> 24) & 0xff] ^ C5[(int)(K[1] >> 16) & 0xff] ^ C6[(int)(K[0] >> 8) & 0xff] ^ C7[(int)(K[7] ) & 0xff]; L[7] = C0[(int)(K[7] >> 56) ] ^ C1[(int)(K[6] >> 48) & 0xff] ^ C2[(int)(K[5] >> 40) & 0xff] ^ C3[(int)(K[4] >> 32) & 0xff] ^ C4[(int)(K[3] >> 24) & 0xff] ^ C5[(int)(K[2] >> 16) & 0xff] ^ C6[(int)(K[1] >> 8) & 0xff] ^ C7[(int)(K[0] ) & 0xff]; K[0] = L[0]; K[1] = L[1]; K[2] = L[2]; K[3] = L[3]; K[4] = L[4]; K[5] = L[5]; K[6] = L[6]; K[7] = L[7]; L[0] = C0[(int)(state[0] >> 56) ] ^ C1[(int)(state[7] >> 48) & 0xff] ^ C2[(int)(state[6] >> 40) & 0xff] ^ C3[(int)(state[5] >> 32) & 0xff] ^ C4[(int)(state[4] >> 24) & 0xff] ^ C5[(int)(state[3] >> 16) & 0xff] ^ C6[(int)(state[2] >> 8) & 0xff] ^ C7[(int)(state[1] ) & 0xff] ^ K[0]; L[1] = C0[(int)(state[1] >> 56) ] ^ C1[(int)(state[0] >> 48) & 0xff] ^ C2[(int)(state[7] >> 40) & 0xff] ^ C3[(int)(state[6] >> 32) & 0xff] ^ C4[(int)(state[5] >> 24) & 0xff] ^ C5[(int)(state[4] >> 16) & 0xff] ^ C6[(int)(state[3] >> 8) & 0xff] ^ C7[(int)(state[2] ) & 0xff] ^ K[1]; L[2] = C0[(int)(state[2] >> 56) ] ^ C1[(int)(state[1] >> 48) & 0xff] ^ C2[(int)(state[0] >> 40) & 0xff] ^ C3[(int)(state[7] >> 32) & 0xff] ^ C4[(int)(state[6] >> 24) & 0xff] ^ C5[(int)(state[5] >> 16) & 0xff] ^ C6[(int)(state[4] >> 8) & 0xff] ^ C7[(int)(state[3] ) & 0xff] ^ K[2]; L[3] = C0[(int)(state[3] >> 56) ] ^ C1[(int)(state[2] >> 48) & 0xff] ^ C2[(int)(state[1] >> 40) & 0xff] ^ C3[(int)(state[0] >> 32) & 0xff] ^ C4[(int)(state[7] >> 24) & 0xff] ^ C5[(int)(state[6] >> 16) & 0xff] ^ C6[(int)(state[5] >> 8) & 0xff] ^ C7[(int)(state[4] ) & 0xff] ^ K[3]; L[4] = C0[(int)(state[4] >> 56) ] ^ C1[(int)(state[3] >> 48) & 0xff] ^ C2[(int)(state[2] >> 40) & 0xff] ^ C3[(int)(state[1] >> 32) & 0xff] ^ C4[(int)(state[0] >> 24) & 0xff] ^ C5[(int)(state[7] >> 16) & 0xff] ^ C6[(int)(state[6] >> 8) & 0xff] ^ C7[(int)(state[5] ) & 0xff] ^ K[4]; L[5] = C0[(int)(state[5] >> 56) ] ^ C1[(int)(state[4] >> 48) & 0xff] ^ C2[(int)(state[3] >> 40) & 0xff] ^ C3[(int)(state[2] >> 32) & 0xff] ^ C4[(int)(state[1] >> 24) & 0xff] ^ C5[(int)(state[0] >> 16) & 0xff] ^ C6[(int)(state[7] >> 8) & 0xff] ^ C7[(int)(state[6] ) & 0xff] ^ K[5]; L[6] = C0[(int)(state[6] >> 56) ] ^ C1[(int)(state[5] >> 48) & 0xff] ^ C2[(int)(state[4] >> 40) & 0xff] ^ C3[(int)(state[3] >> 32) & 0xff] ^ C4[(int)(state[2] >> 24) & 0xff] ^ C5[(int)(state[1] >> 16) & 0xff] ^ C6[(int)(state[0] >> 8) & 0xff] ^ C7[(int)(state[7] ) & 0xff] ^ K[6]; L[7] = C0[(int)(state[7] >> 56) ] ^ C1[(int)(state[6] >> 48) & 0xff] ^ C2[(int)(state[5] >> 40) & 0xff] ^ C3[(int)(state[4] >> 32) & 0xff] ^ C4[(int)(state[3] >> 24) & 0xff] ^ C5[(int)(state[2] >> 16) & 0xff] ^ C6[(int)(state[1] >> 8) & 0xff] ^ C7[(int)(state[0] ) & 0xff] ^ K[7]; state[0] = L[0]; state[1] = L[1]; state[2] = L[2]; state[3] = L[3]; state[4] = L[4]; state[5] = L[5]; state[6] = L[6]; state[7] = L[7]; } wctx->hash[0] ^= state[0] ^ block[0]; wctx->hash[1] ^= state[1] ^ block[1]; wctx->hash[2] ^= state[2] ^ block[2]; wctx->hash[3] ^= state[3] ^ block[3]; wctx->hash[4] ^= state[4] ^ block[4]; wctx->hash[5] ^= state[5] ^ block[5]; wctx->hash[6] ^= state[6] ^ block[6]; wctx->hash[7] ^= state[7] ^ block[7]; }