------- Additional Comments From tkoenig at gcc dot gnu dot org 2005-07-17 19:45 ------- I don't think the timing issue is valid.
Look at these benchmarks: The first one simulates copying 12-byte values to 10-bit values, the second one a compact memcpy of a larger field. $ cat foo.c #include <string.h> #define NELEM 10000000 #define SA 12 #define SB 10 void foo(char *a, char *b, int n); int main() { char *a, *b; int n; a = malloc(NELEM*SA); b = malloc(NELEM*SB); for (n=0; n<10; n++) foo(a,b,NELEM); return 0; } void foo(char *a, char *b, int n) { int i; for (i=0; i<n; i++) { memcpy(a, b, 10); a += SA; b += SB; } } $ gcc -O3 foo.c $ time ./a.out real 0m2.628s user 0m2.523s sys 0m0.096s $ cat foo2.c #include <string.h> #define NELEM 10000000 #define SA 12 void foo(char *a, char *b, int n); int main() { char *a, *b; int n; a = malloc(NELEM*SA); b = malloc(NELEM*SA); for (n=0; n<10; n++) memcpy(a, b, NELEM*SA); return 0; } $ gcc foo2.c foo2.c: In function 'main': foo2.c:13: warning: incompatible implicit declaration of built-in function 'malloc' $ time ./a.out real 0m2.876s user 0m2.777s sys 0m0.093s We also have slow disk I/O to deal with. This is on i686-pc-linux-gnu. Timings on other systems may differ, of course. -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=22519