On Fri, Sep 29, 2006 at 05:34:30PM -0400, Tom Lane wrote: > [EMAIL PROTECTED] writes: > > If anybody is curious, here are my numbers for an AMD X2 3800+: > You did not show your C code, so no one else can reproduce the test on > other hardware. However, it looks like your compiler has unrolled the > memcpy into straight-line 8-byte moves, which makes it pretty hard for > anything operating byte-wise to compete, and is a bit dubious for the > general case anyway (since it requires assuming that the size and > alignment are known at compile time).
I did show the .s code. I call into x_memcpy(a, b), meaning that the compiler can't assume anything. It may happen to be aligned. Here are results over 64 Mbytes of memory, to ensure that every call is a cache miss: $ gcc -O3 -std=c99 -DSTRING='"This is a very long sentence that is expected to be very slow."' -DN="(1024*1024)" -o x x.c y.c strlcpy.c ; ./x NONE: 767243 us MEMCPY: 6044137 us STRNCPY: 10741759 us STRLCPY: 12061630 us LENCPY: 9459099 us $ gcc -O3 -std=c99 -DSTRING='"Short sentence."' -DN="(1024*1024)" -o x x.c y.c strlcpy.c ; ./x NONE: 712193 us MEMCPY: 6072312 us STRNCPY: 9982983 us STRLCPY: 6605052 us LENCPY: 7128258 us $ gcc -O3 -std=c99 -DSTRING='""' -DN="(1024*1024)" -o x x.c y.c strlcpy.c ; ./x NONE: 708164 us MEMCPY: 6042817 us STRNCPY: 8885791 us STRLCPY: 5592477 us LENCPY: 6135550 us At least on my machine, memcpy() still comes out on top. Yes, assuming that it is aligned correctly for the machine. Here is unaliagned (all arrays are stored +1 offset in memory): $ gcc -O3 -std=c99 -DSTRING='"This is a very long sentence that is expected to be very slow."' -DN="(1024*1024)" -DALIGN=1 -o x x.c y.c strlcpy.c ; ./x NONE: 790932 us MEMCPY: 6591559 us STRNCPY: 10622291 us STRLCPY: 12070007 us LENCPY: 10322541 us $ gcc -O3 -std=c99 -DSTRING='"Short sentence."' -DN="(1024*1024)" -DALIGN=1 -o x x.c y.c strlcpy.c ; ./x NONE: 764577 us MEMCPY: 6631731 us STRNCPY: 9513540 us STRLCPY: 6615345 us LENCPY: 7263392 us $ gcc -O3 -std=c99 -DSTRING='""' -DN="(1024*1024)" -DALIGN=1 -o x x.c y.c strlcpy.c ; ./x NONE: 825689 us MEMCPY: 6607777 us STRNCPY: 8976487 us STRLCPY: 5878088 us LENCPY: 6180358 us Alignment looks like it does impact the results for memcpy(). memcpy() changes from around 6.0 seconds to 6.6 seconds. Overall, though, it is still the winner in all cases accept for strlcpy(), which beats it on very short strings (""). Here is the cache hit case including your strlen+memcpy as 'LENCPY': $ gcc -O3 -std=c99 -DSTRING='"This is a very long sentence that is expected to be very slow."' -DN=1 -o x x.c y.c strlcpy.c ; ./x NONE: 696157 us MEMCPY: 825118 us STRNCPY: 7983159 us STRLCPY: 10787462 us LENCPY: 6048339 us $ gcc -O3 -std=c99 -DSTRING='"Short sentence."' -DN=1 -o x x.c y.c strlcpy.c ; ./x NONE: 700201 us MEMCPY: 593701 us STRNCPY: 7577380 us STRLCPY: 3727801 us LENCPY: 3169783 us $ gcc -O3 -std=c99 -DSTRING='""' -DN=1 -o x x.c y.c strlcpy.c ; ./x NONE: 706283 us MEMCPY: 792719 us STRNCPY: 7870425 us STRLCPY: 681334 us LENCPY: 2062983 us First call was every call being a cache hit. With this one, every one is a cache miss, and the 64-byte blocks are spread equally over 64 Mbytes of memory. I've attached the code for your consideration. x.c is the routines I used to perform the tests. y.c is the main program. strlcpy.c is copied from the online reference as is without change. The compilation steps are described above. STRING is the string to try out. N is the number of 64-byte blocks to allocate. ALIGN is the number of bytes to offset the array by when storing / reading / writing. ALIGN should be >= 0. At N=1, it's all in cache. At N=1024*1024 it is taking up 64 Mbytes of RAM. Cheers, mark -- [EMAIL PROTECTED] / [EMAIL PROTECTED] / [EMAIL PROTECTED] __________________________ . . _ ._ . . .__ . . ._. .__ . . . .__ | Neighbourhood Coder |\/| |_| |_| |/ |_ |\/| | |_ | |/ |_ | | | | | | \ | \ |__ . | | .|. |__ |__ | \ |__ | Ottawa, Ontario, Canada One ring to rule them all, one ring to find them, one ring to bring them all and in the darkness bind them... http://mark.mielke.cc/
#include <string.h> #include <sys/types.h> size_t strlcpy(char *dst, const char *src, size_t siz); void x_none(char * restrict a, const char * restrict b) { // Do nothing. } void x_memcpy(char * restrict a, const char * restrict b) { memcpy(a, b, 64); } void x_strncpy(char * restrict a, const char * restrict b) { strncpy(a, b, 64); } void x_strlcpy(char * restrict a, const char * restrict b) { strlcpy(a, b, 64); } void x_strlenmemcpy(char * restrict a, const char * restrict b) { size_t len = strlen(b) + 1; memcpy(a, b, len < 64 ? len : 64); }
#include <stdio.h> #include <string.h> #include <malloc.h> #include <sys/time.h> void x_none(char * restrict a, const char * restrict b); void x_memcpy(char * restrict a, const char * restrict b); void x_strncpy(char * restrict a, const char * restrict b); void x_strlcpy(char * restrict a, const char * restrict b); void x_strlenmemcpy(char * restrict a, const char * restrict b); #ifndef ALIGN # define ALIGN 0 #endif int main () { char *a = malloc(N * 64 + ALIGN + ALIGN); char *b = malloc(N * 64 + ALIGN + ALIGN); if (a == 0 || b == 0) printf("malloc failed.\n"); struct timeval start, end; int i; for (i = 0; i < N; i++) strncpy(&b[i * 64 + ALIGN], STRING, 64); printf("NONE: "); gettimeofday(&start, 0); for (i = 0; i < 100000000; i++) x_none(&a[(i % N) * 64 + ALIGN], &b[(i % N) * 64 + ALIGN]); gettimeofday(&end, 0); printf("%10d us\n", (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec)); printf("MEMCPY: "); gettimeofday(&start, 0); for (i = 0; i < 100000000; i++) x_memcpy(&a[(i % N) * 64 + ALIGN], &b[(i % N) * 64 + ALIGN]); gettimeofday(&end, 0); printf("%10d us\n", (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec)); printf("STRNCPY: "); gettimeofday(&start, 0); for (i = 0; i < 100000000; i++) x_strncpy(&a[(i % N) * 64 + ALIGN], &b[(i % N) * 64 + ALIGN]); gettimeofday(&end, 0); printf("%10d us\n", (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec)); printf("STRLCPY: "); gettimeofday(&start, 0); for (i = 0; i < 100000000; i++) x_strlcpy(&a[(i % N) * 64 + ALIGN], &b[(i % N) * 64 + ALIGN]); gettimeofday(&end, 0); printf("%10d us\n", (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec)); printf("LENCPY: "); gettimeofday(&start, 0); for (i = 0; i < 100000000; i++) x_strlenmemcpy(&a[(i % N) * 64 + ALIGN], &b[(i % N) * 64 + ALIGN]); gettimeofday(&end, 0); printf("%10d us\n", (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec)); return 0; }
/* $OpenBSD: strlcpy.c,v 1.11 2006/05/05 15:27:38 millert Exp $ */ /* * Copyright (c) 1998 Todd C. Miller <[EMAIL PROTECTED]> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <sys/types.h> #include <string.h> /* * Copy src to string dst of size siz. At most siz-1 characters * will be copied. Always NUL terminates (unless siz == 0). * Returns strlen(src); if retval >= siz, truncation occurred. */ size_t strlcpy(char *dst, const char *src, size_t siz) { char *d = dst; const char *s = src; size_t n = siz; /* Copy as many bytes as will fit */ if (n != 0) { while (--n != 0) { if ((*d++ = *s++) == '\0') break; } } /* Not enough room in dst, add NUL and traverse rest of src */ if (n == 0) { if (siz != 0) *d = '\0'; /* NUL-terminate dst */ while (*s++) ; } return(s - src - 1); /* count does not include NUL */ }
---------------------------(end of broadcast)--------------------------- TIP 6: explain analyze is your friend