On Mon, Mar 18, 2002 at 01:52:36PM +0200, Malcolm Kavalsky wrote: > Eureka! > > Nadav Har'El wrote: > > > On Mon, Mar 18, 2002, Malcolm Kavalsky wrote about "Re: pthreads > > question": > > > >> I asked one of the top Unix hackers that I know, and he said: > >> > >> "I would guess that if you do large af_unix transfers that are page > >> aligned then the system doesn't have to actually copy the data rather it > >> can share the page and do a copy on write. This preserves the socket > >> semantics and can be faster than memcpy. This was done many years ago in > >> Solaris." > >> > >> I wonder if digging deep enough in the kernel sources, will reveal > >> this ... > > > > > > You can try to check if this is the case, by following each send or > > memcpy > > by a memset() of the buffer. If the memcpy method suddenly becomes > > quicker, > > this explanation might be true. > > Strange though - how come malloc() returns page-aligned buffers? Does the > > Linux code really checks for this rare and rather esoteric case (if you > > write to the buffer after sending it, and the kernel can't know you're > > writing whole pages, it will have to do a copy-on- write and do the copy > > anyway). > > > This is exactly what happened! I added in memset after memcpy, and also > after sending the buffer, the results are: > > Memcpy'ed and memsetted 1000 blocks of size 1048576 in 18 seconds => 55 > Mbytes/second > > Started receiving at Mon Mar 18 13:41:13 2002 > Received 1048576000 bytes in 17 seconds over unix socket => 59 > Mbytes/second > > Started sending at Mon Mar 18 13:41:13 2002 > Sent and memsetted 1000 blocks of size 1048576 in 17 seconds over unix > socket => 58 Mbytes/second
i decided to play too. i took your code and modified it, so that the tests are run seperately (since i didnt want the after effects from fork's COW behaviour to affect the memcpy case). i also modified it to use getrusage(). here are my results: [mulix@alhambra tmp]$ for arg in 1 2 3; do ./b memcpy ; done ; memcpy'ed 1000 blocks of size 1048576. user time: 16.070000 secs, system time: 0.060000 secs memcpy'ed 1000 blocks of size 1048576. user time: 15.960000 secs, system time: 0.040000 secs memcpy'ed 1000 blocks of size 1048576. user time: 15.920000 secs, system time: 0.060000 secs [mulix@alhambra tmp]$ for arg in 1 2 3; do ./b send ; done ; sent 1000 blocks of size 1048576. user time: 6.990000 secs, system time: 10.020000 secs sent 1000 blocks of size 1048576. user time: 7.420000 secs, system time: 10.330000 secs sent 1000 blocks of size 1048576. user time: 7.110000 secs, system time: 10.380000 secs kernel is 2.4.18rc1, and here's the modified code: #include <stdio.h> #include <malloc.h> #include <string.h> #include <time.h> #include <sys/socket.h> #include <sys/time.h> #include <sys/resource.h> #include <sys/un.h> #include <sys/types.h> #include <sys/wait.h> #include <unistd.h> #include <assert.h> #define BUFSIZE 0x100000 /* 1 Megabyte */ #define NBLOCKS 1000 #define PORT_NAME "/tmp/foo" void server(), client(); void socket_benchmark() { pid_t rc; if ( (rc = fork()) == 0 ) { server(); waitpid(rc, NULL, 0); } else { sleep(1); /* Dirty, but ensures client runs after server is ready */ client(); } } void server() { struct sockaddr_un sin,from; int s,g,len; char *buf; buf = malloc( BUFSIZE ); /* Create an unbound socket */ if( (s=socket( PF_UNIX, SOCK_STREAM, 0 )) < 0 ){ perror("Bad socket\n"); return; } strcpy( sin.sun_path, PORT_NAME ); sin.sun_family = PF_UNIX; if( bind( s, (struct sockaddr *)&sin, sizeof(sin) ) < 0){ perror("bind"); return; } listen( s, 5 ); len = sizeof(from); g = accept( s, (struct sockaddr *)&from, &len ); while( read( g, buf, BUFSIZE ) > 0 ); /* sink all data received */ close(g); close(s); unlink( PORT_NAME ); } void client() { struct rusage r = {{0},}; struct sockaddr_un sin; int s; char *buf; time_t start_time, elapsed_time; int i; assert(!(r.ru_utime.tv_sec | r.ru_utime.tv_usec | r.ru_stime.tv_sec | r.ru_stime.tv_usec)); buf = malloc( BUFSIZE ); if( (s=socket( PF_UNIX, SOCK_STREAM, 0 )) < 0 ){ perror("socket"); return; } strcpy( sin.sun_path, PORT_NAME ); sin.sun_family = PF_UNIX; if( connect( s, (struct sockaddr *)&sin, sizeof(sin)) < 0 ){ perror("connect"); close(s); return; } start_time = time(0); for( i=0; i< NBLOCKS && write(s, buf, BUFSIZE) == BUFSIZE ; i++ ) memset( buf, 'A', BUFSIZE );; elapsed_time = time(0) - start_time; close(s); #if 0 printf( "Sent %d blocks of size %d in %d seconds over unix socket =>", i, BUFSIZE, elapsed_time ); printf( " %d Mbytes/second \n", (NBLOCKS * BUFSIZE) / (0x100000 * elapsed_time) ); #endif if (getrusage(RUSAGE_SELF, &r) <0) { perror("getrusage"); return; } printf("sent\t\t%d blocks of size %d. user time: %ld.%06ld secs, " "system time: %ld.%06ld secs\n", i, BUFSIZE, r.ru_utime.tv_sec, r.ru_utime.tv_usec, r.ru_stime.tv_sec, r.ru_stime.tv_usec); exit(0); } void memcpy_benchmark() { struct rusage r = {{0},}; char *src, *dst; time_t start_time, elapsed_time; int i; assert(!(r.ru_utime.tv_sec | r.ru_utime.tv_usec | r.ru_stime.tv_sec | r.ru_stime.tv_usec)); src = malloc ( BUFSIZE ); dst = malloc ( BUFSIZE ); start_time = time(0); for( i=0; i< NBLOCKS; i++ ) { memcpy( dst, src, BUFSIZE ); memset( dst, 'A', BUFSIZE ); } elapsed_time = time(0) - start_time; #if 0 printf( "Memcpy'ed %d blocks of size %d in %d seconds =>", NBLOCKS, BUFSIZE, elapsed_time ); printf( " %d Mbytes/second\n", (NBLOCKS * BUFSIZE) / (0x100000 * elapsed_time) ); #endif if (getrusage(RUSAGE_SELF, &r) <0) { perror("getrusage"); return; } printf("memcpy'ed\t%d blocks of size %d. user time: %ld.%06ld secs, " "system time: %ld.%06ld secs\n", i, BUFSIZE, r.ru_utime.tv_sec, r.ru_utime.tv_usec, r.ru_stime.tv_sec, r.ru_stime.tv_usec); } void usage(const char* prog) { printf("%s memcpy|send\n", prog); exit(1); } int main(int argc, char* argv[0]) { if (argc < 2) usage(argv[0]); if (!strcmp(argv[1], "memcpy")) memcpy_benchmark(); else if (!strcmp(argv[1], "send")) socket_benchmark(); else usage(argv[0]); return 0; } -- The ill-formed Orange Fails to satisfy the eye: http://vipe.technion.ac.il/~mulix/ Segmentation fault. http://syscalltrack.sf.net/ ================================================================= To unsubscribe, send mail to [EMAIL PROTECTED] with the word "unsubscribe" in the message body, e.g., run the command echo unsubscribe | mail [EMAIL PROTECTED]