On Mon, Mar 18, 2002 at 01:52:36PM +0200, Malcolm Kavalsky wrote:
> Eureka!
> 
> Nadav Har'El wrote:
> 
> > On Mon, Mar 18, 2002, Malcolm Kavalsky wrote about "Re: pthreads 
> > question":
> >
> >> I asked one of the top Unix hackers that I know, and he said:
> >>
> >> "I would guess that if you do large af_unix transfers that are page
> >> aligned then the system doesn't have to actually copy the data rather it
> >> can share the page and do a copy on write. This preserves the socket
> >> semantics and can be faster than memcpy. This was done many years ago in
> >> Solaris."
> >>
> >> I wonder if digging deep enough in the kernel sources, will reveal 
> >> this ...
> >
> >
> > You can try to check if this is the case, by following each send or 
> > memcpy
> > by a memset() of the buffer. If the memcpy method suddenly becomes 
> > quicker,
> > this explanation might be true.
> > Strange though - how come malloc() returns page-aligned buffers? Does the
> > Linux code really checks for this rare and rather esoteric case (if you
> > write to the buffer after sending it, and the kernel can't know you're
> > writing whole pages, it will have to do a copy-on- write and do the copy
> > anyway).
> >
> This is exactly what happened! I added in memset after memcpy, and also 
> after sending the buffer, the results are:
> 
> Memcpy'ed and memsetted 1000 blocks of size 1048576 in 18 seconds => 55 
> Mbytes/second
> 
> Started receiving at Mon Mar 18 13:41:13 2002
> Received 1048576000 bytes in 17 seconds over unix socket =>   59 
> Mbytes/second
> 
> Started sending at Mon Mar 18 13:41:13 2002
> Sent and memsetted 1000 blocks of size 1048576 in 17 seconds over unix 
> socket => 58 Mbytes/second

i decided to play too. i took your code and modified it, so that the
tests are run seperately (since i didnt want the after effects from
fork's COW behaviour to affect the memcpy case). i also modified it to
use getrusage(). 

here are my results:

[mulix@alhambra tmp]$ for arg in 1 2 3; do ./b memcpy ; done ;
memcpy'ed       1000 blocks of size 1048576. user time: 16.070000 secs, system time: 
0.060000 secs
memcpy'ed       1000 blocks of size 1048576. user time: 15.960000 secs, system time: 
0.040000 secs
memcpy'ed       1000 blocks of size 1048576. user time: 15.920000 secs, system time: 
0.060000 secs
[mulix@alhambra tmp]$ for arg in 1 2 3; do ./b send ; done ;
sent            1000 blocks of size 1048576. user time: 6.990000 secs, system time: 
10.020000 secs
sent            1000 blocks of size 1048576. user time: 7.420000 secs, system time: 
10.330000 secs 
sent            1000 blocks of size 1048576. user time: 7.110000 secs, system time: 
10.380000 secs

kernel is 2.4.18rc1, and here's the modified code:

#include <stdio.h>
#include <malloc.h>
#include <string.h>
#include <time.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/un.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <assert.h>

#define BUFSIZE 0x100000  /* 1 Megabyte */
#define NBLOCKS   1000
#define PORT_NAME    "/tmp/foo"

void server(), client(); 

void socket_benchmark()
{
        pid_t rc; 
        if ( (rc = fork()) == 0 ) {
                server();
                waitpid(rc, NULL, 0); 
        } else {
                sleep(1); /* Dirty, but ensures client runs after server is ready */
                client();
        }
}

void server()
{
        struct sockaddr_un sin,from;
        int s,g,len;
        char *buf;
  
        buf = malloc( BUFSIZE );
        /* Create an unbound socket */
        if( (s=socket( PF_UNIX, SOCK_STREAM, 0 )) < 0 ){
                perror("Bad socket\n");
                return; 
        }
        strcpy( sin.sun_path, PORT_NAME );
        sin.sun_family = PF_UNIX;
        if( bind( s, (struct sockaddr *)&sin, 
                  sizeof(sin) ) < 0){
                perror("bind"); 
                return; 
        }
        listen( s, 5 );
        len = sizeof(from);
        g = accept( s, (struct sockaddr *)&from, &len );
        while( read( g, buf, BUFSIZE ) > 0 ); /* sink all data received */
        close(g);
        close(s);
        unlink( PORT_NAME );
}

void client()
{
        struct rusage r = {{0},}; 
        struct sockaddr_un sin;
        int s;
        char *buf;
        time_t start_time, elapsed_time;
        int i;

        assert(!(r.ru_utime.tv_sec | r.ru_utime.tv_usec | 
                 r.ru_stime.tv_sec | r.ru_stime.tv_usec)); 

        buf = malloc( BUFSIZE );
  
        if( (s=socket( PF_UNIX, SOCK_STREAM, 0 )) < 0 ){
                perror("socket"); 
                return; 
        }
        strcpy( sin.sun_path, PORT_NAME );
        sin.sun_family = PF_UNIX;
        if( connect( s, (struct sockaddr *)&sin, sizeof(sin)) < 0 ){
                perror("connect"); 
                close(s);
                return; 
        }

        start_time = time(0);
        for( i=0; i< NBLOCKS && write(s, buf, BUFSIZE) == BUFSIZE ; i++ )
                memset( buf, 'A', BUFSIZE );;
        elapsed_time = time(0) - start_time;
        close(s);
#if 0
        printf( "Sent %d blocks of size %d in %d seconds over unix socket =>",
                i, BUFSIZE, elapsed_time );
        printf( " %d Mbytes/second \n", (NBLOCKS * BUFSIZE) / (0x100000 * 
elapsed_time) );
#endif
        if (getrusage(RUSAGE_SELF, &r) <0) {
                perror("getrusage"); 
                return;
        }
        printf("sent\t\t%d blocks of size %d. user time: %ld.%06ld secs, " 
               "system time: %ld.%06ld secs\n", i, BUFSIZE, r.ru_utime.tv_sec, 
               r.ru_utime.tv_usec, r.ru_stime.tv_sec, r.ru_stime.tv_usec);
        exit(0); 
}

void memcpy_benchmark()
{
        struct rusage r = {{0},}; 
        char *src, *dst;
        time_t start_time, elapsed_time;
        int i;

        assert(!(r.ru_utime.tv_sec | r.ru_utime.tv_usec | 
                 r.ru_stime.tv_sec | r.ru_stime.tv_usec)); 

        src = malloc ( BUFSIZE );
        dst = malloc ( BUFSIZE );
        start_time = time(0);
        for( i=0; i< NBLOCKS; i++ ) {
                memcpy( dst, src, BUFSIZE );
                memset( dst, 'A', BUFSIZE );
        }
        elapsed_time = time(0) - start_time;
#if 0
        printf( "Memcpy'ed %d blocks of size %d in %d seconds =>",
                NBLOCKS, BUFSIZE, elapsed_time );
        printf( " %d Mbytes/second\n", (NBLOCKS * BUFSIZE) / (0x100000 * elapsed_time) 
);
#endif
        if (getrusage(RUSAGE_SELF, &r) <0) {
                perror("getrusage"); 
                return;
        }
        printf("memcpy'ed\t%d blocks of size %d. user time: %ld.%06ld secs, " 
               "system time: %ld.%06ld secs\n", i, BUFSIZE, r.ru_utime.tv_sec, 
               r.ru_utime.tv_usec, r.ru_stime.tv_sec, r.ru_stime.tv_usec);
}

void usage(const char* prog)
{
        printf("%s memcpy|send\n", prog); 
        exit(1); 
}

int main(int argc, char* argv[0])
{
        if (argc < 2)
                usage(argv[0]); 

        if (!strcmp(argv[1], "memcpy"))
                memcpy_benchmark();
        else if (!strcmp(argv[1], "send"))
                socket_benchmark(); 
        else 
                usage(argv[0]); 

        return 0; 
}
-- 
The ill-formed Orange
Fails to satisfy the eye:       http://vipe.technion.ac.il/~mulix/
Segmentation fault.             http://syscalltrack.sf.net/

=================================================================
To unsubscribe, send mail to [EMAIL PROTECTED] with
the word "unsubscribe" in the message body, e.g., run the command
echo unsubscribe | mail [EMAIL PROTECTED]

Reply via email to