when i measure chan send performance with the attached program with
the semaphore locks that have been made the default for sources and
with the old locks, the old locks surprisingly outperform the new ones
by a large margin.
the test is let O be the number of buffers in the channel, and M be
the number of sending procs, then cycles is the number of machine
cycles required to send 1<<21 messages per proc, and receive them
on a single listener.
on my machine, i get the following raw numbers (averaged over a few tries):
new 1.84e9 cycles O=10 M=1
old 1.10e9
new 4.61e9 O=0 M=1
old 4.38e9
new 1.55e10 O=10 M=8
old 2.74e10
new 3.64e10 O=0 M=8
old 5.14e10
am i doing something fundamental wrong, or are the new locks substantially
slower than the old ones?
- erik
#include <u.h>
#include <libc.h>
#include <thread.h>
enum {
Ndflt = 1<<21,
Mdflt = 1,
Odflt = 10,
};
N = Ndflt;
M = Mdflt;
O = Odflt;
Channel *c, *endc;
void
sendthread(void*)
{
ulong i;
for(i = 0; i < N; i++)
sendul(c, i);
threadexits("");
}
void
receivethread(void*)
{
int i;
for(i = 0; i <N*M; i++)
recvul(c);
sendul(endc, 1);
threadexits("");
}
void
usage(void)
{
fprint(2, "usage: chantest [-O nbuf] [-M nproc]\n");
exits("usage");
}
void
threadmain(int argc, char **argv)
{
int i;
uvlong t;
ARGBEGIN{
default:
usage();
case 'M':
M = atoi(EARGF(usage()));
break;
case 'O':
O = atoi(EARGF(usage()));
break;
}ARGEND
if(argc > 0)
usage();
t = -nsec();
c = chancreate(sizeof(ulong), O);
endc = chancreate(sizeof(ulong), 0);
proccreate(receivethread, nil, 4*1024);
for(i = 0; i < M; i++)
proccreate(sendthread, (void*)(uintptr)i, 4*1024);
recvul(endc);
t += nsec();
print("%lld\n", t);
threadexits("");
}