> do you have numbers to back up this claim?
>
> you are claiming that the locked XCHGL
> in tas (pc/l.s) called from lock (port/taslock.c)
> called from incref (port/chan.c) is "much faster"
> than the locked INCL in _xinc (pc/l.s).
> it seems to me that a locked memory bus
> is a locked memory bus.
yes, i do. xinc on most modern intel is a real
loss. and a moderate loss on amd. my atom 330
is an exception.
intel core i7 2.4ghz
loop 0 nsec/call
loopxinc 20 nsec/call
looplock 11 nsec/call
intel 5000 1.6ghz
loop 0 nsec/call
loopxinc 44 nsec/call
looplock 25 nsec/call
intel atom 330 1.6ghz (exception!)
loop 2 nsec/call
loopxinc 14 nsec/call
looplock 22 nsec/call
amd k10 2.0ghz
loop 2 nsec/call
loopxinc 30 nsec/call
looplock 20 nsec/call
intel p4 xeon 3.0ghz
loop 1 nsec/call
loopxinc 76 nsec/call
looplock 42 nsec/call
- erik
TEXT _xinc(SB), 1, $0 /* void _xinc(long*); */
MOVL l+0(FP), AX
LOCK; INCL 0(AX)
RET
TEXT _xdec(SB), 1, $0 /* long _xdec(long*); */
MOVL l+0(FP), BX
XORL AX, AX
LOCK; DECL 0(BX)
JLT _xdeclt
JGT _xdecgt
RET
_xdecgt:
INCL AX
RET
_xdeclt:
DECL AX
RET
#include <u.h>
#include <libc.h>
void _xinc(uint*);
void _xdec(uint*);
enum {
N = 1<<30,
};
void
loop(void)
{
uint i;
for(i = 0; i < N; i++)
;
}
void
loopxinc(void)
{
uint i, x;
for(i = 0; i < N; i++){
_xinc(&x);
_xdec(&x);
}
}
void
looplock(void)
{
uint i;
static Lock l;
for(i = 0; i < N; i++){
lock(&l);
unlock(&l);
}
}
void
timing(char *s, void (*f)(void))
{
uvlong t[2];
t[0] = nsec();
f();
t[1] = nsec();
fprint(2, "%s\t%llud nsec/call\n", s, (t[1] - t[0])/(uvlong)N);
}
void
main(void)
{
nsec();
timing("loop", loop);
timing("loopxinc", loopxinc);
timing("looplock", looplock);
exits("");
}