Ken Fox wrote:
>
> I was noodling around with a couple different VM implementations
> and thought I would ask other people if they were doing the same.
> It would be nice to divide and conquer the exploration of different
> implementations.
>
> I looked into 3 different dispatching techniques. Here's the list
> with timings on a 600 MHz Pentium III running Linux and gcc 2.95:
>
> -g -O
> Switch/case 6.0 sec 2.0 sec
> Computed goto (gcc feature) 6.1 sec 1.4 sec
> Function call 11.9 sec 11.8 sec
Cool!
I have the same machine (600MHz PIII) running Linux but using a
different gcc:
% gcc -v
Reading specs from /usr/lib/gcc-lib/i386-redhat-linux/egcs-2.91.66/specs
gcc version egcs-2.91.66 19990314/Linux (egcs-1.1.2 release)
and I get the same numbers for -O (I used -O3) but different numbers
without optimization. Maybe we should assume optimization?
I implemented a few variants of the function calling version.
FUNCALL_PREDICTABLE is exactly the same as your USE_FUNCALL except
instead of jumping directly through a function pointer, it does a nested
else-if statement. I had a vague idea that this would help branch
prediction, without losing the flexibility because you could always have
a final default 'else' that used a function pointer for unexpected
functions. It also allows the compiler to inline the function bodies.
(For completeness, I should really put the bodies in a separate
compilation unit; I'm not sure whether it inlined them or not. Asking
them to be inlined showed a small effect, so probably not? Or I could
just disassemble.) That got a big speedup: 10.2sec -> 4.9sec. So maybe
keeping things as separate functions isn't so harmful after all, though
that computed goto is very tempting. (But before you believe this branch
prediction BS, see below.)
Since function addresses aren't constant in gcc, that requires a nested
else-if construct. I took a shot at comparing the linear search with a
switch, but this is probably a ridiculous test because of the tiny
number of opcodes. The comparison is between FUNCALL_HYBRID and
FUNCALL_HYBRID_LINEAR.
FUNCALL_NAIVE looks at what happens if you don't rewrite opcode ids with
function pointers, and instead do a lookup for every opcode. It slows it
down some, though the whole thing is so slow it's not a big deal
percentage-wise.
FUNCALL_HYBRID is an attempt to use a switch with separate functions,
avoiding function pointers. It gives the same bottom line: on the PIII,
(1) function pointers are expensive, and (2) the compiler doesn't do a
very good job of inlining (with inlining on, FUNCALL_HYBRID should be
the same as SWITCH, but it's half the speed.)
I'm not sure I really buy the branch prediction argument, so I tried
another version FUNCALL_PREDICTABLE_HALF with half of the opcodes
replaced with function pointers. So branch prediction should work almost
as well since there's still always a single destination for every IP
value (although there's still an extra level of indirection -- major
handwaving here), but things go through a function pointer half the
time. It slows things down some, but much less than half the difference
between function pointers and no function pointers, so it seems to
support the branch prediction argument. But I'm way too far out on a
limb to believe anything I'm saying any more. :-)
Algorithm Inline? Optimization Time (sec)
GOTO - -O3 1.35
GOTO - none 5.87
SWITCH - -O3 2.19
SWITCH - none 5.72
FUNCALL_ORIG no -O3 10.15
FUNCALL_ORIG no none 12.37
FUNCALL_ORIG yes -O3 10.16
FUNCALL_ORIG yes none 12.37
FUNCALL_NAIVE no -O3 11.66
FUNCALL_NAIVE no none 13.51
FUNCALL_NAIVE yes -O3 11.67
FUNCALL_NAIVE yes none 13.51
FUNCALL_HYBRID no -O3 4.21
FUNCALL_HYBRID no none 10.08
FUNCALL_HYBRID yes -O3 4.20
FUNCALL_HYBRID yes none 10.05
FUNCALL_HYBRID_LINEAR no -O3 4.67
FUNCALL_HYBRID_LINEAR no none 10.06
FUNCALL_HYBRID_LINEAR yes -O3 4.67
FUNCALL_HYBRID_LINEAR yes none 10.07
FUNCALL_PREDICTABLE no -O3 4.92
FUNCALL_PREDICTABLE no none 9.85
FUNCALL_PREDICTABLE yes -O3 4.87
FUNCALL_PREDICTABLE yes none 9.84
FUNCALL_PREDICTABLE_HALFno -O3 5.81
FUNCALL_PREDICTABLE_HALFno none 9.66
FUNCALL_PREDICTABLE_HALFyes -O3 5.79
FUNCALL_PREDICTABLE_HALFyes none 9.65
/* This code is in the public domain. Ken Fox, Oct 2000 */
/* #define USE_FUNCALL 1 */
/* #define FUNCALL_ORIG 1 */
/* #define INLINE inline */
/* //#define INLINE */
#include <stdlib.h>
#include <stdio.h>
typedef union {
int i;
void *p;
} cell;
void load(char *filename, cell *code)
{
FILE *input;
cell *pc = code;
if (input = fopen(filename, "r")) {
int i;
while (fscanf(input, " %d ", &i) != EOF) {
pc->i = i;
++pc;
}
fclose(input);
}
pc->i = 0;
}
void dump(cell *code)
{
cell *pc = code;
printf("byte code:\n");
while (pc->i) {
printf(" %5d\n", pc->i);
++pc;
}
}
enum {
STOP = 0,
PUSH = 1,
ADD = 2,
PRINT = 3
};
int count_ops(cell *code)
{
int icount = 0;
cell *pc = code;
op: switch (pc->i) {
case STOP:
return icount;
case PUSH:
++icount;
pc += 2;
goto op;
case ADD:
++icount;
pc += 1;
goto op;
case PRINT:
++icount;
pc += 1;
goto op;
}
}
void fixup(cell *code, void *ops[])
{
cell *pc = code;
op: switch (pc->i) {
case STOP:
pc->p = ops[pc->i];
return;
case PUSH:
pc->p = ops[pc->i];
pc += 2;
goto op;
case ADD:
pc->p = ops[pc->i];
pc += 1;
goto op;
case PRINT:
pc->p = ops[pc->i];
pc += 1;
goto op;
}
}
#if USE_SWITCH
void execute(cell *code)
{
static int stack[1000];
cell *pc = code;
int *sp = stack;
int x, y;
op: switch (pc++->i) {
case STOP:
return;
case PUSH:
*sp++ = pc++->i;
goto op;
case ADD:
y = *--sp;
x = *--sp;
*sp++ = x + y;
goto op;
case PRINT:
printf("--> %d\n", *--sp);
goto op;
}
}
#elif USE_FUNCALL
typedef void *(*op)();
cell *pc;
int *sp;
INLINE void *op_stop()
{
return 0;
}
INLINE void *op_push()
{
*sp++ = pc[1].i;
return pc + 2;
}
INLINE void *op_add()
{
int y = *--sp;
int x = *--sp;
*sp++ = x + y;
return pc + 1;
}
INLINE void *op_print()
{
printf("--> %d\n", *--sp);
return pc + 1;
}
#if FUNCALL_ORIG
void execute(cell *code)
{
static void *ops[] = { op_stop, op_push, op_add, op_print };
static int stack[1000];
if (code->i > 0 && code->i < 255) {
fixup(code, ops);
}
pc = code;
sp = stack;
while (pc = ((op)(pc->p))())
;
}
#elif FUNCALL_NAIVE
void execute(cell *code)
{
static void *ops[] = { op_stop, op_push, op_add, op_print };
static int stack[1000];
pc = code;
sp = stack;
while (pc = ((op)(ops[pc->i]))())
;
}
#elif FUNCALL_HYBRID
void execute(cell *code)
{
static void *ops[] = { op_stop, op_push, op_add, op_print };
static int stack[1000];
pc = code;
sp = stack;
while (pc) {
switch (pc->i) {
case STOP: pc = op_stop(); break;
case PUSH: pc = op_push(); break;
case ADD: pc = op_add(); break;
case PRINT: pc = op_print(); break;
}
}
}
#elif FUNCALL_HYBRID_LINEAR
void execute(cell *code)
{
static void *ops[] = { op_stop, op_push, op_add, op_print };
static int stack[1000];
pc = code;
sp = stack;
while (pc) {
if (pc->i == STOP) pc = op_stop();
else if (pc->i == PUSH) pc = op_push();
else if (pc->i == ADD) pc = op_add();
else if (pc->i == PRINT) pc = op_print();
}
}
#elif FUNCALL_PREDICTABLE
void execute(cell *code)
{
static void *ops[] = { op_stop, op_push, op_add, op_print };
static int stack[1000];
pc = code;
sp = stack;
if (code->i > 0 && code->i < 255) {
fixup(code, ops);
}
while (pc) {
if (pc->p == op_stop) pc = op_stop();
else if (pc->p == op_push) pc = op_push();
else if (pc->p == op_add) pc = op_add();
else if (pc->p == op_print) pc = op_print();
}
}
#endif
#elif USE_GOTO
void execute(cell *code)
{
static void *ops[] = { &&op_stop, &&op_push, &&op_add, &&op_print };
static int stack[1000];
cell *pc = code;
int *sp = stack;
int x, y;
if (code->i > 0 && code->i < 255) {
fixup(code, ops);
}
goto *((pc++)->p);
op_stop:
return;
op_push:
*sp++ = pc++->i;
goto *((pc++)->p);
op_add:
y = *--sp;
x = *--sp;
*sp++ = x + y;
goto *((pc++)->p);
op_print:
printf("--> %d\n", *--sp);
goto *((pc++)->p);
}
#endif
int main(int argc, char *argv[])
{
static cell code[10000];
int n;
if (argc != 2) {
printf("vm-test [byte-code-file]\n");
return 1;
}
load(argv[1], code);
printf("executing %d ops.\n", 100000 * count_ops(code));
for (n = 0; n < 100000; ++n) {
execute(code);
}
return 0;
}
#!/bin/sh
for algo in "-DUSE_GOTO" "-DUSE_SWITCH" \
"-DUSE_FUNCALL -DFUNCALL_ORIG" \
"-DUSE_FUNCALL -DFUNCALL_NAIVE" \
"-DUSE_FUNCALL -DFUNCALL_HYBRID" \
"-DUSE_FUNCALL -DFUNCALL_HYBRID_LINEAR" \
"-DUSE_FUNCALL -DFUNCALL_PREDICTABLE" \
"-DUSE_FUNCALL -DFUNCALL_PREDICTABLE_HALF"
do
for inline in "-DINLINE=" "-DINLINE=inline"; do
for opt in "-O3" ""; do
echo "ALGO: $algo"
echo "INLINE: $inline"
echo "OPT: $opt"
gcc -o vm-test vm.c $opt $algo $inline
time ./vm-test add.dat
echo ""
done
done
done