Ken Fox wrote: > > I was noodling around with a couple different VM implementations > and thought I would ask other people if they were doing the same. > It would be nice to divide and conquer the exploration of different > implementations. > > I looked into 3 different dispatching techniques. Here's the list > with timings on a 600 MHz Pentium III running Linux and gcc 2.95: > > -g -O > Switch/case 6.0 sec 2.0 sec > Computed goto (gcc feature) 6.1 sec 1.4 sec > Function call 11.9 sec 11.8 sec Cool! I have the same machine (600MHz PIII) running Linux but using a different gcc: % gcc -v Reading specs from /usr/lib/gcc-lib/i386-redhat-linux/egcs-2.91.66/specs gcc version egcs-2.91.66 19990314/Linux (egcs-1.1.2 release) and I get the same numbers for -O (I used -O3) but different numbers without optimization. Maybe we should assume optimization? I implemented a few variants of the function calling version. FUNCALL_PREDICTABLE is exactly the same as your USE_FUNCALL except instead of jumping directly through a function pointer, it does a nested else-if statement. I had a vague idea that this would help branch prediction, without losing the flexibility because you could always have a final default 'else' that used a function pointer for unexpected functions. It also allows the compiler to inline the function bodies. (For completeness, I should really put the bodies in a separate compilation unit; I'm not sure whether it inlined them or not. Asking them to be inlined showed a small effect, so probably not? Or I could just disassemble.) That got a big speedup: 10.2sec -> 4.9sec. So maybe keeping things as separate functions isn't so harmful after all, though that computed goto is very tempting. (But before you believe this branch prediction BS, see below.) Since function addresses aren't constant in gcc, that requires a nested else-if construct. I took a shot at comparing the linear search with a switch, but this is probably a ridiculous test because of the tiny number of opcodes. The comparison is between FUNCALL_HYBRID and FUNCALL_HYBRID_LINEAR. FUNCALL_NAIVE looks at what happens if you don't rewrite opcode ids with function pointers, and instead do a lookup for every opcode. It slows it down some, though the whole thing is so slow it's not a big deal percentage-wise. FUNCALL_HYBRID is an attempt to use a switch with separate functions, avoiding function pointers. It gives the same bottom line: on the PIII, (1) function pointers are expensive, and (2) the compiler doesn't do a very good job of inlining (with inlining on, FUNCALL_HYBRID should be the same as SWITCH, but it's half the speed.) I'm not sure I really buy the branch prediction argument, so I tried another version FUNCALL_PREDICTABLE_HALF with half of the opcodes replaced with function pointers. So branch prediction should work almost as well since there's still always a single destination for every IP value (although there's still an extra level of indirection -- major handwaving here), but things go through a function pointer half the time. It slows things down some, but much less than half the difference between function pointers and no function pointers, so it seems to support the branch prediction argument. But I'm way too far out on a limb to believe anything I'm saying any more. :-) Algorithm Inline? Optimization Time (sec) GOTO - -O3 1.35 GOTO - none 5.87 SWITCH - -O3 2.19 SWITCH - none 5.72 FUNCALL_ORIG no -O3 10.15 FUNCALL_ORIG no none 12.37 FUNCALL_ORIG yes -O3 10.16 FUNCALL_ORIG yes none 12.37 FUNCALL_NAIVE no -O3 11.66 FUNCALL_NAIVE no none 13.51 FUNCALL_NAIVE yes -O3 11.67 FUNCALL_NAIVE yes none 13.51 FUNCALL_HYBRID no -O3 4.21 FUNCALL_HYBRID no none 10.08 FUNCALL_HYBRID yes -O3 4.20 FUNCALL_HYBRID yes none 10.05 FUNCALL_HYBRID_LINEAR no -O3 4.67 FUNCALL_HYBRID_LINEAR no none 10.06 FUNCALL_HYBRID_LINEAR yes -O3 4.67 FUNCALL_HYBRID_LINEAR yes none 10.07 FUNCALL_PREDICTABLE no -O3 4.92 FUNCALL_PREDICTABLE no none 9.85 FUNCALL_PREDICTABLE yes -O3 4.87 FUNCALL_PREDICTABLE yes none 9.84 FUNCALL_PREDICTABLE_HALFno -O3 5.81 FUNCALL_PREDICTABLE_HALFno none 9.66 FUNCALL_PREDICTABLE_HALFyes -O3 5.79 FUNCALL_PREDICTABLE_HALFyes none 9.65
/* This code is in the public domain. Ken Fox, Oct 2000 */ /* #define USE_FUNCALL 1 */ /* #define FUNCALL_ORIG 1 */ /* #define INLINE inline */ /* //#define INLINE */ #include <stdlib.h> #include <stdio.h> typedef union { int i; void *p; } cell; void load(char *filename, cell *code) { FILE *input; cell *pc = code; if (input = fopen(filename, "r")) { int i; while (fscanf(input, " %d ", &i) != EOF) { pc->i = i; ++pc; } fclose(input); } pc->i = 0; } void dump(cell *code) { cell *pc = code; printf("byte code:\n"); while (pc->i) { printf(" %5d\n", pc->i); ++pc; } } enum { STOP = 0, PUSH = 1, ADD = 2, PRINT = 3 }; int count_ops(cell *code) { int icount = 0; cell *pc = code; op: switch (pc->i) { case STOP: return icount; case PUSH: ++icount; pc += 2; goto op; case ADD: ++icount; pc += 1; goto op; case PRINT: ++icount; pc += 1; goto op; } } void fixup(cell *code, void *ops[]) { cell *pc = code; op: switch (pc->i) { case STOP: pc->p = ops[pc->i]; return; case PUSH: pc->p = ops[pc->i]; pc += 2; goto op; case ADD: pc->p = ops[pc->i]; pc += 1; goto op; case PRINT: pc->p = ops[pc->i]; pc += 1; goto op; } } #if USE_SWITCH void execute(cell *code) { static int stack[1000]; cell *pc = code; int *sp = stack; int x, y; op: switch (pc++->i) { case STOP: return; case PUSH: *sp++ = pc++->i; goto op; case ADD: y = *--sp; x = *--sp; *sp++ = x + y; goto op; case PRINT: printf("--> %d\n", *--sp); goto op; } } #elif USE_FUNCALL typedef void *(*op)(); cell *pc; int *sp; INLINE void *op_stop() { return 0; } INLINE void *op_push() { *sp++ = pc[1].i; return pc + 2; } INLINE void *op_add() { int y = *--sp; int x = *--sp; *sp++ = x + y; return pc + 1; } INLINE void *op_print() { printf("--> %d\n", *--sp); return pc + 1; } #if FUNCALL_ORIG void execute(cell *code) { static void *ops[] = { op_stop, op_push, op_add, op_print }; static int stack[1000]; if (code->i > 0 && code->i < 255) { fixup(code, ops); } pc = code; sp = stack; while (pc = ((op)(pc->p))()) ; } #elif FUNCALL_NAIVE void execute(cell *code) { static void *ops[] = { op_stop, op_push, op_add, op_print }; static int stack[1000]; pc = code; sp = stack; while (pc = ((op)(ops[pc->i]))()) ; } #elif FUNCALL_HYBRID void execute(cell *code) { static void *ops[] = { op_stop, op_push, op_add, op_print }; static int stack[1000]; pc = code; sp = stack; while (pc) { switch (pc->i) { case STOP: pc = op_stop(); break; case PUSH: pc = op_push(); break; case ADD: pc = op_add(); break; case PRINT: pc = op_print(); break; } } } #elif FUNCALL_HYBRID_LINEAR void execute(cell *code) { static void *ops[] = { op_stop, op_push, op_add, op_print }; static int stack[1000]; pc = code; sp = stack; while (pc) { if (pc->i == STOP) pc = op_stop(); else if (pc->i == PUSH) pc = op_push(); else if (pc->i == ADD) pc = op_add(); else if (pc->i == PRINT) pc = op_print(); } } #elif FUNCALL_PREDICTABLE void execute(cell *code) { static void *ops[] = { op_stop, op_push, op_add, op_print }; static int stack[1000]; pc = code; sp = stack; if (code->i > 0 && code->i < 255) { fixup(code, ops); } while (pc) { if (pc->p == op_stop) pc = op_stop(); else if (pc->p == op_push) pc = op_push(); else if (pc->p == op_add) pc = op_add(); else if (pc->p == op_print) pc = op_print(); } } #endif #elif USE_GOTO void execute(cell *code) { static void *ops[] = { &&op_stop, &&op_push, &&op_add, &&op_print }; static int stack[1000]; cell *pc = code; int *sp = stack; int x, y; if (code->i > 0 && code->i < 255) { fixup(code, ops); } goto *((pc++)->p); op_stop: return; op_push: *sp++ = pc++->i; goto *((pc++)->p); op_add: y = *--sp; x = *--sp; *sp++ = x + y; goto *((pc++)->p); op_print: printf("--> %d\n", *--sp); goto *((pc++)->p); } #endif int main(int argc, char *argv[]) { static cell code[10000]; int n; if (argc != 2) { printf("vm-test [byte-code-file]\n"); return 1; } load(argv[1], code); printf("executing %d ops.\n", 100000 * count_ops(code)); for (n = 0; n < 100000; ++n) { execute(code); } return 0; }
#!/bin/sh for algo in "-DUSE_GOTO" "-DUSE_SWITCH" \ "-DUSE_FUNCALL -DFUNCALL_ORIG" \ "-DUSE_FUNCALL -DFUNCALL_NAIVE" \ "-DUSE_FUNCALL -DFUNCALL_HYBRID" \ "-DUSE_FUNCALL -DFUNCALL_HYBRID_LINEAR" \ "-DUSE_FUNCALL -DFUNCALL_PREDICTABLE" \ "-DUSE_FUNCALL -DFUNCALL_PREDICTABLE_HALF" do for inline in "-DINLINE=" "-DINLINE=inline"; do for opt in "-O3" ""; do echo "ALGO: $algo" echo "INLINE: $inline" echo "OPT: $opt" gcc -o vm-test vm.c $opt $algo $inline time ./vm-test add.dat echo "" done done done