Ken Fox wrote:
> 
> I was noodling around with a couple different VM implementations
> and thought I would ask other people if they were doing the same.
> It would be nice to divide and conquer the exploration of different
> implementations.
> 
> I looked into 3 different dispatching techniques. Here's the list
> with timings on a 600 MHz Pentium III running Linux and gcc 2.95:
> 
>                                  -g            -O
> Switch/case                    6.0 sec       2.0 sec
> Computed goto (gcc feature)    6.1 sec       1.4 sec
> Function call                 11.9 sec      11.8 sec

Cool!

I have the same machine (600MHz PIII) running Linux but using a
different gcc:

% gcc -v
Reading specs from /usr/lib/gcc-lib/i386-redhat-linux/egcs-2.91.66/specs
gcc version egcs-2.91.66 19990314/Linux (egcs-1.1.2 release)

and I get the same numbers for -O (I used -O3) but different numbers
without optimization. Maybe we should assume optimization?

I implemented a few variants of the function calling version.
FUNCALL_PREDICTABLE is exactly the same as your USE_FUNCALL except
instead of jumping directly through a function pointer, it does a nested
else-if statement. I had a vague idea that this would help branch
prediction, without losing the flexibility because you could always have
a final default 'else' that used a function pointer for unexpected
functions. It also allows the compiler to inline the function bodies.
(For completeness, I should really put the bodies in a separate
compilation unit; I'm not sure whether it inlined them or not. Asking
them to be inlined showed a small effect, so probably not? Or I could
just disassemble.) That got a big speedup: 10.2sec -> 4.9sec. So maybe
keeping things as separate functions isn't so harmful after all, though
that computed goto is very tempting. (But before you believe this branch
prediction BS, see below.)

Since function addresses aren't constant in gcc, that requires a nested
else-if construct. I took a shot at comparing the linear search with a
switch, but this is probably a ridiculous test because of the tiny
number of opcodes. The comparison is between FUNCALL_HYBRID and
FUNCALL_HYBRID_LINEAR.

FUNCALL_NAIVE looks at what happens if you don't rewrite opcode ids with
function pointers, and instead do a lookup for every opcode. It slows it
down some, though the whole thing is so slow it's not a big deal
percentage-wise.

FUNCALL_HYBRID is an attempt to use a switch with separate functions,
avoiding function pointers. It gives the same bottom line: on the PIII,
(1) function pointers are expensive, and (2) the compiler doesn't do a
very good job of inlining (with inlining on, FUNCALL_HYBRID should be
the same as SWITCH, but it's half the speed.)

I'm not sure I really buy the branch prediction argument, so I tried
another version FUNCALL_PREDICTABLE_HALF with half of the opcodes
replaced with function pointers. So branch prediction should work almost
as well since there's still always a single destination for every IP
value (although there's still an extra level of indirection -- major
handwaving here), but things go through a function pointer half the
time. It slows things down some, but much less than half the difference
between function pointers and no function pointers, so it seems to
support the branch prediction argument. But I'm way too far out on a
limb to believe anything I'm saying any more. :-)

Algorithm               Inline?         Optimization    Time (sec)
GOTO                    -               -O3             1.35
GOTO                    -               none            5.87
SWITCH                  -               -O3             2.19
SWITCH                  -               none            5.72
FUNCALL_ORIG            no              -O3             10.15
FUNCALL_ORIG            no              none            12.37
FUNCALL_ORIG            yes             -O3             10.16
FUNCALL_ORIG            yes             none            12.37
FUNCALL_NAIVE           no              -O3             11.66
FUNCALL_NAIVE           no              none            13.51
FUNCALL_NAIVE           yes             -O3             11.67
FUNCALL_NAIVE           yes             none            13.51
FUNCALL_HYBRID          no              -O3             4.21
FUNCALL_HYBRID          no              none            10.08
FUNCALL_HYBRID          yes             -O3             4.20
FUNCALL_HYBRID          yes             none            10.05
FUNCALL_HYBRID_LINEAR   no              -O3             4.67
FUNCALL_HYBRID_LINEAR   no              none            10.06
FUNCALL_HYBRID_LINEAR   yes             -O3             4.67
FUNCALL_HYBRID_LINEAR   yes             none            10.07
FUNCALL_PREDICTABLE     no              -O3             4.92
FUNCALL_PREDICTABLE     no              none            9.85
FUNCALL_PREDICTABLE     yes             -O3             4.87
FUNCALL_PREDICTABLE     yes             none            9.84
FUNCALL_PREDICTABLE_HALFno              -O3             5.81
FUNCALL_PREDICTABLE_HALFno              none            9.66
FUNCALL_PREDICTABLE_HALFyes             -O3             5.79
FUNCALL_PREDICTABLE_HALFyes             none            9.65

/* This code is in the public domain. Ken Fox, Oct 2000 */

/*  #define USE_FUNCALL 1 */
/*  #define FUNCALL_ORIG 1 */
/*  #define INLINE inline */
/*  //#define INLINE */

#include <stdlib.h>
#include <stdio.h>

typedef union {
    int i;
    void *p;
} cell;

void load(char *filename, cell *code)
{
    FILE *input;
    cell *pc = code;

    if (input = fopen(filename, "r")) {
        int i;
        while (fscanf(input, " %d ", &i) != EOF) {
            pc->i = i;
            ++pc;
        }
        fclose(input);
    }

    pc->i = 0;
}

void dump(cell *code)
{
    cell *pc = code;

    printf("byte code:\n");
    while (pc->i) {
        printf("  %5d\n", pc->i);
        ++pc;
    }
}

enum {
    STOP = 0,
    PUSH = 1,
    ADD = 2,
    PRINT = 3
};

int count_ops(cell *code)
{
    int icount = 0;

    cell *pc = code;

op: switch (pc->i) {
        case STOP:
            return icount;

        case PUSH:
            ++icount;
            pc += 2;
            goto op;

        case ADD:
            ++icount;
            pc += 1;
            goto op;

        case PRINT:
            ++icount;
            pc += 1;
            goto op;
    }
}

void fixup(cell *code, void *ops[])
{
    cell *pc = code;

op: switch (pc->i) {
        case STOP:
            pc->p = ops[pc->i];
            return;

        case PUSH:
            pc->p = ops[pc->i];
            pc += 2;
            goto op;

        case ADD:
            pc->p = ops[pc->i];
            pc += 1;
            goto op;

        case PRINT:
            pc->p = ops[pc->i];
            pc += 1;
            goto op;
    }
}

#if USE_SWITCH

void execute(cell *code)
{
    static int stack[1000];
    cell *pc = code;
    int *sp = stack;
    int x, y;

op: switch (pc++->i) {
        case STOP:
            return;

        case PUSH:
            *sp++ = pc++->i;
            goto op;

        case ADD:
            y = *--sp;
            x = *--sp;
            *sp++ = x + y;
            goto op;

        case PRINT:
            printf("--> %d\n", *--sp);
            goto op;
    }
}

#elif USE_FUNCALL

typedef void *(*op)();

cell *pc;
int *sp;

INLINE void *op_stop()
{
    return 0;
}

INLINE void *op_push()
{
    *sp++ = pc[1].i;
    return pc + 2;
}

INLINE void *op_add()
{
    int y = *--sp;
    int x = *--sp;
    *sp++ = x + y;
    return pc + 1;
}

INLINE void *op_print()
{
    printf("--> %d\n", *--sp);
    return pc + 1;
}

#if FUNCALL_ORIG

void execute(cell *code)
{
    static void *ops[] = { op_stop, op_push, op_add, op_print };
    static int stack[1000];

    if (code->i > 0 && code->i < 255) {
        fixup(code, ops);
    }

    pc = code;
    sp = stack;

    while (pc = ((op)(pc->p))())
        ;
}

#elif FUNCALL_NAIVE

void execute(cell *code)
{
    static void *ops[] = { op_stop, op_push, op_add, op_print };
    static int stack[1000];

    pc = code;
    sp = stack;

    while (pc = ((op)(ops[pc->i]))())
        ;
}

#elif FUNCALL_HYBRID

void execute(cell *code)
{
    static void *ops[] = { op_stop, op_push, op_add, op_print };
    static int stack[1000];

    pc = code;
    sp = stack;

    while (pc) {
        switch (pc->i) {
        case STOP: pc = op_stop(); break;
        case PUSH: pc = op_push(); break;
        case ADD: pc = op_add(); break;
        case PRINT: pc = op_print(); break;
        }
    }
}

#elif FUNCALL_HYBRID_LINEAR

void execute(cell *code)
{
    static void *ops[] = { op_stop, op_push, op_add, op_print };
    static int stack[1000];

    pc = code;
    sp = stack;

    while (pc) {
        if (pc->i == STOP) pc = op_stop();
        else if (pc->i == PUSH) pc = op_push();
        else if (pc->i == ADD) pc = op_add();
        else if (pc->i == PRINT) pc = op_print();
    }
}

#elif FUNCALL_PREDICTABLE

void execute(cell *code)
{
    static void *ops[] = { op_stop, op_push, op_add, op_print };
    static int stack[1000];

    pc = code;
    sp = stack;

    if (code->i > 0 && code->i < 255) {
        fixup(code, ops);
    }

    while (pc) {
        if (pc->p == op_stop) pc = op_stop();
        else if (pc->p == op_push) pc = op_push();
        else if (pc->p == op_add) pc = op_add();
        else if (pc->p == op_print) pc = op_print();
    }
}

#endif

#elif USE_GOTO

void execute(cell *code)
{
    static void *ops[] = { &&op_stop, &&op_push, &&op_add, &&op_print };
    static int stack[1000];
    cell *pc = code;
    int *sp = stack;
    int x, y;

    if (code->i > 0 && code->i < 255) {
        fixup(code, ops);
    }

    goto *((pc++)->p);

op_stop:
    return;

op_push:
    *sp++ = pc++->i;
    goto *((pc++)->p);

op_add:
    y = *--sp;
    x = *--sp;
    *sp++ = x + y;
    goto *((pc++)->p);

op_print:
    printf("--> %d\n", *--sp);
    goto *((pc++)->p);
}

#endif

int main(int argc, char *argv[])
{
    static cell code[10000];
    int n;

    if (argc != 2) {
        printf("vm-test [byte-code-file]\n");
        return 1;
    }

    load(argv[1], code);

    printf("executing %d ops.\n", 100000 * count_ops(code));
    for (n = 0; n < 100000; ++n) {
        execute(code);
    }

    return 0;
}
#!/bin/sh

for algo in "-DUSE_GOTO" "-DUSE_SWITCH" \
            "-DUSE_FUNCALL -DFUNCALL_ORIG" \
            "-DUSE_FUNCALL -DFUNCALL_NAIVE" \
            "-DUSE_FUNCALL -DFUNCALL_HYBRID" \
            "-DUSE_FUNCALL -DFUNCALL_HYBRID_LINEAR" \
            "-DUSE_FUNCALL -DFUNCALL_PREDICTABLE" \
            "-DUSE_FUNCALL -DFUNCALL_PREDICTABLE_HALF"
do
for inline in "-DINLINE=" "-DINLINE=inline"; do
for opt in "-O3" ""; do
        echo "ALGO: $algo"
        echo "INLINE: $inline"
        echo "OPT: $opt"
        gcc -o vm-test vm.c $opt $algo $inline
        time ./vm-test add.dat
        echo ""
done
done
done

Reply via email to