> Regarding the latter, Plan 9 does not allow floating point
> instructions to be executed within note handling, but erring on the
> side of caution also forbids instructions such as MOVOU (don't ask me)
> which is part of the SSE(2?) extension, but hardly qualifies as a
> floating point instruction.

movou (movdqu in the manual) is a sse2 data movement instruction.
not all sse2 instructions require that sse be turned on (pause, for example),
but movou uses at least one xmm register so is clearly using the sse
unit, thus requiring that it be turned on.

the go runtime memmove uses movou for memmoves between 33 and 128
bytes.  i only see a 10 cycle difference for these cases on my atom machine,
(maximum 13%), so we're not missing out on much here by not using sse.

the real win, or loss for the plan 9 memmove, is in the short memmoves.
but this is a µbenchmark, and it would be more convincing with a real
world test.

- erik

harness; 8.memmovetest
memmove
1       92.42578 cycles/op
2       81.28125 cycles/op
4       56.47266 cycles/op
8       58.32422 cycles/op
16      62.28516 cycles/op
32      70.26563 cycles/op
64      86.32031 cycles/op
128     118.3125 cycles/op
512     323.5078 cycles/op
1024    587.1094 cycles/op
4096    2119.242 cycles/op
131072  133058.5 cycles/op

rt·memmove
1       20.60156 cycles/op
2       20.34375 cycles/op
4       24.46875 cycles/op
8       22.42969 cycles/op
16      27.45703 cycles/op
32      52.82813 cycles/op
64      79.19531 cycles/op
128     129.1289 cycles/op
512     314.4492 cycles/op
1024    569.9648 cycles/op
4096    2132.297 cycles/op
131072  135378.3 cycles/op
#include <u.h>
#include <libc.h>

typedef struct Movtab Movtab;
typedef void* (*Movfn)(void*, void*, ulong);

        u32int  runtimecpuid_edx = 0x4000000;
extern  void*   runtimememmove(void*, void*, ulong);

struct Movtab {
        Movfn   f;
        char    *name;
};

uvlong  hz;
int     sztab[] = {1, 2, 4, 8, 16, 32, 64, 128, 512, 1024, 4096, 128*1024, };
uchar   buf0[128*1024];
uchar   buf1[128*1024];
Movtab  movtab[] = {memmove, "memmove",  runtimememmove, "rt·memmove", };
//Movfn movtab[] = {memmove, runtimememmove};

uvlong
gethz(void)
{
        char buf[1024], *f[5];
        int n, fd;

        fd = open("/dev/time", OREAD);
        if(fd == -1)
                sysfatal("%s: open /dev/time: %r", argv0);
        n = pread(fd, buf, sizeof buf-1, 0);
        if(n <= 0)
                sysfatal("%s: read /dev/time: %r", argv0);

        buf[n] = 0;
        n = tokenize(buf, f, nelem(f));
        if(n < 4)
                sysfatal("%s: /dev/time: unexpected fmt", argv0);

        return strtoull(f[3], 0, 0);
}

void
inner(Movfn f, ulong sz)
{
        int i;

        for(i = 0; i < 1024; i++)
                f(buf1, buf0, sz);
}

void
main(int argc, char **argv)
{
        int i, j;
        uvlong t[2], c[nelem(movtab)][nelem(sztab)];
//      double dhz;
        Movfn f;

        ARGBEGIN{
        }ARGEND

        hz = gethz();
//      dhz = hz;

        for(i = 0; i < 2; i++){
                print("%s\n", movtab[i].name);
                f = movtab[i].f;
                for(j = 0; j < nelem(sztab); j++){
                        cycles(t + 0);
                        inner(f, sztab[j]);
                        cycles(t + 1);
                        c[i][j] = t[1] - t[0];

                        print("%d       %g cycles/op\n", sztab[j], 
c[i][j]/1024.);
                        sleep(0);
                }
                print("\n");
        }

        exits("");
}
from postmaster@kw:
The following attachment had content that we can't
prove to be harmless.  To avoid possible automatic
execution, we changed the content headers.
The original header was:

        Content-Disposition: attachment; filename=memmove_386.s
        Content-Type: text/plain; charset="UTF-8"
        Content-Transfer-Encoding: 8bit

Attachment: memmove_386.s.suspect
Description: Binary data

Reply via email to