I was looking at Jens FIO stuff, and I decided to cook a quick patch for FIO to support GUASI (Generic Userspace Asyncronous Syscall Interface):
http://www.xmailserver.org/guasi-lib.html I then ran a few tests on my Dual Opteron 252 with SATA drives (sata_nv) and 8GB of RAM. Mind that I'm not FIO expert, like at all, but I got some interesting results when comparing GUASI with libaio at 8/1000/10000 depths. If I read those result correctly (Jens may help), GUASI output is more then double the libaio one. Lots of context switches, yes. But the throughput looks like 2+ times. Can someone try to repeat the measures and/or spot the error? Or tell me which other tests to run? This is kinda a suprise for me ... PS: FIO patch to support GUASI is attached. You also need to fetch GUASI and (configure && make install) - Davide >> fio --name=global --rw=randread --size=64m --ioengine=guasi --name=job1 >> --iodepth=8 --thread job1: (g=0): rw=randread, bs=4K-4K/4K-4K, ioengine=guasi, iodepth=8 Starting 1 thread Jobs: 1: [r] [100.0% done] [ 3135/ 0 kb/s] [eta 00m:00s] job1: (groupid=0, jobs=1): err= 0: pid=29298 read : io=65,536KiB, bw=1,576KiB/s, iops=384, runt= 42557msec slat (msec): min= 0, max= 0, avg= 0.00, stdev= 0.00 clat (msec): min= 0, max= 212, avg=20.26, stdev=18.83 bw (KiB/s) : min= 1166, max= 3376, per=98.51%, avg=1552.50, stdev=317.42 cpu : usr=7.69%, sys=92.99%, ctx=97648 IO depths : 1=0.0%, 2=0.0%, 4=0.1%, 8=99.9%, 16=0.0%, 32=0.0%, >=64=0.0% lat (msec): 2=1.4%, 4=3.6%, 10=25.3%, 20=34.0%, 50=28.1%, 100=6.8% lat (msec): 250=0.8%, 500=0.0%, 750=0.0%, 1000=0.0%, >=2000=0.0% Run status group 0 (all jobs): READ: io=65,536KiB, aggrb=1,576KiB/s, minb=1,576KiB/s, maxb=1,576KiB/s, mint=42557msec, maxt=42557msec Disk stats (read/write): sda: ios=16376/98, merge=8/135, ticks=339481/2810, in_queue=342290, util=99.17% >> fio --name=global --rw=randread --size=64m --ioengine=libaio --name=job1 >> --iodepth=8 --thread job1: (g=0): rw=randread, bs=4K-4K/4K-4K, ioengine=libaio, iodepth=8 Starting 1 thread Jobs: 1: [r] [95.9% done] [ 2423/ 0 kb/s] [eta 00m:03s] job1: (groupid=0, jobs=1): err= 0: pid=29332 read : io=65,536KiB, bw=929KiB/s, iops=226, runt= 72181msec slat (msec): min= 0, max= 98, avg=31.30, stdev=15.53 clat (msec): min= 0, max= 0, avg= 0.00, stdev= 0.00 bw (KiB/s) : min= 592, max= 2835, per=98.56%, avg=915.58, stdev=325.29 cpu : usr=0.02%, sys=0.34%, ctx=23023 IO depths : 1=22.2%, 2=22.2%, 4=44.4%, 8=11.1%, 16=0.0%, 32=0.0%, >=64=0.0% lat (msec): 2=100.0%, 4=0.0%, 10=0.0%, 20=0.0%, 50=0.0%, 100=0.0% lat (msec): 250=0.0%, 500=0.0%, 750=0.0%, 1000=0.0%, >=2000=0.0% Run status group 0 (all jobs): READ: io=65,536KiB, aggrb=929KiB/s, minb=929KiB/s, maxb=929KiB/s, mint=72181msec, maxt=72181msec Disk stats (read/write): sda: ios=16384/43, merge=0/42, ticks=71889/20573, in_queue=92461, util=99.57% >> fio --name=global --rw=randread --size=64m --ioengine=guasi --name=job1 >> --iodepth=1000 --thread job1: (g=0): rw=randread, bs=4K-4K/4K-4K, ioengine=guasi, iodepth=1000 Starting 1 thread Jobs: 1: [r] [93.9% done] [ 815/ 0 kb/s] [eta 00m:02s] job1: (groupid=0, jobs=1): err= 0: pid=29343 read : io=65,536KiB, bw=2,130KiB/s, iops=520, runt= 31500msec slat (msec): min= 0, max= 26, avg= 1.02, stdev= 4.19 clat (msec): min= 12, max=28024, avg=1920.73, stdev=764.20 bw (KiB/s) : min= 1139, max= 3376, per=95.21%, avg=2027.87, stdev=354.38 cpu : usr=7.35%, sys=93.77%, ctx=104637 IO depths : 1=0.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.1%, 32=0.2%, >=64=99.6% lat (msec): 2=0.0%, 4=0.0%, 10=0.0%, 20=0.0%, 50=0.1%, 100=0.4% lat (msec): 250=1.2%, 500=1.0%, 750=0.8%, 1000=0.7%, >=2000=45.5% Run status group 0 (all jobs): READ: io=65,536KiB, aggrb=2,130KiB/s, minb=2,130KiB/s, maxb=2,130KiB/s, mint=31500msec, maxt=31500msec Disk stats (read/write): sda: ios=16267/31, merge=115/28, ticks=4019824/313471, in_queue=4333625, util=98.84% >> fio --name=global --rw=randread --size=64m --ioengine=libaio --name=job1 >> --iodepth=1000 --thread job1: (g=0): rw=randread, bs=4K-4K/4K-4K, ioengine=libaio, iodepth=1000 Starting 1 thread Jobs: 1: [r] [98.6% done] [ 4083/ 0 kb/s] [eta 00m:01s]] job1: (groupid=0, jobs=1): err= 0: pid=30346 read : io=65,536KiB, bw=920KiB/s, iops=224, runt= 72925msec slat (msec): min= 0, max= 5539, avg=4431.27, stdev=1268.03 clat (msec): min= 0, max= 0, avg= 0.00, stdev= 0.00 bw (KiB/s) : min= 0, max= 2361, per=103.56%, avg=952.75, stdev=499.54 cpu : usr=0.02%, sys=0.39%, ctx=23089 IO depths : 1=0.2%, 2=0.2%, 4=0.4%, 8=0.8%, 16=1.7%, 32=3.3%, >=64=93.4% lat (msec): 2=100.0%, 4=0.0%, 10=0.0%, 20=0.0%, 50=0.0%, 100=0.0% lat (msec): 250=0.0%, 500=0.0%, 750=0.0%, 1000=0.0%, >=2000=0.0% Run status group 0 (all jobs): READ: io=65,536KiB, aggrb=920KiB/s, minb=920KiB/s, maxb=920KiB/s, mint=72925msec, maxt=72925msec Disk stats (read/write): sda: ios=16384/70, merge=0/54, ticks=72644/31038, in_queue=103682, util=99.61% >> fio --name=global --rw=randread --size=64m --ioengine=guasi --name=job1 >> --iodepth=10000 --thread job1: (g=0): rw=randread, bs=4K-4K/4K-4K, ioengine=guasi, iodepth=10000 Starting 1 thread Jobs: 1: [r] [100.0% done] [ 40752/ 0 kb/s] [eta 00m:00s] job1: (groupid=0, jobs=1): err= 0: pid=32203 read : io=65,536KiB, bw=1,965KiB/s, iops=479, runt= 34148msec slat (msec): min= 0, max= 323, avg=124.06, stdev=112.39 clat (msec): min= 0, max=33982, avg=20686.86, stdev=13689.22 bw (KiB/s) : min= 1, max= 2187, per=94.75%, avg=1861.75, stdev=392.89 cpu : usr=0.35%, sys=2.42%, ctx=166667 IO depths : 1=0.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.1%, 32=0.2%, >=64=99.6% lat (msec): 2=0.0%, 4=0.0%, 10=0.0%, 20=0.1%, 50=0.5%, 100=1.5% lat (msec): 250=5.0%, 500=5.6%, 750=1.8%, 1000=0.8%, >=2000=2.3% Run status group 0 (all jobs): READ: io=65,536KiB, aggrb=1,965KiB/s, minb=1,965KiB/s, maxb=1,965KiB/s, mint=34148msec, maxt=34148msec Disk stats (read/write): sda: ios=16064/122, merge=319/73, ticks=4350268/172548, in_queue=4521657, util=98.95% >> fio --name=global --rw=randread --size=64m --ioengine=libaio --name=job1 >> --iodepth=10000 --thread job1: (g=0): rw=randread, bs=4K-4K/4K-4K, ioengine=libaio, iodepth=10000 Starting 1 thread Jobs: 1: [r] [61.3% done] [ 0/ 0 kb/s] [eta 00m:46s]] job1: (groupid=0, jobs=1): err= 0: pid=9791 read : io=65,536KiB, bw=917KiB/s, iops=224, runt= 73118msec slat (msec): min= 1, max=52656, avg=40082.23, stdev=15703.83 clat (msec): min= 0, max= 3, avg= 2.61, stdev= 0.49 bw (KiB/s) : min= 0, max= 2002, per=109.16%, avg=1001.00, stdev=1415.63 cpu : usr=0.02%, sys=0.40%, ctx=23095 IO depths : 1=0.0%, 2=0.0%, 4=0.0%, 8=0.1%, 16=0.2%, 32=0.4%, >=64=99.2% lat (msec): 2=0.0%, 4=100.0%, 10=0.0%, 20=0.0%, 50=0.0%, 100=0.0% lat (msec): 250=0.0%, 500=0.0%, 750=0.0%, 1000=0.0%, >=2000=0.0% Run status group 0 (all jobs): READ: io=65,536KiB, aggrb=917KiB/s, minb=917KiB/s, maxb=917KiB/s, mint=73118msec, maxt=73118msec Disk stats (read/write): sda: ios=16384/82, merge=0/86, ticks=72720/36477, in_queue=109197, util=99.44%
diff -Nru fio-1.14/engines/guasi.c fio-1.14.guasi/engines/guasi.c --- fio-1.14/engines/guasi.c 1969-12-31 16:00:00.000000000 -0800 +++ fio-1.14.guasi/engines/guasi.c 2007-03-20 21:26:58.000000000 -0700 @@ -0,0 +1,256 @@ +/* + * guasi engine + * + * IO engine using the GUASI library. + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <assert.h> + +#include "../fio.h" +#include "../os.h" + +#ifdef FIO_HAVE_GUASI + +#define GFIO_MIN_THREADS 32 + +#include <guasi.h> +#include <guasi_syscalls.h> + +#ifdef GFIO_DEBUG +#define GDBG_PRINT(a) printf a +#else +#define GDBG_PRINT(a) (void) 0 +#endif + +#define STFU_GCC(a) a = a + + +struct guasi_data { + guasi_t hctx; + int max_reqs; + guasi_req_t *reqs; + struct io_u **io_us; + int reqs_nr; +}; + +static int fio_guasi_prep(struct thread_data fio_unused *td, struct io_u *io_u) +{ + STFU_GCC(io_u); + + GDBG_PRINT(("fio_guasi_prep(%p)\n", io_u)); + + return 0; +} + +static struct io_u *fio_guasi_event(struct thread_data *td, int event) +{ + struct guasi_data *ld = td->io_ops->data; + struct io_u *io_u; + struct guasi_reqinfo rinf; + + GDBG_PRINT(("fio_guasi_event(%d)\n", event)); + if (guasi_req_info(ld->reqs[event], &rinf) < 0) { + fprintf(stderr, "guasi_req_info(%d) FAILED!\n", event); + return NULL; + } + io_u = rinf.asid; + GDBG_PRINT(("fio_guasi_event(%d) -> %p\n", event, io_u)); + + if (io_u->ddir == DDIR_READ || + io_u->ddir == DDIR_WRITE) { + if (rinf.result != (long) io_u->xfer_buflen) { + if (rinf.result < 0) + io_u->error = rinf.error; + else + io_u->resid = io_u->xfer_buflen - rinf.result; + } else + io_u->error = 0; + } else + io_u->error = rinf.result; + + return io_u; +} + +static int fio_guasi_getevents(struct thread_data *td, int min, int max, + struct timespec *t) +{ + struct guasi_data *ld = td->io_ops->data; + int n = 0, r; + long timeo = -1; + + GDBG_PRINT(("fio_guasi_getevents(%d, %d)\n", min, max)); + if (min > ld->max_reqs) + min = ld->max_reqs; + if (max > ld->max_reqs) + max = ld->max_reqs; + if (t) + timeo = t->tv_sec * 1000L + t->tv_nsec / 1000000L; + do { + r = guasi_fetch(ld->hctx, ld->reqs + n, max - n, timeo); + if (r < 0) + break; + n += r; + if (n >= min) + break; + } while (1); + GDBG_PRINT(("fio_guasi_getevents() -> %d\n", n)); + + return n; +} + +static int fio_guasi_queue(struct thread_data *td, struct io_u *io_u) +{ + struct guasi_data *ld = td->io_ops->data; + + GDBG_PRINT(("fio_guasi_queue(%p)\n", io_u)); + if (ld->reqs_nr == (int) td->iodepth) + return FIO_Q_BUSY; + + ld->io_us[ld->reqs_nr] = io_u; + ld->reqs_nr++; + return FIO_Q_QUEUED; +} + +static void fio_guasi_queued(struct thread_data *td, struct io_u **io_us, + unsigned int nr) +{ + struct timeval now; + struct io_u *io_u = io_us[nr]; + + fio_gettime(&now, NULL); + memcpy(&io_u->issue_time, &now, sizeof(now)); + io_u_queued(td, io_u); +} + +static int fio_guasi_commit(struct thread_data *td) +{ + struct guasi_data *ld = td->io_ops->data; + int i; + struct io_u *io_u; + struct fio_file *f; + + GDBG_PRINT(("fio_guasi_commit()\n")); + for (i = 0; i < ld->reqs_nr; i++) { + io_u = ld->io_us[i]; + f = io_u->file; + io_u->greq = NULL; + if (io_u->ddir == DDIR_READ) + io_u->greq = guasi__pread(ld->hctx, ld, io_u, 0, + f->fd, io_u->xfer_buf, io_u->xfer_buflen, + io_u->offset); + else if (io_u->ddir == DDIR_WRITE) + io_u->greq = guasi__pwrite(ld->hctx, ld, io_u, 0, + f->fd, io_u->xfer_buf, io_u->xfer_buflen, + io_u->offset); + else if (io_u->ddir == DDIR_SYNC) + io_u->greq = guasi__fsync(ld->hctx, ld, io_u, 0, f->fd); + else { + fprintf(stderr, "fio_guasi_commit() FAILED: %d\n", io_u->ddir); + } + if (io_u->greq != NULL) + fio_guasi_queued(td, ld->io_us, i); + } + ld->reqs_nr = 0; + GDBG_PRINT(("fio_guasi_commit() -> %d\n", i)); + + return 0; +} + +static int fio_guasi_cancel(struct thread_data *td, struct io_u *io_u) +{ + struct guasi_data *ld = td->io_ops->data; + + STFU_GCC(ld); + GDBG_PRINT(("fio_guasi_cancel(%p)\n", io_u)); + + return guasi_req_cancel(io_u->greq); +} + +static void fio_guasi_cleanup(struct thread_data *td) +{ + struct guasi_data *ld = td->io_ops->data; + + if (ld) { + guasi_free(ld->hctx); + free(ld->reqs); + free(ld->io_us); + free(ld); + td->io_ops->data = NULL; + } +} + +static int fio_guasi_init(struct thread_data *td) +{ + int maxthr; + struct guasi_data *ld = malloc(sizeof(*ld)); + + GDBG_PRINT(("fio_guasi_init(): depth=%d\n", td->iodepth)); + memset(ld, 0, sizeof(*ld)); + maxthr = td->iodepth > GFIO_MIN_THREADS ? td->iodepth: GFIO_MIN_THREADS; + if ((ld->hctx = guasi_create(GFIO_MIN_THREADS, maxthr, 1)) == NULL) { + td_verror(td, errno, "guasi_create"); + free(ld); + return 1; + } + ld->max_reqs = td->iodepth; + ld->reqs = malloc(ld->max_reqs * sizeof(guasi_req_t)); + ld->io_us = malloc(ld->max_reqs * sizeof(struct io_u *)); + memset(ld->io_us, 0, ld->max_reqs * sizeof(struct io_u *)); + ld->reqs_nr = 0; + + td->io_ops->data = ld; + GDBG_PRINT(("fio_guasi_init(): depth=%d -> %p\n", td->iodepth, ld)); + + return 0; +} + +static struct ioengine_ops ioengine = { + .name = "guasi", + .version = FIO_IOOPS_VERSION, + .init = fio_guasi_init, + .prep = fio_guasi_prep, + .queue = fio_guasi_queue, + .commit = fio_guasi_commit, + .cancel = fio_guasi_cancel, + .getevents = fio_guasi_getevents, + .event = fio_guasi_event, + .cleanup = fio_guasi_cleanup, + .open_file = generic_open_file, + .close_file = generic_close_file, +}; + +#else /* FIO_HAVE_GUASI */ + +/* + * When we have a proper configure system in place, we simply wont build + * and install this io engine. For now install a crippled version that + * just complains and fails to load. + */ +static int fio_guasi_init(struct thread_data fio_unused *td) +{ + fprintf(stderr, "fio: guasi not available\n"); + return 1; +} + +static struct ioengine_ops ioengine = { + .name = "guasi", + .version = FIO_IOOPS_VERSION, + .init = fio_guasi_init, +}; + +#endif + +static void fio_init fio_guasi_register(void) +{ + register_ioengine(&ioengine); +} + +static void fio_exit fio_guasi_unregister(void) +{ + unregister_ioengine(&ioengine); +} + diff -Nru fio-1.14/fio.h fio-1.14.guasi/fio.h --- fio-1.14/fio.h 2007-03-14 06:24:42.000000000 -0700 +++ fio-1.14.guasi/fio.h 2007-03-20 20:06:48.000000000 -0700 @@ -23,6 +23,10 @@ #include "syslet.h" #endif +#ifdef FIO_HAVE_GUASI +#include <guasi.h> +#endif + enum fio_ddir { DDIR_READ = 0, DDIR_WRITE, @@ -110,6 +114,9 @@ #ifdef FIO_HAVE_SYSLET struct syslet_req req; #endif +#ifdef FIO_HAVE_GUASI + guasi_req_t greq; +#endif }; struct timeval start_time; struct timeval issue_time; Binary files fio-1.14/job1.1.0 and fio-1.14.guasi/job1.1.0 differ diff -Nru fio-1.14/Makefile fio-1.14.guasi/Makefile --- fio-1.14/Makefile 2007-03-14 06:24:42.000000000 -0700 +++ fio-1.14.guasi/Makefile 2007-03-20 21:13:42.000000000 -0700 @@ -18,6 +18,7 @@ OBJS += engines/null.o OBJS += engines/net.o OBJS += engines/syslet-rw.o +OBJS += engines/guasi.o INSTALL = install prefix = /usr/local @@ -26,7 +27,7 @@ all: $(PROGS) $(SCRIPTS) fio: $(OBJS) - $(CC) $(CFLAGS) -o $@ $(filter %.o,$^) -lpthread -lm -ldl -laio -lrt + $(CC) $(CFLAGS) -o $@ $(filter %.o,$^) -lguasi -lpthread -lm -ldl -laio -lrt clean: -rm -f *.o .depend cscope.out $(PROGS) engines/*.o core.* core diff -Nru fio-1.14/os-linux.h fio-1.14.guasi/os-linux.h --- fio-1.14/os-linux.h 2007-03-14 06:24:42.000000000 -0700 +++ fio-1.14.guasi/os-linux.h 2007-03-20 21:13:58.000000000 -0700 @@ -10,6 +10,7 @@ #define FIO_HAVE_LIBAIO #define FIO_HAVE_POSIXAIO +#define FIO_HAVE_GUASI #define FIO_HAVE_FADVISE #define FIO_HAVE_CPU_AFFINITY #define FIO_HAVE_DISK_UTIL