This improves the performance of requests because an ACB doesn't need to be allocated on the heap any more. It also makes the code nicer and smaller.
As a side effect, the codepath taken by aio=threads is changed to use paio_submit_co(). This doesn't change the performance at this point. Results of qemu-img bench -t none -c 10000000 [-n] /dev/loop0: | aio=native | aio=threads | before | with patch | before | with patch ------+----------+------------+----------+------------ run 1 | 29.921s | 26.932s | 35.286s | 35.447s run 2 | 29.793s | 26.252s | 35.276s | 35.111s run 3 | 30.186s | 27.114s | 35.042s | 34.921s run 4 | 30.425s | 26.600s | 35.169s | 34.968s run 5 | 30.041s | 26.263s | 35.224s | 35.000s TODO: Do some more serious benchmarking in VMs with less variance. Results of a quick fio run are vaguely positive. Signed-off-by: Kevin Wolf <kw...@redhat.com> --- block/linux-aio.c | 70 +++++++++++++++++-------------------------------------- block/raw-aio.h | 5 ++-- block/raw-posix.c | 62 ++++++++++++++++++++++-------------------------- 3 files changed, 52 insertions(+), 85 deletions(-) diff --git a/block/linux-aio.c b/block/linux-aio.c index d92513b..99b259d 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -12,6 +12,7 @@ #include "qemu/queue.h" #include "block/raw-aio.h" #include "qemu/event_notifier.h" +#include "block/coroutine.h" #include <libaio.h> @@ -28,7 +29,7 @@ #define MAX_QUEUED_IO 128 struct qemu_laiocb { - BlockAIOCB common; + Coroutine *co; struct qemu_laio_state *ctx; struct iocb iocb; ssize_t ret; @@ -86,9 +87,9 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s, } } } - laiocb->common.cb(laiocb->common.opaque, ret); - qemu_aio_unref(laiocb); + laiocb->ret = ret; + qemu_coroutine_enter(laiocb->co, NULL); } /* The completion BH fetches completed I/O requests and invokes their @@ -146,30 +147,6 @@ static void qemu_laio_completion_cb(EventNotifier *e) } } -static void laio_cancel(BlockAIOCB *blockacb) -{ - struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb; - struct io_event event; - int ret; - - if (laiocb->ret != -EINPROGRESS) { - return; - } - ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event); - laiocb->ret = -ECANCELED; - if (ret != 0) { - /* iocb is not cancelled, cb will be called by the event loop later */ - return; - } - - laiocb->common.cb(laiocb->common.opaque, laiocb->ret); -} - -static const AIOCBInfo laio_aiocb_info = { - .aiocb_size = sizeof(struct qemu_laiocb), - .cancel_async = laio_cancel, -}; - static void ioq_init(LaioQueue *io_q) { io_q->size = MAX_QUEUED_IO; @@ -243,23 +220,21 @@ int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug) return ret; } -BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque, int type) +int laio_submit_co(BlockDriverState *bs, void *aio_ctx, int fd, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type) { struct qemu_laio_state *s = aio_ctx; - struct qemu_laiocb *laiocb; - struct iocb *iocbs; off_t offset = sector_num * 512; - laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque); - laiocb->nbytes = nb_sectors * 512; - laiocb->ctx = s; - laiocb->ret = -EINPROGRESS; - laiocb->is_read = (type == QEMU_AIO_READ); - laiocb->qiov = qiov; - - iocbs = &laiocb->iocb; + struct qemu_laiocb laiocb = { + .co = qemu_coroutine_self(), + .nbytes = nb_sectors * 512, + .ctx = s, + .is_read = (type == QEMU_AIO_READ), + .qiov = qiov, + }; + struct iocb *iocbs = &laiocb.iocb; + int ret; switch (type) { case QEMU_AIO_WRITE: @@ -272,22 +247,21 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd, default: fprintf(stderr, "%s: invalid AIO request type 0x%x.\n", __func__, type); - goto out_free_aiocb; + return -EIO; } - io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e)); + io_set_eventfd(&laiocb.iocb, event_notifier_get_fd(&s->e)); if (!s->io_q.plugged) { - if (io_submit(s->ctx, 1, &iocbs) < 0) { - goto out_free_aiocb; + ret = io_submit(s->ctx, 1, &iocbs); + if (ret < 0) { + return ret; } } else { ioq_enqueue(s, iocbs); } - return &laiocb->common; -out_free_aiocb: - qemu_aio_unref(laiocb); - return NULL; + qemu_coroutine_yield(); + return laiocb.ret; } void laio_detach_aio_context(void *s_, AioContext *old_context) diff --git a/block/raw-aio.h b/block/raw-aio.h index 80681ce..b83c8af 100644 --- a/block/raw-aio.h +++ b/block/raw-aio.h @@ -35,9 +35,8 @@ #ifdef CONFIG_LINUX_AIO void *laio_init(void); void laio_cleanup(void *s); -BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque, int type); +int laio_submit_co(BlockDriverState *bs, void *aio_ctx, int fd, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type); void laio_detach_aio_context(void *s, AioContext *old_context); void laio_attach_aio_context(void *s, AioContext *new_context); void laio_io_plug(BlockDriverState *bs, void *aio_ctx); diff --git a/block/raw-posix.c b/block/raw-posix.c index b1af77e..aa16b53 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1075,14 +1075,13 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd, return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque); } -static BlockAIOCB *raw_aio_submit(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque, int type) +static int coroutine_fn raw_co_rw(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov, int type) { BDRVRawState *s = bs->opaque; if (fd_open(bs) < 0) - return NULL; + return -EIO; /* * Check if the underlying device requires requests to be aligned, @@ -1095,14 +1094,25 @@ static BlockAIOCB *raw_aio_submit(BlockDriverState *bs, type |= QEMU_AIO_MISALIGNED; #ifdef CONFIG_LINUX_AIO } else if (s->use_aio) { - return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov, - nb_sectors, cb, opaque, type); + return laio_submit_co(bs, s->aio_ctx, s->fd, sector_num, qiov, + nb_sectors, type); #endif } } - return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors, - cb, opaque, type); + return paio_submit_co(bs, s->fd, sector_num, qiov, nb_sectors, type); +} + +static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) +{ + return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_READ); +} + +static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) +{ + return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_WRITE); } static void raw_aio_plug(BlockDriverState *bs) @@ -1135,22 +1145,6 @@ static void raw_aio_flush_io_queue(BlockDriverState *bs) #endif } -static BlockAIOCB *raw_aio_readv(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) -{ - return raw_aio_submit(bs, sector_num, qiov, nb_sectors, - cb, opaque, QEMU_AIO_READ); -} - -static BlockAIOCB *raw_aio_writev(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) -{ - return raw_aio_submit(bs, sector_num, qiov, nb_sectors, - cb, opaque, QEMU_AIO_WRITE); -} - static BlockAIOCB *raw_aio_flush(BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque) { @@ -1701,8 +1695,8 @@ static BlockDriver bdrv_file = { .bdrv_co_get_block_status = raw_co_get_block_status, .bdrv_co_write_zeroes = raw_co_write_zeroes, - .bdrv_aio_readv = raw_aio_readv, - .bdrv_aio_writev = raw_aio_writev, + .bdrv_co_readv = raw_co_readv, + .bdrv_co_writev = raw_co_writev, .bdrv_aio_flush = raw_aio_flush, .bdrv_aio_discard = raw_aio_discard, .bdrv_refresh_limits = raw_refresh_limits, @@ -2103,8 +2097,8 @@ static BlockDriver bdrv_host_device = { .create_opts = &raw_create_opts, .bdrv_co_write_zeroes = hdev_co_write_zeroes, - .bdrv_aio_readv = raw_aio_readv, - .bdrv_aio_writev = raw_aio_writev, + .bdrv_co_readv = raw_co_readv, + .bdrv_co_writev = raw_co_writev, .bdrv_aio_flush = raw_aio_flush, .bdrv_aio_discard = hdev_aio_discard, .bdrv_refresh_limits = raw_refresh_limits, @@ -2252,8 +2246,8 @@ static BlockDriver bdrv_host_floppy = { .bdrv_create = hdev_create, .create_opts = &raw_create_opts, - .bdrv_aio_readv = raw_aio_readv, - .bdrv_aio_writev = raw_aio_writev, + .bdrv_co_readv = raw_co_readv, + .bdrv_co_writev = raw_co_writev, .bdrv_aio_flush = raw_aio_flush, .bdrv_refresh_limits = raw_refresh_limits, .bdrv_io_plug = raw_aio_plug, @@ -2383,8 +2377,8 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_create = hdev_create, .create_opts = &raw_create_opts, - .bdrv_aio_readv = raw_aio_readv, - .bdrv_aio_writev = raw_aio_writev, + .bdrv_co_readv = raw_co_readv, + .bdrv_co_writev = raw_co_writev, .bdrv_aio_flush = raw_aio_flush, .bdrv_refresh_limits = raw_refresh_limits, .bdrv_io_plug = raw_aio_plug, @@ -2520,8 +2514,8 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_create = hdev_create, .create_opts = &raw_create_opts, - .bdrv_aio_readv = raw_aio_readv, - .bdrv_aio_writev = raw_aio_writev, + .bdrv_co_readv = raw_co_readv, + .bdrv_co_writev = raw_co_writev, .bdrv_aio_flush = raw_aio_flush, .bdrv_refresh_limits = raw_refresh_limits, .bdrv_io_plug = raw_aio_plug, -- 1.8.3.1