virtio-blk and virtio-scsi invoke virtio_irqfd_notify() to send Used Buffer Notifications from an IOThread. This involves an eventfd write(2) syscall. Calling this repeatedly when completing multiple I/O requests in a row is wasteful.
Use the blk_io_plug_call() API to batch together virtio_irqfd_notify() calls made during Linux AIO (aio=native) or io_uring (aio=io_uring) completion processing. Do not modify the thread pool (aio=threads) to avoid introducing a dependency from util/ onto the block layer. Behavior is unchanged for emulated devices that do not use blk_io_plug() since blk_io_plug_call() immediately invokes the callback when called outside a blk_io_plug()/blk_io_unplug() region. fio rw=randread bs=4k iodepth=64 numjobs=8 IOPS increases by ~9% with a single IOThread and 8 vCPUs. iodepth=1 decreases by ~1% but this could be noise. Detailed performance data and configuration specifics are available here: https://gitlab.com/stefanha/virt-playbooks/-/tree/blk_io_plug-irqfd This duplicates the BH that virtio-blk uses for batching. The next commit will remove it. Signed-off-by: Stefan Hajnoczi <stefa...@redhat.com> --- block/io_uring.c | 6 ++++++ block/linux-aio.c | 4 ++++ hw/virtio/virtio.c | 10 +++++++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/block/io_uring.c b/block/io_uring.c index 69d9820928..749cf83934 100644 --- a/block/io_uring.c +++ b/block/io_uring.c @@ -124,6 +124,9 @@ static void luring_process_completions(LuringState *s) { struct io_uring_cqe *cqes; int total_bytes; + + blk_io_plug(); + /* * Request completion callbacks can run the nested event loop. * Schedule ourselves so the nested event loop will "see" remaining @@ -216,7 +219,10 @@ end: aio_co_wake(luringcb->co); } } + qemu_bh_cancel(s->completion_bh); + + blk_io_unplug(); } static int ioq_submit(LuringState *s) diff --git a/block/linux-aio.c b/block/linux-aio.c index 561c71a9ae..cef3d6b1c7 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -204,6 +204,8 @@ static void qemu_laio_process_completions(LinuxAioState *s) { struct io_event *events; + blk_io_plug(); + /* Reschedule so nested event loops see currently pending completions */ qemu_bh_schedule(s->completion_bh); @@ -230,6 +232,8 @@ static void qemu_laio_process_completions(LinuxAioState *s) * own `for` loop. If we are the last all counters droped to zero. */ s->event_max = 0; s->event_idx = 0; + + blk_io_unplug(); } static void qemu_laio_process_completions_and_submit(LinuxAioState *s) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 309038fd46..a691e8526b 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -28,6 +28,7 @@ #include "hw/virtio/virtio-bus.h" #include "hw/qdev-properties.h" #include "hw/virtio/virtio-access.h" +#include "sysemu/block-backend.h" #include "sysemu/dma.h" #include "sysemu/runstate.h" #include "virtio-qmp.h" @@ -2426,6 +2427,13 @@ static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) } } +/* Batch irqs while inside a blk_io_plug()/blk_io_unplug() section */ +static void virtio_notify_irqfd_unplug_fn(void *opaque) +{ + EventNotifier *notifier = opaque; + event_notifier_set(notifier); +} + void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq) { WITH_RCU_READ_LOCK_GUARD() { @@ -2452,7 +2460,7 @@ void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq) * to an atomic operation. */ virtio_set_isr(vq->vdev, 0x1); - event_notifier_set(&vq->guest_notifier); + blk_io_plug_call(virtio_notify_irqfd_unplug_fn, &vq->guest_notifier); } static void virtio_irq(VirtQueue *vq) -- 2.41.0