Allow virtio-scsi virtqueues to be assigned to different IOThreads. This makes it possible to take advantage of host multi-queue block layer scalability by assigning virtqueues that have affinity with vCPUs to different IOThreads that have affinity with host CPUs. The same feature was introduced for virtio-blk in the past: https://developers.redhat.com/articles/2024/09/05/scaling-virtio-blk-disk-io-iothread-virtqueue-mapping
Here are fio randread 4k iodepth=64 results from a 4 vCPU guest with an Intel P4800X SSD: iothreads IOPS ------------------------------ 1 189576 2 312698 4 346744 Signed-off-by: Stefan Hajnoczi <stefa...@redhat.com> --- include/hw/virtio/virtio-scsi.h | 5 +- hw/scsi/virtio-scsi-dataplane.c | 90 ++++++++++++++++++++++++--------- hw/scsi/virtio-scsi.c | 63 ++++++++++++++--------- 3 files changed, 107 insertions(+), 51 deletions(-) diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h index 7b7e3ced7a..086201efa2 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -22,6 +22,7 @@ #include "hw/virtio/virtio.h" #include "hw/scsi/scsi.h" #include "chardev/char-fe.h" +#include "qapi/qapi-types-virtio.h" #include "system/iothread.h" #define TYPE_VIRTIO_SCSI_COMMON "virtio-scsi-common" @@ -60,6 +61,7 @@ struct VirtIOSCSIConf { CharBackend chardev; uint32_t boot_tpgt; IOThread *iothread; + IOThreadVirtQueueMappingList *iothread_vq_mapping_list; }; struct VirtIOSCSI; @@ -97,7 +99,7 @@ struct VirtIOSCSI { QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list; /* Fields for dataplane below */ - AioContext *ctx; /* one iothread per virtio-scsi-pci for now */ + AioContext **vq_aio_context; /* per-virtqueue AioContext pointer */ bool dataplane_started; bool dataplane_starting; @@ -115,6 +117,7 @@ void virtio_scsi_common_realize(DeviceState *dev, void virtio_scsi_common_unrealize(DeviceState *dev); void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp); +void virtio_scsi_dataplane_cleanup(VirtIOSCSI *s); int virtio_scsi_dataplane_start(VirtIODevice *s); void virtio_scsi_dataplane_stop(VirtIODevice *s); diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index f49ab98ecc..6bb368c8a5 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -18,6 +18,7 @@ #include "system/block-backend.h" #include "hw/scsi/scsi.h" #include "scsi/constants.h" +#include "hw/virtio/iothread-vq-mapping.h" #include "hw/virtio/virtio-bus.h" /* Context: BQL held */ @@ -27,8 +28,16 @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp) VirtIODevice *vdev = VIRTIO_DEVICE(s); BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + uint16_t num_vqs = vs->conf.num_queues + VIRTIO_SCSI_VQ_NUM_FIXED; - if (vs->conf.iothread) { + if (vs->conf.iothread && vs->conf.iothread_vq_mapping_list) { + error_setg(errp, + "iothread and iothread-vq-mapping properties cannot be set " + "at the same time"); + return; + } + + if (vs->conf.iothread || vs->conf.iothread_vq_mapping_list) { if (!k->set_guest_notifiers || !k->ioeventfd_assign) { error_setg(errp, "device is incompatible with iothread " @@ -39,15 +48,50 @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp) error_setg(errp, "ioeventfd is required for iothread"); return; } - s->ctx = iothread_get_aio_context(vs->conf.iothread); + } + + s->vq_aio_context = g_new(AioContext *, num_vqs); + + if (vs->conf.iothread_vq_mapping_list) { + if (!iothread_vq_mapping_apply(vs->conf.iothread_vq_mapping_list, + s->vq_aio_context, num_vqs, errp)) { + g_free(s->vq_aio_context); + s->vq_aio_context = NULL; + return; + } + } else if (vs->conf.iothread) { + AioContext *ctx = iothread_get_aio_context(vs->conf.iothread); + for (uint16_t i = 0; i < num_vqs; i++) { + s->vq_aio_context[i] = ctx; + } + + /* Released in virtio_scsi_dataplane_cleanup() */ + object_ref(OBJECT(vs->conf.iothread)); } else { - if (!virtio_device_ioeventfd_enabled(vdev)) { - return; + AioContext *ctx = qemu_get_aio_context(); + for (unsigned i = 0; i < num_vqs; i++) { + s->vq_aio_context[i] = ctx; } - s->ctx = qemu_get_aio_context(); } } +/* Context: BQL held */ +void virtio_scsi_dataplane_cleanup(VirtIOSCSI *s) +{ + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + + if (vs->conf.iothread_vq_mapping_list) { + iothread_vq_mapping_cleanup(vs->conf.iothread_vq_mapping_list); + } + + if (vs->conf.iothread) { + object_unref(OBJECT(vs->conf.iothread)); + } + + g_free(s->vq_aio_context); + s->vq_aio_context = NULL; +} + static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); @@ -66,31 +110,20 @@ static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n) } /* Context: BH in IOThread */ -static void virtio_scsi_dataplane_stop_bh(void *opaque) +static void virtio_scsi_dataplane_stop_vq_bh(void *opaque) { - VirtIOSCSI *s = opaque; - VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + AioContext *ctx = qemu_get_current_aio_context(); + VirtQueue *vq = opaque; EventNotifier *host_notifier; - int i; - virtio_queue_aio_detach_host_notifier(vs->ctrl_vq, s->ctx); - host_notifier = virtio_queue_get_host_notifier(vs->ctrl_vq); + virtio_queue_aio_detach_host_notifier(vq, ctx); + host_notifier = virtio_queue_get_host_notifier(vq); /* * Test and clear notifier after disabling event, in case poll callback * didn't have time to run. */ virtio_queue_host_notifier_read(host_notifier); - - virtio_queue_aio_detach_host_notifier(vs->event_vq, s->ctx); - host_notifier = virtio_queue_get_host_notifier(vs->event_vq); - virtio_queue_host_notifier_read(host_notifier); - - for (i = 0; i < vs->conf.num_queues; i++) { - virtio_queue_aio_detach_host_notifier(vs->cmd_vqs[i], s->ctx); - host_notifier = virtio_queue_get_host_notifier(vs->cmd_vqs[i]); - virtio_queue_host_notifier_read(host_notifier); - } } /* Context: BQL held */ @@ -154,11 +187,14 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) smp_wmb(); /* paired with aio_notify_accept() */ if (s->bus.drain_count == 0) { - virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx); - virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx); + virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, + s->vq_aio_context[0]); + virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, + s->vq_aio_context[1]); for (i = 0; i < vs->conf.num_queues; i++) { - virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx); + AioContext *ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i]; + virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], ctx); } } return 0; @@ -207,7 +243,11 @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev) s->dataplane_stopping = true; if (s->bus.drain_count == 0) { - aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s); + for (i = 0; i < vs->conf.num_queues + VIRTIO_SCSI_VQ_NUM_FIXED; i++) { + VirtQueue *vq = virtio_get_queue(&vs->parent_obj, i); + AioContext *ctx = s->vq_aio_context[i]; + aio_wait_bh_oneshot(ctx, virtio_scsi_dataplane_stop_vq_bh, vq); + } } blk_drain_all(); /* ensure there are no in-flight requests */ diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 2045d27289..dabf8ace23 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -27,6 +27,7 @@ #include "hw/qdev-properties.h" #include "hw/scsi/scsi.h" #include "scsi/constants.h" +#include "hw/virtio/iothread-vq-mapping.h" #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" #include "trace.h" @@ -318,13 +319,6 @@ static void virtio_scsi_cancel_notify(Notifier *notifier, void *data) g_free(n); } -static inline void virtio_scsi_ctx_check(VirtIOSCSI *s, SCSIDevice *d) -{ - if (s->dataplane_started && d && blk_is_available(d->conf.blk)) { - assert(blk_get_aio_context(d->conf.blk) == s->ctx); - } -} - static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req) { VirtIOSCSI *s = req->dev; @@ -517,9 +511,11 @@ static void virtio_scsi_flush_defer_tmf_to_aio_context(VirtIOSCSI *s) assert(!s->dataplane_started); - if (s->ctx) { + for (uint32_t i = 0; i < s->parent_obj.conf.num_queues; i++) { + AioContext *ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i]; + /* Our BH only runs after previously scheduled BHs */ - aio_wait_bh_oneshot(s->ctx, dummy_bh, NULL); + aio_wait_bh_oneshot(ctx, dummy_bh, NULL); } } @@ -575,7 +571,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) AioContext *ctx; int ret = 0; - virtio_scsi_ctx_check(s, d); /* Here VIRTIO_SCSI_S_OK means "FUNCTION COMPLETE". */ req->resp.tmf.response = VIRTIO_SCSI_S_OK; @@ -639,6 +634,8 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET: case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET: { + g_autoptr(GHashTable) aio_contexts = g_hash_table_new(NULL, NULL); + if (!d) { goto fail; } @@ -648,8 +645,15 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) qatomic_inc(&req->remaining); - ctx = s->ctx ?: qemu_get_aio_context(); - virtio_scsi_defer_tmf_to_aio_context(req, ctx); + for (uint32_t i = 0; i < s->parent_obj.conf.num_queues; i++) { + ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i]; + + if (!g_hash_table_add(aio_contexts, ctx)) { + continue; /* skip previously added AioContext */ + } + + virtio_scsi_defer_tmf_to_aio_context(req, ctx); + } virtio_scsi_tmf_dec_remaining(req); ret = -EINPROGRESS; @@ -770,9 +774,12 @@ static void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) */ static bool virtio_scsi_defer_to_dataplane(VirtIOSCSI *s) { - if (!s->ctx || s->dataplane_started) { + if (s->dataplane_started) { return false; } + if (s->vq_aio_context[0] == qemu_get_aio_context()) { + return false; /* not using IOThreads */ + } virtio_device_start_ioeventfd(&s->parent_obj.parent_obj); return !s->dataplane_fenced; @@ -946,7 +953,6 @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req) virtio_scsi_complete_cmd_req(req); return -ENOENT; } - virtio_scsi_ctx_check(s, d); req->sreq = scsi_req_new(d, req->req.cmd.tag, virtio_scsi_get_lun(req->req.cmd.lun), req->req.cmd.cdb, vs->cdb_size, req); @@ -1218,14 +1224,16 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, { VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev); VirtIOSCSI *s = VIRTIO_SCSI(vdev); + AioContext *ctx = s->vq_aio_context[0]; SCSIDevice *sd = SCSI_DEVICE(dev); - int ret; - if (s->ctx && !s->dataplane_fenced) { - ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp); - if (ret < 0) { - return; - } + if (ctx != qemu_get_aio_context() && !s->dataplane_fenced) { + /* + * Try to make the BlockBackend's AioContext match ours. Ignore failure + * because I/O will still work although block jobs and other users + * might be slower when multiple AioContexts use a BlockBackend. + */ + blk_set_aio_context(sd->conf.blk, ctx, errp); } if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { @@ -1260,7 +1268,7 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, qdev_simple_device_unplug_cb(hotplug_dev, dev, errp); - if (s->ctx) { + if (s->vq_aio_context[0] != qemu_get_aio_context()) { /* If other users keep the BlockBackend in the iothread, that's ok */ blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL); } @@ -1294,7 +1302,7 @@ static void virtio_scsi_drained_begin(SCSIBus *bus) for (uint32_t i = 0; i < total_queues; i++) { VirtQueue *vq = virtio_get_queue(vdev, i); - virtio_queue_aio_detach_host_notifier(vq, s->ctx); + virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); } } @@ -1320,10 +1328,12 @@ static void virtio_scsi_drained_end(SCSIBus *bus) for (uint32_t i = 0; i < total_queues; i++) { VirtQueue *vq = virtio_get_queue(vdev, i); + AioContext *ctx = s->vq_aio_context[i]; + if (vq == vs->event_vq) { - virtio_queue_aio_attach_host_notifier_no_poll(vq, s->ctx); + virtio_queue_aio_attach_host_notifier_no_poll(vq, ctx); } else { - virtio_queue_aio_attach_host_notifier(vq, s->ctx); + virtio_queue_aio_attach_host_notifier(vq, ctx); } } } @@ -1430,12 +1440,13 @@ void virtio_scsi_common_unrealize(DeviceState *dev) virtio_cleanup(vdev); } +/* main loop */ static void virtio_scsi_device_unrealize(DeviceState *dev) { VirtIOSCSI *s = VIRTIO_SCSI(dev); virtio_scsi_reset_tmf_bh(s); - + virtio_scsi_dataplane_cleanup(s); qbus_set_hotplug_handler(BUS(&s->bus), NULL); virtio_scsi_common_unrealize(dev); qemu_mutex_destroy(&s->tmf_bh_lock); @@ -1460,6 +1471,8 @@ static const Property virtio_scsi_properties[] = { VIRTIO_SCSI_F_CHANGE, true), DEFINE_PROP_LINK("iothread", VirtIOSCSI, parent_obj.conf.iothread, TYPE_IOTHREAD, IOThread *), + DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST("iothread-vq-mapping", VirtIOSCSI, + parent_obj.conf.iothread_vq_mapping_list), }; static const VMStateDescription vmstate_virtio_scsi = { -- 2.48.1