Automatically size the number of virtio-blk-pci request virtqueues to match the number of vCPUs. Other transports continue to default to 1 request virtqueue.
A 1:1 virtqueue:vCPU mapping ensures that completion interrupts are handled on the same vCPU that submitted the request. No IPI is necessary to complete an I/O request and performance is improved. Performance improves from 78k to 104k IOPS on a 32 vCPU guest with 101 virtio-blk-pci devices (ioengine=libaio, iodepth=1, bs=4k, rw=randread with NVMe storage). Signed-off-by: Stefan Hajnoczi <stefa...@redhat.com> Reviewed-by: Cornelia Huck <coh...@redhat.com> --- hw/block/virtio-blk.c | 6 +++++- hw/core/machine.c | 1 + hw/virtio/virtio-blk-pci.c | 9 ++++++++- include/hw/virtio/virtio-blk.h | 2 ++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 142863a3b2..ed5df71779 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1135,6 +1135,9 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) error_setg(errp, "Device needs media, but drive is empty"); return; } + if (conf->num_queues == VIRTIO_BLK_AUTO_NUM_QUEUES) { + conf->num_queues = 1; + } if (!conf->num_queues) { error_setg(errp, "num-queues property must be larger than 0"); return; @@ -1271,7 +1274,8 @@ static Property virtio_blk_properties[] = { #endif DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0, true), - DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, 1), + DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, + VIRTIO_BLK_AUTO_NUM_QUEUES), DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 256), DEFINE_PROP_BOOL("seg-max-adjust", VirtIOBlock, conf.seg_max_adjust, true), DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD, diff --git a/hw/core/machine.c b/hw/core/machine.c index 4bbcec8fbd..c993b8f489 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -31,6 +31,7 @@ GlobalProperty hw_compat_4_2[] = { { "virtio-blk-device", "queue-size", "128"}, { "virtio-scsi-device", "virtqueue_size", "128"}, + { "virtio-blk-device", "num-queues", "1"}, { "virtio-blk-device", "x-enable-wce-if-config-wce", "off" }, { "virtio-blk-device", "seg-max-adjust", "off"}, { "virtio-scsi-device", "num_queues", "1"}, diff --git a/hw/virtio/virtio-blk-pci.c b/hw/virtio/virtio-blk-pci.c index efb2c22a1d..bf628de675 100644 --- a/hw/virtio/virtio-blk-pci.c +++ b/hw/virtio/virtio-blk-pci.c @@ -17,6 +17,7 @@ #include "qemu/osdep.h" +#include "hw/boards.h" #include "hw/qdev-properties.h" #include "hw/virtio/virtio-blk.h" #include "virtio-pci.h" @@ -50,9 +51,15 @@ static void virtio_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) { VirtIOBlkPCI *dev = VIRTIO_BLK_PCI(vpci_dev); DeviceState *vdev = DEVICE(&dev->vdev); + VirtIOBlkConf *conf = &dev->vdev.conf; + + /* 1:1 vq to vcpu mapping is ideal because it avoids IPIs */ + if (conf->num_queues == VIRTIO_BLK_AUTO_NUM_QUEUES) { + conf->num_queues = current_machine->smp.cpus; + } if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { - vpci_dev->nvectors = dev->vdev.conf.num_queues + 1; + vpci_dev->nvectors = conf->num_queues + 1; } qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index 1e62f869b2..4e5e903f4a 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -30,6 +30,8 @@ struct virtio_blk_inhdr unsigned char status; }; +#define VIRTIO_BLK_AUTO_NUM_QUEUES UINT16_MAX + struct VirtIOBlkConf { BlockConf conf; -- 2.24.1