[PATCH] vhost: configure all host notifiers in a single MR transaction
From: Longpeng This allows the vhost device to batch the setup of all its host notifiers. This significantly reduces the device starting time, e.g. the vhost-vDPA generic device [1] start time reduce from 376ms to 9.1ms for a VM with 64 vCPUs and 3 vDPA device(64vq per device). [1] https://www.mail-archive.com/qemu-devel@nongnu.org/msg921541.html Signed-off-by: Longpeng --- hw/virtio/vhost.c | 39 ++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index d1c4c20b8c..bf82d9b176 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1507,6 +1507,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); +int vq_init_count = 0; int i, r, e; /* We will pass the notifiers to the kernel, make sure that QEMU @@ -1518,6 +1519,12 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) goto fail; } +/* + * Batch all the host notifiers in a single transaction to avoid + * quadratic time complexity in address_space_update_ioeventfds(). + */ +memory_region_transaction_begin(); + for (i = 0; i < hdev->nvqs; ++i) { r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, true); @@ -1525,19 +1532,33 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) error_report("vhost VQ %d notifier binding failed: %d", i, -r); goto fail_vq; } + +vq_init_count++; } +memory_region_transaction_commit(); + return 0; fail_vq: -while (--i >= 0) { +for (i = 0; i < vq_init_count; i++) { e = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, false); if (e < 0) { error_report("vhost VQ %d notifier cleanup error: %d", i, -r); } assert (e >= 0); +} + +/* + * The transaction expects the ioeventfds to be open when it + * commits. Do it now, before the cleanup loop. + */ +memory_region_transaction_commit(); + +for (i = 0; i < vq_init_count; i++) { virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); } + virtio_device_release_ioeventfd(vdev); fail: return r; @@ -1553,6 +1574,12 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); int i, r; +/* + * Batch all the host notifiers in a single transaction to avoid + * quadratic time complexity in address_space_update_ioeventfds(). + */ +memory_region_transaction_begin(); + for (i = 0; i < hdev->nvqs; ++i) { r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, false); @@ -1560,8 +1587,18 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); } assert (r >= 0); +} + +/* + * The transaction expects the ioeventfds to be open when it + * commits. Do it now, before the cleanup loop. + */ +memory_region_transaction_commit(); + +for (i = 0; i < hdev->nvqs; ++i) { virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); } + virtio_device_release_ioeventfd(vdev); } -- 2.23.0
[PATCH v10 1/5] virtio: get class_id and pci device id by the virtio id
From: Longpeng Add helpers to get the "Transitional PCI Device ID" and "class_id" of the device specified by the "Virtio Device ID". These helpers will be used to build the generic vDPA device later. Acked-by: Jason Wang Signed-off-by: Longpeng --- hw/virtio/virtio-pci.c | 88 ++ include/hw/virtio/virtio-pci.h | 5 ++ 2 files changed, 93 insertions(+) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index a1c9dfa7bb..a602f670ca 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -19,6 +19,7 @@ #include "exec/memop.h" #include "standard-headers/linux/virtio_pci.h" +#include "standard-headers/linux/virtio_ids.h" #include "hw/boards.h" #include "hw/virtio/virtio.h" #include "migration/qemu-file-types.h" @@ -224,6 +225,90 @@ static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f) return 0; } +typedef struct VirtIOPCIIDInfo { +/* virtio id */ +uint16_t vdev_id; +/* pci device id for the transitional device */ +uint16_t trans_devid; +uint16_t class_id; +} VirtIOPCIIDInfo; + +static const VirtIOPCIIDInfo virtio_pci_id_info[] = { +{ +.vdev_id = VIRTIO_ID_CRYPTO, +.class_id = PCI_CLASS_OTHERS, +}, { +.vdev_id = VIRTIO_ID_FS, +.class_id = PCI_CLASS_STORAGE_OTHER, +}, { +.vdev_id = VIRTIO_ID_NET, +.trans_devid = PCI_DEVICE_ID_VIRTIO_NET, +.class_id = PCI_CLASS_NETWORK_ETHERNET, +}, { +.vdev_id = VIRTIO_ID_BLOCK, +.trans_devid = PCI_DEVICE_ID_VIRTIO_BLOCK, +.class_id = PCI_CLASS_STORAGE_SCSI, +}, { +.vdev_id = VIRTIO_ID_CONSOLE, +.trans_devid = PCI_DEVICE_ID_VIRTIO_CONSOLE, +.class_id = PCI_CLASS_COMMUNICATION_OTHER, +}, { +.vdev_id = VIRTIO_ID_SCSI, +.trans_devid = PCI_DEVICE_ID_VIRTIO_SCSI, +.class_id = PCI_CLASS_STORAGE_SCSI +}, { +.vdev_id = VIRTIO_ID_9P, +.trans_devid = PCI_DEVICE_ID_VIRTIO_9P, +.class_id = PCI_BASE_CLASS_NETWORK, +}, { +.vdev_id = VIRTIO_ID_BALLOON, +.trans_devid = PCI_DEVICE_ID_VIRTIO_BALLOON, +.class_id = PCI_CLASS_OTHERS, +}, { +.vdev_id = VIRTIO_ID_RNG, +.trans_devid = PCI_DEVICE_ID_VIRTIO_RNG, +.class_id = PCI_CLASS_OTHERS, +}, +}; + +static const VirtIOPCIIDInfo *virtio_pci_get_id_info(uint16_t vdev_id) +{ +const VirtIOPCIIDInfo *info = NULL; +int i; + +for (i = 0; i < ARRAY_SIZE(virtio_pci_id_info); i++) { +if (virtio_pci_id_info[i].vdev_id == vdev_id) { +info = &virtio_pci_id_info[i]; +break; +} +} + +if (!info) { +/* The device id is invalid or not added to the id_info yet. */ +error_report("Invalid virtio device(id %u)", vdev_id); +abort(); +} + +return info; +} + +/* + * Get the Transitional Device ID for the specific device, return + * zero if the device is non-transitional. + */ +uint16_t virtio_pci_get_trans_devid(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->trans_devid; +} + +/* + * Get the Class ID for the specific device. + */ +uint16_t virtio_pci_get_class_id(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->class_id; +} + static bool virtio_pci_ioeventfd_enabled(DeviceState *d) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); @@ -1729,6 +1814,9 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default. */ pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus)); +if (proxy->trans_devid) { +pci_config_set_device_id(config, proxy->trans_devid); +} } else { /* pure virtio-1.0 */ pci_set_word(config + PCI_VENDOR_ID, diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h index 938799e8f6..24fba1604b 100644 --- a/include/hw/virtio/virtio-pci.h +++ b/include/hw/virtio/virtio-pci.h @@ -151,6 +151,8 @@ struct VirtIOPCIProxy { bool disable_modern; bool ignore_backend_features; OnOffAuto disable_legacy; +/* Transitional device id */ +uint16_t trans_devid; uint32_t class_code; uint32_t nvectors; uint32_t dfselect; @@ -184,6 +186,9 @@ static inline void virtio_pci_disable_modern(VirtIOPCIProxy *proxy) proxy->disable_modern = true; } +uint16_t virtio_pci_get_trans_devid(uint16_t device_id); +uint16_t virtio_pci_get_class_id(uint16_t device_id); + /* * virtio-input-pci: This extends VirtioPCIProxy. */ -- 2.23.0
[PATCH v10 0/5] add generic vDPA device support
From: Longpeng Hi guys, With the generic vDPA device, QEMU won't need to touch the device types any more, such like vfio. We can use the generic vDPA device as follow: -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X Or -M microvm -m 512m -smp 2 -kernel ... -initrd ... -device \ vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-x Changes v10 -> v9: - optimize the doc [Jason] Changes v9 -> v8: - rename vhost-vdpa-device.rst to vhost-vdpa-generic-device.rst [Jason, Stefano] - emphasize the vhost-vDPA generic device in doc [Jason] Changes v8 -> v7: - add migration blocker. [Michael] Changes v6 -> v7: (v6: https://mail.gnu.org/archive/html/qemu-devel/2022-05/msg02821.html) - rebase. [Jason] - add documentation . [Stefan] Changes v5 -> v6: Patch 2: - Turn to the original approach in the RFC to initialize the virtio_pci_id_info array. [Michael] https://lore.kernel.org/all/20220105005900.860-2-longpe...@huawei.com/ Patch 3: - Fix logical error of exception handler around the post_init. [Stefano] - Fix some coding style warnings. [Stefano] Patch 4: - Fix some coding style warnings. [Stefano] Changes v4 -> v5: Patch 3: - remove vhostfd [Jason] - support virtio-mmio [Jason] Changes v3 -> v4: v3: https://www.mail-archive.com/qemu-devel@nongnu.org/msg877015.html - reorganize the series [Stefano] - fix some typos [Stefano] - fix logical error in vhost_vdpa_device_realize [Stefano] Changes v2 -> v3 Patch 4 & 5: - only call vdpa ioctls in vdpa-dev.c [Stefano, Longpeng] - s/VQS_NUM/VQS_COUNT [Stefano] - check both vdpa_dev_fd and vdpa_dev [Stefano] Patch 6: - move all steps into vhost_vdpa_device_unrealize. [Stefano] Changes RFC -> v2 Patch 1: - rename 'pdev_id' to 'trans_devid' [Michael] - only use transitional device id for the devices listed in the spec [Michael] - use macros to make the id_info table clearer [Longpeng] - add some modern devices in the id_info table [Longpeng] Patch 2: - remove the GET_VECTORS_NUM command [Jason] Patch 4: - expose vdpa_dev_fd as a QOM preperty [Stefan] - introduce vhost_vdpa_device_get_u32 as a common function to make the code clearer [Stefan] - fix the misleading description of 'dc->desc' [Stefano] Patch 5: - check returned number of virtqueues [Stefan] Patch 6: - init s->num_queues [Stefano] - free s->dev.vqs [Stefano] Longpeng (Mike) (5): virtio: get class_id and pci device id by the virtio id vdpa: add vdpa-dev support vdpa: add vdpa-dev-pci support vdpa-dev: mark the device as unmigratable docs: Add generic vhost-vdpa device documentation .../devices/vhost-vdpa-generic-device.rst | 66 +++ hw/virtio/Kconfig | 5 + hw/virtio/meson.build | 2 + hw/virtio/vdpa-dev-pci.c | 102 + hw/virtio/vdpa-dev.c | 377 ++ hw/virtio/virtio-pci.c| 88 include/hw/virtio/vdpa-dev.h | 43 ++ include/hw/virtio/virtio-pci.h| 5 + 8 files changed, 688 insertions(+) create mode 100644 docs/system/devices/vhost-vdpa-generic-device.rst create mode 100644 hw/virtio/vdpa-dev-pci.c create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h -- 2.23.0
[PATCH v10 5/5] docs: Add generic vhost-vdpa device documentation
From: Longpeng Signed-off-by: Longpeng --- .../devices/vhost-vdpa-generic-device.rst | 66 +++ 1 file changed, 66 insertions(+) create mode 100644 docs/system/devices/vhost-vdpa-generic-device.rst diff --git a/docs/system/devices/vhost-vdpa-generic-device.rst b/docs/system/devices/vhost-vdpa-generic-device.rst new file mode 100644 index 00..7d13359ea1 --- /dev/null +++ b/docs/system/devices/vhost-vdpa-generic-device.rst @@ -0,0 +1,66 @@ + += +vhost-vDPA generic device += + +This document explains the usage of the vhost-vDPA generic device. + +Description +--- + +vDPA(virtio data path acceleration) device is a device that uses a datapath +which complies with the virtio specifications with vendor specific control +path. + +QEMU provides two types of vhost-vDPA devices to enable the vDPA device, one +is type sensitive which means QEMU needs to know the actual device type +(e.g. net, blk, scsi) and another is called "vhost-vDPA generic device" which +is type insensitive. + +The vhost-vDPA generic device builds on the vhost-vdpa subsystem and virtio +subsystem. It is quite small, but it can support any type of virtio device. + +Examples + + +1. Please make sure the modules listed bellow are installed: +vhost.ko +vhost_iotlb.ko +vdpa.ko +vhost_vdpa.ko + + +2. Prepare the vhost-vDPA backends, here is an example using vdpa_sim_blk + device: + +:: + host# modprobe vdpa_sim_blk + host# vdpa dev add mgmtdev vdpasim_blk name blk0 + (...you can see the vhost-vDPA device under /dev directory now...) + host# ls -l /dev/vhost-vdpa-* + crw--- 1 root root 236, 0 Nov 2 00:49 /dev/vhost-vdpa-0 + +Note: +It needs some vendor-specific steps to provision the vDPA device if you're +using real HW devices, such as installing the vendor-specific vDPA driver +and binding the device to the driver. + + +3. Start the virtual machine: + +Start QEMU with virtio-mmio bus: + +:: + host# qemu-system \ + -M microvm -m 512 -smp 2 -kernel ... -initrd ... \ + -device vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-0 \ + ... + + +Start QEMU with virtio-pci bus: + +:: + host# qemu-system \ + -M pc -m 512 -smp 2\ + -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-0 \ + ... -- 2.23.0
[PATCH v10 2/5] vdpa: add vdpa-dev support
From: Longpeng Supports vdpa-dev, we can use the deivce directly: -M microvm -m 512m -smp 2 -kernel ... -initrd ... -device \ vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-x Reviewed-by: Stefano Garzarella Acked-by: Jason Wang Signed-off-by: Longpeng --- hw/virtio/Kconfig| 5 + hw/virtio/meson.build| 1 + hw/virtio/vdpa-dev.c | 376 +++ include/hw/virtio/vdpa-dev.h | 43 4 files changed, 425 insertions(+) create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index cbfd8c7173..89e9e426d8 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -85,3 +85,8 @@ config VHOST_USER_GPIO bool default y depends on VIRTIO && VHOST_USER + +config VHOST_VDPA_DEV +bool +default y +depends on VIRTIO && VHOST_VDPA && LINUX diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index dfed1e7af5..54d6d29af7 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -31,6 +31,7 @@ virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], if_true: files('vhost-user-gpio-pci.c')) +virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c new file mode 100644 index 00..dbc4f8001d --- /dev/null +++ b/hw/virtio/vdpa-dev.c @@ -0,0 +1,376 @@ +/* + * Vhost Vdpa Device + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. + * + * Authors: + * Longpeng + * + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" + * implemented by: + * Changpeng Liu + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/cutils.h" +#include "hw/qdev-core.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" +#include "hw/virtio/vdpa-dev.h" +#include "sysemu/sysemu.h" +#include "sysemu/runstate.h" + +static void +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ +/* Nothing to do */ +} + +static uint32_t +vhost_vdpa_device_get_u32(int fd, unsigned long int cmd, Error **errp) +{ +uint32_t val = (uint32_t)-1; + +if (ioctl(fd, cmd, &val) < 0) { +error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s", + cmd, strerror(errno)); +} + +return val; +} + +static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) +{ +VirtIODevice *vdev = VIRTIO_DEVICE(dev); +VhostVdpaDevice *v = VHOST_VDPA_DEVICE(vdev); +uint16_t max_queue_size; +struct vhost_virtqueue *vqs; +int i, ret; + +if (!v->vhostdev) { +error_setg(errp, "vhost-vdpa-device: vhostdev are missing"); +return; +} + +v->vhostfd = qemu_open(v->vhostdev, O_RDWR, errp); +if (*errp) { +return; +} +v->vdpa.device_fd = v->vhostfd; + +v->vdev_id = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_DEVICE_ID, errp); +if (*errp) { +goto out; +} + +max_queue_size = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_VRING_NUM, errp); +if (*errp) { +goto out; +} + +if (v->queue_size > max_queue_size) { +error_setg(errp, "vhost-vdpa-device: invalid queue_size: %u (max:%u)", + v->queue_size, max_queue_size); +goto out; +} else if (!v->queue_size) { +v->queue_size = max_queue_size; +} + +v->num_queues = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_VQS_COUNT, errp); +if (*errp) { +goto out; +} + +if (!v->num_queues || v->num_queues > VIRTIO_QUEUE_MAX) { +error_setg(errp, "invalid number of virtqueues: %u (max:%u)", + v->num_queues, VIRTIO_QUEUE_MAX); +goto out; +} + +v->dev.nvqs = v->num_queues; +vqs = g_new0(struct vhost_virtqueue, v->dev.nvqs); +v->dev.vqs = vqs; +v->dev.vq_index = 0; +v->dev.vq_index_end = v->dev.nvqs; +v->dev.backend_features = 0; +v->started = false; + +ret = vhost_dev_init(&v->dev, &v->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, NULL); +if (ret < 0) { +error_s
[PATCH v10 4/5] vdpa-dev: mark the device as unmigratable
From: Longpeng The generic vDPA device doesn't support migration currently, so mark it as unmigratable temporarily. Reviewed-by: Stefano Garzarella Acked-by: Jason Wang Signed-off-by: Longpeng --- hw/virtio/vdpa-dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index dbc4f8001d..db6ba61152 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -327,6 +327,7 @@ static Property vhost_vdpa_device_properties[] = { static const VMStateDescription vmstate_vhost_vdpa_device = { .name = "vhost-vdpa-device", +.unmigratable = 1, .minimum_version_id = 1, .version_id = 1, .fields = (VMStateField[]) { -- 2.23.0
[PATCH v10 3/5] vdpa: add vdpa-dev-pci support
From: Longpeng Supports vdpa-dev-pci, we can use the device as follow: -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X Reviewed-by: Stefano Garzarella Acked-by: Jason Wang Signed-off-by: Longpeng --- hw/virtio/meson.build| 1 + hw/virtio/vdpa-dev-pci.c | 102 +++ 2 files changed, 103 insertions(+) create mode 100644 hw/virtio/vdpa-dev-pci.c diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 54d6d29af7..559b80cb28 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -57,6 +57,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-serial-pc virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev-pci.c')) virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) diff --git a/hw/virtio/vdpa-dev-pci.c b/hw/virtio/vdpa-dev-pci.c new file mode 100644 index 00..5446e6b393 --- /dev/null +++ b/hw/virtio/vdpa-dev-pci.c @@ -0,0 +1,102 @@ +/* + * Vhost Vdpa Device PCI Bindings + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. + * + * Authors: + * Longpeng + * + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" + * implemented by: + * Changpeng Liu + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include +#include "hw/virtio/virtio.h" +#include "hw/virtio/vdpa-dev.h" +#include "hw/pci/pci.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "hw/virtio/virtio-pci.h" +#include "qom/object.h" + + +typedef struct VhostVdpaDevicePCI VhostVdpaDevicePCI; + +#define TYPE_VHOST_VDPA_DEVICE_PCI "vhost-vdpa-device-pci-base" +DECLARE_INSTANCE_CHECKER(VhostVdpaDevicePCI, VHOST_VDPA_DEVICE_PCI, + TYPE_VHOST_VDPA_DEVICE_PCI) + +struct VhostVdpaDevicePCI { +VirtIOPCIProxy parent_obj; +VhostVdpaDevice vdev; +}; + +static void vhost_vdpa_device_pci_instance_init(Object *obj) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(obj); + +virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), +TYPE_VHOST_VDPA_DEVICE); +object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex"); +} + +static Property vhost_vdpa_device_pci_properties[] = { +DEFINE_PROP_END_OF_LIST(), +}; + +static int vhost_vdpa_device_pci_post_init(VhostVdpaDevice *v, Error **errp) +{ +VhostVdpaDevicePCI *dev = container_of(v, VhostVdpaDevicePCI, vdev); +VirtIOPCIProxy *vpci_dev = &dev->parent_obj; + +vpci_dev->class_code = virtio_pci_get_class_id(v->vdev_id); +vpci_dev->trans_devid = virtio_pci_get_trans_devid(v->vdev_id); +/* one for config vector */ +vpci_dev->nvectors = v->num_queues + 1; + +return 0; +} + +static void +vhost_vdpa_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(vpci_dev); + +dev->vdev.post_init = vhost_vdpa_device_pci_post_init; +qdev_realize(DEVICE(&dev->vdev), BUS(&vpci_dev->bus), errp); +} + +static void vhost_vdpa_device_pci_class_init(ObjectClass *klass, void *data) +{ +DeviceClass *dc = DEVICE_CLASS(klass); +VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + +set_bit(DEVICE_CATEGORY_MISC, dc->categories); +device_class_set_props(dc, vhost_vdpa_device_pci_properties); +k->realize = vhost_vdpa_device_pci_realize; +} + +static const VirtioPCIDeviceTypeInfo vhost_vdpa_device_pci_info = { +.base_name = TYPE_VHOST_VDPA_DEVICE_PCI, +.generic_name= "vhost-vdpa-device-pci", +.transitional_name = "vhost-vdpa-device-pci-transitional", +.non_transitional_name = "vhost-vdpa-device-pci-non-transitional", +.instance_size = sizeof(VhostVdpaDevicePCI), +.instance_init = vhost_vdpa_device_pci_instance_init, +.class_init = vhost_vdpa_device_pci_class_init, +}; + +static void vhost_vdpa_device_pci_register(void) +{ +virtio_pci_types_register(&vhost_vdpa_device_pci_info); +} + +type_init(vhost_vdpa_device_pci_register); -- 2.23.0
[PATCH v2 0/2] two optimizations to speed up the start time
From: Longpeng Changes v2->v1: Patch-1: - remove vq_init_count [Jason] Patch-2: - new added. [Jason] v1: https://www.mail-archive.com/qemu-devel@nongnu.org/msg922499.html Longpeng (Mike) (2): vhost: configure all host notifiers in a single MR transaction vdpa: commit all host notifier MRs in a single MR transaction hw/virtio/vhost-vdpa.c | 18 ++ hw/virtio/vhost.c | 40 ++-- 2 files changed, 56 insertions(+), 2 deletions(-) -- 2.23.0
[PATCH v2 1/2] vhost: configure all host notifiers in a single MR transaction
From: Longpeng This allows the vhost device to batch the setup of all its host notifiers. This significantly reduces the device starting time, e.g. the time spend on enabling notifiers reduce from 376ms to 9.1ms for a VM with 64 vCPUs and 3 vhost-vDPA generic devices[1] (64vq per device) [1] https://www.mail-archive.com/qemu-devel@nongnu.org/msg921541.html Signed-off-by: Longpeng --- hw/virtio/vhost.c | 40 ++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 7fb008bc9e..16f8391d86 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1507,7 +1507,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); -int i, r, e; +int i, n, r, e; /* We will pass the notifiers to the kernel, make sure that QEMU * doesn't interfere. @@ -1518,6 +1518,12 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) goto fail; } +/* + * Batch all the host notifiers in a single transaction to avoid + * quadratic time complexity in address_space_update_ioeventfds(). + */ +memory_region_transaction_begin(); + for (i = 0; i < hdev->nvqs; ++i) { r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, true); @@ -1527,8 +1533,12 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) } } +memory_region_transaction_commit(); + return 0; fail_vq: +/* save i for a second iteration after transaction is committed. */ +n = i; while (--i >= 0) { e = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, false); @@ -1536,8 +1546,18 @@ fail_vq: error_report("vhost VQ %d notifier cleanup error: %d", i, -r); } assert (e >= 0); -virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); } + +/* + * The transaction expects the ioeventfds to be open when it + * commits. Do it now, before the cleanup loop. + */ +memory_region_transaction_commit(); + +while (--n >= 0) { +virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + n); +} + virtio_device_release_ioeventfd(vdev); fail: return r; @@ -1553,6 +1573,12 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); int i, r; +/* + * Batch all the host notifiers in a single transaction to avoid + * quadratic time complexity in address_space_update_ioeventfds(). + */ +memory_region_transaction_begin(); + for (i = 0; i < hdev->nvqs; ++i) { r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, false); @@ -1560,8 +1586,18 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); } assert (r >= 0); +} + +/* + * The transaction expects the ioeventfds to be open when it + * commits. Do it now, before the cleanup loop. + */ +memory_region_transaction_commit(); + +for (i = 0; i < hdev->nvqs; ++i) { virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); } + virtio_device_release_ioeventfd(vdev); } -- 2.23.0
[PATCH v2 2/2] vdpa: commit all host notifier MRs in a single MR transaction
From: Longpeng This allows the vhost-vdpa device to batch the setup of all its MRs of host notifiers. This significantly reduces the device starting time, e.g. the time spend on setup the host notifier MRs reduce from 423ms to 32ms for a VM with 64 vCPUs and 3 vhost-vDPA generic devices[1] (64vq per device). [1] https://www.mail-archive.com/qemu-devel@nongnu.org/msg921541.html Signed-off-by: Longpeng --- hw/virtio/vhost-vdpa.c | 18 ++ 1 file changed, 18 insertions(+) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 7468e44b87..eb233cf08a 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -547,9 +547,18 @@ static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n) { int i; +/* + * Pack all the changes to the memory regions in a single + * transaction to avoid a few updating of the address space + * topology. + */ +memory_region_transaction_begin(); + for (i = dev->vq_index; i < dev->vq_index + n; i++) { vhost_vdpa_host_notifier_uninit(dev, i); } + +memory_region_transaction_commit(); } static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev) @@ -562,16 +571,25 @@ static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev) return; } +/* + * Pack all the changes to the memory regions in a single + * transaction to avoid a few updating of the address space + * topology. + */ +memory_region_transaction_begin(); + for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) { if (vhost_vdpa_host_notifier_init(dev, i)) { goto err; } } +memory_region_transaction_commit(); return; err: vhost_vdpa_host_notifiers_uninit(dev, i - dev->vq_index); +memory_region_transaction_commit(); return; } -- 2.23.0
vfio_pin_map_dma cause synchronize_sched wait too long
Hi guys, Suppose there're two VMs: VM1 is bind to node-0 and calling vfio_pin_map_dma(), VM2 is a migrate incoming VM which bind to node-1. We found the vm_start( QEMU function) of VM2 will take too long occasionally, the reason is as follow. - VM2 - qemu: vm_start vm_start_notify virtio_vmstate_change virtio_pci_vmstate_change virtio_pci_start_ioeventfd virtio_device_start_ioeventfd_impl event_notifier_init eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC) <-- too long kern: sys_eventfd2 get_unused_fd_flags __alloc_fd expand_files expand_fdtable synchronize_sched <-- too long - VM1 - The VM1 is doing vfio_pin_map_dma at the same time. The CPU must finish vfio_pin_map_dma and then rcu-sched grace period can be elapsed, so synchronize_sched would wait for a long time. Is there any solution to this ? Any suggestion would be greatly appreciated, thanks! -- Regards, Longpeng(Mike)
Re: vfio_pin_map_dma cause synchronize_sched wait too long
在 2019/12/2 17:31, Paolo Bonzini 写道: > On 02/12/19 10:10, Longpeng (Mike) wrote: >> >> Suppose there're two VMs: VM1 is bind to node-0 and calling >> vfio_pin_map_dma(), >> VM2 is a migrate incoming VM which bind to node-1. We found the vm_start( >> QEMU >> function) of VM2 will take too long occasionally, the reason is as follow. > > Which part of vfio_pin_map_dma is running? There is already a I need more analysis to find which part. > cond_resched in vfio_iommu_map. Perhaps you could add one to > vfio_pin_pages_remote and/or use vfio_pgsize_bitmap to cap the number of > pages that it returns. Um ... There's only one running task (qemu-kvm of the VM1) on that CPU, so maybe the cond_resched() is ineffective ? > > Paolo > > > -- Regards, Longpeng(Mike)
Re: vfio_pin_map_dma cause synchronize_sched wait too long
在 2019/12/2 18:06, Paolo Bonzini 写道: > On 02/12/19 10:42, Longpeng (Mike) wrote: >>> cond_resched in vfio_iommu_map. Perhaps you could add one to >>> vfio_pin_pages_remote and/or use vfio_pgsize_bitmap to cap the >>> number of pages that it returns. >> Um ... There's only one running task (qemu-kvm of the VM1) on that >> CPU, so maybe the cond_resched() is ineffective ? > > Note that synchronize_sched() these days is just a synonym of > synchronize_rcu, so this makes me wonder if you're running on an older > kernel and whether you are missing this commit: > Yep. I'm running on an older kernel and I've missed this patchset. Thanks a lot :) > > commit 92aa39e9dc77481b90cbef25e547d66cab901496 > Author: Paul E. McKenney > Date: Mon Jul 9 13:47:30 2018 -0700 > > rcu: Make need_resched() respond to urgent RCU-QS needs > > The per-CPU rcu_dynticks.rcu_urgent_qs variable communicates an urgent > need for an RCU quiescent state from the force-quiescent-state > processing > within the grace-period kthread to context switches and to > cond_resched(). > Unfortunately, such urgent needs are not communicated to need_resched(), > which is sometimes used to decide when to invoke cond_resched(), for > but one example, within the KVM vcpu_run() function. As of v4.15, this > can result in synchronize_sched() being delayed by up to ten seconds, > which can be problematic, to say nothing of annoying. > > This commit therefore checks rcu_dynticks.rcu_urgent_qs from within > rcu_check_callbacks(), which is invoked from the scheduling-clock > interrupt handler. If the current task is not an idle task and is > not executing in usermode, a context switch is forced, and either way, > the rcu_dynticks.rcu_urgent_qs variable is set to false. If the current > task is an idle task, then RCU's dyntick-idle code will detect the > quiescent state, so no further action is required. Similarly, if the > task is executing in usermode, other code in rcu_check_callbacks() and > its called functions will report the corresponding quiescent state. > > Reported-by: Marius Hillenbrand > Reported-by: David Woodhouse > Suggested-by: Peter Zijlstra > Signed-off-by: Paul E. McKenney > > > Thanks, > > Paolo > > > . > -- Regards, Longpeng(Mike)
Re: [Qemu-devel] [BUG] VM abort after migration
在 2019/7/10 11:57, Jason Wang 写道: > > On 2019/7/10 上午11:36, Longpeng (Mike) wrote: >> 在 2019/7/10 11:25, Jason Wang 写道: >>> On 2019/7/8 下午5:47, Dr. David Alan Gilbert wrote: >>>> * longpeng (longpe...@huawei.com) wrote: >>>>> Hi guys, >>>>> >>>>> We found a qemu core in our testing environment, the assertion >>>>> 'assert(bus->irq_count[i] == 0)' in pcibus_reset() was triggered and >>>>> the bus->irq_count[i] is '-1'. >>>>> >>>>> Through analysis, it was happened after VM migration and we think >>>>> it was caused by the following sequence: >>>>> >>>>> *Migration Source* >>>>> 1. save bus pci.0 state, including irq_count[x] ( =0 , old ) >>>>> 2. save E1000: >>>>> e1000_pre_save >>>>> e1000_mit_timer >>>>> set_interrupt_cause >>>>> pci_set_irq --> update pci_dev->irq_state to 1 and >>>>> update bus->irq_count[x] to 1 ( new ) >>>>> the irq_state sent to dest. >>>>> >>>>> *Migration Dest* >>>>> 1. Receive the irq_count[x] of pci.0 is 0 , but the irq_state of e1000 is >>>>> 1. >>>>> 2. If the e1000 need change irqline , it would call to pci_irq_handler(), >>>>> the irq_state maybe change to 0 and bus->irq_count[x] will become >>>>> -1 in this situation. >>>>> 3. do VM reboot then the assertion will be triggered. >>>>> >>>>> We also found some guys faced the similar problem: >>>>> [1] https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg02525.html >>>>> [2] https://bugs.launchpad.net/qemu/+bug/1702621 >>>>> >>>>> Is there some patches to fix this problem ? >>>> I don't remember any. >>>> >>>>> Can we save pcibus state after all the pci devs are saved ? >>>> Does this problem only happen with e1000? I think so. >>>> If it's only e1000 I think we should fix it - I think once the VM is >>>> stopped for doing the device migration it shouldn't be raising >>>> interrupts. >>> >>> I wonder maybe we can simply fix this by no setting ICS on pre_save() but >>> scheduling mit timer unconditionally in post_load(). >>> >> I also think this is a bug of e1000 because we find more cores with the same >> frame thease days. >> >> I'm not familiar with e1000 so hope someone could fix it, thanks. :) >> > > Draft a path in attachment, please test. > Hi Jason, We've tested the patch for about two weeks, everything went well, thanks! Feel free to add my: Reported-and-tested-by: Longpeng > Thanks > > >>> Thanks >>> >>> >>>> Dave >>>> >>>>> Thanks, >>>>> Longpeng(Mike) >>>> -- >>>> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK >>> . >>> -- Regards, Longpeng(Mike)
Re: [PATCH] configure: Fix typo of the have_afalg variable
在 2020/2/4 0:00, Thomas Huth 写道: > The variable is called 'have_afalg' and not 'hava_afalg'. > > Fixes: f0d92b56d88 ('introduce some common functions for af_alg backend') > Signed-off-by: Thomas Huth > --- > configure | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/configure b/configure > index 5095f01728..115dc38085 100755 > --- a/configure > +++ b/configure > @@ -5843,7 +5843,7 @@ fi > > ## > # check for usable AF_ALG environment > -hava_afalg=no > +have_afalg=no > cat > $TMPC << EOF > #include > #include > Reviewed-by: Longpeng(Mike) -- Regards, Longpeng(Mike)
[PATCH v2 00/10] add generic vDPA device support
From: Longpeng Hi guys, This patchset tries to support the generic vDPA device, the previous disscussion can be found here [1]. With the generic vDPA device, QEMU won't need to touch the device types any more, such like vfio. We can use the generic vDPA device as follow: -device vhost-vdpa-device-pci,vdpa-dev=/dev/vhost-vdpa-X I've done some simple tests on Huawei's offloading card (net, 0.95) and vdpa_sim_blk (1.0); The kernel part: https://lkml.org/lkml/2022/1/17/239 Changes RFC -> v1 Patch 1: - rename 'pdev_id' to 'trans_devid' [Michael] - only use transitional device id for the devices listed in the spec [Michael] - use macros to make the id_info table clearer [Longpeng] - add some modern devices in the id_info table [Longpeng] Patch 2: - remove the GET_VECTORS_NUM command [Jason] Patch 4: - expose vdpa_dev_fd as a QOM preperty [Stefan] - introduce vhost_vdpa_device_get_u32 as a common function to make the code clearer [Stefan] - fix the misleading description of 'dc->desc' [Stefano] Patch 5: - check returned number of virtqueues [Stefan] Patch 6: - init s->num_queues [Stefano] - free s->dev.vqs [Stefano] Longpeng (Mike) (10): virtio: get class_id and pci device id by the virtio id update linux headers vdpa: add the infrastructure of vdpa-dev vdpa-dev: implement the instance_init/class_init interface vdpa-dev: implement the realize interface vdpa-dev: implement the unrealize interface vdpa-dev: implement the get_config/set_config interface vdpa-dev: implement the get_features interface vdpa-dev: implement the set_status interface vdpa-dev: mark the device as unmigratable hw/virtio/Kconfig| 5 + hw/virtio/meson.build| 2 + hw/virtio/vdpa-dev-pci.c | 99 ++ hw/virtio/vdpa-dev.c | 357 +++ hw/virtio/virtio-pci.c | 77 hw/virtio/virtio-pci.h | 5 + include/hw/virtio/vdpa-dev.h | 29 +++ linux-headers/linux/vhost.h | 7 + 8 files changed, 581 insertions(+) create mode 100644 hw/virtio/vdpa-dev-pci.c create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h -- 2.23.0
[PATCH v2 02/10] update linux headers
From: Longpeng Update linux headers to 5.xxx(kernel part is not merged yet) To support generic vdpa deivce, we need add the following ioctls: - VHOST_VDPA_GET_CONFIG_SIZE: get the configuration size. - VHOST_VDPA_GET_VQS_NUM: get the count of supported virtqueues. Signed-off-by: Longpeng --- linux-headers/linux/vhost.h | 7 +++ 1 file changed, 7 insertions(+) diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index c998860d7b..5d99e7c242 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -150,4 +150,11 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) + +/* Get the config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + #endif -- 2.23.0
[PATCH v2 03/10] vdpa: add the infrastructure of vdpa-dev
From: Longpeng Add the infrastructure of vdpa-dev (the generic vDPA device), we can add a generic vDPA device as follow: -device vhost-vdpa-device-pci,vdpa-dev=/dev/vhost-vdpa-X Signed-off-by: Longpeng --- hw/virtio/Kconfig| 5 hw/virtio/meson.build| 2 ++ hw/virtio/vdpa-dev-pci.c | 51 hw/virtio/vdpa-dev.c | 41 + include/hw/virtio/vdpa-dev.h | 16 +++ 5 files changed, 115 insertions(+) create mode 100644 hw/virtio/vdpa-dev-pci.c create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index c144d42f9b..2723283382 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -68,3 +68,8 @@ config VHOST_USER_RNG bool default y depends on VIRTIO && VHOST_USER + +config VHOST_VDPA_DEV +bool +default y if VIRTIO_PCI +depends on VIRTIO && VHOST_VDPA && LINUX diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 521f7d64a8..8e8943e20b 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -29,6 +29,7 @@ virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_I2C'], if_true: files('vhost-user-i2c-pci.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) virtio_ss.add(when: ['CONFIG_VHOST_USER_RNG', 'CONFIG_VIRTIO_PCI'], if_true: files('vhost-user-rng-pci.c')) +virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) @@ -49,6 +50,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-serial-pc virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev-pci.c')) virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) diff --git a/hw/virtio/vdpa-dev-pci.c b/hw/virtio/vdpa-dev-pci.c new file mode 100644 index 00..a5a7b528a9 --- /dev/null +++ b/hw/virtio/vdpa-dev-pci.c @@ -0,0 +1,51 @@ +#include "qemu/osdep.h" +#include +#include +#include "hw/virtio/virtio.h" +#include "hw/virtio/vdpa-dev.h" +#include "hw/pci/pci.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "virtio-pci.h" +#include "qom/object.h" + + +typedef struct VhostVdpaDevicePCI VhostVdpaDevicePCI; + +#define TYPE_VHOST_VDPA_DEVICE_PCI "vhost-vdpa-device-pci-base" +DECLARE_INSTANCE_CHECKER(VhostVdpaDevicePCI, VHOST_VDPA_DEVICE_PCI, + TYPE_VHOST_VDPA_DEVICE_PCI) + +struct VhostVdpaDevicePCI { +VirtIOPCIProxy parent_obj; +VhostVdpaDevice vdev; +}; + +static void vhost_vdpa_device_pci_instance_init(Object *obj) +{ +return; +} + +static void vhost_vdpa_device_pci_class_init(ObjectClass *klass, void *data) +{ +return; +} + +static const VirtioPCIDeviceTypeInfo vhost_vdpa_device_pci_info = { +.base_name = TYPE_VHOST_VDPA_DEVICE_PCI, +.generic_name= "vhost-vdpa-device-pci", +.transitional_name = "vhost-vdpa-device-pci-transitional", +.non_transitional_name = "vhost-vdpa-device-pci-non-transitional", +.instance_size = sizeof(VhostVdpaDevicePCI), +.instance_init = vhost_vdpa_device_pci_instance_init, +.class_init = vhost_vdpa_device_pci_class_init, +}; + +static void vhost_vdpa_device_pci_register(void) +{ +virtio_pci_types_register(&vhost_vdpa_device_pci_info); +} + +type_init(vhost_vdpa_device_pci_register); diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c new file mode 100644 index 00..f4f92b90b0 --- /dev/null +++ b/hw/virtio/vdpa-dev.c @@ -0,0 +1,41 @@ +#include "qemu/osdep.h" +#include +#include +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/cutils.h" +#include "hw/qdev-core.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" +#include "hw/virtio/vdpa-dev.h" +#include "sysemu/sysemu.h" +#include "sysemu/runstate.h" + +static void vhost_vdpa_device_class_init(ObjectClass *klass, void *data) +{ +return; +} + +static void vhost_vdpa_device_instance_init(Object *obj) +{ +return; +} + +static const TypeInfo vhost_vdpa_device_info = { +.name = TYPE_VHOST_VDPA_DEVICE, +.parent = TYPE_VIRTIO_DEVICE, +.instance_size = sizeof(VhostVdpaDevice), +.class_init = vhost_vdpa_device_class_init, +.instance_init = vhost_vdpa_device_instance_init,
[PATCH v2 04/10] vdpa-dev: implement the instance_init/class_init interface
From: Longpeng Implements the .instance_init and the .class_init interface. Signed-off-by: Longpeng --- hw/virtio/vdpa-dev-pci.c | 52 ++- hw/virtio/vdpa-dev.c | 81 +++- include/hw/virtio/vdpa-dev.h | 5 +++ 3 files changed, 134 insertions(+), 4 deletions(-) diff --git a/hw/virtio/vdpa-dev-pci.c b/hw/virtio/vdpa-dev-pci.c index a5a7b528a9..257538dbdd 100644 --- a/hw/virtio/vdpa-dev-pci.c +++ b/hw/virtio/vdpa-dev-pci.c @@ -25,12 +25,60 @@ struct VhostVdpaDevicePCI { static void vhost_vdpa_device_pci_instance_init(Object *obj) { -return; +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(obj); + +virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), +TYPE_VHOST_VDPA_DEVICE); +object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex"); +} + +static Property vhost_vdpa_device_pci_properties[] = { +DEFINE_PROP_END_OF_LIST(), +}; + +static void +vhost_vdpa_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(vpci_dev); +DeviceState *vdev = DEVICE(&dev->vdev); +uint32_t vdev_id; +uint32_t num_queues; +int fd; + +fd = qemu_open(dev->vdev.vdpa_dev, O_RDWR, errp); +if (*errp) { +return; +} + +vdev_id = vhost_vdpa_device_get_u32(fd, VHOST_VDPA_GET_DEVICE_ID, errp); +if (*errp) { +qemu_close(fd); +return; +} + +num_queues = vhost_vdpa_device_get_u32(fd, VHOST_VDPA_GET_VQS_NUM, errp); +if (*errp) { +qemu_close(fd); +return; +} + +dev->vdev.vdpa_dev_fd = fd; +vpci_dev->class_code = virtio_pci_get_class_id(vdev_id); +vpci_dev->trans_devid = virtio_pci_get_trans_devid(vdev_id); +/* one for config interrupt, one per vq */ +vpci_dev->nvectors = num_queues + 1; +qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } static void vhost_vdpa_device_pci_class_init(ObjectClass *klass, void *data) { -return; +DeviceClass *dc = DEVICE_CLASS(klass); +VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + +set_bit(DEVICE_CATEGORY_MISC, dc->categories); +device_class_set_props(dc, vhost_vdpa_device_pci_properties); +k->realize = vhost_vdpa_device_pci_realize; } static const VirtioPCIDeviceTypeInfo vhost_vdpa_device_pci_info = { diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index f4f92b90b0..b103768f33 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -15,16 +15,93 @@ #include "sysemu/sysemu.h" #include "sysemu/runstate.h" -static void vhost_vdpa_device_class_init(ObjectClass *klass, void *data) +uint32_t vhost_vdpa_device_get_u32(int fd, unsigned long int cmd, Error **errp) +{ +uint32_t val = (uint32_t)-1; + +if (ioctl(fd, cmd, &val) < 0) { +error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s", + cmd, strerror(errno)); +} + +return val; +} + +static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) { return; } -static void vhost_vdpa_device_instance_init(Object *obj) +static void vhost_vdpa_device_unrealize(DeviceState *dev) +{ +return; +} + +static void +vhost_vdpa_device_get_config(VirtIODevice *vdev, uint8_t *config) +{ +return; +} + +static void +vhost_vdpa_device_set_config(VirtIODevice *vdev, const uint8_t *config) +{ +return; +} + +static uint64_t vhost_vdpa_device_get_features(VirtIODevice *vdev, + uint64_t features, + Error **errp) +{ +return (uint64_t)-1; +} + +static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) { return; } +static Property vhost_vdpa_device_properties[] = { +DEFINE_PROP_STRING("vdpa-dev", VhostVdpaDevice, vdpa_dev), +DEFINE_PROP_INT32("vdpa-dev-fd", VhostVdpaDevice, vdpa_dev_fd, -1), +DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription vmstate_vhost_vdpa_device = { +.name = "vhost-vdpa-device", +.minimum_version_id = 1, +.version_id = 1, +.fields = (VMStateField[]) { +VMSTATE_VIRTIO_DEVICE, +VMSTATE_END_OF_LIST() +}, +}; + +static void vhost_vdpa_device_class_init(ObjectClass *klass, void *data) +{ +DeviceClass *dc = DEVICE_CLASS(klass); +VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + +device_class_set_props(dc, vhost_vdpa_device_properties); +dc->desc = "VDPA-based generic device assignment"; +dc->vmsd = &vmstate_vhost_vdpa_device; +set_bit(DEVICE_CATEGORY_MISC, dc->categories); +vdc->realize = vhost_vdpa_device_realize; +vdc->unrealize = vhost_vdpa_device_unrealize; +vdc->get_config = vhost_vdpa_device_get_config; +vdc->set_config = vhost_vdpa_device_set_config; +vdc->get_features = vhost_vdpa_device_get_features; +vdc->set_status = vhost_vdpa_device_set_status; +} + +static voi
[PATCH v2 06/10] vdpa-dev: implement the unrealize interface
From: Longpeng Implements the .unrealize interface. Signed-off-by: Longpeng --- hw/virtio/vdpa-dev.c | 24 +++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index bd28cf7a15..e5691d02bb 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -132,9 +132,31 @@ out: s->vdpa_dev_fd = -1; } +static void vhost_vdpa_vdev_unrealize(VhostVdpaDevice *s) +{ +VirtIODevice *vdev = VIRTIO_DEVICE(s); +int i; + +for (i = 0; i < s->num_queues; i++) { +virtio_delete_queue(s->virtqs[i]); +} +g_free(s->virtqs); +virtio_cleanup(vdev); + +g_free(s->config); +} + static void vhost_vdpa_device_unrealize(DeviceState *dev) { -return; +VirtIODevice *vdev = VIRTIO_DEVICE(dev); +VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); + +virtio_set_status(vdev, 0); +vhost_vdpa_vdev_unrealize(s); +g_free(s->dev.vqs); +vhost_dev_cleanup(&s->dev); +qemu_close(s->vdpa_dev_fd); +s->vdpa_dev_fd = -1; } static void -- 2.23.0
[PATCH v2 10/10] vdpa-dev: mark the device as unmigratable
From: Longpeng The generic vDPA device doesn't support migration currently, so mark it as unmigratable temporarily. Signed-off-by: Longpeng --- hw/virtio/vdpa-dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 99722c88a1..65511243f9 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -307,6 +307,7 @@ static Property vhost_vdpa_device_properties[] = { static const VMStateDescription vmstate_vhost_vdpa_device = { .name = "vhost-vdpa-device", +.unmigratable = 1, .minimum_version_id = 1, .version_id = 1, .fields = (VMStateField[]) { -- 2.23.0
[PATCH v2 07/10] vdpa-dev: implement the get_config/set_config interface
From: Longpeng Implements the .get_config and .set_config interface. Signed-off-by: Longpeng --- hw/virtio/vdpa-dev.c | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index e5691d02bb..cef0a58012 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -162,13 +162,23 @@ static void vhost_vdpa_device_unrealize(DeviceState *dev) static void vhost_vdpa_device_get_config(VirtIODevice *vdev, uint8_t *config) { -return; +VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); + +memcpy(config, s->config, s->config_size); } static void vhost_vdpa_device_set_config(VirtIODevice *vdev, const uint8_t *config) { -return; +VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); +int ret; + +ret = vhost_dev_set_config(&s->dev, s->config, 0, s->config_size, + VHOST_SET_CONFIG_TYPE_MASTER); +if (ret) { +error_report("set device config space failed"); +return; +} } static uint64_t vhost_vdpa_device_get_features(VirtIODevice *vdev, -- 2.23.0
[PATCH v2 08/10] vdpa-dev: implement the get_features interface
From: Longpeng Implements the .get_features interface. Signed-off-by: Longpeng --- hw/virtio/vdpa-dev.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index cef0a58012..7bf07fef9b 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -185,7 +185,14 @@ static uint64_t vhost_vdpa_device_get_features(VirtIODevice *vdev, uint64_t features, Error **errp) { -return (uint64_t)-1; +VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); +uint64_t backend_features = s->dev.features; + +if (!virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM)) { +virtio_clear_feature(&backend_features, VIRTIO_F_IOMMU_PLATFORM); +} + +return backend_features; } static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) -- 2.23.0
[PATCH v2 05/10] vdpa-dev: implement the realize interface
From: Longpeng Implements the .realize interface. Signed-off-by: Longpeng --- hw/virtio/vdpa-dev.c | 101 +++ include/hw/virtio/vdpa-dev.h | 8 +++ 2 files changed, 109 insertions(+) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index b103768f33..bd28cf7a15 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -27,9 +27,109 @@ uint32_t vhost_vdpa_device_get_u32(int fd, unsigned long int cmd, Error **errp) return val; } +static void +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ +/* Nothing to do */ +} + static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) { +VirtIODevice *vdev = VIRTIO_DEVICE(dev); +VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); +uint32_t vdev_id, max_queue_size; +struct vhost_virtqueue *vqs; +int i, ret; + +if (s->vdpa_dev_fd == -1) { +s->vdpa_dev_fd = qemu_open(s->vdpa_dev, O_RDWR, errp); +if (*errp) { +return; +} +} +s->vdpa.device_fd = s->vdpa_dev_fd; + +max_queue_size = vhost_vdpa_device_get_u32(s->vdpa_dev_fd, + VHOST_VDPA_GET_VRING_NUM, errp); +if (*errp) { +goto out; +} + +if (s->queue_size > max_queue_size) { +error_setg(errp, "vhost-vdpa-device: invalid queue_size: %d (max:%d)", + s->queue_size, max_queue_size); +goto out; +} else if (!s->queue_size) { +s->queue_size = max_queue_size; +} + +s->num_queues = vhost_vdpa_device_get_u32(s->vdpa_dev_fd, + VHOST_VDPA_GET_VQS_NUM, errp); +if (*errp) { +goto out; +} + +if (!s->num_queues || s->num_queues > VIRTIO_QUEUE_MAX) { +error_setg(errp, "invalid number of virtqueues: %u (max:%u)", + s->num_queues, VIRTIO_QUEUE_MAX); +goto out; +} + +s->dev.nvqs = s->num_queues; +vqs = g_new0(struct vhost_virtqueue, s->dev.nvqs); +s->dev.vqs = vqs; +s->dev.vq_index = 0; +s->dev.vq_index_end = s->dev.nvqs; +s->dev.backend_features = 0; +s->started = false; + +ret = vhost_dev_init(&s->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, NULL); +if (ret < 0) { +error_setg(errp, "vhost-vdpa-device: vhost initialization failed: %s", + strerror(-ret)); +goto free_vqs; +} + +vdev_id = vhost_vdpa_device_get_u32(s->vdpa_dev_fd, +VHOST_VDPA_GET_DEVICE_ID, errp); +if (ret < 0) { +error_setg(errp, "vhost-vdpa-device: vhost get device id failed: %s", + strerror(-ret)); +goto vhost_cleanup; +} + +s->config_size = vhost_vdpa_device_get_u32(s->vdpa_dev_fd, + VHOST_VDPA_GET_CONFIG_SIZE, errp); +if (*errp) { +goto vhost_cleanup; +} +s->config = g_malloc0(s->config_size); + +ret = vhost_dev_get_config(&s->dev, s->config, s->config_size, NULL); +if (ret < 0) { +error_setg(errp, "vhost-vdpa-device: get config failed"); +goto free_config; +} + +virtio_init(vdev, "vhost-vdpa", vdev_id, s->config_size); + +s->virtqs = g_new0(VirtQueue *, s->dev.nvqs); +for (i = 0; i < s->dev.nvqs; i++) { +s->virtqs[i] = virtio_add_queue(vdev, s->queue_size, +vhost_vdpa_device_dummy_handle_output); +} + return; + +free_config: +g_free(s->config); +vhost_cleanup: +vhost_dev_cleanup(&s->dev); +free_vqs: +g_free(vqs); +out: +qemu_close(s->vdpa_dev_fd); +s->vdpa_dev_fd = -1; } static void vhost_vdpa_device_unrealize(DeviceState *dev) @@ -64,6 +164,7 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) static Property vhost_vdpa_device_properties[] = { DEFINE_PROP_STRING("vdpa-dev", VhostVdpaDevice, vdpa_dev), DEFINE_PROP_INT32("vdpa-dev-fd", VhostVdpaDevice, vdpa_dev_fd, -1), +DEFINE_PROP_UINT16("queue-size", VhostVdpaDevice, queue_size, 0), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h index e7ad349113..e0482035cf 100644 --- a/include/hw/virtio/vdpa-dev.h +++ b/include/hw/virtio/vdpa-dev.h @@ -14,6 +14,14 @@ struct VhostVdpaDevice { char *vdpa_dev; int vdpa_dev_fd; int32_t bootindex; +struct vhost_dev dev; +struct vhost_vdpa vdpa; +VirtQueue **virtqs; +uint8_t *config; +int config_size; +uint32_t num_queues; +uint16_t queue_size; +bool started; }; uint32_t vhost_vdpa_device_get_u32(int fd, unsigned long int cmd, Error **errp); -- 2.23.0
[PATCH v2 01/10] virtio: get class_id and pci device id by the virtio id
From: Longpeng Add helpers to get the "Transitional PCI Device ID" and "class_id" of the device specified by the "Virtio Device ID". These helpers will be used to build the generic vDPA device later. Signed-off-by: Longpeng --- hw/virtio/virtio-pci.c | 77 ++ hw/virtio/virtio-pci.h | 5 +++ 2 files changed, 82 insertions(+) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 750aa47ec1..373e26d7c3 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -19,6 +19,7 @@ #include "exec/memop.h" #include "standard-headers/linux/virtio_pci.h" +#include "standard-headers/linux/virtio_ids.h" #include "hw/boards.h" #include "hw/virtio/virtio.h" #include "migration/qemu-file-types.h" @@ -213,6 +214,79 @@ static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f) return 0; } +typedef struct VirtIOPCIIDInfo { +/* virtio id */ +uint16_t vdev_id; +/* pci device id for the transitional device */ +uint16_t trans_devid; +uint16_t class_id; +} VirtIOPCIIDInfo; + +#define VIRTIO_TRANS_DEV_ID_INFO(name, class) \ +{ \ +.vdev_id = VIRTIO_ID_##name,\ +.trans_devid = PCI_DEVICE_ID_VIRTIO_##name, \ +.class_id = class, \ +} + +#define VIRTIO_MODERN_DEV_ID_NFO(name, class) \ +{ \ +.vdev_id = VIRTIO_ID_##name,\ +.class_id = class, \ +} + +static const VirtIOPCIIDInfo virtio_pci_id_info[] = { +/* Non-transitional devices */ +VIRTIO_MODERN_DEV_ID_NFO(CRYPTO,PCI_CLASS_OTHERS), +VIRTIO_MODERN_DEV_ID_NFO(FS,PCI_CLASS_STORAGE_OTHER), +/* Transitional devices */ +VIRTIO_TRANS_DEV_ID_INFO(NET, PCI_CLASS_NETWORK_ETHERNET), +VIRTIO_TRANS_DEV_ID_INFO(BLOCK, PCI_CLASS_STORAGE_SCSI), +VIRTIO_TRANS_DEV_ID_INFO(CONSOLE, PCI_CLASS_COMMUNICATION_OTHER), +VIRTIO_TRANS_DEV_ID_INFO(SCSI, PCI_CLASS_STORAGE_SCSI), +VIRTIO_TRANS_DEV_ID_INFO(9P,PCI_BASE_CLASS_NETWORK), +VIRTIO_TRANS_DEV_ID_INFO(BALLOON, PCI_CLASS_OTHERS), +VIRTIO_TRANS_DEV_ID_INFO(RNG, PCI_CLASS_OTHERS), +}; + +static const VirtIOPCIIDInfo *virtio_pci_get_id_info(uint16_t vdev_id) +{ +const VirtIOPCIIDInfo *info = NULL; +int i; + +for (i = 0; i < ARRAY_SIZE(virtio_pci_id_info); i++) { +if (virtio_pci_id_info[i].vdev_id == vdev_id) { +info = &virtio_pci_id_info[i]; +break; +} +} + +if (!info) { +/* The device id is invalid or not added to the id_info yet. */ +error_report("Invalid virtio device(id %u)", vdev_id); +abort(); +} + +return info; +} + +/* + * Get the Transitional Device ID for the specific device, return + * zero if the device is non-transitional. + */ +uint16_t virtio_pci_get_trans_devid(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->trans_devid; +} + +/* + * Get the Class ID for the specific device. + */ +uint16_t virtio_pci_get_class_id(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->class_id; +} + static bool virtio_pci_ioeventfd_enabled(DeviceState *d) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); @@ -1674,6 +1748,9 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default. */ pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus)); +if (proxy->trans_devid) { +pci_config_set_device_id(config, proxy->trans_devid); +} } else { /* pure virtio-1.0 */ pci_set_word(config + PCI_VENDOR_ID, diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index 2446dcd9ae..f08665cd1b 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -146,6 +146,8 @@ struct VirtIOPCIProxy { bool disable_modern; bool ignore_backend_features; OnOffAuto disable_legacy; +/* Transitional device id */ +uint16_t trans_devid; uint32_t class_code; uint32_t nvectors; uint32_t dfselect; @@ -158,6 +160,9 @@ struct VirtIOPCIProxy { VirtioBusState bus; }; +uint16_t virtio_pci_get_trans_devid(uint16_t device_id); +uint16_t virtio_pci_get_class_id(uint16_t device_id); + static inline bool virtio_pci_modern(VirtIOPCIProxy *proxy) { return !proxy->disable_modern; -- 2.23.0
[PATCH v2 09/10] vdpa-dev: implement the set_status interface
From: Longpeng Implements the .set_status interface. Signed-off-by: Longpeng --- hw/virtio/vdpa-dev.c | 100 ++- 1 file changed, 99 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 7bf07fef9b..99722c88a1 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -195,9 +195,107 @@ static uint64_t vhost_vdpa_device_get_features(VirtIODevice *vdev, return backend_features; } +static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp) +{ +VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); +BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); +VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +int i, ret; + +if (!k->set_guest_notifiers) { +error_setg(errp, "binding does not support guest notifiers"); +return -ENOSYS; +} + +ret = vhost_dev_enable_notifiers(&s->dev, vdev); +if (ret < 0) { +error_setg_errno(errp, -ret, "Error enabling host notifiers"); +return ret; +} + +ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true); +if (ret < 0) { +error_setg_errno(errp, -ret, "Error binding guest notifier"); +goto err_host_notifiers; +} + +s->dev.acked_features = vdev->guest_features; + +ret = vhost_dev_start(&s->dev, vdev); +if (ret < 0) { +error_setg_errno(errp, -ret, "Error starting vhost"); +goto err_guest_notifiers; +} +s->started = true; + +/* + * guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ +for (i = 0; i < s->dev.nvqs; i++) { +vhost_virtqueue_mask(&s->dev, vdev, i, false); +} + +return ret; + +err_guest_notifiers: +k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); +err_host_notifiers: +vhost_dev_disable_notifiers(&s->dev, vdev); +return ret; +} + +static void vhost_vdpa_device_stop(VirtIODevice *vdev) +{ +VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); +BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); +VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +int ret; + +if (!s->started) { +return; +} +s->started = false; + +if (!k->set_guest_notifiers) { +return; +} + +vhost_dev_stop(&s->dev, vdev); + +ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); +if (ret < 0) { +error_report("vhost guest notifier cleanup failed: %d", ret); +return; +} + +vhost_dev_disable_notifiers(&s->dev, vdev); +} + static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) { -return; +VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); +bool should_start = virtio_device_started(vdev, status); +Error *local_err = NULL; +int ret; + +if (!vdev->vm_running) { +should_start = false; +} + +if (s->started == should_start) { +return; +} + +if (should_start) { +ret = vhost_vdpa_device_start(vdev, &local_err); +if (ret < 0) { +error_reportf_err(local_err, "vhost-vdpa-device: start failed: "); +} +} else { +vhost_vdpa_device_stop(vdev); +} } static Property vhost_vdpa_device_properties[] = { -- 2.23.0
[PATCH v5 2/6] vfio: move re-enabling INTX out of the common helper
Move re-enabling INTX out, and the callers should decide to re-enable it or not. Signed-off-by: Longpeng(Mike) --- hw/vfio/pci.c | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index dd30806..d5e542b 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -690,7 +690,6 @@ retry: static void vfio_msi_disable_common(VFIOPCIDevice *vdev) { -Error *err = NULL; int i; for (i = 0; i < vdev->nr_vectors; i++) { @@ -709,15 +708,11 @@ static void vfio_msi_disable_common(VFIOPCIDevice *vdev) vdev->msi_vectors = NULL; vdev->nr_vectors = 0; vdev->interrupt = VFIO_INT_NONE; - -vfio_intx_enable(vdev, &err); -if (err) { -error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); -} } static void vfio_msix_disable(VFIOPCIDevice *vdev) { +Error *err = NULL; int i; msix_unset_vector_notifiers(&vdev->pdev); @@ -738,6 +733,10 @@ static void vfio_msix_disable(VFIOPCIDevice *vdev) } vfio_msi_disable_common(vdev); +vfio_intx_enable(vdev, &err); +if (err) { +error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); +} memset(vdev->msix->pending, 0, BITS_TO_LONGS(vdev->msix->entries) * sizeof(unsigned long)); @@ -747,8 +746,14 @@ static void vfio_msix_disable(VFIOPCIDevice *vdev) static void vfio_msi_disable(VFIOPCIDevice *vdev) { +Error *err = NULL; + vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSI_IRQ_INDEX); vfio_msi_disable_common(vdev); +vfio_intx_enable(vdev, &err); +if (err) { +error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); +} trace_vfio_msi_disable(vdev->vbasedev.name); } -- 1.8.3.1
[PATCH v5 4/6] kvm: irqchip: extract kvm_irqchip_add_deferred_msi_route
Extract a common helper that add MSI route for specific vector but does not commit immediately. Signed-off-by: Longpeng(Mike) --- accel/kvm/kvm-all.c | 15 +-- include/sysemu/kvm.h | 6 ++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index db8d83b..8627f7c 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -1953,7 +1953,7 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) return kvm_set_irq(s, route->kroute.gsi, 1); } -int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) +int kvm_irqchip_add_deferred_msi_route(KVMState *s, int vector, PCIDevice *dev) { struct kvm_irq_routing_entry kroute = {}; int virq; @@ -1996,7 +1996,18 @@ int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) kvm_add_routing_entry(s, &kroute); kvm_arch_add_msi_route_post(&kroute, vector, dev); -kvm_irqchip_commit_routes(s); + +return virq; +} + +int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) +{ +int virq; + +virq = kvm_irqchip_add_deferred_msi_route(s, vector, dev); +if (virq >= 0) { +kvm_irqchip_commit_routes(s); +} return virq; } diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index a1ab1ee..8de0d9a 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -476,6 +476,12 @@ void kvm_init_cpu_signals(CPUState *cpu); * @return: virq (>=0) when success, errno (<0) when failed. */ int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev); +/** + * Add MSI route for specific vector but does not commit to KVM + * immediately + */ +int kvm_irqchip_add_deferred_msi_route(KVMState *s, int vector, + PCIDevice *dev); int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, PCIDevice *dev); void kvm_irqchip_commit_routes(KVMState *s); -- 1.8.3.1
[PATCH v5 5/6] Revert "vfio: Avoid disabling and enabling vectors repeatedly in VFIO migration"
Commit ecebe53fe993 ("vfio: Avoid disabling and enabling vectors repeatedly in VFIO migration") avoids inefficiently disabling and enabling vectors repeatedly and lets the unmasked vectors be enabled one by one. But we want to batch multiple routes and defer the commit, and only commit once outside the loop of setting vector notifiers, so we cannot enable the vectors one by one in the loop now. Revert that commit and we will take another way in the next patch, it can not only avoid disabling/enabling vectors repeatedly, but also satisfy our requirement of defer to commit. Signed-off-by: Longpeng(Mike) --- hw/vfio/pci.c | 20 +++- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 1ff84e6..69ad081 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -569,9 +569,6 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) static void vfio_msix_enable(VFIOPCIDevice *vdev) { -PCIDevice *pdev = &vdev->pdev; -unsigned int nr, max_vec = 0; - vfio_disable_interrupts(vdev); vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->msix->entries); @@ -590,22 +587,11 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev) * triggering to userspace, then immediately release the vector, leaving * the physical device with no vectors enabled, but MSI-X enabled, just * like the guest view. - * If there are already unmasked vectors (in migration resume phase and - * some guest startups) which will be enabled soon, we can allocate all - * of them here to avoid inefficiently disabling and enabling vectors - * repeatedly later. */ -if (!pdev->msix_function_masked) { -for (nr = 0; nr < msix_nr_vectors_allocated(pdev); nr++) { -if (!msix_is_masked(pdev, nr)) { -max_vec = nr; -} -} -} -vfio_msix_vector_do_use(pdev, max_vec, NULL, NULL); -vfio_msix_vector_release(pdev, max_vec); +vfio_msix_vector_do_use(&vdev->pdev, 0, NULL, NULL); +vfio_msix_vector_release(&vdev->pdev, 0); -if (msix_set_vector_notifiers(pdev, vfio_msix_vector_use, +if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use, vfio_msix_vector_release, NULL)) { error_report("vfio: msix_set_vector_notifiers failed"); } -- 1.8.3.1
[PATCH v5 0/6] optimize the downtime for vfio migration
Hi guys, In vfio migration resume phase, the cost would increase if the vfio device has more unmasked vectors. We try to optimize it in this series. You can see the commit message in PATCH 6 for details. Patch 1-3 are simple cleanups and fixup. Patch 4-5 are the preparations for the optimization. Patch 6 optimizes the vfio msix setup path. Changes v4->v5: - setup the notifier and irqfd in the same function to makes the code neater.[Alex] Changes v3->v4: - fix several typos and grammatical errors [Alex] - remove the patches that fix and clean the MSIX common part from this series [Alex] - Patch 6: - use vector->use directly and fill it with -1 on error paths [Alex] - add comment before enable deferring to commit [Alex] - move the code that do_use/release on vector 0 into an "else" branch [Alex] - introduce vfio_prepare_kvm_msi_virq_batch() that enables the 'defer_kvm_irq_routing' flag [Alex] - introduce vfio_commit_kvm_msi_virq_batch() that clears the 'defer_kvm_irq_routing' flag and does further work [Alex] Changes v2->v3: - fix two errors [Longpeng] Changes v1->v2: - fix several typos and grammatical errors [Alex, Philippe] - split fixups and cleanups into separate patches [Alex, Philippe] - introduce kvm_irqchip_add_deferred_msi_route to minimize code changes[Alex] - enable the optimization in msi setup path[Alex] Longpeng (Mike) (6): vfio: simplify the conditional statements in vfio_msi_enable vfio: move re-enabling INTX out of the common helper vfio: simplify the failure path in vfio_msi_enable kvm: irqchip: extract kvm_irqchip_add_deferred_msi_route Revert "vfio: Avoid disabling and enabling vectors repeatedly in VFIO migration" vfio: defer to commit kvm irq routing when enable msi/msix accel/kvm/kvm-all.c | 15 - hw/vfio/pci.c| 176 --- hw/vfio/pci.h| 1 + include/sysemu/kvm.h | 6 ++ 4 files changed, 130 insertions(+), 68 deletions(-) -- 1.8.3.1
[PATCH v5 1/6] vfio: simplify the conditional statements in vfio_msi_enable
It's unnecessary to test against the specific return value of VFIO_DEVICE_SET_IRQS, since any positive return is an error indicating the number of vectors we should retry with. Signed-off-by: Longpeng(Mike) --- hw/vfio/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 5cdf1d4..dd30806 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -650,7 +650,7 @@ retry: if (ret) { if (ret < 0) { error_report("vfio: Error: Failed to setup MSI fds: %m"); -} else if (ret != vdev->nr_vectors) { +} else { error_report("vfio: Error: Failed to enable %d " "MSI vectors, retry with %d", vdev->nr_vectors, ret); } @@ -668,7 +668,7 @@ retry: g_free(vdev->msi_vectors); vdev->msi_vectors = NULL; -if (ret > 0 && ret != vdev->nr_vectors) { +if (ret > 0) { vdev->nr_vectors = ret; goto retry; } -- 1.8.3.1
[PATCH v5 3/6] vfio: simplify the failure path in vfio_msi_enable
Use vfio_msi_disable_common to simplify the error handling in vfio_msi_enable. Signed-off-by: Longpeng(Mike) --- hw/vfio/pci.c | 16 ++-- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index d5e542b..1ff84e6 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -47,6 +47,7 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); +static void vfio_msi_disable_common(VFIOPCIDevice *vdev); /* * Disabling BAR mmaping can be slow, but toggling it around INTx can @@ -655,24 +656,12 @@ retry: "MSI vectors, retry with %d", vdev->nr_vectors, ret); } -for (i = 0; i < vdev->nr_vectors; i++) { -VFIOMSIVector *vector = &vdev->msi_vectors[i]; -if (vector->virq >= 0) { -vfio_remove_kvm_msi_virq(vector); -} -qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), -NULL, NULL, NULL); -event_notifier_cleanup(&vector->interrupt); -} - -g_free(vdev->msi_vectors); -vdev->msi_vectors = NULL; +vfio_msi_disable_common(vdev); if (ret > 0) { vdev->nr_vectors = ret; goto retry; } -vdev->nr_vectors = 0; /* * Failing to setup MSI doesn't really fall within any specification. @@ -680,7 +669,6 @@ retry: * out to fall back to INTx for this device. */ error_report("vfio: Error: Failed to enable MSI"); -vdev->interrupt = VFIO_INT_NONE; return; } -- 1.8.3.1
[PATCH v5 6/6] vfio: defer to commit kvm irq routing when enable msi/msix
In migration resume phase, all unmasked msix vectors need to be setup when loading the VF state. However, the setup operation would take longer if the VM has more VFs and each VF has more unmasked vectors. The hot spot is kvm_irqchip_commit_routes, it'll scan and update all irqfds that are already assigned each invocation, so more vectors means need more time to process them. vfio_pci_load_config vfio_msix_enable msix_set_vector_notifiers for (vector = 0; vector < dev->msix_entries_nr; vector++) { vfio_msix_vector_do_use vfio_add_kvm_msi_virq kvm_irqchip_commit_routes <-- expensive } We can reduce the cost by only committing once outside the loop. The routes are cached in kvm_state, we commit them first and then bind irqfd for each vector. The test VM has 128 vcpus and 8 VF (each one has 65 vectors), we measure the cost of the vfio_msix_enable for each VF, and we can see 90+% costs can be reduce. VF Count of irqfds[*] OriginalWith this patch 1st 658 2 2nd 130 15 2 3rd 195 22 2 4th 260 24 3 5th 325 36 2 6th 390 44 3 7th 455 51 3 8th 520 58 4 Total 258ms 21ms [*] Count of irqfds How many irqfds that already assigned and need to process in this round. The optimization can be applied to msi type too. Signed-off-by: Longpeng(Mike) --- hw/vfio/pci.c | 123 -- hw/vfio/pci.h | 1 + 2 files changed, 95 insertions(+), 29 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 69ad081..5b3a86d 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -413,30 +413,37 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, int vector_n, bool msix) { -int virq; - if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) { return; } -if (event_notifier_init(&vector->kvm_interrupt, 0)) { +vector->virq = kvm_irqchip_add_deferred_msi_route(kvm_state, vector_n, + &vdev->pdev); +} + +static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector) +{ +if (vector->virq < 0) { return; } -virq = kvm_irqchip_add_msi_route(kvm_state, vector_n, &vdev->pdev); -if (virq < 0) { -event_notifier_cleanup(&vector->kvm_interrupt); -return; +if (event_notifier_init(&vector->kvm_interrupt, 0)) { +goto fail_notifier; } if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt, - NULL, virq) < 0) { -kvm_irqchip_release_virq(kvm_state, virq); -event_notifier_cleanup(&vector->kvm_interrupt); -return; + NULL, vector->virq) < 0) { +goto fail_kvm; } -vector->virq = virq; +return; + +fail_kvm: +event_notifier_cleanup(&vector->kvm_interrupt); +fail_notifier: +kvm_irqchip_release_virq(kvm_state, vector->virq); +vector->virq = -1; +return; } static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector) @@ -492,6 +499,10 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, } else { if (msg) { vfio_add_kvm_msi_virq(vdev, vector, nr, true); +if (!vdev->defer_kvm_irq_routing) { +kvm_irqchip_commit_routes(kvm_state); +vfio_connect_kvm_msi_virq(vector); +} } } @@ -501,11 +512,13 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, * increase them as needed. */ if (vdev->nr_vectors < nr + 1) { -vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); vdev->nr_vectors = nr + 1; -ret = vfio_enable_vectors(vdev, true); -if (ret) { -error_report("vfio: failed to enable vectors, %d", ret); +if (!vdev->defer_kvm_irq_routing) { +vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); +ret = vfio_enable_vectors(vdev, true); +if (ret) { +error_report("vfio: failed to enable vectors, %d", ret); +} } } else { Error *err = NULL; @@ -567,6 +580,30 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) } } +static void vfio_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev) +{ +assert(!vdev->defer_kvm_irq_rou
[PATCH v4 1/4] linux-headers: Update headers to Linux 5.18-rc6
From: Longpeng Update headers to 5.18-rc6. I need latest vhost changes. Signed-off-by: Longpeng --- linux-headers/linux/vhost.h | 7 +++ 1 file changed, 7 insertions(+) diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index c998860d7b..5d99e7c242 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -150,4 +150,11 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) + +/* Get the config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + #endif -- 2.23.0
[PATCH v4 0/4] add generic vDPA device support
From: Longpeng Hi guys, With the generic vDPA device, QEMU won't need to touch the device types any more, such like vfio. We can use the generic vDPA device as follow: -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X I've done some simple tests on Huawei's offloading card (net, 0.95). Changes v3 -> v4: v3: https://www.mail-archive.com/qemu-devel@nongnu.org/msg877015.html - reorganize the series [Stefano] - fix some typos [Stefano] - fix logical error in vhost_vdpa_device_realize [Stefano] Changes v2 -> v3 Patch 4 & 5: - only call vdpa ioctls in vdpa-dev.c [Stefano, Longpeng] - s/VQS_NUM/VQS_COUNT [Stefano] - check both vdpa_dev_fd and vdpa_dev [Stefano] Patch 6: - move all steps into vhost_vdpa_device_unrealize. [Stefano] Changes RFC -> v2 Patch 1: - rename 'pdev_id' to 'trans_devid' [Michael] - only use transitional device id for the devices listed in the spec [Michael] - use macros to make the id_info table clearer [Longpeng] - add some modern devices in the id_info table [Longpeng] Patch 2: - remove the GET_VECTORS_NUM command [Jason] Patch 4: - expose vdpa_dev_fd as a QOM preperty [Stefan] - introduce vhost_vdpa_device_get_u32 as a common function to make the code clearer [Stefan] - fix the misleading description of 'dc->desc' [Stefano] Patch 5: - check returned number of virtqueues [Stefan] Patch 6: - init s->num_queues [Stefano] - free s->dev.vqs [Stefano] Longpeng (Mike) (4): linux-headers: Update headers to Linux 5.18-rc6 virtio: get class_id and pci device id by the virtio id vdpa: add vdpa-dev support vdpa: add vdpa-dev-pci support hw/virtio/Kconfig| 5 + hw/virtio/meson.build| 2 + hw/virtio/vdpa-dev-pci.c | 101 + hw/virtio/vdpa-dev.c | 385 +++ hw/virtio/virtio-pci.c | 77 +++ hw/virtio/virtio-pci.h | 5 + include/hw/virtio/vdpa-dev.h | 43 linux-headers/linux/vhost.h | 7 + 8 files changed, 625 insertions(+) create mode 100644 hw/virtio/vdpa-dev-pci.c create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h -- 2.23.0
[PATCH v4 2/4] virtio: get class_id and pci device id by the virtio id
From: Longpeng Add helpers to get the "Transitional PCI Device ID" and "class_id" of the device specified by the "Virtio Device ID". These helpers will be used to build the generic vDPA device later. Signed-off-by: Longpeng --- hw/virtio/virtio-pci.c | 77 ++ hw/virtio/virtio-pci.h | 5 +++ 2 files changed, 82 insertions(+) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 7cf1231c1c..fdfa205cee 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -19,6 +19,7 @@ #include "exec/memop.h" #include "standard-headers/linux/virtio_pci.h" +#include "standard-headers/linux/virtio_ids.h" #include "hw/boards.h" #include "hw/virtio/virtio.h" #include "migration/qemu-file-types.h" @@ -212,6 +213,79 @@ static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f) return 0; } +typedef struct VirtIOPCIIDInfo { +/* virtio id */ +uint16_t vdev_id; +/* pci device id for the transitional device */ +uint16_t trans_devid; +uint16_t class_id; +} VirtIOPCIIDInfo; + +#define VIRTIO_TRANS_DEV_ID_INFO(name, class) \ +{ \ +.vdev_id = VIRTIO_ID_##name,\ +.trans_devid = PCI_DEVICE_ID_VIRTIO_##name, \ +.class_id = class, \ +} + +#define VIRTIO_MODERN_DEV_ID_NFO(name, class) \ +{ \ +.vdev_id = VIRTIO_ID_##name,\ +.class_id = class, \ +} + +static const VirtIOPCIIDInfo virtio_pci_id_info[] = { +/* Non-transitional devices */ +VIRTIO_MODERN_DEV_ID_NFO(CRYPTO,PCI_CLASS_OTHERS), +VIRTIO_MODERN_DEV_ID_NFO(FS,PCI_CLASS_STORAGE_OTHER), +/* Transitional devices */ +VIRTIO_TRANS_DEV_ID_INFO(NET, PCI_CLASS_NETWORK_ETHERNET), +VIRTIO_TRANS_DEV_ID_INFO(BLOCK, PCI_CLASS_STORAGE_SCSI), +VIRTIO_TRANS_DEV_ID_INFO(CONSOLE, PCI_CLASS_COMMUNICATION_OTHER), +VIRTIO_TRANS_DEV_ID_INFO(SCSI, PCI_CLASS_STORAGE_SCSI), +VIRTIO_TRANS_DEV_ID_INFO(9P,PCI_BASE_CLASS_NETWORK), +VIRTIO_TRANS_DEV_ID_INFO(BALLOON, PCI_CLASS_OTHERS), +VIRTIO_TRANS_DEV_ID_INFO(RNG, PCI_CLASS_OTHERS), +}; + +static const VirtIOPCIIDInfo *virtio_pci_get_id_info(uint16_t vdev_id) +{ +const VirtIOPCIIDInfo *info = NULL; +int i; + +for (i = 0; i < ARRAY_SIZE(virtio_pci_id_info); i++) { +if (virtio_pci_id_info[i].vdev_id == vdev_id) { +info = &virtio_pci_id_info[i]; +break; +} +} + +if (!info) { +/* The device id is invalid or not added to the id_info yet. */ +error_report("Invalid virtio device(id %u)", vdev_id); +abort(); +} + +return info; +} + +/* + * Get the Transitional Device ID for the specific device, return + * zero if the device is non-transitional. + */ +uint16_t virtio_pci_get_trans_devid(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->trans_devid; +} + +/* + * Get the Class ID for the specific device. + */ +uint16_t virtio_pci_get_class_id(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->class_id; +} + static bool virtio_pci_ioeventfd_enabled(DeviceState *d) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); @@ -1675,6 +1749,9 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default. */ pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus)); +if (proxy->trans_devid) { +pci_config_set_device_id(config, proxy->trans_devid); +} } else { /* pure virtio-1.0 */ pci_set_word(config + PCI_VENDOR_ID, diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index 2446dcd9ae..f08665cd1b 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -146,6 +146,8 @@ struct VirtIOPCIProxy { bool disable_modern; bool ignore_backend_features; OnOffAuto disable_legacy; +/* Transitional device id */ +uint16_t trans_devid; uint32_t class_code; uint32_t nvectors; uint32_t dfselect; @@ -158,6 +160,9 @@ struct VirtIOPCIProxy { VirtioBusState bus; }; +uint16_t virtio_pci_get_trans_devid(uint16_t device_id); +uint16_t virtio_pci_get_class_id(uint16_t device_id); + static inline bool virtio_pci_modern(VirtIOPCIProxy *proxy) { return !proxy->disable_modern; -- 2.23.0
[PATCH v4 4/4] vdpa: add vdpa-dev-pci support
From: Longpeng Supports vdpa-dev-pci, we can use the device as follow: -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X Signed-off-by: Longpeng --- hw/virtio/meson.build| 1 + hw/virtio/vdpa-dev-pci.c | 101 +++ 2 files changed, 102 insertions(+) create mode 100644 hw/virtio/vdpa-dev-pci.c diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 8f6f86db71..c2da69616f 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -50,6 +50,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-serial-pc virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev-pci.c')) virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) diff --git a/hw/virtio/vdpa-dev-pci.c b/hw/virtio/vdpa-dev-pci.c new file mode 100644 index 00..31bd17353a --- /dev/null +++ b/hw/virtio/vdpa-dev-pci.c @@ -0,0 +1,101 @@ +/* + * Vhost Vdpa Device PCI Bindings + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. + * + * Authors: + * Longpeng + * + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" implemented by: + * Changpeng Liu + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include +#include "hw/virtio/virtio.h" +#include "hw/virtio/vdpa-dev.h" +#include "hw/pci/pci.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "virtio-pci.h" +#include "qom/object.h" + + +typedef struct VhostVdpaDevicePCI VhostVdpaDevicePCI; + +#define TYPE_VHOST_VDPA_DEVICE_PCI "vhost-vdpa-device-pci-base" +DECLARE_INSTANCE_CHECKER(VhostVdpaDevicePCI, VHOST_VDPA_DEVICE_PCI, + TYPE_VHOST_VDPA_DEVICE_PCI) + +struct VhostVdpaDevicePCI { +VirtIOPCIProxy parent_obj; +VhostVdpaDevice vdev; +}; + +static void vhost_vdpa_device_pci_instance_init(Object *obj) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(obj); + +virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), +TYPE_VHOST_VDPA_DEVICE); +object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex"); +} + +static Property vhost_vdpa_device_pci_properties[] = { +DEFINE_PROP_END_OF_LIST(), +}; + +static int vhost_vdpa_device_pci_post_init(VhostVdpaDevice *v, Error **errp) +{ +VhostVdpaDevicePCI *dev = container_of(v, VhostVdpaDevicePCI, vdev); +VirtIOPCIProxy *vpci_dev = &dev->parent_obj; + +vpci_dev->class_code = virtio_pci_get_class_id(v->vdev_id); +vpci_dev->trans_devid = virtio_pci_get_trans_devid(v->vdev_id); +/* one for config vector */ +vpci_dev->nvectors = v->num_queues + 1; + +return 0; +} + +static void +vhost_vdpa_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(vpci_dev); + +dev->vdev.post_init = vhost_vdpa_device_pci_post_init; +qdev_realize(DEVICE(&dev->vdev), BUS(&vpci_dev->bus), errp); +} + +static void vhost_vdpa_device_pci_class_init(ObjectClass *klass, void *data) +{ +DeviceClass *dc = DEVICE_CLASS(klass); +VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + +set_bit(DEVICE_CATEGORY_MISC, dc->categories); +device_class_set_props(dc, vhost_vdpa_device_pci_properties); +k->realize = vhost_vdpa_device_pci_realize; +} + +static const VirtioPCIDeviceTypeInfo vhost_vdpa_device_pci_info = { +.base_name = TYPE_VHOST_VDPA_DEVICE_PCI, +.generic_name= "vhost-vdpa-device-pci", +.transitional_name = "vhost-vdpa-device-pci-transitional", +.non_transitional_name = "vhost-vdpa-device-pci-non-transitional", +.instance_size = sizeof(VhostVdpaDevicePCI), +.instance_init = vhost_vdpa_device_pci_instance_init, +.class_init = vhost_vdpa_device_pci_class_init, +}; + +static void vhost_vdpa_device_pci_register(void) +{ +virtio_pci_types_register(&vhost_vdpa_device_pci_info); +} + +type_init(vhost_vdpa_device_pci_register); -- 2.23.0
[PATCH v4 3/4] vdpa: add vdpa-dev support
From: Longpeng Supports vdpa-dev. Signed-off-by: Longpeng --- hw/virtio/Kconfig| 5 + hw/virtio/meson.build| 1 + hw/virtio/vdpa-dev.c | 385 +++ include/hw/virtio/vdpa-dev.h | 43 4 files changed, 434 insertions(+) create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index c144d42f9b..2723283382 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -68,3 +68,8 @@ config VHOST_USER_RNG bool default y depends on VIRTIO && VHOST_USER + +config VHOST_VDPA_DEV +bool +default y if VIRTIO_PCI +depends on VIRTIO && VHOST_VDPA && LINUX diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 67dc77e00f..8f6f86db71 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -29,6 +29,7 @@ virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_I2C'], if_true: files('vhost-user-i2c-pci.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) virtio_ss.add(when: ['CONFIG_VHOST_USER_RNG', 'CONFIG_VIRTIO_PCI'], if_true: files('vhost-user-rng-pci.c')) +virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c new file mode 100644 index 00..543b5b4b81 --- /dev/null +++ b/hw/virtio/vdpa-dev.c @@ -0,0 +1,385 @@ +/* + * Vhost Vdpa Device + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. + * + * Authors: + * Longpeng + * + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" implemented by: + * Changpeng Liu + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/cutils.h" +#include "hw/qdev-core.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" +#include "hw/virtio/vdpa-dev.h" +#include "sysemu/sysemu.h" +#include "sysemu/runstate.h" + +static void +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ +/* Nothing to do */ +} + +static uint32_t +vhost_vdpa_device_get_u32(int fd, unsigned long int cmd, Error **errp) +{ +uint32_t val = (uint32_t)-1; + +if (ioctl(fd, cmd, &val) < 0) { +error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s", + cmd, strerror(errno)); +} + +return val; +} + +static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) +{ +VirtIODevice *vdev = VIRTIO_DEVICE(dev); +VhostVdpaDevice *v = VHOST_VDPA_DEVICE(vdev); +uint16_t max_queue_size; +struct vhost_virtqueue *vqs; +int i, ret; + +if (!v->vhostdev && v->vhostfd == -1) { +error_setg(errp, "both vhostdev and vhostfd are missing"); +return; +} + +if (v->vhostdev && v->vhostfd != -1) { +error_setg(errp, "both vhostdev and vhostfd are set"); +return; +} + +if (v->vhostfd == -1) { +v->vhostfd = qemu_open(v->vhostdev, O_RDWR, errp); +if (*errp) { +return; +} +} +v->vdpa.device_fd = v->vhostfd; + +v->vdev_id = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_DEVICE_ID, errp); +if (*errp) { +goto out; +} + +max_queue_size = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_VRING_NUM, errp); +if (*errp) { +goto out; +} + +if (v->queue_size > max_queue_size) { +error_setg(errp, "vhost-vdpa-device: invalid queue_size: %u (max:%u)", + v->queue_size, max_queue_size); +goto out; +} else if (!v->queue_size) { +v->queue_size = max_queue_size; +} + +v->num_queues = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_VQS_COUNT, errp); +if (*errp) { +goto out; +} + +if (!v->num_queues || v->num_queues > VIRTIO_QUEUE_MAX) { +error_setg(errp, "invalid number of virtqueues: %u (max:%u)", + v->num_queues, VIRTIO_QUEUE_MAX); +goto out; +} + +v->dev.nvqs = v->num_queues; +vqs = g_new0(struct vhost_virtqueue, v->dev.nvqs); +v->dev.vqs = vqs; +v->dev.vq_index = 0; +v->dev.vq_index_end = v->dev.nvqs; +v->dev.backend_features = 0; +v->started = false; + +ret = vhost_dev_init(&v->dev, &v->vdpa, VHOST_BACKEND_TYPE_VDPA, 0,
[PATCH v5 3/4] vdpa: add vdpa-dev support
From: Longpeng Supports vdpa-dev, we can use the deivce directly: -M microvm -m 512m -smp 2 -kernel ... -initrd ... -device \ vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-x Signed-off-by: Longpeng --- hw/virtio/Kconfig| 5 + hw/virtio/meson.build| 1 + hw/virtio/vdpa-dev.c | 377 +++ include/hw/virtio/vdpa-dev.h | 43 4 files changed, 426 insertions(+) create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index c144d42f9b..724eb58a32 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -68,3 +68,8 @@ config VHOST_USER_RNG bool default y depends on VIRTIO && VHOST_USER + +config VHOST_VDPA_DEV +bool +default y +depends on VIRTIO && VHOST_VDPA && LINUX diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 67dc77e00f..8f6f86db71 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -29,6 +29,7 @@ virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_I2C'], if_true: files('vhost-user-i2c-pci.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) virtio_ss.add(when: ['CONFIG_VHOST_USER_RNG', 'CONFIG_VIRTIO_PCI'], if_true: files('vhost-user-rng-pci.c')) +virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c new file mode 100644 index 00..56597c881a --- /dev/null +++ b/hw/virtio/vdpa-dev.c @@ -0,0 +1,377 @@ +/* + * Vhost Vdpa Device + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. + * + * Authors: + * Longpeng + * + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" implemented by: + * Changpeng Liu + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/cutils.h" +#include "hw/qdev-core.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" +#include "hw/virtio/vdpa-dev.h" +#include "sysemu/sysemu.h" +#include "sysemu/runstate.h" + +static void +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ +/* Nothing to do */ +} + +static uint32_t +vhost_vdpa_device_get_u32(int fd, unsigned long int cmd, Error **errp) +{ +uint32_t val = (uint32_t)-1; + +if (ioctl(fd, cmd, &val) < 0) { +error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s", + cmd, strerror(errno)); +} + +return val; +} + +static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) +{ +VirtIODevice *vdev = VIRTIO_DEVICE(dev); +VhostVdpaDevice *v = VHOST_VDPA_DEVICE(vdev); +uint16_t max_queue_size; +struct vhost_virtqueue *vqs; +int i, ret; + +if (!v->vhostdev) { +error_setg(errp, "vhost-vdpa-device: vhostdev are missing"); +return; +} + +v->vhostfd = qemu_open(v->vhostdev, O_RDWR, errp); +if (*errp) { +return; +} +v->vdpa.device_fd = v->vhostfd; + +v->vdev_id = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_DEVICE_ID, errp); +if (*errp) { +goto out; +} + +max_queue_size = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_VRING_NUM, errp); +if (*errp) { +goto out; +} + +if (v->queue_size > max_queue_size) { +error_setg(errp, "vhost-vdpa-device: invalid queue_size: %u (max:%u)", + v->queue_size, max_queue_size); +goto out; +} else if (!v->queue_size) { +v->queue_size = max_queue_size; +} + +v->num_queues = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_VQS_COUNT, errp); +if (*errp) { +goto out; +} + +if (!v->num_queues || v->num_queues > VIRTIO_QUEUE_MAX) { +error_setg(errp, "invalid number of virtqueues: %u (max:%u)", + v->num_queues, VIRTIO_QUEUE_MAX); +goto out; +} + +v->dev.nvqs = v->num_queues; +vqs = g_new0(struct vhost_virtqueue, v->dev.nvqs); +v->dev.vqs = vqs; +v->dev.vq_index = 0; +v->dev.vq_index_end = v->dev.nvqs; +v->dev.backend_features = 0; +v->started = false; + +ret = vhost_dev_init(&v->dev, &v->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, NULL); +if (ret < 0) { +error_setg(errp, "vhost-vdpa-device: vhost
[PATCH v5 0/4] add generic vDPA device support
From: Longpeng Hi guys, With the generic vDPA device, QEMU won't need to touch the device types any more, such like vfio. We can use the generic vDPA device as follow: -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X Or -M microvm -m 512m -smp 2 -kernel ... -initrd ... -device \ vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-x I've done some simple tests on Huawei's offloading card (net, 0.95). Changes v4 -> v5: Patch 3: - remove vhostfd [Jason] - support virtio-mmio [Jason] Changes v3 -> v4: v3: https://www.mail-archive.com/qemu-devel@nongnu.org/msg877015.html - reorganize the series [Stefano] - fix some typos [Stefano] - fix logical error in vhost_vdpa_device_realize [Stefano] Changes v2 -> v3 Patch 4 & 5: - only call vdpa ioctls in vdpa-dev.c [Stefano, Longpeng] - s/VQS_NUM/VQS_COUNT [Stefano] - check both vdpa_dev_fd and vdpa_dev [Stefano] Patch 6: - move all steps into vhost_vdpa_device_unrealize. [Stefano] Changes RFC -> v2 Patch 1: - rename 'pdev_id' to 'trans_devid' [Michael] - only use transitional device id for the devices listed in the spec [Michael] - use macros to make the id_info table clearer [Longpeng] - add some modern devices in the id_info table [Longpeng] Patch 2: - remove the GET_VECTORS_NUM command [Jason] Patch 4: - expose vdpa_dev_fd as a QOM preperty [Stefan] - introduce vhost_vdpa_device_get_u32 as a common function to make the code clearer [Stefan] - fix the misleading description of 'dc->desc' [Stefano] Patch 5: - check returned number of virtqueues [Stefan] Patch 6: - init s->num_queues [Stefano] - free s->dev.vqs [Stefano] Longpeng (Mike) (4): linux-headers: Update headers to Linux 5.18-rc6 virtio: get class_id and pci device id by the virtio id vdpa: add vdpa-dev support vdpa: add vdpa-dev-pci support hw/virtio/Kconfig| 5 + hw/virtio/meson.build| 2 + hw/virtio/vdpa-dev-pci.c | 101 ++ hw/virtio/vdpa-dev.c | 377 +++ hw/virtio/virtio-pci.c | 77 +++ hw/virtio/virtio-pci.h | 5 + include/hw/virtio/vdpa-dev.h | 43 linux-headers/linux/vhost.h | 7 + 8 files changed, 617 insertions(+) create mode 100644 hw/virtio/vdpa-dev-pci.c create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h -- 2.23.0
[PATCH v5 1/4] linux-headers: Update headers to Linux 5.18-rc6
From: Longpeng Update headers to 5.18-rc6. I need latest vhost changes. Signed-off-by: Longpeng --- linux-headers/linux/vhost.h | 7 +++ 1 file changed, 7 insertions(+) diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index c998860d7b..5d99e7c242 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -150,4 +150,11 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) + +/* Get the config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + #endif -- 2.23.0
[PATCH v5 4/4] vdpa: add vdpa-dev-pci support
From: Longpeng Supports vdpa-dev-pci, we can use the device as follow: -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X Signed-off-by: Longpeng --- hw/virtio/meson.build| 1 + hw/virtio/vdpa-dev-pci.c | 101 +++ 2 files changed, 102 insertions(+) create mode 100644 hw/virtio/vdpa-dev-pci.c diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 8f6f86db71..c2da69616f 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -50,6 +50,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-serial-pc virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev-pci.c')) virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) diff --git a/hw/virtio/vdpa-dev-pci.c b/hw/virtio/vdpa-dev-pci.c new file mode 100644 index 00..31bd17353a --- /dev/null +++ b/hw/virtio/vdpa-dev-pci.c @@ -0,0 +1,101 @@ +/* + * Vhost Vdpa Device PCI Bindings + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. + * + * Authors: + * Longpeng + * + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" implemented by: + * Changpeng Liu + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include +#include "hw/virtio/virtio.h" +#include "hw/virtio/vdpa-dev.h" +#include "hw/pci/pci.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "virtio-pci.h" +#include "qom/object.h" + + +typedef struct VhostVdpaDevicePCI VhostVdpaDevicePCI; + +#define TYPE_VHOST_VDPA_DEVICE_PCI "vhost-vdpa-device-pci-base" +DECLARE_INSTANCE_CHECKER(VhostVdpaDevicePCI, VHOST_VDPA_DEVICE_PCI, + TYPE_VHOST_VDPA_DEVICE_PCI) + +struct VhostVdpaDevicePCI { +VirtIOPCIProxy parent_obj; +VhostVdpaDevice vdev; +}; + +static void vhost_vdpa_device_pci_instance_init(Object *obj) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(obj); + +virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), +TYPE_VHOST_VDPA_DEVICE); +object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex"); +} + +static Property vhost_vdpa_device_pci_properties[] = { +DEFINE_PROP_END_OF_LIST(), +}; + +static int vhost_vdpa_device_pci_post_init(VhostVdpaDevice *v, Error **errp) +{ +VhostVdpaDevicePCI *dev = container_of(v, VhostVdpaDevicePCI, vdev); +VirtIOPCIProxy *vpci_dev = &dev->parent_obj; + +vpci_dev->class_code = virtio_pci_get_class_id(v->vdev_id); +vpci_dev->trans_devid = virtio_pci_get_trans_devid(v->vdev_id); +/* one for config vector */ +vpci_dev->nvectors = v->num_queues + 1; + +return 0; +} + +static void +vhost_vdpa_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(vpci_dev); + +dev->vdev.post_init = vhost_vdpa_device_pci_post_init; +qdev_realize(DEVICE(&dev->vdev), BUS(&vpci_dev->bus), errp); +} + +static void vhost_vdpa_device_pci_class_init(ObjectClass *klass, void *data) +{ +DeviceClass *dc = DEVICE_CLASS(klass); +VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + +set_bit(DEVICE_CATEGORY_MISC, dc->categories); +device_class_set_props(dc, vhost_vdpa_device_pci_properties); +k->realize = vhost_vdpa_device_pci_realize; +} + +static const VirtioPCIDeviceTypeInfo vhost_vdpa_device_pci_info = { +.base_name = TYPE_VHOST_VDPA_DEVICE_PCI, +.generic_name= "vhost-vdpa-device-pci", +.transitional_name = "vhost-vdpa-device-pci-transitional", +.non_transitional_name = "vhost-vdpa-device-pci-non-transitional", +.instance_size = sizeof(VhostVdpaDevicePCI), +.instance_init = vhost_vdpa_device_pci_instance_init, +.class_init = vhost_vdpa_device_pci_class_init, +}; + +static void vhost_vdpa_device_pci_register(void) +{ +virtio_pci_types_register(&vhost_vdpa_device_pci_info); +} + +type_init(vhost_vdpa_device_pci_register); -- 2.23.0
[PATCH v5 2/4] virtio: get class_id and pci device id by the virtio id
From: Longpeng Add helpers to get the "Transitional PCI Device ID" and "class_id" of the device specified by the "Virtio Device ID". These helpers will be used to build the generic vDPA device later. Signed-off-by: Longpeng --- hw/virtio/virtio-pci.c | 77 ++ hw/virtio/virtio-pci.h | 5 +++ 2 files changed, 82 insertions(+) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 7cf1231c1c..fdfa205cee 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -19,6 +19,7 @@ #include "exec/memop.h" #include "standard-headers/linux/virtio_pci.h" +#include "standard-headers/linux/virtio_ids.h" #include "hw/boards.h" #include "hw/virtio/virtio.h" #include "migration/qemu-file-types.h" @@ -212,6 +213,79 @@ static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f) return 0; } +typedef struct VirtIOPCIIDInfo { +/* virtio id */ +uint16_t vdev_id; +/* pci device id for the transitional device */ +uint16_t trans_devid; +uint16_t class_id; +} VirtIOPCIIDInfo; + +#define VIRTIO_TRANS_DEV_ID_INFO(name, class) \ +{ \ +.vdev_id = VIRTIO_ID_##name,\ +.trans_devid = PCI_DEVICE_ID_VIRTIO_##name, \ +.class_id = class, \ +} + +#define VIRTIO_MODERN_DEV_ID_NFO(name, class) \ +{ \ +.vdev_id = VIRTIO_ID_##name,\ +.class_id = class, \ +} + +static const VirtIOPCIIDInfo virtio_pci_id_info[] = { +/* Non-transitional devices */ +VIRTIO_MODERN_DEV_ID_NFO(CRYPTO,PCI_CLASS_OTHERS), +VIRTIO_MODERN_DEV_ID_NFO(FS,PCI_CLASS_STORAGE_OTHER), +/* Transitional devices */ +VIRTIO_TRANS_DEV_ID_INFO(NET, PCI_CLASS_NETWORK_ETHERNET), +VIRTIO_TRANS_DEV_ID_INFO(BLOCK, PCI_CLASS_STORAGE_SCSI), +VIRTIO_TRANS_DEV_ID_INFO(CONSOLE, PCI_CLASS_COMMUNICATION_OTHER), +VIRTIO_TRANS_DEV_ID_INFO(SCSI, PCI_CLASS_STORAGE_SCSI), +VIRTIO_TRANS_DEV_ID_INFO(9P,PCI_BASE_CLASS_NETWORK), +VIRTIO_TRANS_DEV_ID_INFO(BALLOON, PCI_CLASS_OTHERS), +VIRTIO_TRANS_DEV_ID_INFO(RNG, PCI_CLASS_OTHERS), +}; + +static const VirtIOPCIIDInfo *virtio_pci_get_id_info(uint16_t vdev_id) +{ +const VirtIOPCIIDInfo *info = NULL; +int i; + +for (i = 0; i < ARRAY_SIZE(virtio_pci_id_info); i++) { +if (virtio_pci_id_info[i].vdev_id == vdev_id) { +info = &virtio_pci_id_info[i]; +break; +} +} + +if (!info) { +/* The device id is invalid or not added to the id_info yet. */ +error_report("Invalid virtio device(id %u)", vdev_id); +abort(); +} + +return info; +} + +/* + * Get the Transitional Device ID for the specific device, return + * zero if the device is non-transitional. + */ +uint16_t virtio_pci_get_trans_devid(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->trans_devid; +} + +/* + * Get the Class ID for the specific device. + */ +uint16_t virtio_pci_get_class_id(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->class_id; +} + static bool virtio_pci_ioeventfd_enabled(DeviceState *d) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); @@ -1675,6 +1749,9 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default. */ pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus)); +if (proxy->trans_devid) { +pci_config_set_device_id(config, proxy->trans_devid); +} } else { /* pure virtio-1.0 */ pci_set_word(config + PCI_VENDOR_ID, diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index 2446dcd9ae..f08665cd1b 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -146,6 +146,8 @@ struct VirtIOPCIProxy { bool disable_modern; bool ignore_backend_features; OnOffAuto disable_legacy; +/* Transitional device id */ +uint16_t trans_devid; uint32_t class_code; uint32_t nvectors; uint32_t dfselect; @@ -158,6 +160,9 @@ struct VirtIOPCIProxy { VirtioBusState bus; }; +uint16_t virtio_pci_get_trans_devid(uint16_t device_id); +uint16_t virtio_pci_get_class_id(uint16_t device_id); + static inline bool virtio_pci_modern(VirtIOPCIProxy *proxy) { return !proxy->disable_modern; -- 2.23.0
[PATCH v6 0/4] add generic vDPA device support
From: Longpeng i guys, With the generic vDPA device, QEMU won't need to touch the device types any more, such like vfio. We can use the generic vDPA device as follow: -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X Or -M microvm -m 512m -smp 2 -kernel ... -initrd ... -device \ vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-x I've done some simple tests on Huawei's offloading card (net, 0.95). Changes v5 -> v6: Patch 2: - Turn to the original approach in the RFC to initialize the virtio_pci_id_info array. [Michael] https://lore.kernel.org/all/20220105005900.860-2-longpe...@huawei.com/ Patch 3: - Fix logical error of exception handler around the post_init. [Stefano] - Fix some coding style warnings. [Stefano] Patch 4: - Fix some coding style warnings. [Stefano] Changes v4 -> v5: Patch 3: - remove vhostfd [Jason] - support virtio-mmio [Jason] Changes v3 -> v4: v3: https://www.mail-archive.com/qemu-devel@nongnu.org/msg877015.html - reorganize the series [Stefano] - fix some typos [Stefano] - fix logical error in vhost_vdpa_device_realize [Stefano] Changes v2 -> v3 Patch 4 & 5: - only call vdpa ioctls in vdpa-dev.c [Stefano, Longpeng] - s/VQS_NUM/VQS_COUNT [Stefano] - check both vdpa_dev_fd and vdpa_dev [Stefano] Patch 6: - move all steps into vhost_vdpa_device_unrealize. [Stefano] Changes RFC -> v2 Patch 1: - rename 'pdev_id' to 'trans_devid' [Michael] - only use transitional device id for the devices listed in the spec [Michael] - use macros to make the id_info table clearer [Longpeng] - add some modern devices in the id_info table [Longpeng] Patch 2: - remove the GET_VECTORS_NUM command [Jason] Patch 4: - expose vdpa_dev_fd as a QOM preperty [Stefan] - introduce vhost_vdpa_device_get_u32 as a common function to make the code clearer [Stefan] - fix the misleading description of 'dc->desc' [Stefano] Patch 5: - check returned number of virtqueues [Stefan] Patch 6: - init s->num_queues [Stefano] - free s->dev.vqs [Stefano] Longpeng (Mike) (4): linux-headers: Update headers to Linux 5.18-rc6 virtio: get class_id and pci device id by the virtio id vdpa: add vdpa-dev support vdpa: add vdpa-dev-pci support hw/virtio/Kconfig| 5 + hw/virtio/meson.build| 2 + hw/virtio/vdpa-dev-pci.c | 102 ++ hw/virtio/vdpa-dev.c | 377 +++ hw/virtio/virtio-pci.c | 101 ++ hw/virtio/virtio-pci.h | 5 + include/hw/virtio/vdpa-dev.h | 43 linux-headers/linux/vhost.h | 7 + 8 files changed, 642 insertions(+) create mode 100644 hw/virtio/vdpa-dev-pci.c create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h -- 2.23.0
[PATCH v6 1/4] linux-headers: Update headers to Linux 5.18-rc6
From: Longpeng Update headers to 5.18-rc6. I need latest vhost changes. Signed-off-by: Longpeng --- linux-headers/linux/vhost.h | 7 +++ 1 file changed, 7 insertions(+) diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index c998860d7b..5d99e7c242 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -150,4 +150,11 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) + +/* Get the config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + #endif -- 2.23.0
[PATCH v6 3/4] vdpa: add vdpa-dev support
From: Longpeng Supports vdpa-dev, we can use the deivce directly: -M microvm -m 512m -smp 2 -kernel ... -initrd ... -device \ vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-x Signed-off-by: Longpeng --- hw/virtio/Kconfig| 5 + hw/virtio/meson.build| 1 + hw/virtio/vdpa-dev.c | 377 +++ include/hw/virtio/vdpa-dev.h | 43 4 files changed, 426 insertions(+) create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index c144d42f9b..724eb58a32 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -68,3 +68,8 @@ config VHOST_USER_RNG bool default y depends on VIRTIO && VHOST_USER + +config VHOST_VDPA_DEV +bool +default y +depends on VIRTIO && VHOST_VDPA && LINUX diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 67dc77e00f..8f6f86db71 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -29,6 +29,7 @@ virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_I2C'], if_true: files('vhost-user-i2c-pci.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) virtio_ss.add(when: ['CONFIG_VHOST_USER_RNG', 'CONFIG_VIRTIO_PCI'], if_true: files('vhost-user-rng-pci.c')) +virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c new file mode 100644 index 00..1840f0e450 --- /dev/null +++ b/hw/virtio/vdpa-dev.c @@ -0,0 +1,377 @@ +/* + * Vhost Vdpa Device + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. + * + * Authors: + * Longpeng + * + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" + * implemented by: + * Changpeng Liu + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/cutils.h" +#include "hw/qdev-core.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" +#include "hw/virtio/vdpa-dev.h" +#include "sysemu/sysemu.h" +#include "sysemu/runstate.h" + +static void +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ +/* Nothing to do */ +} + +static uint32_t +vhost_vdpa_device_get_u32(int fd, unsigned long int cmd, Error **errp) +{ +uint32_t val = (uint32_t)-1; + +if (ioctl(fd, cmd, &val) < 0) { +error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s", + cmd, strerror(errno)); +} + +return val; +} + +static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) +{ +VirtIODevice *vdev = VIRTIO_DEVICE(dev); +VhostVdpaDevice *v = VHOST_VDPA_DEVICE(vdev); +uint16_t max_queue_size; +struct vhost_virtqueue *vqs; +int i, ret; + +if (!v->vhostdev) { +error_setg(errp, "vhost-vdpa-device: vhostdev are missing"); +return; +} + +v->vhostfd = qemu_open(v->vhostdev, O_RDWR, errp); +if (*errp) { +return; +} +v->vdpa.device_fd = v->vhostfd; + +v->vdev_id = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_DEVICE_ID, errp); +if (*errp) { +goto out; +} + +max_queue_size = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_VRING_NUM, errp); +if (*errp) { +goto out; +} + +if (v->queue_size > max_queue_size) { +error_setg(errp, "vhost-vdpa-device: invalid queue_size: %u (max:%u)", + v->queue_size, max_queue_size); +goto out; +} else if (!v->queue_size) { +v->queue_size = max_queue_size; +} + +v->num_queues = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_VQS_COUNT, errp); +if (*errp) { +goto out; +} + +if (!v->num_queues || v->num_queues > VIRTIO_QUEUE_MAX) { +error_setg(errp, "invalid number of virtqueues: %u (max:%u)", + v->num_queues, VIRTIO_QUEUE_MAX); +goto out; +} + +v->dev.nvqs = v->num_queues; +vqs = g_new0(struct vhost_virtqueue, v->dev.nvqs); +v->dev.vqs = vqs; +v->dev.vq_index = 0; +v->dev.vq_index_end = v->dev.nvqs; +v->dev.backend_features = 0; +v->started = false; + +ret = vhost_dev_init(&v->dev, &v->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, NULL); +if (ret < 0) { +error_setg(errp, "vhost-vdpa-device: vho
[PATCH v6 2/4] virtio: get class_id and pci device id by the virtio id
From: Longpeng Add helpers to get the "Transitional PCI Device ID" and "class_id" of the device specified by the "Virtio Device ID". These helpers will be used to build the generic vDPA device later. Signed-off-by: Longpeng --- hw/virtio/virtio-pci.c | 101 + hw/virtio/virtio-pci.h | 5 ++ 2 files changed, 106 insertions(+) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 7cf1231c1c..204388b967 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -19,6 +19,7 @@ #include "exec/memop.h" #include "standard-headers/linux/virtio_pci.h" +#include "standard-headers/linux/virtio_ids.h" #include "hw/boards.h" #include "hw/virtio/virtio.h" #include "migration/qemu-file-types.h" @@ -212,6 +213,103 @@ static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f) return 0; } +typedef struct VirtIOPCIIDInfo { +/* virtio id */ +uint16_t vdev_id; +/* pci device id for the transitional device */ +uint16_t trans_devid; +uint16_t class_id; +} VirtIOPCIIDInfo; + +#define VIRTIO_TRANS_DEV_ID_INFO(name, class) \ +{ \ +.vdev_id = VIRTIO_ID_##name,\ +.trans_devid = PCI_DEVICE_ID_VIRTIO_##name, \ +.class_id = class, \ +} + +#define VIRTIO_MODERN_DEV_ID_NFO(name, class) \ +{ \ +.vdev_id = VIRTIO_ID_##name,\ +.class_id = class, \ +} + +static const VirtIOPCIIDInfo virtio_pci_id_info[] = { +{ +.vdev_id = VIRTIO_ID_CRYPTO, +.class_id = PCI_CLASS_OTHERS, +}, { +.vdev_id = VIRTIO_ID_FS, +.class_id = PCI_CLASS_STORAGE_OTHER, +}, { +.vdev_id = VIRTIO_ID_NET, +.trans_devid = PCI_DEVICE_ID_VIRTIO_NET, +.class_id = PCI_CLASS_NETWORK_ETHERNET, +}, { +.vdev_id = VIRTIO_ID_BLOCK, +.trans_devid = PCI_DEVICE_ID_VIRTIO_BLOCK, +.class_id = PCI_CLASS_STORAGE_SCSI, +}, { +.vdev_id = VIRTIO_ID_CONSOLE, +.trans_devid = PCI_DEVICE_ID_VIRTIO_CONSOLE, +.class_id = PCI_CLASS_COMMUNICATION_OTHER, +}, { +.vdev_id = VIRTIO_ID_SCSI, +.trans_devid = PCI_DEVICE_ID_VIRTIO_SCSI, +.class_id = PCI_CLASS_STORAGE_SCSI +}, { +.vdev_id = VIRTIO_ID_9P, +.trans_devid = PCI_DEVICE_ID_VIRTIO_9P, +.class_id = PCI_BASE_CLASS_NETWORK, +}, { +.vdev_id = VIRTIO_ID_BALLOON, +.trans_devid = PCI_DEVICE_ID_VIRTIO_BALLOON, +.class_id = PCI_CLASS_OTHERS, +}, { +.vdev_id = VIRTIO_ID_RNG, +.trans_devid = PCI_DEVICE_ID_VIRTIO_RNG, +.class_id = PCI_CLASS_OTHERS, +}, +}; + +static const VirtIOPCIIDInfo *virtio_pci_get_id_info(uint16_t vdev_id) +{ +const VirtIOPCIIDInfo *info = NULL; +int i; + +for (i = 0; i < ARRAY_SIZE(virtio_pci_id_info); i++) { +if (virtio_pci_id_info[i].vdev_id == vdev_id) { +info = &virtio_pci_id_info[i]; +break; +} +} + +if (!info) { +/* The device id is invalid or not added to the id_info yet. */ +error_report("Invalid virtio device(id %u)", vdev_id); +abort(); +} + +return info; +} + +/* + * Get the Transitional Device ID for the specific device, return + * zero if the device is non-transitional. + */ +uint16_t virtio_pci_get_trans_devid(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->trans_devid; +} + +/* + * Get the Class ID for the specific device. + */ +uint16_t virtio_pci_get_class_id(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->class_id; +} + static bool virtio_pci_ioeventfd_enabled(DeviceState *d) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); @@ -1675,6 +1773,9 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default. */ pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus)); +if (proxy->trans_devid) { +pci_config_set_device_id(config, proxy->trans_devid); +} } else { /* pure virtio-1.0 */ pci_set_word(config + PCI_VENDOR_ID, diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index 2446dcd9ae..f08665cd1b 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -146,6 +146,8 @@ struct VirtIOPCIProxy { bool disable_modern; bool ignore_backend_features; OnOffAuto disable_legacy; +/* Transitional device id */ +uint16_t trans_devid; uint32_t class_code; uint32_t nvectors; uint32_t dfselect; @@ -158,6 +160,9 @@ struct VirtIOPCIProxy { VirtioBusState bus; }; +uint16_t virtio_pci_get_trans_devid(uint16_t device_id); +uint16_t virtio_pci_get_class_id(uint16_t device_id); + static inline bool virt
[PATCH v6 resend 4/4] vdpa: add vdpa-dev-pci support
From: Longpeng Supports vdpa-dev-pci, we can use the device as follow: -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X Reviewed-by: Stefano Garzarella Signed-off-by: Longpeng --- hw/virtio/meson.build| 1 + hw/virtio/vdpa-dev-pci.c | 102 +++ 2 files changed, 103 insertions(+) create mode 100644 hw/virtio/vdpa-dev-pci.c diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 8f6f86db71..c2da69616f 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -50,6 +50,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-serial-pc virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev-pci.c')) virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) diff --git a/hw/virtio/vdpa-dev-pci.c b/hw/virtio/vdpa-dev-pci.c new file mode 100644 index 00..fde35dfc92 --- /dev/null +++ b/hw/virtio/vdpa-dev-pci.c @@ -0,0 +1,102 @@ +/* + * Vhost Vdpa Device PCI Bindings + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. + * + * Authors: + * Longpeng + * + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" + * implemented by: + * Changpeng Liu + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include +#include "hw/virtio/virtio.h" +#include "hw/virtio/vdpa-dev.h" +#include "hw/pci/pci.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "virtio-pci.h" +#include "qom/object.h" + + +typedef struct VhostVdpaDevicePCI VhostVdpaDevicePCI; + +#define TYPE_VHOST_VDPA_DEVICE_PCI "vhost-vdpa-device-pci-base" +DECLARE_INSTANCE_CHECKER(VhostVdpaDevicePCI, VHOST_VDPA_DEVICE_PCI, + TYPE_VHOST_VDPA_DEVICE_PCI) + +struct VhostVdpaDevicePCI { +VirtIOPCIProxy parent_obj; +VhostVdpaDevice vdev; +}; + +static void vhost_vdpa_device_pci_instance_init(Object *obj) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(obj); + +virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), +TYPE_VHOST_VDPA_DEVICE); +object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex"); +} + +static Property vhost_vdpa_device_pci_properties[] = { +DEFINE_PROP_END_OF_LIST(), +}; + +static int vhost_vdpa_device_pci_post_init(VhostVdpaDevice *v, Error **errp) +{ +VhostVdpaDevicePCI *dev = container_of(v, VhostVdpaDevicePCI, vdev); +VirtIOPCIProxy *vpci_dev = &dev->parent_obj; + +vpci_dev->class_code = virtio_pci_get_class_id(v->vdev_id); +vpci_dev->trans_devid = virtio_pci_get_trans_devid(v->vdev_id); +/* one for config vector */ +vpci_dev->nvectors = v->num_queues + 1; + +return 0; +} + +static void +vhost_vdpa_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(vpci_dev); + +dev->vdev.post_init = vhost_vdpa_device_pci_post_init; +qdev_realize(DEVICE(&dev->vdev), BUS(&vpci_dev->bus), errp); +} + +static void vhost_vdpa_device_pci_class_init(ObjectClass *klass, void *data) +{ +DeviceClass *dc = DEVICE_CLASS(klass); +VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + +set_bit(DEVICE_CATEGORY_MISC, dc->categories); +device_class_set_props(dc, vhost_vdpa_device_pci_properties); +k->realize = vhost_vdpa_device_pci_realize; +} + +static const VirtioPCIDeviceTypeInfo vhost_vdpa_device_pci_info = { +.base_name = TYPE_VHOST_VDPA_DEVICE_PCI, +.generic_name= "vhost-vdpa-device-pci", +.transitional_name = "vhost-vdpa-device-pci-transitional", +.non_transitional_name = "vhost-vdpa-device-pci-non-transitional", +.instance_size = sizeof(VhostVdpaDevicePCI), +.instance_init = vhost_vdpa_device_pci_instance_init, +.class_init = vhost_vdpa_device_pci_class_init, +}; + +static void vhost_vdpa_device_pci_register(void) +{ +virtio_pci_types_register(&vhost_vdpa_device_pci_info); +} + +type_init(vhost_vdpa_device_pci_register); -- 2.23.0
[PATCH v6 4/4] vdpa: add vdpa-dev-pci support
From: Longpeng Supports vdpa-dev-pci, we can use the device as follow: -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X Reviewed-by: Stefano Garzarella Signed-off-by: Longpeng --- hw/virtio/meson.build| 1 + hw/virtio/vdpa-dev-pci.c | 102 +++ 2 files changed, 103 insertions(+) create mode 100644 hw/virtio/vdpa-dev-pci.c diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 8f6f86db71..c2da69616f 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -50,6 +50,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-serial-pc virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev-pci.c')) virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) diff --git a/hw/virtio/vdpa-dev-pci.c b/hw/virtio/vdpa-dev-pci.c new file mode 100644 index 00..fde35dfc92 --- /dev/null +++ b/hw/virtio/vdpa-dev-pci.c @@ -0,0 +1,102 @@ +/* + * Vhost Vdpa Device PCI Bindings + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. + * + * Authors: + * Longpeng + * + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" + * implemented by: + * Changpeng Liu + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include +#include "hw/virtio/virtio.h" +#include "hw/virtio/vdpa-dev.h" +#include "hw/pci/pci.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "virtio-pci.h" +#include "qom/object.h" + + +typedef struct VhostVdpaDevicePCI VhostVdpaDevicePCI; + +#define TYPE_VHOST_VDPA_DEVICE_PCI "vhost-vdpa-device-pci-base" +DECLARE_INSTANCE_CHECKER(VhostVdpaDevicePCI, VHOST_VDPA_DEVICE_PCI, + TYPE_VHOST_VDPA_DEVICE_PCI) + +struct VhostVdpaDevicePCI { +VirtIOPCIProxy parent_obj; +VhostVdpaDevice vdev; +}; + +static void vhost_vdpa_device_pci_instance_init(Object *obj) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(obj); + +virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), +TYPE_VHOST_VDPA_DEVICE); +object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex"); +} + +static Property vhost_vdpa_device_pci_properties[] = { +DEFINE_PROP_END_OF_LIST(), +}; + +static int vhost_vdpa_device_pci_post_init(VhostVdpaDevice *v, Error **errp) +{ +VhostVdpaDevicePCI *dev = container_of(v, VhostVdpaDevicePCI, vdev); +VirtIOPCIProxy *vpci_dev = &dev->parent_obj; + +vpci_dev->class_code = virtio_pci_get_class_id(v->vdev_id); +vpci_dev->trans_devid = virtio_pci_get_trans_devid(v->vdev_id); +/* one for config vector */ +vpci_dev->nvectors = v->num_queues + 1; + +return 0; +} + +static void +vhost_vdpa_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(vpci_dev); + +dev->vdev.post_init = vhost_vdpa_device_pci_post_init; +qdev_realize(DEVICE(&dev->vdev), BUS(&vpci_dev->bus), errp); +} + +static void vhost_vdpa_device_pci_class_init(ObjectClass *klass, void *data) +{ +DeviceClass *dc = DEVICE_CLASS(klass); +VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + +set_bit(DEVICE_CATEGORY_MISC, dc->categories); +device_class_set_props(dc, vhost_vdpa_device_pci_properties); +k->realize = vhost_vdpa_device_pci_realize; +} + +static const VirtioPCIDeviceTypeInfo vhost_vdpa_device_pci_info = { +.base_name = TYPE_VHOST_VDPA_DEVICE_PCI, +.generic_name= "vhost-vdpa-device-pci", +.transitional_name = "vhost-vdpa-device-pci-transitional", +.non_transitional_name = "vhost-vdpa-device-pci-non-transitional", +.instance_size = sizeof(VhostVdpaDevicePCI), +.instance_init = vhost_vdpa_device_pci_instance_init, +.class_init = vhost_vdpa_device_pci_class_init, +}; + +static void vhost_vdpa_device_pci_register(void) +{ +virtio_pci_types_register(&vhost_vdpa_device_pci_info); +} + +type_init(vhost_vdpa_device_pci_register); -- 2.23.0
[PATCH v6 resend 1/4] linux-headers: Update headers to Linux 5.18-rc6
From: Longpeng Update headers to 5.18-rc6. I need latest vhost changes. Signed-off-by: Longpeng --- linux-headers/linux/vhost.h | 7 +++ 1 file changed, 7 insertions(+) diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index c998860d7b..5d99e7c242 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -150,4 +150,11 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) + +/* Get the config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + #endif -- 2.23.0
[PATCH v6 resend 3/4] vdpa: add vdpa-dev support
From: Longpeng Supports vdpa-dev, we can use the deivce directly: -M microvm -m 512m -smp 2 -kernel ... -initrd ... -device \ vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-x Signed-off-by: Longpeng --- hw/virtio/Kconfig| 5 + hw/virtio/meson.build| 1 + hw/virtio/vdpa-dev.c | 377 +++ include/hw/virtio/vdpa-dev.h | 43 4 files changed, 426 insertions(+) create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index c144d42f9b..724eb58a32 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -68,3 +68,8 @@ config VHOST_USER_RNG bool default y depends on VIRTIO && VHOST_USER + +config VHOST_VDPA_DEV +bool +default y +depends on VIRTIO && VHOST_VDPA && LINUX diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 67dc77e00f..8f6f86db71 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -29,6 +29,7 @@ virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_I2C'], if_true: files('vhost-user-i2c-pci.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) virtio_ss.add(when: ['CONFIG_VHOST_USER_RNG', 'CONFIG_VIRTIO_PCI'], if_true: files('vhost-user-rng-pci.c')) +virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c new file mode 100644 index 00..1840f0e450 --- /dev/null +++ b/hw/virtio/vdpa-dev.c @@ -0,0 +1,377 @@ +/* + * Vhost Vdpa Device + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. + * + * Authors: + * Longpeng + * + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" + * implemented by: + * Changpeng Liu + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/cutils.h" +#include "hw/qdev-core.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" +#include "hw/virtio/vdpa-dev.h" +#include "sysemu/sysemu.h" +#include "sysemu/runstate.h" + +static void +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ +/* Nothing to do */ +} + +static uint32_t +vhost_vdpa_device_get_u32(int fd, unsigned long int cmd, Error **errp) +{ +uint32_t val = (uint32_t)-1; + +if (ioctl(fd, cmd, &val) < 0) { +error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s", + cmd, strerror(errno)); +} + +return val; +} + +static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) +{ +VirtIODevice *vdev = VIRTIO_DEVICE(dev); +VhostVdpaDevice *v = VHOST_VDPA_DEVICE(vdev); +uint16_t max_queue_size; +struct vhost_virtqueue *vqs; +int i, ret; + +if (!v->vhostdev) { +error_setg(errp, "vhost-vdpa-device: vhostdev are missing"); +return; +} + +v->vhostfd = qemu_open(v->vhostdev, O_RDWR, errp); +if (*errp) { +return; +} +v->vdpa.device_fd = v->vhostfd; + +v->vdev_id = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_DEVICE_ID, errp); +if (*errp) { +goto out; +} + +max_queue_size = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_VRING_NUM, errp); +if (*errp) { +goto out; +} + +if (v->queue_size > max_queue_size) { +error_setg(errp, "vhost-vdpa-device: invalid queue_size: %u (max:%u)", + v->queue_size, max_queue_size); +goto out; +} else if (!v->queue_size) { +v->queue_size = max_queue_size; +} + +v->num_queues = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_VQS_COUNT, errp); +if (*errp) { +goto out; +} + +if (!v->num_queues || v->num_queues > VIRTIO_QUEUE_MAX) { +error_setg(errp, "invalid number of virtqueues: %u (max:%u)", + v->num_queues, VIRTIO_QUEUE_MAX); +goto out; +} + +v->dev.nvqs = v->num_queues; +vqs = g_new0(struct vhost_virtqueue, v->dev.nvqs); +v->dev.vqs = vqs; +v->dev.vq_index = 0; +v->dev.vq_index_end = v->dev.nvqs; +v->dev.backend_features = 0; +v->started = false; + +ret = vhost_dev_init(&v->dev, &v->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, NULL); +if (ret < 0) { +error_setg(errp, "vhost-vdpa-device: vho
[PATCH v6 resend 0/4] add generic vDPA device support
From: Longpeng Hi guys, With the generic vDPA device, QEMU won't need to touch the device types any more, such like vfio. We can use the generic vDPA device as follow: -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X Or -M microvm -m 512m -smp 2 -kernel ... -initrd ... -device \ vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-x I've done some simple tests on Huawei's offloading card (net, 0.95). Changes v5 -> v6: Patch 2: - Turn to the original approach in the RFC to initialize the virtio_pci_id_info array. [Michael] https://lore.kernel.org/all/20220105005900.860-2-longpe...@huawei.com/ Patch 3: - Fix logical error of exception handler around the post_init. [Stefano] - Fix some coding style warnings. [Stefano] Patch 4: - Fix some coding style warnings. [Stefano] Changes v4 -> v5: Patch 3: - remove vhostfd [Jason] - support virtio-mmio [Jason] Changes v3 -> v4: v3: https://www.mail-archive.com/qemu-devel@nongnu.org/msg877015.html - reorganize the series [Stefano] - fix some typos [Stefano] - fix logical error in vhost_vdpa_device_realize [Stefano] Changes v2 -> v3 Patch 4 & 5: - only call vdpa ioctls in vdpa-dev.c [Stefano, Longpeng] - s/VQS_NUM/VQS_COUNT [Stefano] - check both vdpa_dev_fd and vdpa_dev [Stefano] Patch 6: - move all steps into vhost_vdpa_device_unrealize. [Stefano] Changes RFC -> v2 Patch 1: - rename 'pdev_id' to 'trans_devid' [Michael] - only use transitional device id for the devices listed in the spec [Michael] - use macros to make the id_info table clearer [Longpeng] - add some modern devices in the id_info table [Longpeng] Patch 2: - remove the GET_VECTORS_NUM command [Jason] Patch 4: - expose vdpa_dev_fd as a QOM preperty [Stefan] - introduce vhost_vdpa_device_get_u32 as a common function to make the code clearer [Stefan] - fix the misleading description of 'dc->desc' [Stefano] Patch 5: - check returned number of virtqueues [Stefan] Patch 6: - init s->num_queues [Stefano] - free s->dev.vqs [Stefano] Longpeng (Mike) (4): linux-headers: Update headers to Linux 5.18-rc6 virtio: get class_id and pci device id by the virtio id vdpa: add vdpa-dev support vdpa: add vdpa-dev-pci support hw/virtio/Kconfig| 5 + hw/virtio/meson.build| 2 + hw/virtio/vdpa-dev-pci.c | 102 ++ hw/virtio/vdpa-dev.c | 377 +++ hw/virtio/virtio-pci.c | 88 hw/virtio/virtio-pci.h | 5 + include/hw/virtio/vdpa-dev.h | 43 linux-headers/linux/vhost.h | 7 + 8 files changed, 629 insertions(+) create mode 100644 hw/virtio/vdpa-dev-pci.c create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h -- 2.23.0
[PATCH v6 resend 2/4] virtio: get class_id and pci device id by the virtio id
From: Longpeng Add helpers to get the "Transitional PCI Device ID" and "class_id" of the device specified by the "Virtio Device ID". These helpers will be used to build the generic vDPA device later. Signed-off-by: Longpeng --- hw/virtio/virtio-pci.c | 88 ++ hw/virtio/virtio-pci.h | 5 +++ 2 files changed, 93 insertions(+) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 7cf1231c1c..247d7a9002 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -19,6 +19,7 @@ #include "exec/memop.h" #include "standard-headers/linux/virtio_pci.h" +#include "standard-headers/linux/virtio_ids.h" #include "hw/boards.h" #include "hw/virtio/virtio.h" #include "migration/qemu-file-types.h" @@ -212,6 +213,90 @@ static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f) return 0; } +typedef struct VirtIOPCIIDInfo { +/* virtio id */ +uint16_t vdev_id; +/* pci device id for the transitional device */ +uint16_t trans_devid; +uint16_t class_id; +} VirtIOPCIIDInfo; + +static const VirtIOPCIIDInfo virtio_pci_id_info[] = { +{ +.vdev_id = VIRTIO_ID_CRYPTO, +.class_id = PCI_CLASS_OTHERS, +}, { +.vdev_id = VIRTIO_ID_FS, +.class_id = PCI_CLASS_STORAGE_OTHER, +}, { +.vdev_id = VIRTIO_ID_NET, +.trans_devid = PCI_DEVICE_ID_VIRTIO_NET, +.class_id = PCI_CLASS_NETWORK_ETHERNET, +}, { +.vdev_id = VIRTIO_ID_BLOCK, +.trans_devid = PCI_DEVICE_ID_VIRTIO_BLOCK, +.class_id = PCI_CLASS_STORAGE_SCSI, +}, { +.vdev_id = VIRTIO_ID_CONSOLE, +.trans_devid = PCI_DEVICE_ID_VIRTIO_CONSOLE, +.class_id = PCI_CLASS_COMMUNICATION_OTHER, +}, { +.vdev_id = VIRTIO_ID_SCSI, +.trans_devid = PCI_DEVICE_ID_VIRTIO_SCSI, +.class_id = PCI_CLASS_STORAGE_SCSI +}, { +.vdev_id = VIRTIO_ID_9P, +.trans_devid = PCI_DEVICE_ID_VIRTIO_9P, +.class_id = PCI_BASE_CLASS_NETWORK, +}, { +.vdev_id = VIRTIO_ID_BALLOON, +.trans_devid = PCI_DEVICE_ID_VIRTIO_BALLOON, +.class_id = PCI_CLASS_OTHERS, +}, { +.vdev_id = VIRTIO_ID_RNG, +.trans_devid = PCI_DEVICE_ID_VIRTIO_RNG, +.class_id = PCI_CLASS_OTHERS, +}, +}; + +static const VirtIOPCIIDInfo *virtio_pci_get_id_info(uint16_t vdev_id) +{ +const VirtIOPCIIDInfo *info = NULL; +int i; + +for (i = 0; i < ARRAY_SIZE(virtio_pci_id_info); i++) { +if (virtio_pci_id_info[i].vdev_id == vdev_id) { +info = &virtio_pci_id_info[i]; +break; +} +} + +if (!info) { +/* The device id is invalid or not added to the id_info yet. */ +error_report("Invalid virtio device(id %u)", vdev_id); +abort(); +} + +return info; +} + +/* + * Get the Transitional Device ID for the specific device, return + * zero if the device is non-transitional. + */ +uint16_t virtio_pci_get_trans_devid(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->trans_devid; +} + +/* + * Get the Class ID for the specific device. + */ +uint16_t virtio_pci_get_class_id(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->class_id; +} + static bool virtio_pci_ioeventfd_enabled(DeviceState *d) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); @@ -1675,6 +1760,9 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default. */ pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus)); +if (proxy->trans_devid) { +pci_config_set_device_id(config, proxy->trans_devid); +} } else { /* pure virtio-1.0 */ pci_set_word(config + PCI_VENDOR_ID, diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index 2446dcd9ae..f08665cd1b 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -146,6 +146,8 @@ struct VirtIOPCIProxy { bool disable_modern; bool ignore_backend_features; OnOffAuto disable_legacy; +/* Transitional device id */ +uint16_t trans_devid; uint32_t class_code; uint32_t nvectors; uint32_t dfselect; @@ -158,6 +160,9 @@ struct VirtIOPCIProxy { VirtioBusState bus; }; +uint16_t virtio_pci_get_trans_devid(uint16_t device_id); +uint16_t virtio_pci_get_class_id(uint16_t device_id); + static inline bool virtio_pci_modern(VirtIOPCIProxy *proxy) { return !proxy->disable_modern; -- 2.23.0
[RFC 01/10] virtio: get class_id and pci device id by the virtio id
From: Longpeng Add helpers to get the "Transitional PCI Device ID" and "class_id" of the deivce which is specificed by the "Virtio Device ID". These helpers will be used to build the generic vDPA device later. Signed-off-by: Longpeng --- hw/virtio/virtio-pci.c | 93 ++ hw/virtio/virtio-pci.h | 4 ++ 2 files changed, 97 insertions(+) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 750aa47ec1..843085c4ea 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -19,6 +19,7 @@ #include "exec/memop.h" #include "standard-headers/linux/virtio_pci.h" +#include "standard-headers/linux/virtio_ids.h" #include "hw/boards.h" #include "hw/virtio/virtio.h" #include "migration/qemu-file-types.h" @@ -213,6 +214,95 @@ static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f) return 0; } +typedef struct VirtIOPCIIDInfo { +uint16_t vdev_id; /* virtio id */ +uint16_t pdev_id; /* pci device id */ +uint16_t class_id; +} VirtIOPCIIDInfo; + +static const VirtIOPCIIDInfo virtio_pci_id_info[] = { +{ +.vdev_id = VIRTIO_ID_NET, +.pdev_id = PCI_DEVICE_ID_VIRTIO_NET, +.class_id = PCI_CLASS_NETWORK_ETHERNET, +}, +{ +.vdev_id = VIRTIO_ID_BLOCK, +.pdev_id = PCI_DEVICE_ID_VIRTIO_BLOCK, +.class_id = PCI_CLASS_STORAGE_SCSI, +}, +{ +.vdev_id = VIRTIO_ID_CONSOLE, +.pdev_id = PCI_DEVICE_ID_VIRTIO_CONSOLE, +.class_id = PCI_CLASS_COMMUNICATION_OTHER, +}, +{ +.vdev_id = VIRTIO_ID_SCSI, +.pdev_id = PCI_DEVICE_ID_VIRTIO_SCSI, +.class_id = PCI_CLASS_STORAGE_SCSI, +}, +{ +.vdev_id = VIRTIO_ID_9P, +.pdev_id = PCI_DEVICE_ID_VIRTIO_9P, +.class_id = PCI_BASE_CLASS_NETWORK, +}, +{ +.vdev_id = VIRTIO_ID_VSOCK, +.pdev_id = PCI_DEVICE_ID_VIRTIO_VSOCK, +.class_id = PCI_CLASS_COMMUNICATION_OTHER, +}, +{ +.vdev_id = VIRTIO_ID_IOMMU, +.pdev_id = PCI_DEVICE_ID_VIRTIO_IOMMU, +.class_id = PCI_CLASS_OTHERS, +}, +{ +.vdev_id = VIRTIO_ID_MEM, +.pdev_id = PCI_DEVICE_ID_VIRTIO_MEM, +.class_id = PCI_CLASS_OTHERS, +}, +{ +.vdev_id = VIRTIO_ID_PMEM, +.pdev_id = PCI_DEVICE_ID_VIRTIO_PMEM, +.class_id = PCI_CLASS_OTHERS, +}, +{ +.vdev_id = VIRTIO_ID_RNG, +.pdev_id = PCI_DEVICE_ID_VIRTIO_RNG, +.class_id = PCI_CLASS_OTHERS, +}, +{ +.vdev_id = VIRTIO_ID_BALLOON, +.pdev_id = PCI_DEVICE_ID_VIRTIO_BALLOON, +.class_id = PCI_CLASS_OTHERS, +}, +}; + +static VirtIOPCIIDInfo virtio_pci_get_id_info(uint16_t vdev_id) +{ +VirtIOPCIIDInfo info = {}; +int i; + +for (i = 0; i < ARRAY_SIZE(virtio_pci_id_info); i++) { +if (virtio_pci_id_info[i].vdev_id == vdev_id) { +info = virtio_pci_id_info[i]; +break; +} +} + +return info; +} + +uint16_t virtio_pci_get_pci_devid(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id).pdev_id; +} + +uint16_t virtio_pci_get_class_id(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id).class_id; +} + static bool virtio_pci_ioeventfd_enabled(DeviceState *d) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); @@ -1674,6 +1764,9 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default. */ pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus)); +if (proxy->pdev_id) { +pci_config_set_device_id(config, proxy->pdev_id); +} } else { /* pure virtio-1.0 */ pci_set_word(config + PCI_VENDOR_ID, diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index 2446dcd9ae..06aa59436e 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -146,6 +146,7 @@ struct VirtIOPCIProxy { bool disable_modern; bool ignore_backend_features; OnOffAuto disable_legacy; +uint16_t pdev_id; uint32_t class_code; uint32_t nvectors; uint32_t dfselect; @@ -158,6 +159,9 @@ struct VirtIOPCIProxy { VirtioBusState bus; }; +uint16_t virtio_pci_get_pci_devid(uint16_t device_id); +uint16_t virtio_pci_get_class_id(uint16_t device_id); + static inline bool virtio_pci_modern(VirtIOPCIProxy *proxy) { return !proxy->disable_modern; -- 2.23.0
[RFC 00/10] add generic vDPA device support
From: Longpeng Hi guys, This patchset tries to support the generic vDPA device, the previous disscussion can be found here [1]. With the generic vDPA device, QEMU won't need to touch the device types any more, such like vfio. We can use the generic vDPA device as follow: -device vhost-vdpa-device-pci,vdpa-dev=/dev/vhost-vdpa-X I've done some simple tests on Huawei's offloading card (net, 0.95) and vdpa_sim_blk (1.0); Note: the kernel part does not send out yet, I'll send it as soon as possible. [1] https://lore.kernel.org/all/20211208052010.1719-1-longpe...@huawei.com/ Longpeng (Mike) (10): virtio: get class_id and pci device id by the virtio id vhost: add 3 commands for vhost-vdpa vdpa: add the infrastructure of vdpa-dev vdpa-dev: implement the instance_init/class_init interface vdpa-dev: implement the realize interface vdpa-dev: implement the unrealize interface vdpa-dev: implement the get_config/set_config interface vdpa-dev: implement the get_features interface vdpa-dev: implement the set_status interface vdpa-dev: mark the device as unmigratable hw/virtio/Kconfig| 5 + hw/virtio/meson.build| 2 + hw/virtio/vdpa-dev-pci.c | 127 + hw/virtio/vdpa-dev.c | 355 +++ hw/virtio/virtio-pci.c | 93 + hw/virtio/virtio-pci.h | 4 + include/hw/virtio/vdpa-dev.h | 26 +++ linux-headers/linux/vhost.h | 10 + 8 files changed, 622 insertions(+) create mode 100644 hw/virtio/vdpa-dev-pci.c create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h -- 2.23.0
[RFC 02/10] vhost: add 3 commands for vhost-vdpa
From: Longpeng To support generic vdpa deivce, we need add the following ioctls: - GET_VECTORS_NUM: the count of vectors that supported - GET_CONFIG_SIZE: the size of the virtio config space - GET_VQS_NUM: the count of virtqueues that exported Signed-off-by: Longpeng --- linux-headers/linux/vhost.h | 10 ++ 1 file changed, 10 insertions(+) diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index c998860d7b..c5edd75d15 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -150,4 +150,14 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) + +/* Get the number of vectors */ +#define VHOST_VDPA_GET_VECTORS_NUM _IOR(VHOST_VIRTIO, 0x79, int) + +/* Get the virtio config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x80, int) + +/* Get the number of virtqueues */ +#define VHOST_VDPA_GET_VQS_NUM _IOR(VHOST_VIRTIO, 0x81, int) + #endif -- 2.23.0
[RFC 04/10] vdpa-dev: implement the instance_init/class_init interface
From: Longpeng Implements the .instance_init and the .class_init interface. Signed-off-by: Longpeng --- hw/virtio/vdpa-dev-pci.c | 80 +++- hw/virtio/vdpa-dev.c | 68 +- include/hw/virtio/vdpa-dev.h | 2 + 3 files changed, 146 insertions(+), 4 deletions(-) diff --git a/hw/virtio/vdpa-dev-pci.c b/hw/virtio/vdpa-dev-pci.c index a5a7b528a9..0af54a26d4 100644 --- a/hw/virtio/vdpa-dev-pci.c +++ b/hw/virtio/vdpa-dev-pci.c @@ -23,14 +23,90 @@ struct VhostVdpaDevicePCI { VhostVdpaDevice vdev; }; +static uint32_t +vdpa_dev_pci_get_info(const char *name, uint64_t cmd, Error **errp) +{ +int device_fd; +uint32_t val; +int ret; + +device_fd = qemu_open(name, O_RDWR, errp); +if (device_fd == -1) { +return (uint32_t)-1; +} + +ret = ioctl(device_fd, cmd, &val); +if (ret < 0) { +error_setg(errp, "vhost-vdpa-device-pci: cmd 0x%lx failed: %s", + cmd, strerror(errno)); +goto out; +} + +out: +close(device_fd); +return val; +} + +static inline uint32_t +vdpa_dev_pci_get_devid(VhostVdpaDevicePCI *dev, Error **errp) +{ +return vdpa_dev_pci_get_info(dev->vdev.vdpa_dev, + VHOST_VDPA_GET_DEVICE_ID, errp); +} + +static inline uint32_t +vdpa_dev_pci_get_vectors_num(VhostVdpaDevicePCI *dev, Error **errp) +{ +return vdpa_dev_pci_get_info(dev->vdev.vdpa_dev, + VHOST_VDPA_GET_VECTORS_NUM, errp); +} + static void vhost_vdpa_device_pci_instance_init(Object *obj) { -return; +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(obj); + +virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), +TYPE_VHOST_VDPA_DEVICE); +object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex"); +} + +static Property vhost_vdpa_device_pci_properties[] = { +DEFINE_PROP_END_OF_LIST(), +}; + +static void +vhost_vdpa_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(vpci_dev); +DeviceState *vdev = DEVICE(&dev->vdev); +uint32_t devid; +uint32_t vectors; + +devid = vdpa_dev_pci_get_devid(dev, errp); +if (*errp) { +return; +} + +vectors = vdpa_dev_pci_get_vectors_num(dev, errp); +if (*errp) { +return; +} + +vpci_dev->class_code = virtio_pci_get_class_id(devid); +vpci_dev->pdev_id = virtio_pci_get_pci_devid(devid); +vpci_dev->nvectors = vectors; +qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } static void vhost_vdpa_device_pci_class_init(ObjectClass *klass, void *data) { -return; +DeviceClass *dc = DEVICE_CLASS(klass); +VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + +set_bit(DEVICE_CATEGORY_MISC, dc->categories); +device_class_set_props(dc, vhost_vdpa_device_pci_properties); +k->realize = vhost_vdpa_device_pci_realize; } static const VirtioPCIDeviceTypeInfo vhost_vdpa_device_pci_info = { diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index f4f92b90b0..790117fb3b 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -15,16 +15,80 @@ #include "sysemu/sysemu.h" #include "sysemu/runstate.h" -static void vhost_vdpa_device_class_init(ObjectClass *klass, void *data) +static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) { return; } -static void vhost_vdpa_device_instance_init(Object *obj) +static void vhost_vdpa_device_unrealize(DeviceState *dev) +{ +return; +} + +static void +vhost_vdpa_device_get_config(VirtIODevice *vdev, uint8_t *config) +{ +return; +} + +static void +vhost_vdpa_device_set_config(VirtIODevice *vdev, const uint8_t *config) { return; } +static uint64_t vhost_vdpa_device_get_features(VirtIODevice *vdev, + uint64_t features, + Error **errp) +{ +return (uint64_t)-1; +} + +static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) +{ +return; +} + +static Property vhost_vdpa_device_properties[] = { +DEFINE_PROP_STRING("vdpa-dev", VhostVdpaDevice, vdpa_dev), +DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription vmstate_vhost_vdpa_device = { +.name = "vhost-vdpa-device", +.minimum_version_id = 1, +.version_id = 1, +.fields = (VMStateField[]) { +VMSTATE_VIRTIO_DEVICE, +VMSTATE_END_OF_LIST() +}, +}; + +static void vhost_vdpa_device_class_init(ObjectClass *klass, void *data) +{ +DeviceClass *dc = DEVICE_CLASS(klass); +VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + +device_class_set_props(dc, vhost_vdpa_device_properties); +dc->desc = "VDPA-based generic PCI device assignment"; +dc->vmsd = &vmstate_vhost_vdpa_device; +set_bit(DEVICE_CATEGORY_MISC, dc->categories); +vdc->realize = vhost_v
[RFC 05/10] vdpa-dev: implement the realize interface
From: Longpeng Implements the .realize interface. Signed-off-by: Longpeng --- hw/virtio/vdpa-dev.c | 114 +++ include/hw/virtio/vdpa-dev.h | 8 +++ 2 files changed, 122 insertions(+) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 790117fb3b..2d534d837a 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -15,9 +15,122 @@ #include "sysemu/sysemu.h" #include "sysemu/runstate.h" +static void +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ +/* Nothing to do */ +} + +static int vdpa_dev_get_info_by_fd(int fd, uint64_t cmd, Error **errp) +{ +int val; + +if (ioctl(fd, cmd, &val) < 0) { +error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s", + cmd, strerror(errno)); +return -1; +} + +return val; +} + +static inline int vdpa_dev_get_queue_size(int fd, Error **errp) +{ +return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VRING_NUM, errp); +} + +static inline int vdpa_dev_get_vqs_num(int fd, Error **errp) +{ +return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_VQS_NUM, errp); +} + +static inline int vdpa_dev_get_config_size(int fd, Error **errp) +{ +return vdpa_dev_get_info_by_fd(fd, VHOST_VDPA_GET_CONFIG_SIZE, errp); +} + static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) { +VirtIODevice *vdev = VIRTIO_DEVICE(dev); +VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); +uint32_t device_id; +int max_queue_size; +int fd; +int i, ret; + +fd = qemu_open(s->vdpa_dev, O_RDWR, errp); +if (fd == -1) { +return; +} +s->vdpa.device_fd = fd; + +max_queue_size = vdpa_dev_get_queue_size(fd, errp); +if (*errp) { +goto out; +} + +if (s->queue_size > max_queue_size) { +error_setg(errp, "vhost-vdpa-device: invalid queue_size: %d (max:%d)", + s->queue_size, max_queue_size); +goto out; +} else if (!s->queue_size) { +s->queue_size = max_queue_size; +} + +ret = vdpa_dev_get_vqs_num(fd, errp); +if (*errp) { +goto out; +} + +s->dev.nvqs = ret; +s->dev.vqs = g_new0(struct vhost_virtqueue, s->dev.nvqs); +s->dev.vq_index = 0; +s->dev.vq_index_end = s->dev.nvqs; +s->dev.backend_features = 0; +s->started = false; + +ret = vhost_dev_init(&s->dev, &s->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, NULL); +if (ret < 0) { +error_setg(errp, "vhost-vdpa-device: vhost initialization failed: %s", + strerror(-ret)); +goto out; +} + +ret = s->dev.vhost_ops->vhost_get_device_id(&s->dev, &device_id); +if (ret < 0) { +error_setg(errp, "vhost-vdpa-device: vhost get device id failed: %s", + strerror(-ret)); +goto vhost_cleanup; +} + +s->config_size = vdpa_dev_get_config_size(fd, errp); +if (*errp) { +goto vhost_cleanup; +} + +s->config = g_malloc0(s->config_size); + +ret = vhost_dev_get_config(&s->dev, s->config, s->config_size, NULL); +if (ret < 0) { +error_setg(errp, "vhost-vdpa-device: get config failed"); +goto config_err; +} + +virtio_init(vdev, "vhost-vdpa", device_id, s->config_size); + +s->virtqs = g_new0(VirtQueue *, s->dev.nvqs); +for (i = 0; i < s->dev.nvqs; i++) { +s->virtqs[i] = virtio_add_queue(vdev, s->queue_size, +vhost_vdpa_device_dummy_handle_output); +} + return; +config_err: +g_free(s->config); +vhost_cleanup: +vhost_dev_cleanup(&s->dev); +out: +close(fd); } static void vhost_vdpa_device_unrealize(DeviceState *dev) @@ -51,6 +164,7 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) static Property vhost_vdpa_device_properties[] = { DEFINE_PROP_STRING("vdpa-dev", VhostVdpaDevice, vdpa_dev), +DEFINE_PROP_UINT16("queue-size", VhostVdpaDevice, queue_size, 0), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h index 7a0e6bdcf8..49f8145d61 100644 --- a/include/hw/virtio/vdpa-dev.h +++ b/include/hw/virtio/vdpa-dev.h @@ -13,6 +13,14 @@ struct VhostVdpaDevice { VirtIODevice parent_obj; char *vdpa_dev; int32_t bootindex; +struct vhost_dev dev; +struct vhost_vdpa vdpa; +VirtQueue **virtqs; +uint8_t *config; +int config_size; +uint32_t num_queues; +uint16_t queue_size; +bool started; }; #endif -- 2.23.0
[RFC 03/10] vdpa: add the infrastructure of vdpa-dev
From: Longpeng Add the infrastructure of vdpa-dev (the generic vDPA device), we can add a generic vDPA device as follow: -device vhost-vdpa-device-pci,vdpa-dev=/dev/vhost-vdpa-X Signed-off-by: Longpeng --- hw/virtio/Kconfig| 5 hw/virtio/meson.build| 2 ++ hw/virtio/vdpa-dev-pci.c | 51 hw/virtio/vdpa-dev.c | 41 + include/hw/virtio/vdpa-dev.h | 16 +++ 5 files changed, 115 insertions(+) create mode 100644 hw/virtio/vdpa-dev-pci.c create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index c144d42f9b..2723283382 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -68,3 +68,8 @@ config VHOST_USER_RNG bool default y depends on VIRTIO && VHOST_USER + +config VHOST_VDPA_DEV +bool +default y if VIRTIO_PCI +depends on VIRTIO && VHOST_VDPA && LINUX diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 521f7d64a8..8e8943e20b 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -29,6 +29,7 @@ virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_I2C'], if_true: files('vhost-user-i2c-pci.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) virtio_ss.add(when: ['CONFIG_VHOST_USER_RNG', 'CONFIG_VIRTIO_PCI'], if_true: files('vhost-user-rng-pci.c')) +virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) @@ -49,6 +50,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-serial-pc virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev-pci.c')) virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) diff --git a/hw/virtio/vdpa-dev-pci.c b/hw/virtio/vdpa-dev-pci.c new file mode 100644 index 00..a5a7b528a9 --- /dev/null +++ b/hw/virtio/vdpa-dev-pci.c @@ -0,0 +1,51 @@ +#include "qemu/osdep.h" +#include +#include +#include "hw/virtio/virtio.h" +#include "hw/virtio/vdpa-dev.h" +#include "hw/pci/pci.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "virtio-pci.h" +#include "qom/object.h" + + +typedef struct VhostVdpaDevicePCI VhostVdpaDevicePCI; + +#define TYPE_VHOST_VDPA_DEVICE_PCI "vhost-vdpa-device-pci-base" +DECLARE_INSTANCE_CHECKER(VhostVdpaDevicePCI, VHOST_VDPA_DEVICE_PCI, + TYPE_VHOST_VDPA_DEVICE_PCI) + +struct VhostVdpaDevicePCI { +VirtIOPCIProxy parent_obj; +VhostVdpaDevice vdev; +}; + +static void vhost_vdpa_device_pci_instance_init(Object *obj) +{ +return; +} + +static void vhost_vdpa_device_pci_class_init(ObjectClass *klass, void *data) +{ +return; +} + +static const VirtioPCIDeviceTypeInfo vhost_vdpa_device_pci_info = { +.base_name = TYPE_VHOST_VDPA_DEVICE_PCI, +.generic_name= "vhost-vdpa-device-pci", +.transitional_name = "vhost-vdpa-device-pci-transitional", +.non_transitional_name = "vhost-vdpa-device-pci-non-transitional", +.instance_size = sizeof(VhostVdpaDevicePCI), +.instance_init = vhost_vdpa_device_pci_instance_init, +.class_init = vhost_vdpa_device_pci_class_init, +}; + +static void vhost_vdpa_device_pci_register(void) +{ +virtio_pci_types_register(&vhost_vdpa_device_pci_info); +} + +type_init(vhost_vdpa_device_pci_register); diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c new file mode 100644 index 00..f4f92b90b0 --- /dev/null +++ b/hw/virtio/vdpa-dev.c @@ -0,0 +1,41 @@ +#include "qemu/osdep.h" +#include +#include +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/cutils.h" +#include "hw/qdev-core.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" +#include "hw/virtio/vdpa-dev.h" +#include "sysemu/sysemu.h" +#include "sysemu/runstate.h" + +static void vhost_vdpa_device_class_init(ObjectClass *klass, void *data) +{ +return; +} + +static void vhost_vdpa_device_instance_init(Object *obj) +{ +return; +} + +static const TypeInfo vhost_vdpa_device_info = { +.name = TYPE_VHOST_VDPA_DEVICE, +.parent = TYPE_VIRTIO_DEVICE, +.instance_size = sizeof(VhostVdpaDevice), +.class_init = vhost_vdpa_device_class_init, +.instance_init = vhost_vdpa_device_instance_init,
[RFC 07/10] vdpa-dev: implement the get_config/set_config interface
From: Longpeng Implements the .get_config and .set_config interface. Signed-off-by: Longpeng --- hw/virtio/vdpa-dev.c | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 4e4dd3d201..4f97a7521b 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -161,13 +161,23 @@ static void vhost_vdpa_device_unrealize(DeviceState *dev) static void vhost_vdpa_device_get_config(VirtIODevice *vdev, uint8_t *config) { -return; +VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); + +memcpy(config, s->config, s->config_size); } static void vhost_vdpa_device_set_config(VirtIODevice *vdev, const uint8_t *config) { -return; +VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); +int ret; + +ret = vhost_dev_set_config(&s->dev, s->config, 0, s->config_size, + VHOST_SET_CONFIG_TYPE_MASTER); +if (ret) { +error_report("set device config space failed"); +return; +} } static uint64_t vhost_vdpa_device_get_features(VirtIODevice *vdev, -- 2.23.0
[RFC 06/10] vdpa-dev: implement the unrealize interface
From: Longpeng Implements the .unrealize interface. Signed-off-by: Longpeng --- hw/virtio/vdpa-dev.c | 22 +- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 2d534d837a..4e4dd3d201 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -133,9 +133,29 @@ out: close(fd); } +static void vhost_vdpa_vdev_unrealize(VhostVdpaDevice *s) +{ +VirtIODevice *vdev = VIRTIO_DEVICE(s); +int i; + +for (i = 0; i < s->num_queues; i++) { +virtio_delete_queue(s->virtqs[i]); +} +g_free(s->virtqs); +virtio_cleanup(vdev); + +g_free(s->config); +} + static void vhost_vdpa_device_unrealize(DeviceState *dev) { -return; +VirtIODevice *vdev = VIRTIO_DEVICE(dev); +VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); + +virtio_set_status(vdev, 0); +vhost_dev_cleanup(&s->dev); +vhost_vdpa_vdev_unrealize(s); +close(s->vdpa.device_fd); } static void -- 2.23.0
[RFC 10/10] vdpa-dev: mark the device as unmigratable
From: Longpeng The generic vDPA device doesn't support migration currently, so mark it as unmigratable temporarily. Signed-off-by: Longpeng --- hw/virtio/vdpa-dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 64649bfb5a..0644aace22 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -305,6 +305,7 @@ static Property vhost_vdpa_device_properties[] = { static const VMStateDescription vmstate_vhost_vdpa_device = { .name = "vhost-vdpa-device", +.unmigratable = 1, .minimum_version_id = 1, .version_id = 1, .fields = (VMStateField[]) { -- 2.23.0
[RFC 09/10] vdpa-dev: implement the set_status interface
From: Longpeng Implements the .set_status interface. Signed-off-by: Longpeng --- hw/virtio/vdpa-dev.c | 100 ++- 1 file changed, 99 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 32b3117c4b..64649bfb5a 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -194,9 +194,107 @@ static uint64_t vhost_vdpa_device_get_features(VirtIODevice *vdev, return backend_features; } +static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp) +{ +VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); +BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); +VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +int i, ret; + +if (!k->set_guest_notifiers) { +error_setg(errp, "binding does not support guest notifiers"); +return -ENOSYS; +} + +ret = vhost_dev_enable_notifiers(&s->dev, vdev); +if (ret < 0) { +error_setg_errno(errp, -ret, "Error enabling host notifiers"); +return ret; +} + +ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true); +if (ret < 0) { +error_setg_errno(errp, -ret, "Error binding guest notifier"); +goto err_host_notifiers; +} + +s->dev.acked_features = vdev->guest_features; + +ret = vhost_dev_start(&s->dev, vdev); +if (ret < 0) { +error_setg_errno(errp, -ret, "Error starting vhost"); +goto err_guest_notifiers; +} +s->started = true; + +/* + * guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ +for (i = 0; i < s->dev.nvqs; i++) { +vhost_virtqueue_mask(&s->dev, vdev, i, false); +} + +return ret; + +err_guest_notifiers: +k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); +err_host_notifiers: +vhost_dev_disable_notifiers(&s->dev, vdev); +return ret; +} + +static void vhost_vdpa_device_stop(VirtIODevice *vdev) +{ +VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); +BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); +VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +int ret; + +if (!s->started) { +return; +} +s->started = false; + +if (!k->set_guest_notifiers) { +return; +} + +vhost_dev_stop(&s->dev, vdev); + +ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); +if (ret < 0) { +error_report("vhost guest notifier cleanup failed: %d", ret); +return; +} + +vhost_dev_disable_notifiers(&s->dev, vdev); +} + static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) { -return; +VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); +bool should_start = virtio_device_started(vdev, status); +Error *local_err = NULL; +int ret; + +if (!vdev->vm_running) { +should_start = false; +} + +if (s->started == should_start) { +return; +} + +if (should_start) { +ret = vhost_vdpa_device_start(vdev, &local_err); +if (ret < 0) { +error_reportf_err(local_err, "vhost-vdpa-device: start failed: "); +} +} else { +vhost_vdpa_device_stop(vdev); +} } static Property vhost_vdpa_device_properties[] = { -- 2.23.0
[RFC 08/10] vdpa-dev: implement the get_features interface
From: Longpeng Implements the .get_features interface. Signed-off-by: Longpeng --- hw/virtio/vdpa-dev.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 4f97a7521b..32b3117c4b 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -184,7 +184,14 @@ static uint64_t vhost_vdpa_device_get_features(VirtIODevice *vdev, uint64_t features, Error **errp) { -return (uint64_t)-1; +VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); +uint64_t backend_features = s->dev.features; + +if (!virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM)) { +virtio_clear_feature(&backend_features, VIRTIO_F_IOMMU_PLATFORM); +} + +return backend_features; } static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) -- 2.23.0
[RFC 1/2] sem-posix: remove the posix semaphore support
POSIX specifies an absolute time for sem_timedwait(), it would be affected if the system time is changing, but there is not a relative time or monotonic clock version of sem_timedwait, so we cannot gain from POSIX semaphore anymore. An alternative way is to use sem_trywait + usleep, maybe we can remove CONFIG_SEM_TIMEDWAIT in this way? No, because some systems (e.g. mac os) mark the sem_xxx API as deprecated. So maybe remove the usage of POSIX semaphore and turn to use the pthread variant for all systems looks better. Signed-off-by: Longpeng(Mike) --- include/qemu/thread-posix.h | 4 meson.build | 1 - util/qemu-thread-posix.c| 54 - 3 files changed, 59 deletions(-) diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h index b792e6e..5466608 100644 --- a/include/qemu/thread-posix.h +++ b/include/qemu/thread-posix.h @@ -27,13 +27,9 @@ struct QemuCond { }; struct QemuSemaphore { -#ifndef CONFIG_SEM_TIMEDWAIT pthread_mutex_t lock; pthread_cond_t cond; unsigned int count; -#else -sem_t sem; -#endif bool initialized; }; diff --git a/meson.build b/meson.build index 762d7ce..3ccb110 100644 --- a/meson.build +++ b/meson.build @@ -1557,7 +1557,6 @@ config_host_data.set('CONFIG_POSIX_FALLOCATE', cc.has_function('posix_fallocate' config_host_data.set('CONFIG_POSIX_MEMALIGN', cc.has_function('posix_memalign')) config_host_data.set('CONFIG_PPOLL', cc.has_function('ppoll')) config_host_data.set('CONFIG_PREADV', cc.has_function('preadv', prefix: '#include ')) -config_host_data.set('CONFIG_SEM_TIMEDWAIT', cc.has_function('sem_timedwait', dependencies: threads)) config_host_data.set('CONFIG_SENDFILE', cc.has_function('sendfile')) config_host_data.set('CONFIG_SETNS', cc.has_function('setns') and cc.has_function('unshare')) config_host_data.set('CONFIG_SYNCFS', cc.has_function('syncfs')) diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index e1225b6..1ad2503 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -219,7 +219,6 @@ void qemu_sem_init(QemuSemaphore *sem, int init) { int rc; -#ifndef CONFIG_SEM_TIMEDWAIT rc = pthread_mutex_init(&sem->lock, NULL); if (rc != 0) { error_exit(rc, __func__); @@ -232,12 +231,6 @@ void qemu_sem_init(QemuSemaphore *sem, int init) error_exit(EINVAL, __func__); } sem->count = init; -#else -rc = sem_init(&sem->sem, 0, init); -if (rc < 0) { -error_exit(errno, __func__); -} -#endif sem->initialized = true; } @@ -247,7 +240,6 @@ void qemu_sem_destroy(QemuSemaphore *sem) assert(sem->initialized); sem->initialized = false; -#ifndef CONFIG_SEM_TIMEDWAIT rc = pthread_cond_destroy(&sem->cond); if (rc < 0) { error_exit(rc, __func__); @@ -256,12 +248,6 @@ void qemu_sem_destroy(QemuSemaphore *sem) if (rc < 0) { error_exit(rc, __func__); } -#else -rc = sem_destroy(&sem->sem); -if (rc < 0) { -error_exit(errno, __func__); -} -#endif } void qemu_sem_post(QemuSemaphore *sem) @@ -269,7 +255,6 @@ void qemu_sem_post(QemuSemaphore *sem) int rc; assert(sem->initialized); -#ifndef CONFIG_SEM_TIMEDWAIT pthread_mutex_lock(&sem->lock); if (sem->count == UINT_MAX) { rc = EINVAL; @@ -281,12 +266,6 @@ void qemu_sem_post(QemuSemaphore *sem) if (rc != 0) { error_exit(rc, __func__); } -#else -rc = sem_post(&sem->sem); -if (rc < 0) { -error_exit(errno, __func__); -} -#endif } int qemu_sem_timedwait(QemuSemaphore *sem, int ms) @@ -295,7 +274,6 @@ int qemu_sem_timedwait(QemuSemaphore *sem, int ms) struct timespec ts; assert(sem->initialized); -#ifndef CONFIG_SEM_TIMEDWAIT rc = 0; compute_abs_deadline(&ts, ms); pthread_mutex_lock(&sem->lock); @@ -313,29 +291,6 @@ int qemu_sem_timedwait(QemuSemaphore *sem, int ms) } pthread_mutex_unlock(&sem->lock); return (rc == ETIMEDOUT ? -1 : 0); -#else -if (ms <= 0) { -/* This is cheaper than sem_timedwait. */ -do { -rc = sem_trywait(&sem->sem); -} while (rc == -1 && errno == EINTR); -if (rc == -1 && errno == EAGAIN) { -return -1; -} -} else { -compute_abs_deadline(&ts, ms); -do { -rc = sem_timedwait(&sem->sem, &ts); -} while (rc == -1 && errno == EINTR); -if (rc == -1 && errno == ETIMEDOUT) { -return -1; -} -} -if (rc < 0) { -error_exit(errno, __func__); -} -return 0; -#
[RFC 2/2] sem-posix: use monotonic clock instead
Use CLOCK_MONOTONIC, so the timeout isn't affected by changes to the system time. It depends on the pthread_condattr_setclock(), while some systems(e.g. mac os) do not support it, the behavior won't change in these systems. Signed-off-by: Longpeng(Mike) --- include/qemu/thread-posix.h | 1 + meson.build | 11 +++ util/qemu-thread-posix.c| 32 +--- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h index 5466608..cc77000 100644 --- a/include/qemu/thread-posix.h +++ b/include/qemu/thread-posix.h @@ -29,6 +29,7 @@ struct QemuCond { struct QemuSemaphore { pthread_mutex_t lock; pthread_cond_t cond; +pthread_condattr_t attr; unsigned int count; bool initialized; }; diff --git a/meson.build b/meson.build index 3ccb110..2bab94f 100644 --- a/meson.build +++ b/meson.build @@ -1688,6 +1688,17 @@ config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_WO_TID', cc.links(gnu_source_pre pthread_create(&thread, 0, f, 0); return 0; }''', dependencies: threads)) +config_host_data.set('CONFIG_PTHREAD_CONDATTR_SETCLOCK', cc.links(gnu_source_prefix + ''' + #include + #include + + int main(void) + { +pthread_condattr_t attr +pthread_condattr_init(&attr); +pthread_condattr_setclock(&attr, CLOCK_MONOTONIC); +return 0; + }''', dependencies: threads)) config_host_data.set('CONFIG_SIGNALFD', cc.links(gnu_source_prefix + ''' #include diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index 1ad2503..d3a7c54 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -40,10 +40,22 @@ static void error_exit(int err, const char *msg) static void compute_abs_deadline(struct timespec *ts, int ms) { +time_t now_sec; +long now_nsec; +#ifdef CONFIG_PTHREAD_CONDATTR_SETCLOCK +struct timespec now; +clock_gettime(CLOCK_MONOTONIC, &now); +now_sec = now.tv_sec; +now_nsec = now.tv_nsec; +#else struct timeval tv; gettimeofday(&tv, NULL); -ts->tv_nsec = tv.tv_usec * 1000 + (ms % 1000) * 100; -ts->tv_sec = tv.tv_sec + ms / 1000; +now_sec = tv.tv_sec; +now_nsec = tv.tv_usec * 1000; +#endif + +ts->tv_nsec = now_nsec + (ms % 1000) * 100; +ts->tv_sec = now_sec + ms / 1000; if (ts->tv_nsec >= 10) { ts->tv_sec++; ts->tv_nsec -= 10; @@ -223,7 +235,17 @@ void qemu_sem_init(QemuSemaphore *sem, int init) if (rc != 0) { error_exit(rc, __func__); } -rc = pthread_cond_init(&sem->cond, NULL); +rc = pthread_condattr_init(&sem->attr); +if (rc != 0) { +error_exit(rc, __func__); +} +#ifdef CONFIG_PTHREAD_CONDATTR_SETCLOCK +rc = pthread_condattr_setclock(&sem->attr, CLOCK_MONOTONIC); +if (rc != 0) { +error_exit(rc, __func__); +} +#endif +rc = pthread_cond_init(&sem->cond, &sem->attr); if (rc != 0) { error_exit(rc, __func__); } @@ -248,6 +270,10 @@ void qemu_sem_destroy(QemuSemaphore *sem) if (rc < 0) { error_exit(rc, __func__); } +rc = pthread_condattr_destroy(&sem->attr); +if (rc < 0) { +error_exit(rc, __func__); +} } void qemu_sem_post(QemuSemaphore *sem) -- 1.8.3.1
[RFC 0/2] qemu-sem-posix: use monotonic clock instead
The qemu_sem_timedwait() uses system time as default, it would be affected by changes to the system time. In the real scenario, the time that goes faster or slower is a common case and the NTP service could help us to sync time periodically. This patchset uses monotonic clock instead of the realtime clock, this could make sure we would not be affected by the system time anymore. Longpeng (Mike) (2): sem-posix: remove the posix semaphore support sem-posix: use monotonic clock instead include/qemu/thread-posix.h | 5 +-- meson.build | 12 ++- util/qemu-thread-posix.c| 82 +++-- 3 files changed, 39 insertions(+), 60 deletions(-) -- 1.8.3.1
[PATCH v2 2/3] sem-posix: use monotonic clock instead
Use CLOCK_MONOTONIC, so the timeout isn't affected by changes to the system time. It depends on the pthread_condattr_setclock(), while some systems(e.g. mac os) does not support it, so the behavior won't change in these systems. Signed-off-by: Longpeng(Mike) --- meson.build | 11 +++ util/qemu-thread-posix.c | 49 +--- 2 files changed, 53 insertions(+), 7 deletions(-) diff --git a/meson.build b/meson.build index 3ccb110..2bab94f 100644 --- a/meson.build +++ b/meson.build @@ -1688,6 +1688,17 @@ config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_WO_TID', cc.links(gnu_source_pre pthread_create(&thread, 0, f, 0); return 0; }''', dependencies: threads)) +config_host_data.set('CONFIG_PTHREAD_CONDATTR_SETCLOCK', cc.links(gnu_source_prefix + ''' + #include + #include + + int main(void) + { +pthread_condattr_t attr +pthread_condattr_init(&attr); +pthread_condattr_setclock(&attr, CLOCK_MONOTONIC); +return 0; + }''', dependencies: threads)) config_host_data.set('CONFIG_SIGNALFD', cc.links(gnu_source_prefix + ''' #include diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index 1ad2503..6ce 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -38,12 +38,20 @@ static void error_exit(int err, const char *msg) abort(); } +static inline clockid_t qemu_timedwait_clockid(void) +{ +#ifdef CONFIG_PTHREAD_CONDATTR_SETCLOCK +return CLOCK_MONOTONIC; +#else +return CLOCK_REALTIME; +#endif +} + static void compute_abs_deadline(struct timespec *ts, int ms) { -struct timeval tv; -gettimeofday(&tv, NULL); -ts->tv_nsec = tv.tv_usec * 1000 + (ms % 1000) * 100; -ts->tv_sec = tv.tv_sec + ms / 1000; +clock_gettime(qemu_timedwait_clockid(), ts); +ts->tv_nsec += (ms % 1000) * 100; +ts->tv_sec += ms / 1000; if (ts->tv_nsec >= 10) { ts->tv_sec++; ts->tv_nsec -= 10; @@ -147,11 +155,25 @@ void qemu_rec_mutex_unlock_impl(QemuRecMutex *mutex, const char *file, int line) void qemu_cond_init(QemuCond *cond) { +pthread_condattr_t attr; int err; -err = pthread_cond_init(&cond->cond, NULL); -if (err) +err = pthread_condattr_init(&attr); +if (err) { +error_exit(err, __func__); +} +err = pthread_condattr_setclock(&attr, qemu_timedwait_clockid()); +if (err) { +error_exit(err, __func__); +} +err = pthread_cond_init(&cond->cond, &attr); +if (err) { error_exit(err, __func__); +} +err = pthread_condattr_destroy(&attr); +if (err) { +error_exit(err, __func__); +} cond->initialized = true; } @@ -217,16 +239,29 @@ bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms, void qemu_sem_init(QemuSemaphore *sem, int init) { +pthread_condattr_t attr; int rc; rc = pthread_mutex_init(&sem->lock, NULL); if (rc != 0) { error_exit(rc, __func__); } -rc = pthread_cond_init(&sem->cond, NULL); +rc = pthread_condattr_init(&attr); +if (rc != 0) { +error_exit(rc, __func__); +} +rc = pthread_condattr_setclock(&attr, qemu_timedwait_clockid()); if (rc != 0) { error_exit(rc, __func__); } +rc = pthread_cond_init(&sem->cond, &attr); +if (rc != 0) { +error_exit(rc, __func__); +} +rc = pthread_condattr_destroy(&attr); +if (rc < 0) { +error_exit(rc, __func__); +} if (init < 0) { error_exit(EINVAL, __func__); } -- 1.8.3.1
[PATCH v2 1/3] sem-posix: remove the posix semaphore support
POSIX specifies an absolute time for sem_timedwait(), it would be affected if the system time is changing, but there is not a relative time or monotonic clock version of sem_timedwait, so we cannot gain from POSIX semaphore any more. An alternative way is to use sem_trywait + usleep, maybe we can remove CONFIG_SEM_TIMEDWAIT in this way? No, because some systems (e.g. mac os) mark the sem_xxx API as deprecated. So maybe remove the usage of POSIX semaphore and turn to use the pthread variant for all systems looks better. Signed-off-by: Longpeng(Mike) --- include/qemu/thread-posix.h | 4 meson.build | 1 - util/qemu-thread-posix.c| 54 - 3 files changed, 59 deletions(-) diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h index b792e6e..5466608 100644 --- a/include/qemu/thread-posix.h +++ b/include/qemu/thread-posix.h @@ -27,13 +27,9 @@ struct QemuCond { }; struct QemuSemaphore { -#ifndef CONFIG_SEM_TIMEDWAIT pthread_mutex_t lock; pthread_cond_t cond; unsigned int count; -#else -sem_t sem; -#endif bool initialized; }; diff --git a/meson.build b/meson.build index 762d7ce..3ccb110 100644 --- a/meson.build +++ b/meson.build @@ -1557,7 +1557,6 @@ config_host_data.set('CONFIG_POSIX_FALLOCATE', cc.has_function('posix_fallocate' config_host_data.set('CONFIG_POSIX_MEMALIGN', cc.has_function('posix_memalign')) config_host_data.set('CONFIG_PPOLL', cc.has_function('ppoll')) config_host_data.set('CONFIG_PREADV', cc.has_function('preadv', prefix: '#include ')) -config_host_data.set('CONFIG_SEM_TIMEDWAIT', cc.has_function('sem_timedwait', dependencies: threads)) config_host_data.set('CONFIG_SENDFILE', cc.has_function('sendfile')) config_host_data.set('CONFIG_SETNS', cc.has_function('setns') and cc.has_function('unshare')) config_host_data.set('CONFIG_SYNCFS', cc.has_function('syncfs')) diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index e1225b6..1ad2503 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -219,7 +219,6 @@ void qemu_sem_init(QemuSemaphore *sem, int init) { int rc; -#ifndef CONFIG_SEM_TIMEDWAIT rc = pthread_mutex_init(&sem->lock, NULL); if (rc != 0) { error_exit(rc, __func__); @@ -232,12 +231,6 @@ void qemu_sem_init(QemuSemaphore *sem, int init) error_exit(EINVAL, __func__); } sem->count = init; -#else -rc = sem_init(&sem->sem, 0, init); -if (rc < 0) { -error_exit(errno, __func__); -} -#endif sem->initialized = true; } @@ -247,7 +240,6 @@ void qemu_sem_destroy(QemuSemaphore *sem) assert(sem->initialized); sem->initialized = false; -#ifndef CONFIG_SEM_TIMEDWAIT rc = pthread_cond_destroy(&sem->cond); if (rc < 0) { error_exit(rc, __func__); @@ -256,12 +248,6 @@ void qemu_sem_destroy(QemuSemaphore *sem) if (rc < 0) { error_exit(rc, __func__); } -#else -rc = sem_destroy(&sem->sem); -if (rc < 0) { -error_exit(errno, __func__); -} -#endif } void qemu_sem_post(QemuSemaphore *sem) @@ -269,7 +255,6 @@ void qemu_sem_post(QemuSemaphore *sem) int rc; assert(sem->initialized); -#ifndef CONFIG_SEM_TIMEDWAIT pthread_mutex_lock(&sem->lock); if (sem->count == UINT_MAX) { rc = EINVAL; @@ -281,12 +266,6 @@ void qemu_sem_post(QemuSemaphore *sem) if (rc != 0) { error_exit(rc, __func__); } -#else -rc = sem_post(&sem->sem); -if (rc < 0) { -error_exit(errno, __func__); -} -#endif } int qemu_sem_timedwait(QemuSemaphore *sem, int ms) @@ -295,7 +274,6 @@ int qemu_sem_timedwait(QemuSemaphore *sem, int ms) struct timespec ts; assert(sem->initialized); -#ifndef CONFIG_SEM_TIMEDWAIT rc = 0; compute_abs_deadline(&ts, ms); pthread_mutex_lock(&sem->lock); @@ -313,29 +291,6 @@ int qemu_sem_timedwait(QemuSemaphore *sem, int ms) } pthread_mutex_unlock(&sem->lock); return (rc == ETIMEDOUT ? -1 : 0); -#else -if (ms <= 0) { -/* This is cheaper than sem_timedwait. */ -do { -rc = sem_trywait(&sem->sem); -} while (rc == -1 && errno == EINTR); -if (rc == -1 && errno == EAGAIN) { -return -1; -} -} else { -compute_abs_deadline(&ts, ms); -do { -rc = sem_timedwait(&sem->sem, &ts); -} while (rc == -1 && errno == EINTR); -if (rc == -1 && errno == ETIMEDOUT) { -return -1; -} -} -if (rc < 0) { -error_exit(errno, __func__); -} -return 0; -#
[PATCH v2 0/3] qemu-sem-posix: use monotonic clock instead
The qemu_sem_timedwait() uses system time as default, it would be affected by changes to the system time. In the real scenario, the time that goes faster or slower is a common case and the NTP service could help us to sync time periodically. This patchset uses monotonic clock instead of the realtime clock, this could make sure we would not be affected by the system time anymore. Changes v1(RFC) -> v2: Patch 2: - clean the code [Paolo] - use pthread_condattr_setclock when initializing qemu-cond. [Paolo] Patch 3: - new added, make the qemu-sem code neater. [Longpeng] Longpeng (Mike) (3): sem-posix: remove the posix semaphore support sem-posix: use monotonic clock instead sem-posix: refactor qemu-sem with qemu-cond and qemu-mutex include/qemu/thread-posix.h | 9 +-- meson.build | 12 ++- util/qemu-thread-posix.c| 178 +++- 3 files changed, 73 insertions(+), 126 deletions(-) -- 1.8.3.1
[PATCH v2 3/3] sem-posix: refactor qemu-sem with qemu-cond and qemu-mutex
Now, qemu-sem is based on the pthread_cond only, we can use qemu-cond and qemu-mutex to make the code neater and the mutex trace can be supported in qemu-sem naturally. Signed-off-by: Longpeng(Mike) --- include/qemu/thread-posix.h | 5 +-- util/qemu-thread-posix.c| 103 ++-- 2 files changed, 34 insertions(+), 74 deletions(-) diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h index 5466608..5f2f3d1 100644 --- a/include/qemu/thread-posix.h +++ b/include/qemu/thread-posix.h @@ -27,10 +27,9 @@ struct QemuCond { }; struct QemuSemaphore { -pthread_mutex_t lock; -pthread_cond_t cond; +QemuMutex mutex; +QemuCond cond; unsigned int count; -bool initialized; }; struct QemuEvent { diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index 6ce..f2ce47d 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -220,16 +220,15 @@ void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex, const char *file, con error_exit(err, __func__); } -bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms, - const char *file, const int line) +static bool +qemu_cond_timedwait_ts(QemuCond *cond, QemuMutex *mutex, struct timespec *ts, + const char *file, const int line) { int err; -struct timespec ts; assert(cond->initialized); trace_qemu_mutex_unlock(mutex, file, line); -compute_abs_deadline(&ts, ms); -err = pthread_cond_timedwait(&cond->cond, &mutex->lock, &ts); +err = pthread_cond_timedwait(&cond->cond, &mutex->lock, ts); trace_qemu_mutex_locked(mutex, file, line); if (err && err != ETIMEDOUT) { error_exit(err, __func__); @@ -237,111 +236,73 @@ bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms, return err != ETIMEDOUT; } +bool qemu_cond_timedwait_impl(QemuCond *cond, QemuMutex *mutex, int ms, + const char *file, const int line) +{ +struct timespec ts; + +compute_abs_deadline(&ts, ms); +return qemu_cond_timedwait_ts(cond, mutex, &ts, file, line); +} + void qemu_sem_init(QemuSemaphore *sem, int init) { -pthread_condattr_t attr; -int rc; +qemu_mutex_init(&sem->mutex); +qemu_cond_init(&sem->cond); -rc = pthread_mutex_init(&sem->lock, NULL); -if (rc != 0) { -error_exit(rc, __func__); -} -rc = pthread_condattr_init(&attr); -if (rc != 0) { -error_exit(rc, __func__); -} -rc = pthread_condattr_setclock(&attr, qemu_timedwait_clockid()); -if (rc != 0) { -error_exit(rc, __func__); -} -rc = pthread_cond_init(&sem->cond, &attr); -if (rc != 0) { -error_exit(rc, __func__); -} -rc = pthread_condattr_destroy(&attr); -if (rc < 0) { -error_exit(rc, __func__); -} if (init < 0) { error_exit(EINVAL, __func__); } sem->count = init; -sem->initialized = true; } void qemu_sem_destroy(QemuSemaphore *sem) { -int rc; - -assert(sem->initialized); -sem->initialized = false; -rc = pthread_cond_destroy(&sem->cond); -if (rc < 0) { -error_exit(rc, __func__); -} -rc = pthread_mutex_destroy(&sem->lock); -if (rc < 0) { -error_exit(rc, __func__); -} +qemu_cond_destroy(&sem->cond); +qemu_mutex_destroy(&sem->mutex); } void qemu_sem_post(QemuSemaphore *sem) { -int rc; - -assert(sem->initialized); -pthread_mutex_lock(&sem->lock); +qemu_mutex_lock(&sem->mutex); if (sem->count == UINT_MAX) { -rc = EINVAL; +error_exit(EINVAL, __func__); } else { sem->count++; -rc = pthread_cond_signal(&sem->cond); -} -pthread_mutex_unlock(&sem->lock); -if (rc != 0) { -error_exit(rc, __func__); +qemu_cond_signal(&sem->cond); } +qemu_mutex_unlock(&sem->mutex); } int qemu_sem_timedwait(QemuSemaphore *sem, int ms) { -int rc; +bool rc = true; struct timespec ts; -assert(sem->initialized); -rc = 0; compute_abs_deadline(&ts, ms); -pthread_mutex_lock(&sem->lock); +qemu_mutex_lock(&sem->mutex); while (sem->count == 0) { -rc = pthread_cond_timedwait(&sem->cond, &sem->lock, &ts); -if (rc == ETIMEDOUT) { +rc = qemu_cond_timedwait_ts(&sem->cond, &sem->mutex, &ts, +__FILE__, __LINE__); +if (!rc) { /* timeout */ break; } -if (rc != 0) { -error_exit(rc, __func__); -} } -if (rc != ETIMEDOUT) { +if (rc) { --sem->count; }
[PATCH resend 0/2] kvm/msi: do explicit commit when adding msi routes
From: Longpeng This patchset moves the call to kvm_irqchip_commit_routes() out of kvm_irqchip_add_msi_route(). An optimization of vfio migration [1] depends on this changes. [1] https://lists.gnu.org/archive/html/qemu-devel/2021-11/msg00968.html Longpeng (Mike) (2): kvm-irqchip: introduce new API to support route change kvm/msi: do explicit commit when adding msi routes accel/kvm/kvm-all.c| 7 --- accel/stubs/kvm-stub.c | 2 +- hw/misc/ivshmem.c | 5 - hw/vfio/pci.c | 5 - hw/virtio/virtio-pci.c | 4 +++- include/sysemu/kvm.h | 23 +-- target/i386/kvm/kvm.c | 4 +++- 7 files changed, 40 insertions(+), 10 deletions(-) -- 2.23.0
[PATCH resend 1/2] kvm-irqchip: introduce new API to support route change
From: Longpeng Paolo suggested adding the new API to support route changes [1]. We should invoke kvm_irqchip_begin_route_changes() before changing the routes, increasing the KVMRouteChange.changes if the routes are changed, and commit the changes at last. [1] https://lists.gnu.org/archive/html/qemu-devel/2021-11/msg02898.html Signed-off-by: Longpeng --- include/sysemu/kvm.h | 19 +++ 1 file changed, 19 insertions(+) diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 6eb39a088b..6f8a43dea4 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -224,6 +224,11 @@ DECLARE_INSTANCE_CHECKER(KVMState, KVM_STATE, extern KVMState *kvm_state; typedef struct Notifier Notifier; +typedef struct KVMRouteChange { + KVMState *s; + int changes; +} KVMRouteChange; + /* external API */ bool kvm_has_free_slot(MachineState *ms); @@ -494,6 +499,20 @@ int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev); int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, PCIDevice *dev); void kvm_irqchip_commit_routes(KVMState *s); + +static inline KVMRouteChange kvm_irqchip_begin_route_changes(KVMState *s) +{ + return (KVMRouteChange) { .s = s, .changes = 0 }; +} + +static inline void kvm_irqchip_commit_route_changes(KVMRouteChange *c) +{ + if (c->changes) { + kvm_irqchip_commit_routes(c->s); + c->changes = 0; +} +} + void kvm_irqchip_release_virq(KVMState *s, int virq); int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter); -- 2.23.0
[PATCH resend 2/2] kvm/msi: do explicit commit when adding msi routes
From: Longpeng We invoke the kvm_irqchip_commit_routes() for each addition to MSI route table, which is not efficient if we are adding lots of routes in some cases. This patch lets callers invoke the kvm_irqchip_commit_routes(), so the callers can decide how to optimize. [1] https://lists.gnu.org/archive/html/qemu-devel/2021-11/msg00967.html Signed-off-by: Longpeng --- accel/kvm/kvm-all.c| 7 --- accel/stubs/kvm-stub.c | 2 +- hw/misc/ivshmem.c | 5 - hw/vfio/pci.c | 5 - hw/virtio/virtio-pci.c | 4 +++- include/sysemu/kvm.h | 4 ++-- target/i386/kvm/kvm.c | 4 +++- 7 files changed, 21 insertions(+), 10 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 0e66ebb497..27864dfaea 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -1961,10 +1961,11 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) return kvm_set_irq(s, route->kroute.gsi, 1); } -int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) +int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev) { struct kvm_irq_routing_entry kroute = {}; int virq; +KVMState *s = c->s; MSIMessage msg = {0, 0}; if (pci_available && dev) { @@ -2004,7 +2005,7 @@ int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) kvm_add_routing_entry(s, &kroute); kvm_arch_add_msi_route_post(&kroute, vector, dev); -kvm_irqchip_commit_routes(s); +c->changes++; return virq; } @@ -2162,7 +2163,7 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) abort(); } -int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) +int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev) { return -ENOSYS; } diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index 5319573e00..ae6e8e9aa7 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -81,7 +81,7 @@ int kvm_on_sigbus(int code, void *addr) } #ifndef CONFIG_USER_ONLY -int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev) +int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev) { return -ENOSYS; } diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 299837e5c1..2307f4a513 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -424,16 +424,19 @@ static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector, Error **errp) { PCIDevice *pdev = PCI_DEVICE(s); +KVMRouteChange c; int ret; IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector); assert(!s->msi_vectors[vector].pdev); -ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev); +c = kvm_irqchip_begin_route_changes(kvm_state); +ret = kvm_irqchip_add_msi_route(&c, vector, pdev); if (ret < 0) { error_setg(errp, "kvm_irqchip_add_msi_route failed"); return; } +kvm_irqchip_commit_route_changes(&c); s->msi_vectors[vector].virq = ret; s->msi_vectors[vector].pdev = pdev; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 7b45353ce2..d07a4e99b1 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -412,6 +412,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, int vector_n, bool msix) { +KVMRouteChange c; int virq; if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) { @@ -422,11 +423,13 @@ static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, return; } -virq = kvm_irqchip_add_msi_route(kvm_state, vector_n, &vdev->pdev); +c = kvm_irqchip_begin_route_changes(kvm_state); +virq = kvm_irqchip_add_msi_route(&c, vector_n, &vdev->pdev); if (virq < 0) { event_notifier_cleanup(&vector->kvm_interrupt); return; } +kvm_irqchip_commit_route_changes(&c); if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt, NULL, virq) < 0) { diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 373e26d7c3..80d8c2ea11 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -758,10 +758,12 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, int ret; if (irqfd->users == 0) { -ret = kvm_irqchip_add_msi_route(kvm_state, vector, &proxy->pci_dev); +KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state); +ret = kvm_irqchip_add_msi_route(&c, vector, &proxy->pci_dev); if (ret < 0) { return ret; } +kvm_irqchip_commit_route_changes(&c); irqfd->virq = ret; } irqfd->users++; diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 6f8a43dea4..097b3c79d5 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -486,7 +486,7 @@ void kvm_init_cpu
[PATCH v7 2/4] vdpa: add vdpa-dev support
From: Longpeng Supports vdpa-dev, we can use the deivce directly: -M microvm -m 512m -smp 2 -kernel ... -initrd ... -device \ vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-x Reviewed-by: Stefano Garzarella Acked-by: Jason Wang Signed-off-by: Longpeng --- hw/virtio/Kconfig| 5 + hw/virtio/meson.build| 1 + hw/virtio/vdpa-dev.c | 377 +++ include/hw/virtio/vdpa-dev.h | 43 4 files changed, 426 insertions(+) create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index cbfd8c7173..89e9e426d8 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -85,3 +85,8 @@ config VHOST_USER_GPIO bool default y depends on VIRTIO && VHOST_USER + +config VHOST_VDPA_DEV +bool +default y +depends on VIRTIO && VHOST_VDPA && LINUX diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index dfed1e7af5..54d6d29af7 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -31,6 +31,7 @@ virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], if_true: files('vhost-user-gpio-pci.c')) +virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c new file mode 100644 index 00..1840f0e450 --- /dev/null +++ b/hw/virtio/vdpa-dev.c @@ -0,0 +1,377 @@ +/* + * Vhost Vdpa Device + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. + * + * Authors: + * Longpeng + * + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" + * implemented by: + * Changpeng Liu + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/cutils.h" +#include "hw/qdev-core.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" +#include "hw/virtio/vdpa-dev.h" +#include "sysemu/sysemu.h" +#include "sysemu/runstate.h" + +static void +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ +/* Nothing to do */ +} + +static uint32_t +vhost_vdpa_device_get_u32(int fd, unsigned long int cmd, Error **errp) +{ +uint32_t val = (uint32_t)-1; + +if (ioctl(fd, cmd, &val) < 0) { +error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s", + cmd, strerror(errno)); +} + +return val; +} + +static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) +{ +VirtIODevice *vdev = VIRTIO_DEVICE(dev); +VhostVdpaDevice *v = VHOST_VDPA_DEVICE(vdev); +uint16_t max_queue_size; +struct vhost_virtqueue *vqs; +int i, ret; + +if (!v->vhostdev) { +error_setg(errp, "vhost-vdpa-device: vhostdev are missing"); +return; +} + +v->vhostfd = qemu_open(v->vhostdev, O_RDWR, errp); +if (*errp) { +return; +} +v->vdpa.device_fd = v->vhostfd; + +v->vdev_id = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_DEVICE_ID, errp); +if (*errp) { +goto out; +} + +max_queue_size = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_VRING_NUM, errp); +if (*errp) { +goto out; +} + +if (v->queue_size > max_queue_size) { +error_setg(errp, "vhost-vdpa-device: invalid queue_size: %u (max:%u)", + v->queue_size, max_queue_size); +goto out; +} else if (!v->queue_size) { +v->queue_size = max_queue_size; +} + +v->num_queues = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_VQS_COUNT, errp); +if (*errp) { +goto out; +} + +if (!v->num_queues || v->num_queues > VIRTIO_QUEUE_MAX) { +error_setg(errp, "invalid number of virtqueues: %u (max:%u)", + v->num_queues, VIRTIO_QUEUE_MAX); +goto out; +} + +v->dev.nvqs = v->num_queues; +vqs = g_new0(struct vhost_virtqueue, v->dev.nvqs); +v->dev.vqs = vqs; +v->dev.vq_index = 0; +v->dev.vq_index_end = v->dev.nvqs; +v->dev.backend_features = 0; +v->started = false; + +ret = vhost_dev_init(&v->dev, &v->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, NULL); +if (ret < 0) { +error_s
[PATCH v7 1/4] virtio: get class_id and pci device id by the virtio id
From: Longpeng Add helpers to get the "Transitional PCI Device ID" and "class_id" of the device specified by the "Virtio Device ID". These helpers will be used to build the generic vDPA device later. Acked-by: Jason Wang Signed-off-by: Longpeng --- hw/virtio/virtio-pci.c | 88 ++ include/hw/virtio/virtio-pci.h | 5 ++ 2 files changed, 93 insertions(+) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 34db51e241..3469b88d43 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -19,6 +19,7 @@ #include "exec/memop.h" #include "standard-headers/linux/virtio_pci.h" +#include "standard-headers/linux/virtio_ids.h" #include "hw/boards.h" #include "hw/virtio/virtio.h" #include "migration/qemu-file-types.h" @@ -213,6 +214,90 @@ static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f) return 0; } +typedef struct VirtIOPCIIDInfo { +/* virtio id */ +uint16_t vdev_id; +/* pci device id for the transitional device */ +uint16_t trans_devid; +uint16_t class_id; +} VirtIOPCIIDInfo; + +static const VirtIOPCIIDInfo virtio_pci_id_info[] = { +{ +.vdev_id = VIRTIO_ID_CRYPTO, +.class_id = PCI_CLASS_OTHERS, +}, { +.vdev_id = VIRTIO_ID_FS, +.class_id = PCI_CLASS_STORAGE_OTHER, +}, { +.vdev_id = VIRTIO_ID_NET, +.trans_devid = PCI_DEVICE_ID_VIRTIO_NET, +.class_id = PCI_CLASS_NETWORK_ETHERNET, +}, { +.vdev_id = VIRTIO_ID_BLOCK, +.trans_devid = PCI_DEVICE_ID_VIRTIO_BLOCK, +.class_id = PCI_CLASS_STORAGE_SCSI, +}, { +.vdev_id = VIRTIO_ID_CONSOLE, +.trans_devid = PCI_DEVICE_ID_VIRTIO_CONSOLE, +.class_id = PCI_CLASS_COMMUNICATION_OTHER, +}, { +.vdev_id = VIRTIO_ID_SCSI, +.trans_devid = PCI_DEVICE_ID_VIRTIO_SCSI, +.class_id = PCI_CLASS_STORAGE_SCSI +}, { +.vdev_id = VIRTIO_ID_9P, +.trans_devid = PCI_DEVICE_ID_VIRTIO_9P, +.class_id = PCI_BASE_CLASS_NETWORK, +}, { +.vdev_id = VIRTIO_ID_BALLOON, +.trans_devid = PCI_DEVICE_ID_VIRTIO_BALLOON, +.class_id = PCI_CLASS_OTHERS, +}, { +.vdev_id = VIRTIO_ID_RNG, +.trans_devid = PCI_DEVICE_ID_VIRTIO_RNG, +.class_id = PCI_CLASS_OTHERS, +}, +}; + +static const VirtIOPCIIDInfo *virtio_pci_get_id_info(uint16_t vdev_id) +{ +const VirtIOPCIIDInfo *info = NULL; +int i; + +for (i = 0; i < ARRAY_SIZE(virtio_pci_id_info); i++) { +if (virtio_pci_id_info[i].vdev_id == vdev_id) { +info = &virtio_pci_id_info[i]; +break; +} +} + +if (!info) { +/* The device id is invalid or not added to the id_info yet. */ +error_report("Invalid virtio device(id %u)", vdev_id); +abort(); +} + +return info; +} + +/* + * Get the Transitional Device ID for the specific device, return + * zero if the device is non-transitional. + */ +uint16_t virtio_pci_get_trans_devid(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->trans_devid; +} + +/* + * Get the Class ID for the specific device. + */ +uint16_t virtio_pci_get_class_id(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->class_id; +} + static bool virtio_pci_ioeventfd_enabled(DeviceState *d) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); @@ -1683,6 +1768,9 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default. */ pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus)); +if (proxy->trans_devid) { +pci_config_set_device_id(config, proxy->trans_devid); +} } else { /* pure virtio-1.0 */ pci_set_word(config + PCI_VENDOR_ID, diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h index 2446dcd9ae..d95b1a13a5 100644 --- a/include/hw/virtio/virtio-pci.h +++ b/include/hw/virtio/virtio-pci.h @@ -146,6 +146,8 @@ struct VirtIOPCIProxy { bool disable_modern; bool ignore_backend_features; OnOffAuto disable_legacy; +/* Transitional device id */ +uint16_t trans_devid; uint32_t class_code; uint32_t nvectors; uint32_t dfselect; @@ -179,6 +181,9 @@ static inline void virtio_pci_disable_modern(VirtIOPCIProxy *proxy) proxy->disable_modern = true; } +uint16_t virtio_pci_get_trans_devid(uint16_t device_id); +uint16_t virtio_pci_get_class_id(uint16_t device_id); + /* * virtio-input-pci: This extends VirtioPCIProxy. */ -- 2.23.0
[PATCH v7 0/4] add generic vDPA device support
From: Longpeng Hi guys, With the generic vDPA device, QEMU won't need to touch the device types any more, such like vfio. We can use the generic vDPA device as follow: -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X Or -M microvm -m 512m -smp 2 -kernel ... -initrd ... -device \ vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-x Changes v6 -> v7: (v6: https://mail.gnu.org/archive/html/qemu-devel/2022-05/msg02821.html) - rebase. [Jason] - add documentation . [Stefan] Changes v5 -> v6: Patch 2: - Turn to the original approach in the RFC to initialize the virtio_pci_id_info array. [Michael] https://lore.kernel.org/all/20220105005900.860-2-longpe...@huawei.com/ Patch 3: - Fix logical error of exception handler around the post_init. [Stefano] - Fix some coding style warnings. [Stefano] Patch 4: - Fix some coding style warnings. [Stefano] Changes v4 -> v5: Patch 3: - remove vhostfd [Jason] - support virtio-mmio [Jason] Changes v3 -> v4: v3: https://www.mail-archive.com/qemu-devel@nongnu.org/msg877015.html - reorganize the series [Stefano] - fix some typos [Stefano] - fix logical error in vhost_vdpa_device_realize [Stefano] Changes v2 -> v3 Patch 4 & 5: - only call vdpa ioctls in vdpa-dev.c [Stefano, Longpeng] - s/VQS_NUM/VQS_COUNT [Stefano] - check both vdpa_dev_fd and vdpa_dev [Stefano] Patch 6: - move all steps into vhost_vdpa_device_unrealize. [Stefano] Changes RFC -> v2 Patch 1: - rename 'pdev_id' to 'trans_devid' [Michael] - only use transitional device id for the devices listed in the spec [Michael] - use macros to make the id_info table clearer [Longpeng] - add some modern devices in the id_info table [Longpeng] Patch 2: - remove the GET_VECTORS_NUM command [Jason] Patch 4: - expose vdpa_dev_fd as a QOM preperty [Stefan] - introduce vhost_vdpa_device_get_u32 as a common function to make the code clearer [Stefan] - fix the misleading description of 'dc->desc' [Stefano] Patch 5: - check returned number of virtqueues [Stefan] Patch 6: - init s->num_queues [Stefano] - free s->dev.vqs [Stefano] Longpeng (Mike) (4): virtio: get class_id and pci device id by the virtio id vdpa: add vdpa-dev support vdpa: add vdpa-dev-pci support docs: Add generic vhost-vdpa device documentation docs/system/devices/vhost-vdpa-device.rst | 43 +++ hw/virtio/Kconfig | 5 + hw/virtio/meson.build | 2 + hw/virtio/vdpa-dev-pci.c | 102 ++ hw/virtio/vdpa-dev.c | 377 ++ hw/virtio/virtio-pci.c| 88 + include/hw/virtio/vdpa-dev.h | 43 +++ include/hw/virtio/virtio-pci.h| 5 + 8 files changed, 665 insertions(+) create mode 100644 docs/system/devices/vhost-vdpa-device.rst create mode 100644 hw/virtio/vdpa-dev-pci.c create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h -- 2.23.0
[PATCH v7 3/4] vdpa: add vdpa-dev-pci support
From: Longpeng Supports vdpa-dev-pci, we can use the device as follow: -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X Reviewed-by: Stefano Garzarella Acked-by: Jason Wang Signed-off-by: Longpeng --- hw/virtio/meson.build| 1 + hw/virtio/vdpa-dev-pci.c | 102 +++ hw/virtio/vdpa-dev.c | 2 +- 3 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 hw/virtio/vdpa-dev-pci.c diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 54d6d29af7..559b80cb28 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -57,6 +57,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-serial-pc virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev-pci.c')) virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) diff --git a/hw/virtio/vdpa-dev-pci.c b/hw/virtio/vdpa-dev-pci.c new file mode 100644 index 00..5446e6b393 --- /dev/null +++ b/hw/virtio/vdpa-dev-pci.c @@ -0,0 +1,102 @@ +/* + * Vhost Vdpa Device PCI Bindings + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. + * + * Authors: + * Longpeng + * + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" + * implemented by: + * Changpeng Liu + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include +#include "hw/virtio/virtio.h" +#include "hw/virtio/vdpa-dev.h" +#include "hw/pci/pci.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "hw/virtio/virtio-pci.h" +#include "qom/object.h" + + +typedef struct VhostVdpaDevicePCI VhostVdpaDevicePCI; + +#define TYPE_VHOST_VDPA_DEVICE_PCI "vhost-vdpa-device-pci-base" +DECLARE_INSTANCE_CHECKER(VhostVdpaDevicePCI, VHOST_VDPA_DEVICE_PCI, + TYPE_VHOST_VDPA_DEVICE_PCI) + +struct VhostVdpaDevicePCI { +VirtIOPCIProxy parent_obj; +VhostVdpaDevice vdev; +}; + +static void vhost_vdpa_device_pci_instance_init(Object *obj) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(obj); + +virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), +TYPE_VHOST_VDPA_DEVICE); +object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex"); +} + +static Property vhost_vdpa_device_pci_properties[] = { +DEFINE_PROP_END_OF_LIST(), +}; + +static int vhost_vdpa_device_pci_post_init(VhostVdpaDevice *v, Error **errp) +{ +VhostVdpaDevicePCI *dev = container_of(v, VhostVdpaDevicePCI, vdev); +VirtIOPCIProxy *vpci_dev = &dev->parent_obj; + +vpci_dev->class_code = virtio_pci_get_class_id(v->vdev_id); +vpci_dev->trans_devid = virtio_pci_get_trans_devid(v->vdev_id); +/* one for config vector */ +vpci_dev->nvectors = v->num_queues + 1; + +return 0; +} + +static void +vhost_vdpa_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(vpci_dev); + +dev->vdev.post_init = vhost_vdpa_device_pci_post_init; +qdev_realize(DEVICE(&dev->vdev), BUS(&vpci_dev->bus), errp); +} + +static void vhost_vdpa_device_pci_class_init(ObjectClass *klass, void *data) +{ +DeviceClass *dc = DEVICE_CLASS(klass); +VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + +set_bit(DEVICE_CATEGORY_MISC, dc->categories); +device_class_set_props(dc, vhost_vdpa_device_pci_properties); +k->realize = vhost_vdpa_device_pci_realize; +} + +static const VirtioPCIDeviceTypeInfo vhost_vdpa_device_pci_info = { +.base_name = TYPE_VHOST_VDPA_DEVICE_PCI, +.generic_name= "vhost-vdpa-device-pci", +.transitional_name = "vhost-vdpa-device-pci-transitional", +.non_transitional_name = "vhost-vdpa-device-pci-non-transitional", +.instance_size = sizeof(VhostVdpaDevicePCI), +.instance_init = vhost_vdpa_device_pci_instance_init, +.class_init = vhost_vdpa_device_pci_class_init, +}; + +static void vhost_vdpa_device_pci_register(void) +{ +virtio_pci_types_register(&vhost_vdpa_device_pci_info); +} + +type_init(vhost_vdpa_device_pci_register); diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 1840f0e450..62d83d3423 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -138,7 +138,7 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) goto free_config; } -virtio_init(vdev, "vhost-vdpa", v->vdev_id, v->config_size); +virtio_init(vdev, v->vdev_id, v->config_size); v->virtqs = g_new0(VirtQue
[PATCH v7 4/4] docs: Add generic vhost-vdpa device documentation
From: Longpeng Signed-off-by: Longpeng --- docs/system/devices/vhost-vdpa-device.rst | 43 +++ 1 file changed, 43 insertions(+) create mode 100644 docs/system/devices/vhost-vdpa-device.rst diff --git a/docs/system/devices/vhost-vdpa-device.rst b/docs/system/devices/vhost-vdpa-device.rst new file mode 100644 index 00..50173299e0 --- /dev/null +++ b/docs/system/devices/vhost-vdpa-device.rst @@ -0,0 +1,43 @@ + += +generic vhost-vdpa device += + +This document explains the usage of the generic vhost vdpa device. + +Description +--- + +vDPA(virtio data path acceleration) device is a device that uses a datapath +which complies with the virtio specifications with vendor specific control +path. + +QEMU provides two types of vhost-vdpa devices to enable the vDPA device, one +is type sensitive which means QEMU needs to know the actual device type +(e.g. net, blk, scsi) and another is called "generic vdpa device" which is +type insensitive (likes vfio-pci). + +Examples + + +Prepare the vhost-vdpa backends first: + +:: + host# ls -l /dev/vhost-vdpa-* + crw--- 1 root root 236, 0 Nov 2 00:49 /dev/vhost-vdpa-0 + +Start QEMU with virtio-mmio bus: + +:: + host# qemu-system \ + -M microvm -m 512 -smp 2 -kernel ... -initrd ... \ + -device vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-0 \ + ... + +Start QEMU with virtio-pci bus: + +:: + host# qemu-system \ + -M pc -m 512 -smp 2\ + -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-0 \ + ... -- 2.23.0
[PATCH v7 resend 0/4] add generic vDPA device support
From: Longpeng Hi guys, With the generic vDPA device, QEMU won't need to touch the device types any more, such like vfio. We can use the generic vDPA device as follow: -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X Or -M microvm -m 512m -smp 2 -kernel ... -initrd ... -device \ vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-x Changes v6 -> v7: (v6: https://mail.gnu.org/archive/html/qemu-devel/2022-05/msg02821.html) - rebase. [Jason] - add documentation . [Stefan] Changes v5 -> v6: Patch 2: - Turn to the original approach in the RFC to initialize the virtio_pci_id_info array. [Michael] https://lore.kernel.org/all/20220105005900.860-2-longpe...@huawei.com/ Patch 3: - Fix logical error of exception handler around the post_init. [Stefano] - Fix some coding style warnings. [Stefano] Patch 4: - Fix some coding style warnings. [Stefano] Changes v4 -> v5: Patch 3: - remove vhostfd [Jason] - support virtio-mmio [Jason] Changes v3 -> v4: v3: https://www.mail-archive.com/qemu-devel@nongnu.org/msg877015.html - reorganize the series [Stefano] - fix some typos [Stefano] - fix logical error in vhost_vdpa_device_realize [Stefano] Changes v2 -> v3 Patch 4 & 5: - only call vdpa ioctls in vdpa-dev.c [Stefano, Longpeng] - s/VQS_NUM/VQS_COUNT [Stefano] - check both vdpa_dev_fd and vdpa_dev [Stefano] Patch 6: - move all steps into vhost_vdpa_device_unrealize. [Stefano] Changes RFC -> v2 Patch 1: - rename 'pdev_id' to 'trans_devid' [Michael] - only use transitional device id for the devices listed in the spec [Michael] - use macros to make the id_info table clearer [Longpeng] - add some modern devices in the id_info table [Longpeng] Patch 2: - remove the GET_VECTORS_NUM command [Jason] Patch 4: - expose vdpa_dev_fd as a QOM preperty [Stefan] - introduce vhost_vdpa_device_get_u32 as a common function to make the code clearer [Stefan] - fix the misleading description of 'dc->desc' [Stefano] Patch 5: - check returned number of virtqueues [Stefan] Patch 6: - init s->num_queues [Stefano] - free s->dev.vqs [Stefano] Longpeng (Mike) (4): virtio: get class_id and pci device id by the virtio id vdpa: add vdpa-dev support vdpa: add vdpa-dev-pci support docs: Add generic vhost-vdpa device documentation docs/system/devices/vhost-vdpa-device.rst | 43 +++ hw/virtio/Kconfig | 5 + hw/virtio/meson.build | 2 + hw/virtio/vdpa-dev-pci.c | 102 ++ hw/virtio/vdpa-dev.c | 377 ++ hw/virtio/virtio-pci.c| 88 + include/hw/virtio/vdpa-dev.h | 43 +++ include/hw/virtio/virtio-pci.h| 5 + 8 files changed, 665 insertions(+) create mode 100644 docs/system/devices/vhost-vdpa-device.rst create mode 100644 hw/virtio/vdpa-dev-pci.c create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h -- 2.23.0
[PATCH v7 resend 1/4] virtio: get class_id and pci device id by the virtio id
From: Longpeng Add helpers to get the "Transitional PCI Device ID" and "class_id" of the device specified by the "Virtio Device ID". These helpers will be used to build the generic vDPA device later. Acked-by: Jason Wang Signed-off-by: Longpeng --- hw/virtio/virtio-pci.c | 88 ++ include/hw/virtio/virtio-pci.h | 5 ++ 2 files changed, 93 insertions(+) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 34db51e241..3469b88d43 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -19,6 +19,7 @@ #include "exec/memop.h" #include "standard-headers/linux/virtio_pci.h" +#include "standard-headers/linux/virtio_ids.h" #include "hw/boards.h" #include "hw/virtio/virtio.h" #include "migration/qemu-file-types.h" @@ -213,6 +214,90 @@ static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f) return 0; } +typedef struct VirtIOPCIIDInfo { +/* virtio id */ +uint16_t vdev_id; +/* pci device id for the transitional device */ +uint16_t trans_devid; +uint16_t class_id; +} VirtIOPCIIDInfo; + +static const VirtIOPCIIDInfo virtio_pci_id_info[] = { +{ +.vdev_id = VIRTIO_ID_CRYPTO, +.class_id = PCI_CLASS_OTHERS, +}, { +.vdev_id = VIRTIO_ID_FS, +.class_id = PCI_CLASS_STORAGE_OTHER, +}, { +.vdev_id = VIRTIO_ID_NET, +.trans_devid = PCI_DEVICE_ID_VIRTIO_NET, +.class_id = PCI_CLASS_NETWORK_ETHERNET, +}, { +.vdev_id = VIRTIO_ID_BLOCK, +.trans_devid = PCI_DEVICE_ID_VIRTIO_BLOCK, +.class_id = PCI_CLASS_STORAGE_SCSI, +}, { +.vdev_id = VIRTIO_ID_CONSOLE, +.trans_devid = PCI_DEVICE_ID_VIRTIO_CONSOLE, +.class_id = PCI_CLASS_COMMUNICATION_OTHER, +}, { +.vdev_id = VIRTIO_ID_SCSI, +.trans_devid = PCI_DEVICE_ID_VIRTIO_SCSI, +.class_id = PCI_CLASS_STORAGE_SCSI +}, { +.vdev_id = VIRTIO_ID_9P, +.trans_devid = PCI_DEVICE_ID_VIRTIO_9P, +.class_id = PCI_BASE_CLASS_NETWORK, +}, { +.vdev_id = VIRTIO_ID_BALLOON, +.trans_devid = PCI_DEVICE_ID_VIRTIO_BALLOON, +.class_id = PCI_CLASS_OTHERS, +}, { +.vdev_id = VIRTIO_ID_RNG, +.trans_devid = PCI_DEVICE_ID_VIRTIO_RNG, +.class_id = PCI_CLASS_OTHERS, +}, +}; + +static const VirtIOPCIIDInfo *virtio_pci_get_id_info(uint16_t vdev_id) +{ +const VirtIOPCIIDInfo *info = NULL; +int i; + +for (i = 0; i < ARRAY_SIZE(virtio_pci_id_info); i++) { +if (virtio_pci_id_info[i].vdev_id == vdev_id) { +info = &virtio_pci_id_info[i]; +break; +} +} + +if (!info) { +/* The device id is invalid or not added to the id_info yet. */ +error_report("Invalid virtio device(id %u)", vdev_id); +abort(); +} + +return info; +} + +/* + * Get the Transitional Device ID for the specific device, return + * zero if the device is non-transitional. + */ +uint16_t virtio_pci_get_trans_devid(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->trans_devid; +} + +/* + * Get the Class ID for the specific device. + */ +uint16_t virtio_pci_get_class_id(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->class_id; +} + static bool virtio_pci_ioeventfd_enabled(DeviceState *d) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); @@ -1683,6 +1768,9 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default. */ pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus)); +if (proxy->trans_devid) { +pci_config_set_device_id(config, proxy->trans_devid); +} } else { /* pure virtio-1.0 */ pci_set_word(config + PCI_VENDOR_ID, diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h index 2446dcd9ae..d95b1a13a5 100644 --- a/include/hw/virtio/virtio-pci.h +++ b/include/hw/virtio/virtio-pci.h @@ -146,6 +146,8 @@ struct VirtIOPCIProxy { bool disable_modern; bool ignore_backend_features; OnOffAuto disable_legacy; +/* Transitional device id */ +uint16_t trans_devid; uint32_t class_code; uint32_t nvectors; uint32_t dfselect; @@ -179,6 +181,9 @@ static inline void virtio_pci_disable_modern(VirtIOPCIProxy *proxy) proxy->disable_modern = true; } +uint16_t virtio_pci_get_trans_devid(uint16_t device_id); +uint16_t virtio_pci_get_class_id(uint16_t device_id); + /* * virtio-input-pci: This extends VirtioPCIProxy. */ -- 2.23.0
[PATCH v7 resend 2/4] vdpa: add vdpa-dev support
From: Longpeng Supports vdpa-dev, we can use the deivce directly: -M microvm -m 512m -smp 2 -kernel ... -initrd ... -device \ vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-x Reviewed-by: Stefano Garzarella Acked-by: Jason Wang Signed-off-by: Longpeng --- hw/virtio/Kconfig| 5 + hw/virtio/meson.build| 1 + hw/virtio/vdpa-dev.c | 377 +++ include/hw/virtio/vdpa-dev.h | 43 4 files changed, 426 insertions(+) create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index cbfd8c7173..89e9e426d8 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -85,3 +85,8 @@ config VHOST_USER_GPIO bool default y depends on VIRTIO && VHOST_USER + +config VHOST_VDPA_DEV +bool +default y +depends on VIRTIO && VHOST_VDPA && LINUX diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index dfed1e7af5..54d6d29af7 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -31,6 +31,7 @@ virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], if_true: files('vhost-user-gpio-pci.c')) +virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c new file mode 100644 index 00..62d83d3423 --- /dev/null +++ b/hw/virtio/vdpa-dev.c @@ -0,0 +1,377 @@ +/* + * Vhost Vdpa Device + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. + * + * Authors: + * Longpeng + * + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" + * implemented by: + * Changpeng Liu + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/cutils.h" +#include "hw/qdev-core.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" +#include "hw/virtio/vdpa-dev.h" +#include "sysemu/sysemu.h" +#include "sysemu/runstate.h" + +static void +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ +/* Nothing to do */ +} + +static uint32_t +vhost_vdpa_device_get_u32(int fd, unsigned long int cmd, Error **errp) +{ +uint32_t val = (uint32_t)-1; + +if (ioctl(fd, cmd, &val) < 0) { +error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s", + cmd, strerror(errno)); +} + +return val; +} + +static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) +{ +VirtIODevice *vdev = VIRTIO_DEVICE(dev); +VhostVdpaDevice *v = VHOST_VDPA_DEVICE(vdev); +uint16_t max_queue_size; +struct vhost_virtqueue *vqs; +int i, ret; + +if (!v->vhostdev) { +error_setg(errp, "vhost-vdpa-device: vhostdev are missing"); +return; +} + +v->vhostfd = qemu_open(v->vhostdev, O_RDWR, errp); +if (*errp) { +return; +} +v->vdpa.device_fd = v->vhostfd; + +v->vdev_id = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_DEVICE_ID, errp); +if (*errp) { +goto out; +} + +max_queue_size = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_VRING_NUM, errp); +if (*errp) { +goto out; +} + +if (v->queue_size > max_queue_size) { +error_setg(errp, "vhost-vdpa-device: invalid queue_size: %u (max:%u)", + v->queue_size, max_queue_size); +goto out; +} else if (!v->queue_size) { +v->queue_size = max_queue_size; +} + +v->num_queues = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_VQS_COUNT, errp); +if (*errp) { +goto out; +} + +if (!v->num_queues || v->num_queues > VIRTIO_QUEUE_MAX) { +error_setg(errp, "invalid number of virtqueues: %u (max:%u)", + v->num_queues, VIRTIO_QUEUE_MAX); +goto out; +} + +v->dev.nvqs = v->num_queues; +vqs = g_new0(struct vhost_virtqueue, v->dev.nvqs); +v->dev.vqs = vqs; +v->dev.vq_index = 0; +v->dev.vq_index_end = v->dev.nvqs; +v->dev.backend_features = 0; +v->started = false; + +ret = vhost_dev_init(&v->dev, &v->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, NULL); +if (ret < 0) { +error_s
[PATCH v7 resend 4/4] docs: Add generic vhost-vdpa device documentation
From: Longpeng Signed-off-by: Longpeng --- docs/system/devices/vhost-vdpa-device.rst | 43 +++ 1 file changed, 43 insertions(+) create mode 100644 docs/system/devices/vhost-vdpa-device.rst diff --git a/docs/system/devices/vhost-vdpa-device.rst b/docs/system/devices/vhost-vdpa-device.rst new file mode 100644 index 00..b758c4fce6 --- /dev/null +++ b/docs/system/devices/vhost-vdpa-device.rst @@ -0,0 +1,43 @@ + += +generic vhost-vdpa device += + +This document explains the usage of the generic vhost vdpa device. + +Description +--- + +vDPA(virtio data path acceleration) device is a device that uses a datapath +which complies with the virtio specifications with vendor specific control +path. + +QEMU provides two types of vhost-vdpa devices to enable the vDPA device, one +is type sensitive which means QEMU needs to know the actual device type +(e.g. net, blk, scsi) and another is called "generic vdpa device" which is +type insensitive (likes vfio-pci). + +Examples + + +Prepare the vhost-vdpa backends first: + +:: + host# ls -l /dev/vhost-vdpa-* + crw--- 1 root root 236, 0 Nov 2 00:49 /dev/vhost-vdpa-0 + +Start QEMU with virtio-mmio bus: + +:: + host# qemu-system \ + -M microvm -m 512 -smp 2 -kernel ... -initrd ... \ + -device vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-0 \ + ... + +Start QEMU with virtio-pci bus: + +:: + host# qemu-system \ + -M pc -m 512 -smp 2\ + -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-0 \ + ... -- 2.23.0
[PATCH v7 resend 3/4] vdpa: add vdpa-dev-pci support
From: Longpeng Supports vdpa-dev-pci, we can use the device as follow: -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X Reviewed-by: Stefano Garzarella Acked-by: Jason Wang Signed-off-by: Longpeng --- hw/virtio/meson.build| 1 + hw/virtio/vdpa-dev-pci.c | 102 +++ 2 files changed, 103 insertions(+) create mode 100644 hw/virtio/vdpa-dev-pci.c diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 54d6d29af7..559b80cb28 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -57,6 +57,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-serial-pc virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev-pci.c')) virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) diff --git a/hw/virtio/vdpa-dev-pci.c b/hw/virtio/vdpa-dev-pci.c new file mode 100644 index 00..5446e6b393 --- /dev/null +++ b/hw/virtio/vdpa-dev-pci.c @@ -0,0 +1,102 @@ +/* + * Vhost Vdpa Device PCI Bindings + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. + * + * Authors: + * Longpeng + * + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" + * implemented by: + * Changpeng Liu + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include +#include "hw/virtio/virtio.h" +#include "hw/virtio/vdpa-dev.h" +#include "hw/pci/pci.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "hw/virtio/virtio-pci.h" +#include "qom/object.h" + + +typedef struct VhostVdpaDevicePCI VhostVdpaDevicePCI; + +#define TYPE_VHOST_VDPA_DEVICE_PCI "vhost-vdpa-device-pci-base" +DECLARE_INSTANCE_CHECKER(VhostVdpaDevicePCI, VHOST_VDPA_DEVICE_PCI, + TYPE_VHOST_VDPA_DEVICE_PCI) + +struct VhostVdpaDevicePCI { +VirtIOPCIProxy parent_obj; +VhostVdpaDevice vdev; +}; + +static void vhost_vdpa_device_pci_instance_init(Object *obj) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(obj); + +virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), +TYPE_VHOST_VDPA_DEVICE); +object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex"); +} + +static Property vhost_vdpa_device_pci_properties[] = { +DEFINE_PROP_END_OF_LIST(), +}; + +static int vhost_vdpa_device_pci_post_init(VhostVdpaDevice *v, Error **errp) +{ +VhostVdpaDevicePCI *dev = container_of(v, VhostVdpaDevicePCI, vdev); +VirtIOPCIProxy *vpci_dev = &dev->parent_obj; + +vpci_dev->class_code = virtio_pci_get_class_id(v->vdev_id); +vpci_dev->trans_devid = virtio_pci_get_trans_devid(v->vdev_id); +/* one for config vector */ +vpci_dev->nvectors = v->num_queues + 1; + +return 0; +} + +static void +vhost_vdpa_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(vpci_dev); + +dev->vdev.post_init = vhost_vdpa_device_pci_post_init; +qdev_realize(DEVICE(&dev->vdev), BUS(&vpci_dev->bus), errp); +} + +static void vhost_vdpa_device_pci_class_init(ObjectClass *klass, void *data) +{ +DeviceClass *dc = DEVICE_CLASS(klass); +VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + +set_bit(DEVICE_CATEGORY_MISC, dc->categories); +device_class_set_props(dc, vhost_vdpa_device_pci_properties); +k->realize = vhost_vdpa_device_pci_realize; +} + +static const VirtioPCIDeviceTypeInfo vhost_vdpa_device_pci_info = { +.base_name = TYPE_VHOST_VDPA_DEVICE_PCI, +.generic_name= "vhost-vdpa-device-pci", +.transitional_name = "vhost-vdpa-device-pci-transitional", +.non_transitional_name = "vhost-vdpa-device-pci-non-transitional", +.instance_size = sizeof(VhostVdpaDevicePCI), +.instance_init = vhost_vdpa_device_pci_instance_init, +.class_init = vhost_vdpa_device_pci_class_init, +}; + +static void vhost_vdpa_device_pci_register(void) +{ +virtio_pci_types_register(&vhost_vdpa_device_pci_info); +} + +type_init(vhost_vdpa_device_pci_register); -- 2.23.0
[PATCH v8 2/5] vdpa: add vdpa-dev support
From: Longpeng Supports vdpa-dev, we can use the deivce directly: -M microvm -m 512m -smp 2 -kernel ... -initrd ... -device \ vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-x Reviewed-by: Stefano Garzarella Acked-by: Jason Wang Signed-off-by: Longpeng --- hw/virtio/Kconfig| 5 + hw/virtio/meson.build| 1 + hw/virtio/vdpa-dev.c | 376 +++ include/hw/virtio/vdpa-dev.h | 43 4 files changed, 425 insertions(+) create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index cbfd8c7173..89e9e426d8 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -85,3 +85,8 @@ config VHOST_USER_GPIO bool default y depends on VIRTIO && VHOST_USER + +config VHOST_VDPA_DEV +bool +default y +depends on VIRTIO && VHOST_VDPA && LINUX diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index dfed1e7af5..54d6d29af7 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -31,6 +31,7 @@ virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], if_true: files('vhost-user-gpio-pci.c')) +virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c new file mode 100644 index 00..2885d06cbe --- /dev/null +++ b/hw/virtio/vdpa-dev.c @@ -0,0 +1,376 @@ +/* + * Vhost Vdpa Device + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. + * + * Authors: + * Longpeng + * + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" + * implemented by: + * Changpeng Liu + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/cutils.h" +#include "hw/qdev-core.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" +#include "hw/virtio/vdpa-dev.h" +#include "sysemu/sysemu.h" +#include "sysemu/runstate.h" + +static void +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ +/* Nothing to do */ +} + +static uint32_t +vhost_vdpa_device_get_u32(int fd, unsigned long int cmd, Error **errp) +{ +uint32_t val = (uint32_t)-1; + +if (ioctl(fd, cmd, &val) < 0) { +error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s", + cmd, strerror(errno)); +} + +return val; +} + +static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) +{ +VirtIODevice *vdev = VIRTIO_DEVICE(dev); +VhostVdpaDevice *v = VHOST_VDPA_DEVICE(vdev); +uint16_t max_queue_size; +struct vhost_virtqueue *vqs; +int i, ret; + +if (!v->vhostdev) { +error_setg(errp, "vhost-vdpa-device: vhostdev are missing"); +return; +} + +v->vhostfd = qemu_open(v->vhostdev, O_RDWR, errp); +if (*errp) { +return; +} +v->vdpa.device_fd = v->vhostfd; + +v->vdev_id = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_DEVICE_ID, errp); +if (*errp) { +goto out; +} + +max_queue_size = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_VRING_NUM, errp); +if (*errp) { +goto out; +} + +if (v->queue_size > max_queue_size) { +error_setg(errp, "vhost-vdpa-device: invalid queue_size: %u (max:%u)", + v->queue_size, max_queue_size); +goto out; +} else if (!v->queue_size) { +v->queue_size = max_queue_size; +} + +v->num_queues = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_VQS_COUNT, errp); +if (*errp) { +goto out; +} + +if (!v->num_queues || v->num_queues > VIRTIO_QUEUE_MAX) { +error_setg(errp, "invalid number of virtqueues: %u (max:%u)", + v->num_queues, VIRTIO_QUEUE_MAX); +goto out; +} + +v->dev.nvqs = v->num_queues; +vqs = g_new0(struct vhost_virtqueue, v->dev.nvqs); +v->dev.vqs = vqs; +v->dev.vq_index = 0; +v->dev.vq_index_end = v->dev.nvqs; +v->dev.backend_features = 0; +v->started = false; + +ret = vhost_dev_init(&v->dev, &v->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, NULL); +if (ret < 0) { +error_s
[PATCH v8 0/5] add generic vDPA device support
From: Longpeng Hi guys, With the generic vDPA device, QEMU won't need to touch the device types any more, such like vfio. We can use the generic vDPA device as follow: -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X Or -M microvm -m 512m -smp 2 -kernel ... -initrd ... -device \ vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-x Changes v8 -> v7: - add migration blocker. [Michael] Changes v6 -> v7: (v6: https://mail.gnu.org/archive/html/qemu-devel/2022-05/msg02821.html) - rebase. [Jason] - add documentation . [Stefan] Changes v5 -> v6: Patch 2: - Turn to the original approach in the RFC to initialize the virtio_pci_id_info array. [Michael] https://lore.kernel.org/all/20220105005900.860-2-longpe...@huawei.com/ Patch 3: - Fix logical error of exception handler around the post_init. [Stefano] - Fix some coding style warnings. [Stefano] Patch 4: - Fix some coding style warnings. [Stefano] Changes v4 -> v5: Patch 3: - remove vhostfd [Jason] - support virtio-mmio [Jason] Changes v3 -> v4: v3: https://www.mail-archive.com/qemu-devel@nongnu.org/msg877015.html - reorganize the series [Stefano] - fix some typos [Stefano] - fix logical error in vhost_vdpa_device_realize [Stefano] Changes v2 -> v3 Patch 4 & 5: - only call vdpa ioctls in vdpa-dev.c [Stefano, Longpeng] - s/VQS_NUM/VQS_COUNT [Stefano] - check both vdpa_dev_fd and vdpa_dev [Stefano] Patch 6: - move all steps into vhost_vdpa_device_unrealize. [Stefano] Changes RFC -> v2 Patch 1: - rename 'pdev_id' to 'trans_devid' [Michael] - only use transitional device id for the devices listed in the spec [Michael] - use macros to make the id_info table clearer [Longpeng] - add some modern devices in the id_info table [Longpeng] Patch 2: - remove the GET_VECTORS_NUM command [Jason] Patch 4: - expose vdpa_dev_fd as a QOM preperty [Stefan] - introduce vhost_vdpa_device_get_u32 as a common function to make the code clearer [Stefan] - fix the misleading description of 'dc->desc' [Stefano] Patch 5: - check returned number of virtqueues [Stefan] Patch 6: - init s->num_queues [Stefano] - free s->dev.vqs [Stefano] Longpeng (Mike) (5): virtio: get class_id and pci device id by the virtio id vdpa: add vdpa-dev support vdpa: add vdpa-dev-pci support vdpa-dev: mark the device as unmigratable docs: Add generic vhost-vdpa device documentation docs/system/devices/vhost-vdpa-device.rst | 43 +++ hw/virtio/Kconfig | 5 + hw/virtio/meson.build | 2 + hw/virtio/vdpa-dev-pci.c | 102 ++ hw/virtio/vdpa-dev.c | 377 ++ hw/virtio/virtio-pci.c| 88 + include/hw/virtio/vdpa-dev.h | 43 +++ include/hw/virtio/virtio-pci.h| 5 + 8 files changed, 665 insertions(+) create mode 100644 docs/system/devices/vhost-vdpa-device.rst create mode 100644 hw/virtio/vdpa-dev-pci.c create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h -- 2.23.0
[PATCH v8 3/5] vdpa: add vdpa-dev-pci support
From: Longpeng Supports vdpa-dev-pci, we can use the device as follow: -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X Reviewed-by: Stefano Garzarella Acked-by: Jason Wang Signed-off-by: Longpeng --- hw/virtio/meson.build| 1 + hw/virtio/vdpa-dev-pci.c | 102 +++ 2 files changed, 103 insertions(+) create mode 100644 hw/virtio/vdpa-dev-pci.c diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 54d6d29af7..559b80cb28 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -57,6 +57,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-serial-pc virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev-pci.c')) virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) diff --git a/hw/virtio/vdpa-dev-pci.c b/hw/virtio/vdpa-dev-pci.c new file mode 100644 index 00..5446e6b393 --- /dev/null +++ b/hw/virtio/vdpa-dev-pci.c @@ -0,0 +1,102 @@ +/* + * Vhost Vdpa Device PCI Bindings + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. + * + * Authors: + * Longpeng + * + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" + * implemented by: + * Changpeng Liu + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include +#include "hw/virtio/virtio.h" +#include "hw/virtio/vdpa-dev.h" +#include "hw/pci/pci.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "hw/virtio/virtio-pci.h" +#include "qom/object.h" + + +typedef struct VhostVdpaDevicePCI VhostVdpaDevicePCI; + +#define TYPE_VHOST_VDPA_DEVICE_PCI "vhost-vdpa-device-pci-base" +DECLARE_INSTANCE_CHECKER(VhostVdpaDevicePCI, VHOST_VDPA_DEVICE_PCI, + TYPE_VHOST_VDPA_DEVICE_PCI) + +struct VhostVdpaDevicePCI { +VirtIOPCIProxy parent_obj; +VhostVdpaDevice vdev; +}; + +static void vhost_vdpa_device_pci_instance_init(Object *obj) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(obj); + +virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), +TYPE_VHOST_VDPA_DEVICE); +object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex"); +} + +static Property vhost_vdpa_device_pci_properties[] = { +DEFINE_PROP_END_OF_LIST(), +}; + +static int vhost_vdpa_device_pci_post_init(VhostVdpaDevice *v, Error **errp) +{ +VhostVdpaDevicePCI *dev = container_of(v, VhostVdpaDevicePCI, vdev); +VirtIOPCIProxy *vpci_dev = &dev->parent_obj; + +vpci_dev->class_code = virtio_pci_get_class_id(v->vdev_id); +vpci_dev->trans_devid = virtio_pci_get_trans_devid(v->vdev_id); +/* one for config vector */ +vpci_dev->nvectors = v->num_queues + 1; + +return 0; +} + +static void +vhost_vdpa_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(vpci_dev); + +dev->vdev.post_init = vhost_vdpa_device_pci_post_init; +qdev_realize(DEVICE(&dev->vdev), BUS(&vpci_dev->bus), errp); +} + +static void vhost_vdpa_device_pci_class_init(ObjectClass *klass, void *data) +{ +DeviceClass *dc = DEVICE_CLASS(klass); +VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + +set_bit(DEVICE_CATEGORY_MISC, dc->categories); +device_class_set_props(dc, vhost_vdpa_device_pci_properties); +k->realize = vhost_vdpa_device_pci_realize; +} + +static const VirtioPCIDeviceTypeInfo vhost_vdpa_device_pci_info = { +.base_name = TYPE_VHOST_VDPA_DEVICE_PCI, +.generic_name= "vhost-vdpa-device-pci", +.transitional_name = "vhost-vdpa-device-pci-transitional", +.non_transitional_name = "vhost-vdpa-device-pci-non-transitional", +.instance_size = sizeof(VhostVdpaDevicePCI), +.instance_init = vhost_vdpa_device_pci_instance_init, +.class_init = vhost_vdpa_device_pci_class_init, +}; + +static void vhost_vdpa_device_pci_register(void) +{ +virtio_pci_types_register(&vhost_vdpa_device_pci_info); +} + +type_init(vhost_vdpa_device_pci_register); -- 2.23.0
[PATCH v8 5/5] docs: Add generic vhost-vdpa device documentation
From: Longpeng Signed-off-by: Longpeng --- docs/system/devices/vhost-vdpa-device.rst | 43 +++ 1 file changed, 43 insertions(+) create mode 100644 docs/system/devices/vhost-vdpa-device.rst diff --git a/docs/system/devices/vhost-vdpa-device.rst b/docs/system/devices/vhost-vdpa-device.rst new file mode 100644 index 00..b758c4fce6 --- /dev/null +++ b/docs/system/devices/vhost-vdpa-device.rst @@ -0,0 +1,43 @@ + += +generic vhost-vdpa device += + +This document explains the usage of the generic vhost vdpa device. + +Description +--- + +vDPA(virtio data path acceleration) device is a device that uses a datapath +which complies with the virtio specifications with vendor specific control +path. + +QEMU provides two types of vhost-vdpa devices to enable the vDPA device, one +is type sensitive which means QEMU needs to know the actual device type +(e.g. net, blk, scsi) and another is called "generic vdpa device" which is +type insensitive (likes vfio-pci). + +Examples + + +Prepare the vhost-vdpa backends first: + +:: + host# ls -l /dev/vhost-vdpa-* + crw--- 1 root root 236, 0 Nov 2 00:49 /dev/vhost-vdpa-0 + +Start QEMU with virtio-mmio bus: + +:: + host# qemu-system \ + -M microvm -m 512 -smp 2 -kernel ... -initrd ... \ + -device vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-0 \ + ... + +Start QEMU with virtio-pci bus: + +:: + host# qemu-system \ + -M pc -m 512 -smp 2\ + -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-0 \ + ... -- 2.23.0
[PATCH v8 4/5] vdpa-dev: mark the device as unmigratable
From: Longpeng The generic vDPA device doesn't support migration currently, so mark it as unmigratable temporarily. Signed-off-by: Longpeng --- hw/virtio/vdpa-dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 2885d06cbe..62d83d3423 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -327,6 +327,7 @@ static Property vhost_vdpa_device_properties[] = { static const VMStateDescription vmstate_vhost_vdpa_device = { .name = "vhost-vdpa-device", +.unmigratable = 1, .minimum_version_id = 1, .version_id = 1, .fields = (VMStateField[]) { -- 2.23.0
[PATCH v8 1/5] virtio: get class_id and pci device id by the virtio id
From: Longpeng Add helpers to get the "Transitional PCI Device ID" and "class_id" of the device specified by the "Virtio Device ID". These helpers will be used to build the generic vDPA device later. Acked-by: Jason Wang Signed-off-by: Longpeng --- hw/virtio/virtio-pci.c | 88 ++ include/hw/virtio/virtio-pci.h | 5 ++ 2 files changed, 93 insertions(+) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 34db51e241..3469b88d43 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -19,6 +19,7 @@ #include "exec/memop.h" #include "standard-headers/linux/virtio_pci.h" +#include "standard-headers/linux/virtio_ids.h" #include "hw/boards.h" #include "hw/virtio/virtio.h" #include "migration/qemu-file-types.h" @@ -213,6 +214,90 @@ static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f) return 0; } +typedef struct VirtIOPCIIDInfo { +/* virtio id */ +uint16_t vdev_id; +/* pci device id for the transitional device */ +uint16_t trans_devid; +uint16_t class_id; +} VirtIOPCIIDInfo; + +static const VirtIOPCIIDInfo virtio_pci_id_info[] = { +{ +.vdev_id = VIRTIO_ID_CRYPTO, +.class_id = PCI_CLASS_OTHERS, +}, { +.vdev_id = VIRTIO_ID_FS, +.class_id = PCI_CLASS_STORAGE_OTHER, +}, { +.vdev_id = VIRTIO_ID_NET, +.trans_devid = PCI_DEVICE_ID_VIRTIO_NET, +.class_id = PCI_CLASS_NETWORK_ETHERNET, +}, { +.vdev_id = VIRTIO_ID_BLOCK, +.trans_devid = PCI_DEVICE_ID_VIRTIO_BLOCK, +.class_id = PCI_CLASS_STORAGE_SCSI, +}, { +.vdev_id = VIRTIO_ID_CONSOLE, +.trans_devid = PCI_DEVICE_ID_VIRTIO_CONSOLE, +.class_id = PCI_CLASS_COMMUNICATION_OTHER, +}, { +.vdev_id = VIRTIO_ID_SCSI, +.trans_devid = PCI_DEVICE_ID_VIRTIO_SCSI, +.class_id = PCI_CLASS_STORAGE_SCSI +}, { +.vdev_id = VIRTIO_ID_9P, +.trans_devid = PCI_DEVICE_ID_VIRTIO_9P, +.class_id = PCI_BASE_CLASS_NETWORK, +}, { +.vdev_id = VIRTIO_ID_BALLOON, +.trans_devid = PCI_DEVICE_ID_VIRTIO_BALLOON, +.class_id = PCI_CLASS_OTHERS, +}, { +.vdev_id = VIRTIO_ID_RNG, +.trans_devid = PCI_DEVICE_ID_VIRTIO_RNG, +.class_id = PCI_CLASS_OTHERS, +}, +}; + +static const VirtIOPCIIDInfo *virtio_pci_get_id_info(uint16_t vdev_id) +{ +const VirtIOPCIIDInfo *info = NULL; +int i; + +for (i = 0; i < ARRAY_SIZE(virtio_pci_id_info); i++) { +if (virtio_pci_id_info[i].vdev_id == vdev_id) { +info = &virtio_pci_id_info[i]; +break; +} +} + +if (!info) { +/* The device id is invalid or not added to the id_info yet. */ +error_report("Invalid virtio device(id %u)", vdev_id); +abort(); +} + +return info; +} + +/* + * Get the Transitional Device ID for the specific device, return + * zero if the device is non-transitional. + */ +uint16_t virtio_pci_get_trans_devid(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->trans_devid; +} + +/* + * Get the Class ID for the specific device. + */ +uint16_t virtio_pci_get_class_id(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->class_id; +} + static bool virtio_pci_ioeventfd_enabled(DeviceState *d) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); @@ -1683,6 +1768,9 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default. */ pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus)); +if (proxy->trans_devid) { +pci_config_set_device_id(config, proxy->trans_devid); +} } else { /* pure virtio-1.0 */ pci_set_word(config + PCI_VENDOR_ID, diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h index 2446dcd9ae..d95b1a13a5 100644 --- a/include/hw/virtio/virtio-pci.h +++ b/include/hw/virtio/virtio-pci.h @@ -146,6 +146,8 @@ struct VirtIOPCIProxy { bool disable_modern; bool ignore_backend_features; OnOffAuto disable_legacy; +/* Transitional device id */ +uint16_t trans_devid; uint32_t class_code; uint32_t nvectors; uint32_t dfselect; @@ -179,6 +181,9 @@ static inline void virtio_pci_disable_modern(VirtIOPCIProxy *proxy) proxy->disable_modern = true; } +uint16_t virtio_pci_get_trans_devid(uint16_t device_id); +uint16_t virtio_pci_get_class_id(uint16_t device_id); + /* * virtio-input-pci: This extends VirtioPCIProxy. */ -- 2.23.0
[PATCH v9 5/5] docs: Add generic vhost-vdpa device documentation
From: Longpeng Signed-off-by: Longpeng --- .../devices/vhost-vdpa-generic-device.rst | 46 +++ 1 file changed, 46 insertions(+) create mode 100644 docs/system/devices/vhost-vdpa-generic-device.rst diff --git a/docs/system/devices/vhost-vdpa-generic-device.rst b/docs/system/devices/vhost-vdpa-generic-device.rst new file mode 100644 index 00..d6db9af755 --- /dev/null +++ b/docs/system/devices/vhost-vdpa-generic-device.rst @@ -0,0 +1,46 @@ + += +vhost-vDPA generic device += + +This document explains the usage of the vhost-vDPA generic device. + +Description +--- + +vDPA(virtio data path acceleration) device is a device that uses a datapath +which complies with the virtio specifications with vendor specific control +path. + +QEMU provides two types of vhost-vDPA devices to enable the vDPA device, one +is type sensitive which means QEMU needs to know the actual device type +(e.g. net, blk, scsi) and another is called "vhost-vDPA generic device" which +is type insensitive. + +The vhost-vDPA generic device builds on the vhost-vdpa subsystem and virtio +subsystem. It is quite small, but it can support any type of virtio device. + +Examples + + +Prepare the vhost-vDPA backends first: + +:: + host# ls -l /dev/vhost-vdpa-* + crw--- 1 root root 236, 0 Nov 2 00:49 /dev/vhost-vdpa-0 + +Start QEMU with virtio-mmio bus: + +:: + host# qemu-system \ + -M microvm -m 512 -smp 2 -kernel ... -initrd ... \ + -device vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-0 \ + ... + +Start QEMU with virtio-pci bus: + +:: + host# qemu-system \ + -M pc -m 512 -smp 2\ + -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-0 \ + ... -- 2.23.0
[PATCH v9 2/5] vdpa: add vdpa-dev support
From: Longpeng Supports vdpa-dev, we can use the deivce directly: -M microvm -m 512m -smp 2 -kernel ... -initrd ... -device \ vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-x Reviewed-by: Stefano Garzarella Acked-by: Jason Wang Signed-off-by: Longpeng --- hw/virtio/Kconfig| 5 + hw/virtio/meson.build| 1 + hw/virtio/vdpa-dev.c | 376 +++ include/hw/virtio/vdpa-dev.h | 43 4 files changed, 425 insertions(+) create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index cbfd8c7173..89e9e426d8 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -85,3 +85,8 @@ config VHOST_USER_GPIO bool default y depends on VIRTIO && VHOST_USER + +config VHOST_VDPA_DEV +bool +default y +depends on VIRTIO && VHOST_VDPA && LINUX diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index dfed1e7af5..54d6d29af7 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -31,6 +31,7 @@ virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], if_true: files('vhost-user-gpio-pci.c')) +virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c new file mode 100644 index 00..2885d06cbe --- /dev/null +++ b/hw/virtio/vdpa-dev.c @@ -0,0 +1,376 @@ +/* + * Vhost Vdpa Device + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. + * + * Authors: + * Longpeng + * + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" + * implemented by: + * Changpeng Liu + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/cutils.h" +#include "hw/qdev-core.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" +#include "hw/virtio/vdpa-dev.h" +#include "sysemu/sysemu.h" +#include "sysemu/runstate.h" + +static void +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ +/* Nothing to do */ +} + +static uint32_t +vhost_vdpa_device_get_u32(int fd, unsigned long int cmd, Error **errp) +{ +uint32_t val = (uint32_t)-1; + +if (ioctl(fd, cmd, &val) < 0) { +error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s", + cmd, strerror(errno)); +} + +return val; +} + +static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) +{ +VirtIODevice *vdev = VIRTIO_DEVICE(dev); +VhostVdpaDevice *v = VHOST_VDPA_DEVICE(vdev); +uint16_t max_queue_size; +struct vhost_virtqueue *vqs; +int i, ret; + +if (!v->vhostdev) { +error_setg(errp, "vhost-vdpa-device: vhostdev are missing"); +return; +} + +v->vhostfd = qemu_open(v->vhostdev, O_RDWR, errp); +if (*errp) { +return; +} +v->vdpa.device_fd = v->vhostfd; + +v->vdev_id = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_DEVICE_ID, errp); +if (*errp) { +goto out; +} + +max_queue_size = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_VRING_NUM, errp); +if (*errp) { +goto out; +} + +if (v->queue_size > max_queue_size) { +error_setg(errp, "vhost-vdpa-device: invalid queue_size: %u (max:%u)", + v->queue_size, max_queue_size); +goto out; +} else if (!v->queue_size) { +v->queue_size = max_queue_size; +} + +v->num_queues = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_VQS_COUNT, errp); +if (*errp) { +goto out; +} + +if (!v->num_queues || v->num_queues > VIRTIO_QUEUE_MAX) { +error_setg(errp, "invalid number of virtqueues: %u (max:%u)", + v->num_queues, VIRTIO_QUEUE_MAX); +goto out; +} + +v->dev.nvqs = v->num_queues; +vqs = g_new0(struct vhost_virtqueue, v->dev.nvqs); +v->dev.vqs = vqs; +v->dev.vq_index = 0; +v->dev.vq_index_end = v->dev.nvqs; +v->dev.backend_features = 0; +v->started = false; + +ret = vhost_dev_init(&v->dev, &v->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, NULL); +if (ret < 0) { +error_s
[PATCH v9 3/5] vdpa: add vdpa-dev-pci support
From: Longpeng Supports vdpa-dev-pci, we can use the device as follow: -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X Reviewed-by: Stefano Garzarella Acked-by: Jason Wang Signed-off-by: Longpeng --- hw/virtio/meson.build| 1 + hw/virtio/vdpa-dev-pci.c | 102 +++ 2 files changed, 103 insertions(+) create mode 100644 hw/virtio/vdpa-dev-pci.c diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 54d6d29af7..559b80cb28 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -57,6 +57,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-serial-pc virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev-pci.c')) virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) diff --git a/hw/virtio/vdpa-dev-pci.c b/hw/virtio/vdpa-dev-pci.c new file mode 100644 index 00..5446e6b393 --- /dev/null +++ b/hw/virtio/vdpa-dev-pci.c @@ -0,0 +1,102 @@ +/* + * Vhost Vdpa Device PCI Bindings + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. + * + * Authors: + * Longpeng + * + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" + * implemented by: + * Changpeng Liu + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#include "qemu/osdep.h" +#include +#include +#include "hw/virtio/virtio.h" +#include "hw/virtio/vdpa-dev.h" +#include "hw/pci/pci.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "hw/virtio/virtio-pci.h" +#include "qom/object.h" + + +typedef struct VhostVdpaDevicePCI VhostVdpaDevicePCI; + +#define TYPE_VHOST_VDPA_DEVICE_PCI "vhost-vdpa-device-pci-base" +DECLARE_INSTANCE_CHECKER(VhostVdpaDevicePCI, VHOST_VDPA_DEVICE_PCI, + TYPE_VHOST_VDPA_DEVICE_PCI) + +struct VhostVdpaDevicePCI { +VirtIOPCIProxy parent_obj; +VhostVdpaDevice vdev; +}; + +static void vhost_vdpa_device_pci_instance_init(Object *obj) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(obj); + +virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), +TYPE_VHOST_VDPA_DEVICE); +object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex"); +} + +static Property vhost_vdpa_device_pci_properties[] = { +DEFINE_PROP_END_OF_LIST(), +}; + +static int vhost_vdpa_device_pci_post_init(VhostVdpaDevice *v, Error **errp) +{ +VhostVdpaDevicePCI *dev = container_of(v, VhostVdpaDevicePCI, vdev); +VirtIOPCIProxy *vpci_dev = &dev->parent_obj; + +vpci_dev->class_code = virtio_pci_get_class_id(v->vdev_id); +vpci_dev->trans_devid = virtio_pci_get_trans_devid(v->vdev_id); +/* one for config vector */ +vpci_dev->nvectors = v->num_queues + 1; + +return 0; +} + +static void +vhost_vdpa_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(vpci_dev); + +dev->vdev.post_init = vhost_vdpa_device_pci_post_init; +qdev_realize(DEVICE(&dev->vdev), BUS(&vpci_dev->bus), errp); +} + +static void vhost_vdpa_device_pci_class_init(ObjectClass *klass, void *data) +{ +DeviceClass *dc = DEVICE_CLASS(klass); +VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + +set_bit(DEVICE_CATEGORY_MISC, dc->categories); +device_class_set_props(dc, vhost_vdpa_device_pci_properties); +k->realize = vhost_vdpa_device_pci_realize; +} + +static const VirtioPCIDeviceTypeInfo vhost_vdpa_device_pci_info = { +.base_name = TYPE_VHOST_VDPA_DEVICE_PCI, +.generic_name= "vhost-vdpa-device-pci", +.transitional_name = "vhost-vdpa-device-pci-transitional", +.non_transitional_name = "vhost-vdpa-device-pci-non-transitional", +.instance_size = sizeof(VhostVdpaDevicePCI), +.instance_init = vhost_vdpa_device_pci_instance_init, +.class_init = vhost_vdpa_device_pci_class_init, +}; + +static void vhost_vdpa_device_pci_register(void) +{ +virtio_pci_types_register(&vhost_vdpa_device_pci_info); +} + +type_init(vhost_vdpa_device_pci_register); -- 2.23.0
[PATCH v9 1/5] virtio: get class_id and pci device id by the virtio id
From: Longpeng Add helpers to get the "Transitional PCI Device ID" and "class_id" of the device specified by the "Virtio Device ID". These helpers will be used to build the generic vDPA device later. Acked-by: Jason Wang Signed-off-by: Longpeng --- hw/virtio/virtio-pci.c | 88 ++ include/hw/virtio/virtio-pci.h | 5 ++ 2 files changed, 93 insertions(+) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index a1c9dfa7bb..a602f670ca 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -19,6 +19,7 @@ #include "exec/memop.h" #include "standard-headers/linux/virtio_pci.h" +#include "standard-headers/linux/virtio_ids.h" #include "hw/boards.h" #include "hw/virtio/virtio.h" #include "migration/qemu-file-types.h" @@ -224,6 +225,90 @@ static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f) return 0; } +typedef struct VirtIOPCIIDInfo { +/* virtio id */ +uint16_t vdev_id; +/* pci device id for the transitional device */ +uint16_t trans_devid; +uint16_t class_id; +} VirtIOPCIIDInfo; + +static const VirtIOPCIIDInfo virtio_pci_id_info[] = { +{ +.vdev_id = VIRTIO_ID_CRYPTO, +.class_id = PCI_CLASS_OTHERS, +}, { +.vdev_id = VIRTIO_ID_FS, +.class_id = PCI_CLASS_STORAGE_OTHER, +}, { +.vdev_id = VIRTIO_ID_NET, +.trans_devid = PCI_DEVICE_ID_VIRTIO_NET, +.class_id = PCI_CLASS_NETWORK_ETHERNET, +}, { +.vdev_id = VIRTIO_ID_BLOCK, +.trans_devid = PCI_DEVICE_ID_VIRTIO_BLOCK, +.class_id = PCI_CLASS_STORAGE_SCSI, +}, { +.vdev_id = VIRTIO_ID_CONSOLE, +.trans_devid = PCI_DEVICE_ID_VIRTIO_CONSOLE, +.class_id = PCI_CLASS_COMMUNICATION_OTHER, +}, { +.vdev_id = VIRTIO_ID_SCSI, +.trans_devid = PCI_DEVICE_ID_VIRTIO_SCSI, +.class_id = PCI_CLASS_STORAGE_SCSI +}, { +.vdev_id = VIRTIO_ID_9P, +.trans_devid = PCI_DEVICE_ID_VIRTIO_9P, +.class_id = PCI_BASE_CLASS_NETWORK, +}, { +.vdev_id = VIRTIO_ID_BALLOON, +.trans_devid = PCI_DEVICE_ID_VIRTIO_BALLOON, +.class_id = PCI_CLASS_OTHERS, +}, { +.vdev_id = VIRTIO_ID_RNG, +.trans_devid = PCI_DEVICE_ID_VIRTIO_RNG, +.class_id = PCI_CLASS_OTHERS, +}, +}; + +static const VirtIOPCIIDInfo *virtio_pci_get_id_info(uint16_t vdev_id) +{ +const VirtIOPCIIDInfo *info = NULL; +int i; + +for (i = 0; i < ARRAY_SIZE(virtio_pci_id_info); i++) { +if (virtio_pci_id_info[i].vdev_id == vdev_id) { +info = &virtio_pci_id_info[i]; +break; +} +} + +if (!info) { +/* The device id is invalid or not added to the id_info yet. */ +error_report("Invalid virtio device(id %u)", vdev_id); +abort(); +} + +return info; +} + +/* + * Get the Transitional Device ID for the specific device, return + * zero if the device is non-transitional. + */ +uint16_t virtio_pci_get_trans_devid(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->trans_devid; +} + +/* + * Get the Class ID for the specific device. + */ +uint16_t virtio_pci_get_class_id(uint16_t device_id) +{ +return virtio_pci_get_id_info(device_id)->class_id; +} + static bool virtio_pci_ioeventfd_enabled(DeviceState *d) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); @@ -1729,6 +1814,9 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) * is set to PCI_SUBVENDOR_ID_REDHAT_QUMRANET by default. */ pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus)); +if (proxy->trans_devid) { +pci_config_set_device_id(config, proxy->trans_devid); +} } else { /* pure virtio-1.0 */ pci_set_word(config + PCI_VENDOR_ID, diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h index 938799e8f6..24fba1604b 100644 --- a/include/hw/virtio/virtio-pci.h +++ b/include/hw/virtio/virtio-pci.h @@ -151,6 +151,8 @@ struct VirtIOPCIProxy { bool disable_modern; bool ignore_backend_features; OnOffAuto disable_legacy; +/* Transitional device id */ +uint16_t trans_devid; uint32_t class_code; uint32_t nvectors; uint32_t dfselect; @@ -184,6 +186,9 @@ static inline void virtio_pci_disable_modern(VirtIOPCIProxy *proxy) proxy->disable_modern = true; } +uint16_t virtio_pci_get_trans_devid(uint16_t device_id); +uint16_t virtio_pci_get_class_id(uint16_t device_id); + /* * virtio-input-pci: This extends VirtioPCIProxy. */ -- 2.23.0
[PATCH v9 0/5] add generic vDPA device support
From: Longpeng Hi guys, With the generic vDPA device, QEMU won't need to touch the device types any more, such like vfio. We can use the generic vDPA device as follow: -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X Or -M microvm -m 512m -smp 2 -kernel ... -initrd ... -device \ vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-x Changes v9 -> v8: - rename vhost-vdpa-device.rst to vhost-vdpa-generic-device.rst [Jason, Stefano] - emphasize the vhost-vDPA generic device in doc [Jason] Changes v8 -> v7: - add migration blocker. [Michael] Changes v6 -> v7: (v6: https://mail.gnu.org/archive/html/qemu-devel/2022-05/msg02821.html) - rebase. [Jason] - add documentation . [Stefan] Changes v5 -> v6: Patch 2: - Turn to the original approach in the RFC to initialize the virtio_pci_id_info array. [Michael] https://lore.kernel.org/all/20220105005900.860-2-longpe...@huawei.com/ Patch 3: - Fix logical error of exception handler around the post_init. [Stefano] - Fix some coding style warnings. [Stefano] Patch 4: - Fix some coding style warnings. [Stefano] Changes v4 -> v5: Patch 3: - remove vhostfd [Jason] - support virtio-mmio [Jason] Changes v3 -> v4: v3: https://www.mail-archive.com/qemu-devel@nongnu.org/msg877015.html - reorganize the series [Stefano] - fix some typos [Stefano] - fix logical error in vhost_vdpa_device_realize [Stefano] Changes v2 -> v3 Patch 4 & 5: - only call vdpa ioctls in vdpa-dev.c [Stefano, Longpeng] - s/VQS_NUM/VQS_COUNT [Stefano] - check both vdpa_dev_fd and vdpa_dev [Stefano] Patch 6: - move all steps into vhost_vdpa_device_unrealize. [Stefano] Changes RFC -> v2 Patch 1: - rename 'pdev_id' to 'trans_devid' [Michael] - only use transitional device id for the devices listed in the spec [Michael] - use macros to make the id_info table clearer [Longpeng] - add some modern devices in the id_info table [Longpeng] Patch 2: - remove the GET_VECTORS_NUM command [Jason] Patch 4: - expose vdpa_dev_fd as a QOM preperty [Stefan] - introduce vhost_vdpa_device_get_u32 as a common function to make the code clearer [Stefan] - fix the misleading description of 'dc->desc' [Stefano] Patch 5: - check returned number of virtqueues [Stefan] Patch 6: - init s->num_queues [Stefano] - free s->dev.vqs [Stefano] Longpeng (Mike) (5): virtio: get class_id and pci device id by the virtio id vdpa: add vdpa-dev support vdpa: add vdpa-dev-pci support vdpa-dev: mark the device as unmigratable docs: Add generic vhost-vdpa device documentation .../devices/vhost-vdpa-generic-device.rst | 46 +++ hw/virtio/Kconfig | 5 + hw/virtio/meson.build | 2 + hw/virtio/vdpa-dev-pci.c | 102 + hw/virtio/vdpa-dev.c | 377 ++ hw/virtio/virtio-pci.c| 88 include/hw/virtio/vdpa-dev.h | 43 ++ include/hw/virtio/virtio-pci.h| 5 + 8 files changed, 668 insertions(+) create mode 100644 docs/system/devices/vhost-vdpa-generic-device.rst create mode 100644 hw/virtio/vdpa-dev-pci.c create mode 100644 hw/virtio/vdpa-dev.c create mode 100644 include/hw/virtio/vdpa-dev.h -- 2.23.0
[PATCH v9 4/5] vdpa-dev: mark the device as unmigratable
From: Longpeng The generic vDPA device doesn't support migration currently, so mark it as unmigratable temporarily. Reviewed-by: Stefano Garzarella Acked-by: Jason Wang Signed-off-by: Longpeng --- hw/virtio/vdpa-dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 2885d06cbe..62d83d3423 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -327,6 +327,7 @@ static Property vhost_vdpa_device_properties[] = { static const VMStateDescription vmstate_vhost_vdpa_device = { .name = "vhost-vdpa-device", +.unmigratable = 1, .minimum_version_id = 1, .version_id = 1, .fields = (VMStateField[]) { -- 2.23.0
[PATCH v1 1/3] virtio-pci: submit msi route changes in batch
From: Longpeng The kvm_irqchip_commit_routes() is a time-intensive operation, it needs scan and update all irqfds that are already assigned during each invocation, so more vectors means need more time to process them. For virtio-pci, we can just submit once when enabling vectors of a virtio-pci device. This can reduce the downtime when migrating a VM with vhost-vdpa devices. Signed-off-by: Longpeng --- hw/virtio/virtio-pci.c | 24 +--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 247325c193..22e76e3902 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -49,6 +49,19 @@ * configuration space */ #define VIRTIO_PCI_CONFIG_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_enabled(dev)) +/* Protected by the BQL */ +static KVMRouteChange virtio_pci_route_change; + +static inline void virtio_pci_begin_route_changes(void) +{ +virtio_pci_route_change = kvm_irqchip_begin_route_changes(kvm_state); +} + +static inline void virtio_pci_commit_route_changes(void) +{ +kvm_irqchip_commit_route_changes(&virtio_pci_route_change); +} + static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, VirtIOPCIProxy *dev); static void virtio_pci_reset(DeviceState *qdev); @@ -790,12 +803,11 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, int ret; if (irqfd->users == 0) { -KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state); -ret = kvm_irqchip_add_msi_route(&c, vector, &proxy->pci_dev); +ret = kvm_irqchip_add_msi_route(&virtio_pci_route_change, vector, +&proxy->pci_dev); if (ret < 0) { return ret; } -kvm_irqchip_commit_route_changes(&c); irqfd->virq = ret; } irqfd->users++; @@ -903,12 +915,18 @@ static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) int ret = 0; VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); +virtio_pci_begin_route_changes(); + for (queue_no = 0; queue_no < nvqs; queue_no++) { if (!virtio_queue_get_num(vdev, queue_no)) { +virtio_pci_commit_route_changes(); return -1; } ret = kvm_virtio_pci_vector_use_one(proxy, queue_no); } + +virtio_pci_commit_route_changes(); + return ret; } -- 2.23.0
[PATCH v1 0/3] virtio-pci: optimize set_guest_notifier
From: Longpeng This patchset optimizes the time-consuming operation in virtio_pci_set_guest_notifier, especially for the vhost-vdpa migration, the time spend on set_guest_notifier can reduce 87% in some cases. Longpeng (Mike) (3): virtio-pci: submit msi route changes in batch kvm-irqchip: use KVMRouteChange API to update msi route virtio-pci: defer to commit kvm irq routing when enable msi/msix accel/kvm/kvm-all.c| 10 +-- accel/stubs/kvm-stub.c | 2 +- hw/intc/ioapic.c | 5 +- hw/misc/ivshmem.c | 6 +- hw/vfio/pci.c | 5 +- hw/virtio/virtio-pci.c | 140 - include/hw/virtio/virtio.h | 1 + include/sysemu/kvm.h | 2 +- target/i386/kvm/kvm.c | 6 +- 9 files changed, 145 insertions(+), 32 deletions(-) -- 2.23.0
[PATCH v1 2/3] kvm-irqchip: use KVMRouteChange API to update msi route
From: Longpeng The KVMRouteChange API is added by commit 9568690868e ("kvm-irqchip: introduce new API to support route change"). We can also apply it on kvm_irqchip_update_msi_route(), there are no functional changes and we can optimize the virtio-pci core base on this change in the next patch. Signed-off-by: Longpeng --- accel/kvm/kvm-all.c| 10 ++ accel/stubs/kvm-stub.c | 2 +- hw/intc/ioapic.c | 5 +++-- hw/misc/ivshmem.c | 6 -- hw/vfio/pci.c | 5 +++-- hw/virtio/virtio-pci.c | 7 +-- include/sysemu/kvm.h | 2 +- target/i386/kvm/kvm.c | 6 -- 8 files changed, 27 insertions(+), 16 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 9b26582655..1ed0dc4c9d 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -1820,10 +1820,11 @@ static void kvm_add_routing_entry(KVMState *s, set_gsi(s, entry->gsi); } -static int kvm_update_routing_entry(KVMState *s, +static int kvm_update_routing_entry(KVMRouteChange *c, struct kvm_irq_routing_entry *new_entry) { struct kvm_irq_routing_entry *entry; +KVMState *s = c->s; int n; for (n = 0; n < s->irq_routes->nr; n++) { @@ -1837,6 +1838,7 @@ static int kvm_update_routing_entry(KVMState *s, } *entry = *new_entry; +c->changes++; return 0; } @@ -2046,7 +2048,7 @@ int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev) return virq; } -int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, +int kvm_irqchip_update_msi_route(KVMRouteChange *c, int virq, MSIMessage msg, PCIDevice *dev) { struct kvm_irq_routing_entry kroute = {}; @@ -2075,7 +2077,7 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, trace_kvm_irqchip_update_msi_route(virq); -return kvm_update_routing_entry(s, &kroute); +return kvm_update_routing_entry(c, &kroute); } static int kvm_irqchip_assign_irqfd(KVMState *s, EventNotifier *event, @@ -2221,7 +2223,7 @@ static int kvm_irqchip_assign_irqfd(KVMState *s, EventNotifier *event, abort(); } -int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg) +int kvm_irqchip_update_msi_route(KVMRouteChange *c, int virq, MSIMessage msg) { return -ENOSYS; } diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index 5d2dd8f351..5bcf98b9ab 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -69,7 +69,7 @@ void kvm_irqchip_release_virq(KVMState *s, int virq) { } -int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, +int kvm_irqchip_update_msi_route(KVMRouteChange *c, int virq, MSIMessage msg, PCIDevice *dev) { return -ENOSYS; diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c index 264262959d..07b9cf7705 100644 --- a/hw/intc/ioapic.c +++ b/hw/intc/ioapic.c @@ -195,6 +195,7 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s) int i; if (kvm_irqchip_is_split()) { +KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state); for (i = 0; i < IOAPIC_NUM_PINS; i++) { MSIMessage msg; struct ioapic_entry_info info; @@ -202,10 +203,10 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s) if (!info.masked) { msg.address = info.addr; msg.data = info.data; -kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL); +kvm_irqchip_update_msi_route(&c, i, msg, NULL); } } -kvm_irqchip_commit_routes(kvm_state); +kvm_irqchip_commit_route_changes(&c); } #endif } diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index d66d912172..0e9427be42 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -278,6 +278,7 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, IVShmemState *s = IVSHMEM_COMMON(dev); EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; MSIVector *v = &s->msi_vectors[vector]; +KVMRouteChange c; int ret; IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector); @@ -287,11 +288,12 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, } assert(!v->unmasked); -ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev); +c = kvm_irqchip_begin_route_changes(kvm_state); +ret = kvm_irqchip_update_msi_route(&c, v->virq, msg, dev); if (ret < 0) { return ret; } -kvm_irqchip_commit_routes(kvm_state); +kvm_irqchip_commit_route_changes(&c); ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); if (ret < 0) { diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 939dcc3d4a..fb69cc9965 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -460,8 +460,9 @@ static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector) static void vfio_
[PATCH v1 3/3] virtio-pci: defer to commit kvm irq routing when enable msi/msix
From: Longpeng All unmasked vectors will be setup in msix_set_vector_notifiers(), which is a time-consuming operation because each vector need to be submit to KVM once. It's even worse if the VM has several devices and each devices has dozens of vectors. We can defer and commit the vectors in batch, just like the commit dc580d51f7 ("vfio: defer to commit kvm irq routing when enable msi/msix"), The can reduce 80% of the time spending on virtio_pci_set_guest_notifiers(). Signed-off-by: Longpeng --- hw/virtio/virtio-pci.c | 113 - include/hw/virtio/virtio.h | 1 + 2 files changed, 99 insertions(+), 15 deletions(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 5fd02b7cb8..13f9c31009 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -51,15 +51,22 @@ /* Protected by the BQL */ static KVMRouteChange virtio_pci_route_change; +static unsigned virtio_pci_route_change_depth; static inline void virtio_pci_begin_route_changes(void) { -virtio_pci_route_change = kvm_irqchip_begin_route_changes(kvm_state); +if (!virtio_pci_route_change_depth) { +virtio_pci_route_change = kvm_irqchip_begin_route_changes(kvm_state); +} +virtio_pci_route_change_depth++; } static inline void virtio_pci_commit_route_changes(void) { -kvm_irqchip_commit_route_changes(&virtio_pci_route_change); +virtio_pci_route_change_depth--; +if (!virtio_pci_route_change_depth) { +kvm_irqchip_commit_route_changes(&virtio_pci_route_change); +} } static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, @@ -976,6 +983,88 @@ static void kvm_virtio_pci_vector_config_release(VirtIOPCIProxy *proxy) kvm_virtio_pci_vector_release_one(proxy, VIRTIO_CONFIG_IRQ_IDX); } +static int virtio_pci_vector_do_unmask(VirtIOPCIProxy *proxy, + unsigned int queue_no, + unsigned int vector, + EventNotifier *n) +{ +VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); +VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); +int ret = 0; + +/* + * If guest supports masking, irqfd is already setup, unmask it. + * Otherwise, set it up now. + */ +if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +k->guest_notifier_mask(vdev, queue_no, false); +/* Test after unmasking to avoid losing events. */ +if (k->guest_notifier_pending && +k->guest_notifier_pending(vdev, queue_no)) { +event_notifier_set(n); +} +} else { +ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); +} + +return ret; +} + +static void virtio_pci_prepare_kvm_msi_virq_batch(VirtIOPCIProxy *proxy) +{ +VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + +assert(!vdev->defer_kvm_irq_routing); +vdev->defer_kvm_irq_routing = true; +virtio_pci_begin_route_changes(); +} + +static void virtio_pci_commit_kvm_msi_virq_batch(VirtIOPCIProxy *proxy) +{ +VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); +PCIDevice *dev = &proxy->pci_dev; +VirtQueue *vq; +EventNotifier *n; +int vector, index; +int ret; + +assert(vdev->defer_kvm_irq_routing); +virtio_pci_commit_route_changes(); +vdev->defer_kvm_irq_routing = false; + +if (!msix_enabled(dev)) { +return; +} + +/* Unmask all unmasked vectors */ +for (vector = 0; vector < dev->msix_entries_nr; vector++) { +if (msix_is_masked(dev, vector)) { +continue; +} + +vq = virtio_vector_first_queue(vdev, vector); +while (vq) { +index = virtio_get_queue_index(vq); +if (!virtio_queue_get_num(vdev, index)) { +break; +} +if (index < proxy->nvqs_with_notifiers) { +n = virtio_queue_get_guest_notifier(vq); +ret = virtio_pci_vector_do_unmask(proxy, index, vector, n); +assert(ret >= 0); +} +vq = virtio_vector_next_queue(vq); +} + +if (vector == vdev->config_vector) { +n = virtio_config_get_guest_notifier(vdev); +ret = virtio_pci_vector_do_unmask(proxy, VIRTIO_CONFIG_IRQ_IDX, + vector, n); +assert(ret >= 0); +} +} +} + static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, unsigned int queue_no, unsigned int vector, @@ -983,7 +1072,6 @@ static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, EventNotifier *n) { VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); VirtIOIRQFD *irqfd; int ret = 0; @@ -1002,19 +1090,10 @@ static int vir