On Wed, May 11, 2022 at 2:32 PM Stefano Garzarella <sgarz...@redhat.com> wrote: > > On Wed, May 11, 2022 at 10:56:02AM +0800, Jason Wang wrote: > >On Tue, May 10, 2022 at 8:59 PM Longpeng(Mike) <longpe...@huawei.com> wrote: > >> > >> From: Longpeng <longpe...@huawei.com> > >> > >> Supports vdpa-dev. > >> > >> Signed-off-by: Longpeng <longpe...@huawei.com> > >> --- > >> hw/virtio/Kconfig | 5 + > >> hw/virtio/meson.build | 1 + > >> hw/virtio/vdpa-dev.c | 385 +++++++++++++++++++++++++++++++++++ > >> include/hw/virtio/vdpa-dev.h | 43 ++++ > >> 4 files changed, 434 insertions(+) > >> create mode 100644 hw/virtio/vdpa-dev.c > >> create mode 100644 include/hw/virtio/vdpa-dev.h > >> > >> diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig > >> index c144d42f9b..2723283382 100644 > >> --- a/hw/virtio/Kconfig > >> +++ b/hw/virtio/Kconfig > >> @@ -68,3 +68,8 @@ config VHOST_USER_RNG > >> bool > >> default y > >> depends on VIRTIO && VHOST_USER > >> + > >> +config VHOST_VDPA_DEV > >> + bool > >> + default y if VIRTIO_PCI > > > >Do we have the plan to add VIRTIO_MMIO support? > > > >> + depends on VIRTIO && VHOST_VDPA && LINUX > >> diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build > >> index 67dc77e00f..8f6f86db71 100644 > >> --- a/hw/virtio/meson.build > >> +++ b/hw/virtio/meson.build > >> @@ -29,6 +29,7 @@ virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: > >> files('vhost-user-i2c.c')) > >> virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_I2C'], > >> if_true: files('vhost-user-i2c-pci.c')) > >> virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: > >> files('vhost-user-rng.c')) > >> virtio_ss.add(when: ['CONFIG_VHOST_USER_RNG', 'CONFIG_VIRTIO_PCI'], > >> if_true: files('vhost-user-rng-pci.c')) > >> +virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) > >> > >> virtio_pci_ss = ss.source_set() > >> virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: > >> files('vhost-vsock-pci.c')) > >> diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c > >> new file mode 100644 > >> index 0000000000..543b5b4b81 > >> --- /dev/null > >> +++ b/hw/virtio/vdpa-dev.c > >> @@ -0,0 +1,385 @@ > >> +/* > >> + * Vhost Vdpa Device > >> + * > >> + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved. > >> + * > >> + * Authors: > >> + * Longpeng <longpe...@huawei.com> > >> + * > >> + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c" > >> implemented by: > >> + * Changpeng Liu <changpeng....@intel.com> > >> + * > >> + * This work is licensed under the terms of the GNU LGPL, version 2 or > >> later. > >> + * See the COPYING.LIB file in the top-level directory. > >> + */ > >> +#include "qemu/osdep.h" > >> +#include <sys/ioctl.h> > >> +#include <linux/vhost.h> > >> +#include "qapi/error.h" > >> +#include "qemu/error-report.h" > >> +#include "qemu/cutils.h" > >> +#include "hw/qdev-core.h" > >> +#include "hw/qdev-properties.h" > >> +#include "hw/qdev-properties-system.h" > >> +#include "hw/virtio/vhost.h" > >> +#include "hw/virtio/virtio.h" > >> +#include "hw/virtio/virtio-bus.h" > >> +#include "hw/virtio/virtio-access.h" > >> +#include "hw/virtio/vdpa-dev.h" > >> +#include "sysemu/sysemu.h" > >> +#include "sysemu/runstate.h" > >> + > >> +static void > >> +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) > >> +{ > >> + /* Nothing to do */ > >> +} > >> + > >> +static uint32_t > >> +vhost_vdpa_device_get_u32(int fd, unsigned long int cmd, Error **errp) > >> +{ > >> + uint32_t val = (uint32_t)-1; > >> + > >> + if (ioctl(fd, cmd, &val) < 0) { > >> + error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s", > >> + cmd, strerror(errno)); > >> + } > >> + > >> + return val; > >> +} > >> + > >> +static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) > >> +{ > >> + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > >> + VhostVdpaDevice *v = VHOST_VDPA_DEVICE(vdev); > >> + uint16_t max_queue_size; > >> + struct vhost_virtqueue *vqs; > >> + int i, ret; > >> + > >> + if (!v->vhostdev && v->vhostfd == -1) { > >> + error_setg(errp, "both vhostdev and vhostfd are missing"); > >> + return; > >> + } > >> + > >> + if (v->vhostdev && v->vhostfd != -1) { > >> + error_setg(errp, "both vhostdev and vhostfd are set"); > >> + return; > >> + } > >> + > >> + if (v->vhostfd == -1) { > >> + v->vhostfd = qemu_open(v->vhostdev, O_RDWR, errp); > >> + if (*errp) { > > > >Is this better to set error messages for all the possible failures > >during realization? > > > >> + return; > >> + } > >> + } > >> + v->vdpa.device_fd = v->vhostfd; > >> + > >> + v->vdev_id = vhost_vdpa_device_get_u32(v->vhostfd, > >> + VHOST_VDPA_GET_DEVICE_ID, > >> errp); > >> + if (*errp) { > >> + goto out; > >> + } > >> + > >> + max_queue_size = vhost_vdpa_device_get_u32(v->vhostfd, > >> + VHOST_VDPA_GET_VRING_NUM, > >> errp); > >> + if (*errp) { > >> + goto out; > >> + } > >> + > >> + if (v->queue_size > max_queue_size) { > >> + error_setg(errp, "vhost-vdpa-device: invalid queue_size: %u > >> (max:%u)", > >> + v->queue_size, max_queue_size); > >> + goto out; > >> + } else if (!v->queue_size) { > >> + v->queue_size = max_queue_size; > >> + } > >> + > >> + v->num_queues = vhost_vdpa_device_get_u32(v->vhostfd, > >> + VHOST_VDPA_GET_VQS_COUNT, > >> errp); > >> + if (*errp) { > >> + goto out; > >> + } > >> + > >> + if (!v->num_queues || v->num_queues > VIRTIO_QUEUE_MAX) { > >> + error_setg(errp, "invalid number of virtqueues: %u (max:%u)", > >> + v->num_queues, VIRTIO_QUEUE_MAX); > >> + goto out; > >> + } > >> + > >> + v->dev.nvqs = v->num_queues; > >> + vqs = g_new0(struct vhost_virtqueue, v->dev.nvqs); > >> + v->dev.vqs = vqs; > >> + v->dev.vq_index = 0; > >> + v->dev.vq_index_end = v->dev.nvqs; > >> + v->dev.backend_features = 0; > >> + v->started = false; > >> + > >> + ret = vhost_dev_init(&v->dev, &v->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, > >> NULL); > >> + if (ret < 0) { > >> + error_setg(errp, "vhost-vdpa-device: vhost initialization failed: > >> %s", > >> + strerror(-ret)); > >> + goto free_vqs; > >> + } > >> + > >> + v->config_size = vhost_vdpa_device_get_u32(v->vhostfd, > >> + > >> VHOST_VDPA_GET_CONFIG_SIZE, errp); > >> + if (*errp) { > >> + goto vhost_cleanup; > >> + } > >> + > >> + if (v->post_init && v->post_init(v, errp) < 0) { > >> + goto free_virtio; > >> + } > >> + > >> + v->config = g_malloc0(v->config_size); > >> + > >> + ret = vhost_dev_get_config(&v->dev, v->config, v->config_size, NULL); > >> + if (ret < 0) { > >> + error_setg(errp, "vhost-vdpa-device: get config failed"); > >> + goto free_config; > >> + } > >> + > >> + virtio_init(vdev, "vhost-vdpa", v->vdev_id, v->config_size); > >> + > >> + v->virtqs = g_new0(VirtQueue *, v->dev.nvqs); > >> + for (i = 0; i < v->dev.nvqs; i++) { > >> + v->virtqs[i] = virtio_add_queue(vdev, v->queue_size, > >> + > >> vhost_vdpa_device_dummy_handle_output); > >> + } > >> + > >> + return; > >> + > >> +free_virtio: > >> + for (i = 0; i < v->num_queues; i++) { > >> + virtio_delete_queue(v->virtqs[i]); > >> + } > >> + g_free(v->virtqs); > >> + virtio_cleanup(vdev); > >> +free_config: > >> + g_free(v->config); > >> +vhost_cleanup: > >> + vhost_dev_cleanup(&v->dev); > >> +free_vqs: > >> + g_free(vqs); > >> +out: > >> + qemu_close(v->vhostfd); > >> + v->vhostfd = -1; > >> +} > >> + > >> +static void vhost_vdpa_device_unrealize(DeviceState *dev) > >> +{ > >> + VirtIODevice *vdev = VIRTIO_DEVICE(dev); > >> + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); > >> + int i; > >> + > >> + virtio_set_status(vdev, 0); > >> + > >> + for (i = 0; i < s->num_queues; i++) { > >> + virtio_delete_queue(s->virtqs[i]); > >> + } > >> + g_free(s->virtqs); > >> + virtio_cleanup(vdev); > >> + > >> + g_free(s->config); > >> + g_free(s->dev.vqs); > >> + vhost_dev_cleanup(&s->dev); > >> + qemu_close(s->vhostfd); > >> + s->vhostfd = -1; > >> +} > >> + > >> +static void > >> +vhost_vdpa_device_get_config(VirtIODevice *vdev, uint8_t *config) > >> +{ > >> + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); > >> + > >> + memcpy(config, s->config, s->config_size); > >> +} > >> + > >> +static void > >> +vhost_vdpa_device_set_config(VirtIODevice *vdev, const uint8_t *config) > >> +{ > >> + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); > >> + int ret; > >> + > >> + ret = vhost_dev_set_config(&s->dev, s->config, 0, s->config_size, > >> + VHOST_SET_CONFIG_TYPE_MASTER); > >> + if (ret) { > >> + error_report("set device config space failed"); > >> + return; > >> + } > >> +} > >> + > >> +static uint64_t vhost_vdpa_device_get_features(VirtIODevice *vdev, > >> + uint64_t features, > >> + Error **errp) > >> +{ > >> + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); > >> + uint64_t backend_features = s->dev.features; > >> + > >> + if (!virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM)) { > >> + virtio_clear_feature(&backend_features, VIRTIO_F_IOMMU_PLATFORM); > >> + } > >> + > >> + return backend_features; > >> +} > >> + > >> +static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp) > >> +{ > >> + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); > >> + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); > >> + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > >> + int i, ret; > >> + > >> + if (!k->set_guest_notifiers) { > >> + error_setg(errp, "binding does not support guest notifiers"); > >> + return -ENOSYS; > >> + } > >> + > >> + ret = vhost_dev_enable_notifiers(&s->dev, vdev); > >> + if (ret < 0) { > >> + error_setg_errno(errp, -ret, "Error enabling host notifiers"); > >> + return ret; > >> + } > >> + > >> + ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true); > >> + if (ret < 0) { > >> + error_setg_errno(errp, -ret, "Error binding guest notifier"); > >> + goto err_host_notifiers; > >> + } > >> + > >> + s->dev.acked_features = vdev->guest_features; > >> + > >> + ret = vhost_dev_start(&s->dev, vdev); > >> + if (ret < 0) { > >> + error_setg_errno(errp, -ret, "Error starting vhost"); > >> + goto err_guest_notifiers; > >> + } > >> + s->started = true; > >> + > >> + /* > >> + * guest_notifier_mask/pending not used yet, so just unmask > >> + * everything here. virtio-pci will do the right thing by > >> + * enabling/disabling irqfd. > >> + */ > >> + for (i = 0; i < s->dev.nvqs; i++) { > >> + vhost_virtqueue_mask(&s->dev, vdev, i, false); > >> + } > >> + > >> + return ret; > >> + > >> +err_guest_notifiers: > >> + k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > >> +err_host_notifiers: > >> + vhost_dev_disable_notifiers(&s->dev, vdev); > >> + return ret; > >> +} > >> + > >> +static void vhost_vdpa_device_stop(VirtIODevice *vdev) > >> +{ > >> + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); > >> + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); > >> + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); > >> + int ret; > >> + > >> + if (!s->started) { > >> + return; > >> + } > >> + s->started = false; > >> + > >> + if (!k->set_guest_notifiers) { > >> + return; > >> + } > >> + > >> + vhost_dev_stop(&s->dev, vdev); > >> + > >> + ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); > >> + if (ret < 0) { > >> + error_report("vhost guest notifier cleanup failed: %d", ret); > >> + return; > >> + } > >> + > >> + vhost_dev_disable_notifiers(&s->dev, vdev); > >> +} > >> + > >> +static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t > >> status) > >> +{ > >> + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); > >> + bool should_start = virtio_device_started(vdev, status); > >> + Error *local_err = NULL; > >> + int ret; > >> + > >> + if (!vdev->vm_running) { > >> + should_start = false; > >> + } > >> + > >> + if (s->started == should_start) { > >> + return; > >> + } > >> + > >> + if (should_start) { > >> + ret = vhost_vdpa_device_start(vdev, &local_err); > >> + if (ret < 0) { > >> + error_reportf_err(local_err, "vhost-vdpa-device: start > >> failed: "); > >> + } > >> + } else { > >> + vhost_vdpa_device_stop(vdev); > >> + } > >> +} > >> + > >> +static Property vhost_vdpa_device_properties[] = { > >> + DEFINE_PROP_STRING("vhostdev", VhostVdpaDevice, vhostdev), > >> + DEFINE_PROP_INT32("vhostfd", VhostVdpaDevice, vhostfd, -1), > > > >This is probably not needed since we can "abuse" /dev/fd/X for > >vhostdev. > > IIRC for other vhost devices (e.g. vhost-vsock) the management layer > (e.g. libvirt) opens the device (because I think QEMU has less > permissions) and then passes the fd to QEMU, could this be useful for > this scenario as well?
Yes, it's the way current vhost-vDPA did (the fd is passing via SCM_RIGHTS). Thanks > > Thanks, > Stefano >