On Wed, May 11, 2022 at 10:56:02AM +0800, Jason Wang wrote:
>On Tue, May 10, 2022 at 8:59 PM Longpeng(Mike) <longpe...@huawei.com> wrote:
>>
>> From: Longpeng <longpe...@huawei.com>
>>
>> Supports vdpa-dev.
>>
>> Signed-off-by: Longpeng <longpe...@huawei.com>
>> ---
>> hw/virtio/Kconfig | 5 +
>> hw/virtio/meson.build | 1 +
>> hw/virtio/vdpa-dev.c | 385 +++++++++++++++++++++++++++++++++++
>> include/hw/virtio/vdpa-dev.h | 43 ++++
>> 4 files changed, 434 insertions(+)
>> create mode 100644 hw/virtio/vdpa-dev.c
>> create mode 100644 include/hw/virtio/vdpa-dev.h
>>
>> diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig
>> index c144d42f9b..2723283382 100644
>> --- a/hw/virtio/Kconfig
>> +++ b/hw/virtio/Kconfig
>> @@ -68,3 +68,8 @@ config VHOST_USER_RNG
>> bool
>> default y
>> depends on VIRTIO && VHOST_USER
>> +
>> +config VHOST_VDPA_DEV
>> + bool
>> + default y if VIRTIO_PCI
>
>Do we have the plan to add VIRTIO_MMIO support?
>
>> + depends on VIRTIO && VHOST_VDPA && LINUX
>> diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
>> index 67dc77e00f..8f6f86db71 100644
>> --- a/hw/virtio/meson.build
>> +++ b/hw/virtio/meson.build
>> @@ -29,6 +29,7 @@ virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true:
files('vhost-user-i2c.c'))
>> virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_I2C'],
if_true: files('vhost-user-i2c-pci.c'))
>> virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true:
files('vhost-user-rng.c'))
>> virtio_ss.add(when: ['CONFIG_VHOST_USER_RNG', 'CONFIG_VIRTIO_PCI'],
if_true: files('vhost-user-rng-pci.c'))
>> +virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c'))
>>
>> virtio_pci_ss = ss.source_set()
>> virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true:
files('vhost-vsock-pci.c'))
>> diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
>> new file mode 100644
>> index 0000000000..543b5b4b81
>> --- /dev/null
>> +++ b/hw/virtio/vdpa-dev.c
>> @@ -0,0 +1,385 @@
>> +/*
>> + * Vhost Vdpa Device
>> + *
>> + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved.
>> + *
>> + * Authors:
>> + * Longpeng <longpe...@huawei.com>
>> + *
>> + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c"
implemented by:
>> + * Changpeng Liu <changpeng....@intel.com>
>> + *
>> + * This work is licensed under the terms of the GNU LGPL, version 2 or
later.
>> + * See the COPYING.LIB file in the top-level directory.
>> + */
>> +#include "qemu/osdep.h"
>> +#include <sys/ioctl.h>
>> +#include <linux/vhost.h>
>> +#include "qapi/error.h"
>> +#include "qemu/error-report.h"
>> +#include "qemu/cutils.h"
>> +#include "hw/qdev-core.h"
>> +#include "hw/qdev-properties.h"
>> +#include "hw/qdev-properties-system.h"
>> +#include "hw/virtio/vhost.h"
>> +#include "hw/virtio/virtio.h"
>> +#include "hw/virtio/virtio-bus.h"
>> +#include "hw/virtio/virtio-access.h"
>> +#include "hw/virtio/vdpa-dev.h"
>> +#include "sysemu/sysemu.h"
>> +#include "sysemu/runstate.h"
>> +
>> +static void
>> +vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>> +{
>> + /* Nothing to do */
>> +}
>> +
>> +static uint32_t
>> +vhost_vdpa_device_get_u32(int fd, unsigned long int cmd, Error **errp)
>> +{
>> + uint32_t val = (uint32_t)-1;
>> +
>> + if (ioctl(fd, cmd, &val) < 0) {
>> + error_setg(errp, "vhost-vdpa-device: cmd 0x%lx failed: %s",
>> + cmd, strerror(errno));
>> + }
>> +
>> + return val;
>> +}
>> +
>> +static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp)
>> +{
>> + VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>> + VhostVdpaDevice *v = VHOST_VDPA_DEVICE(vdev);
>> + uint16_t max_queue_size;
>> + struct vhost_virtqueue *vqs;
>> + int i, ret;
>> +
>> + if (!v->vhostdev && v->vhostfd == -1) {
>> + error_setg(errp, "both vhostdev and vhostfd are missing");
>> + return;
>> + }
>> +
>> + if (v->vhostdev && v->vhostfd != -1) {
>> + error_setg(errp, "both vhostdev and vhostfd are set");
>> + return;
>> + }
>> +
>> + if (v->vhostfd == -1) {
>> + v->vhostfd = qemu_open(v->vhostdev, O_RDWR, errp);
>> + if (*errp) {
>
>Is this better to set error messages for all the possible failures
>during realization?
>
>> + return;
>> + }
>> + }
>> + v->vdpa.device_fd = v->vhostfd;
>> +
>> + v->vdev_id = vhost_vdpa_device_get_u32(v->vhostfd,
>> + VHOST_VDPA_GET_DEVICE_ID, errp);
>> + if (*errp) {
>> + goto out;
>> + }
>> +
>> + max_queue_size = vhost_vdpa_device_get_u32(v->vhostfd,
>> + VHOST_VDPA_GET_VRING_NUM,
errp);
>> + if (*errp) {
>> + goto out;
>> + }
>> +
>> + if (v->queue_size > max_queue_size) {
>> + error_setg(errp, "vhost-vdpa-device: invalid queue_size: %u
(max:%u)",
>> + v->queue_size, max_queue_size);
>> + goto out;
>> + } else if (!v->queue_size) {
>> + v->queue_size = max_queue_size;
>> + }
>> +
>> + v->num_queues = vhost_vdpa_device_get_u32(v->vhostfd,
>> + VHOST_VDPA_GET_VQS_COUNT,
errp);
>> + if (*errp) {
>> + goto out;
>> + }
>> +
>> + if (!v->num_queues || v->num_queues > VIRTIO_QUEUE_MAX) {
>> + error_setg(errp, "invalid number of virtqueues: %u (max:%u)",
>> + v->num_queues, VIRTIO_QUEUE_MAX);
>> + goto out;
>> + }
>> +
>> + v->dev.nvqs = v->num_queues;
>> + vqs = g_new0(struct vhost_virtqueue, v->dev.nvqs);
>> + v->dev.vqs = vqs;
>> + v->dev.vq_index = 0;
>> + v->dev.vq_index_end = v->dev.nvqs;
>> + v->dev.backend_features = 0;
>> + v->started = false;
>> +
>> + ret = vhost_dev_init(&v->dev, &v->vdpa, VHOST_BACKEND_TYPE_VDPA, 0,
NULL);
>> + if (ret < 0) {
>> + error_setg(errp, "vhost-vdpa-device: vhost initialization failed:
%s",
>> + strerror(-ret));
>> + goto free_vqs;
>> + }
>> +
>> + v->config_size = vhost_vdpa_device_get_u32(v->vhostfd,
>> + VHOST_VDPA_GET_CONFIG_SIZE,
errp);
>> + if (*errp) {
>> + goto vhost_cleanup;
>> + }
>> +
>> + if (v->post_init && v->post_init(v, errp) < 0) {
>> + goto free_virtio;
>> + }
>> +
>> + v->config = g_malloc0(v->config_size);
>> +
>> + ret = vhost_dev_get_config(&v->dev, v->config, v->config_size, NULL);
>> + if (ret < 0) {
>> + error_setg(errp, "vhost-vdpa-device: get config failed");
>> + goto free_config;
>> + }
>> +
>> + virtio_init(vdev, "vhost-vdpa", v->vdev_id, v->config_size);
>> +
>> + v->virtqs = g_new0(VirtQueue *, v->dev.nvqs);
>> + for (i = 0; i < v->dev.nvqs; i++) {
>> + v->virtqs[i] = virtio_add_queue(vdev, v->queue_size,
>> +
vhost_vdpa_device_dummy_handle_output);
>> + }
>> +
>> + return;
>> +
>> +free_virtio:
>> + for (i = 0; i < v->num_queues; i++) {
>> + virtio_delete_queue(v->virtqs[i]);
>> + }
>> + g_free(v->virtqs);
>> + virtio_cleanup(vdev);
>> +free_config:
>> + g_free(v->config);
>> +vhost_cleanup:
>> + vhost_dev_cleanup(&v->dev);
>> +free_vqs:
>> + g_free(vqs);
>> +out:
>> + qemu_close(v->vhostfd);
>> + v->vhostfd = -1;
>> +}
>> +
>> +static void vhost_vdpa_device_unrealize(DeviceState *dev)
>> +{
>> + VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>> + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev);
>> + int i;
>> +
>> + virtio_set_status(vdev, 0);
>> +
>> + for (i = 0; i < s->num_queues; i++) {
>> + virtio_delete_queue(s->virtqs[i]);
>> + }
>> + g_free(s->virtqs);
>> + virtio_cleanup(vdev);
>> +
>> + g_free(s->config);
>> + g_free(s->dev.vqs);
>> + vhost_dev_cleanup(&s->dev);
>> + qemu_close(s->vhostfd);
>> + s->vhostfd = -1;
>> +}
>> +
>> +static void
>> +vhost_vdpa_device_get_config(VirtIODevice *vdev, uint8_t *config)
>> +{
>> + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev);
>> +
>> + memcpy(config, s->config, s->config_size);
>> +}
>> +
>> +static void
>> +vhost_vdpa_device_set_config(VirtIODevice *vdev, const uint8_t *config)
>> +{
>> + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev);
>> + int ret;
>> +
>> + ret = vhost_dev_set_config(&s->dev, s->config, 0, s->config_size,
>> + VHOST_SET_CONFIG_TYPE_MASTER);
>> + if (ret) {
>> + error_report("set device config space failed");
>> + return;
>> + }
>> +}
>> +
>> +static uint64_t vhost_vdpa_device_get_features(VirtIODevice *vdev,
>> + uint64_t features,
>> + Error **errp)
>> +{
>> + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev);
>> + uint64_t backend_features = s->dev.features;
>> +
>> + if (!virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM)) {
>> + virtio_clear_feature(&backend_features, VIRTIO_F_IOMMU_PLATFORM);
>> + }
>> +
>> + return backend_features;
>> +}
>> +
>> +static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp)
>> +{
>> + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev);
>> + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
>> + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
>> + int i, ret;
>> +
>> + if (!k->set_guest_notifiers) {
>> + error_setg(errp, "binding does not support guest notifiers");
>> + return -ENOSYS;
>> + }
>> +
>> + ret = vhost_dev_enable_notifiers(&s->dev, vdev);
>> + if (ret < 0) {
>> + error_setg_errno(errp, -ret, "Error enabling host notifiers");
>> + return ret;
>> + }
>> +
>> + ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true);
>> + if (ret < 0) {
>> + error_setg_errno(errp, -ret, "Error binding guest notifier");
>> + goto err_host_notifiers;
>> + }
>> +
>> + s->dev.acked_features = vdev->guest_features;
>> +
>> + ret = vhost_dev_start(&s->dev, vdev);
>> + if (ret < 0) {
>> + error_setg_errno(errp, -ret, "Error starting vhost");
>> + goto err_guest_notifiers;
>> + }
>> + s->started = true;
>> +
>> + /*
>> + * guest_notifier_mask/pending not used yet, so just unmask
>> + * everything here. virtio-pci will do the right thing by
>> + * enabling/disabling irqfd.
>> + */
>> + for (i = 0; i < s->dev.nvqs; i++) {
>> + vhost_virtqueue_mask(&s->dev, vdev, i, false);
>> + }
>> +
>> + return ret;
>> +
>> +err_guest_notifiers:
>> + k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
>> +err_host_notifiers:
>> + vhost_dev_disable_notifiers(&s->dev, vdev);
>> + return ret;
>> +}
>> +
>> +static void vhost_vdpa_device_stop(VirtIODevice *vdev)
>> +{
>> + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev);
>> + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
>> + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
>> + int ret;
>> +
>> + if (!s->started) {
>> + return;
>> + }
>> + s->started = false;
>> +
>> + if (!k->set_guest_notifiers) {
>> + return;
>> + }
>> +
>> + vhost_dev_stop(&s->dev, vdev);
>> +
>> + ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
>> + if (ret < 0) {
>> + error_report("vhost guest notifier cleanup failed: %d", ret);
>> + return;
>> + }
>> +
>> + vhost_dev_disable_notifiers(&s->dev, vdev);
>> +}
>> +
>> +static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status)
>> +{
>> + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev);
>> + bool should_start = virtio_device_started(vdev, status);
>> + Error *local_err = NULL;
>> + int ret;
>> +
>> + if (!vdev->vm_running) {
>> + should_start = false;
>> + }
>> +
>> + if (s->started == should_start) {
>> + return;
>> + }
>> +
>> + if (should_start) {
>> + ret = vhost_vdpa_device_start(vdev, &local_err);
>> + if (ret < 0) {
>> + error_reportf_err(local_err, "vhost-vdpa-device: start failed:
");
>> + }
>> + } else {
>> + vhost_vdpa_device_stop(vdev);
>> + }
>> +}
>> +
>> +static Property vhost_vdpa_device_properties[] = {
>> + DEFINE_PROP_STRING("vhostdev", VhostVdpaDevice, vhostdev),
>> + DEFINE_PROP_INT32("vhostfd", VhostVdpaDevice, vhostfd, -1),
>
>This is probably not needed since we can "abuse" /dev/fd/X for
>vhostdev.
IIRC for other vhost devices (e.g. vhost-vsock) the management layer
(e.g. libvirt) opens the device (because I think QEMU has less
permissions) and then passes the fd to QEMU, could this be useful for
this scenario as well?