> On Sep 20, 2024, at 23:09, Maxime Coquelin <maxime.coque...@redhat.com> wrote: > > External email: Use caution opening links or attachments > > > This patch enables VDUSE reconnection support making use of > the newly introduced reconnection mechanism in Vhost > library. > > At DPDK VDUSE device creation time, there are two > possibilities: > 1. The Kernel VDUSE device does not exist: > a. A reconnection file named after the VUDSE device name > is created in VDUSE tmpfs. > b. The file is truncated to 'struct vhost_reconnect_data' > size, and mmapped. > c. Negotiated features, Virtio status... are saved for > sanity checks at reconnect time. > 2. The Kernel VDUSE device already exists: > a. Exit with failure if no reconnect file exists for > this device. > b. Open and mmap the reconnect file. > c. Perform sanity check to ensure features are compatible. > d. Restore virtqueues' available indexes at startup time. > > Then at runtime, the virtqueues' available index are logged by > the Vhost reconnection mechanism. > > At DPDK VDUSE device destruction time, there are two > possibilities: > 1. The Kernel VDUSE device destruction succeed, which > means it is no more attached to the vDPA bus. The > reconnection file is unmapped and then removed. > 2. The Kernel VDUSE device destruction failed, meaning it > is no more attached to the vDPA bus. The reconnection > file is unmapped but not removed to make possible later > reconnection. > > Signed-off-by: Maxime Coquelin <maxime.coque...@redhat.com> > --- > lib/vhost/vduse.c | 308 ++++++++++++++++++++++++++++++++++++++++------ > 1 file changed, 268 insertions(+), 40 deletions(-) > > diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c > index c66602905c..f9ac317438 100644 > --- a/lib/vhost/vduse.c > +++ b/lib/vhost/vduse.c > @@ -136,7 +136,7 @@ vduse_control_queue_event(int fd, void *arg, int *remove > __rte_unused) > } > > static void > -vduse_vring_setup(struct virtio_net *dev, unsigned int index) > +vduse_vring_setup(struct virtio_net *dev, unsigned int index, bool reconnect) > { > struct vhost_virtqueue *vq = dev->virtqueue[index]; > struct vhost_vring_addr *ra = &vq->ring_addrs; > @@ -152,6 +152,19 @@ vduse_vring_setup(struct virtio_net *dev, unsigned int > index) > return; > } > > + if (reconnect) { > + vq->last_avail_idx = vq->reconnect_log->last_avail_idx; > + vq->last_used_idx = vq->reconnect_log->last_avail_idx; > + } else { > + vq->last_avail_idx = vq_info.split.avail_index; > + vq->last_used_idx = vq_info.split.avail_index; > + } > + vq->size = vq_info.num; > + vq->ready = true; > + vq->enabled = vq_info.ready; > + ra->desc_user_addr = vq_info.desc_addr; > + ra->avail_user_addr = vq_info.driver_addr; > + ra->used_user_addr = vq_info.device_addr; > VHOST_CONFIG_LOG(dev->ifname, INFO, "VQ %u info:", index); > VHOST_CONFIG_LOG(dev->ifname, INFO, "\tnum: %u", vq_info.num); > VHOST_CONFIG_LOG(dev->ifname, INFO, "\tdesc_addr: %llx", > @@ -160,17 +173,9 @@ vduse_vring_setup(struct virtio_net *dev, unsigned int > index) > (unsigned long long)vq_info.driver_addr); > VHOST_CONFIG_LOG(dev->ifname, INFO, "\tdevice_addr: %llx", > (unsigned long long)vq_info.device_addr); > - VHOST_CONFIG_LOG(dev->ifname, INFO, "\tavail_idx: %u", > vq_info.split.avail_index); > + VHOST_CONFIG_LOG(dev->ifname, INFO, "\tavail_idx: %u", > vq->last_avail_idx); > + VHOST_CONFIG_LOG(dev->ifname, INFO, "\tused_idx: %u", > vq->last_used_idx); > VHOST_CONFIG_LOG(dev->ifname, INFO, "\tready: %u", vq_info.ready); > - > - vq->last_avail_idx = vq_info.split.avail_index; > - vq->size = vq_info.num; > - vq->ready = true; > - vq->enabled = vq_info.ready; > - ra->desc_user_addr = vq_info.desc_addr; > - ra->avail_user_addr = vq_info.driver_addr; > - ra->used_user_addr = vq_info.device_addr; > - > vq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); > if (vq->kickfd < 0) { > VHOST_CONFIG_LOG(dev->ifname, ERR, "Failed to init kickfd for > VQ %u: %s", > @@ -267,7 +272,7 @@ vduse_vring_cleanup(struct virtio_net *dev, unsigned int > index) > } > > static void > -vduse_device_start(struct virtio_net *dev) > +vduse_device_start(struct virtio_net *dev, bool reconnect) > { > unsigned int i, ret; > > @@ -287,6 +292,15 @@ vduse_device_start(struct virtio_net *dev) > return; > } > > + if (reconnect && dev->features != dev->reconnect_log->features) { > + VHOST_CONFIG_LOG(dev->ifname, ERR, > + "Mismatch between reconnect file features > 0x%" PRIx64 " & device features 0x%" PRIx64, > + dev->reconnect_log->features, dev->features); > + return; > + } > + > + dev->reconnect_log->features = dev->features; > + > VHOST_CONFIG_LOG(dev->ifname, INFO, "Negotiated Virtio features: 0x%" > PRIx64, > dev->features); > > @@ -300,7 +314,7 @@ vduse_device_start(struct virtio_net *dev) > } > > for (i = 0; i < dev->nr_vring; i++) > - vduse_vring_setup(dev, i); > + vduse_vring_setup(dev, i, reconnect); > > dev->flags |= VIRTIO_DEV_READY; > > @@ -373,6 +387,7 @@ vduse_events_handler(int fd, void *arg, int *remove > __rte_unused) > req.s.status); > old_status = dev->status; > dev->status = req.s.status; > + dev->reconnect_log->status = dev->status; > resp.result = VDUSE_REQ_RESULT_OK; > break; > case VDUSE_UPDATE_IOTLB: > @@ -398,7 +413,7 @@ vduse_events_handler(int fd, void *arg, int *remove > __rte_unused) > > if ((old_status ^ dev->status) & VIRTIO_DEVICE_STATUS_DRIVER_OK) { > if (dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK) > - vduse_device_start(dev); > + vduse_device_start(dev, false); > else > vduse_device_stop(dev); > } > @@ -407,10 +422,64 @@ vduse_events_handler(int fd, void *arg, int *remove > __rte_unused) > vduse_req_id_to_str(req.type), req.type); > } > > +static char vduse_reconnect_dir[PATH_MAX]; > +static bool vduse_reconnect_path_set; > + > +static int > +vduse_reconnect_path_init(void) > +{ > + const char *directory; > + int ret; > + > + /* from RuntimeDirectory= see systemd.exec */ > + directory = getenv("RUNTIME_DIRECTORY"); > + if (directory == NULL) { > + /* > + * Used standard convention defined in > + * XDG Base Directory Specification and > + * Filesystem Hierarchy Standard. > + */ > + if (getuid() == 0) > + directory = "/var/run"; > + else > + directory = getenv("XDG_RUNTIME_DIR") ? : "/tmp"; > + } > + > + ret = snprintf(vduse_reconnect_dir, sizeof(vduse_reconnect_dir), > "%s/vduse", > + directory); > + if (ret < 0 || ret == sizeof(vduse_reconnect_dir)) { > + VHOST_CONFIG_LOG("vduse", ERR, "Error creating VDUSE > reconnect path name"); > + return -1; > + } > + > + ret = mkdir(vduse_reconnect_dir, 0700); > + if (ret < 0 && errno != EEXIST) { > + VHOST_CONFIG_LOG("vduse", ERR, "Error creating '%s': %s", > + vduse_reconnect_dir, strerror(errno)); > + return -1; > + } > + > + VHOST_CONFIG_LOG("vduse", INFO, "Created VDUSE reconnect directory in > %s", > + vduse_reconnect_dir); > + > + return 0; > +} > + > +static void > +vduse_reconnect_handler(int fd, void *arg, int *remove) > +{ > + struct virtio_net *dev = arg; > + > + vduse_device_start(dev, true); > + > + close(fd); > + *remove = 1; > +} > + > int > vduse_device_create(const char *path, bool compliant_ol_flags) > { > - int control_fd, dev_fd, vid, ret; > + int control_fd, dev_fd, vid, ret, reco_fd; > uint32_t i, max_queue_pairs, total_queues; > struct virtio_net *dev; > struct virtio_net_config vnet_config = {{ 0 }}; > @@ -418,6 +487,9 @@ vduse_device_create(const char *path, bool > compliant_ol_flags) > uint64_t features; > struct vduse_dev_config *dev_config = NULL; > const char *name = path + strlen("/dev/vduse/"); > + char reconnect_file[PATH_MAX]; > + struct vhost_reconnect_data *reconnect_log = NULL; > + bool reconnect = false; > > if (vduse.fdset == NULL) { > vduse.fdset = fdset_init("vduse-evt"); > @@ -427,6 +499,20 @@ vduse_device_create(const char *path, bool > compliant_ol_flags) > } > } > > + if (vduse_reconnect_path_set == false) { > + if (vduse_reconnect_path_init() < 0) { > + VHOST_CONFIG_LOG(path, ERR, "failed to initialize > reconnect path"); > + return -1; > + } > + vduse_reconnect_path_set = true; > + } > + > + ret = snprintf(reconnect_file, sizeof(reconnect_file), "%s/%s", > vduse_reconnect_dir, name); > + if (ret < 0 || ret == sizeof(reconnect_file)) { > + VHOST_CONFIG_LOG(name, ERR, "Failed to create vduse reconnect > path name"); > + return -1; > + } > + > control_fd = open(VDUSE_CTRL_PATH, O_RDWR); > if (control_fd < 0) { > VHOST_CONFIG_LOG(name, ERR, "Failed to open %s: %s", > @@ -441,14 +527,6 @@ vduse_device_create(const char *path, bool > compliant_ol_flags) > goto out_ctrl_close; > } > > - dev_config = malloc(offsetof(struct vduse_dev_config, config) + > - sizeof(vnet_config)); > - if (!dev_config) { > - VHOST_CONFIG_LOG(name, ERR, "Failed to allocate VDUSE > config"); > - ret = -1; > - goto out_ctrl_close; > - } > - > ret = rte_vhost_driver_get_features(path, &features); > if (ret < 0) { > VHOST_CONFIG_LOG(name, ERR, "Failed to get backend features"); > @@ -469,23 +547,118 @@ vduse_device_create(const char *path, bool > compliant_ol_flags) > else > total_queues += 1; /* Includes ctrl queue */ > > - vnet_config.max_virtqueue_pairs = max_queue_pairs; > - memset(dev_config, 0, sizeof(struct vduse_dev_config)); > + if (access(path, F_OK) == 0) { > + VHOST_CONFIG_LOG(name, INFO, "Device already exists, > reconnecting..."); > + reconnect = true; > + > + reco_fd = open(reconnect_file, O_RDWR, 0600); > + if (reco_fd < 0) { > + if (errno == ENOENT) > + VHOST_CONFIG_LOG(name, ERR, "Missing > reconnect file (%s)", > + reconnect_file); > + else > + VHOST_CONFIG_LOG(name, ERR, "Failed to open > reconnect file %s (%s)", > + reconnect_file, > strerror(errno)); > + ret = -1; > + goto out_ctrl_close; > + } > > - strncpy(dev_config->name, name, VDUSE_NAME_MAX - 1); > - dev_config->device_id = VIRTIO_ID_NET; > - dev_config->vendor_id = 0; > - dev_config->features = features; > - dev_config->vq_num = total_queues; > - dev_config->vq_align = sysconf(_SC_PAGE_SIZE); > - dev_config->config_size = sizeof(struct virtio_net_config); > - memcpy(dev_config->config, &vnet_config, sizeof(vnet_config)); > + reconnect_log = mmap(NULL, sizeof(*reconnect_log), PROT_READ > | PROT_WRITE, > + MAP_SHARED, reco_fd, 0); > + close(reco_fd); > + if (reconnect_log == MAP_FAILED) { > + VHOST_CONFIG_LOG(name, ERR, "Failed to mmap reconnect > file %s (%s)", > + reconnect_file, strerror(errno)); > + ret = -1; > + goto out_ctrl_close; > + } > > - ret = ioctl(control_fd, VDUSE_CREATE_DEV, dev_config); > - if (ret < 0) { > - VHOST_CONFIG_LOG(name, ERR, "Failed to create VDUSE device: > %s", > - strerror(errno)); > - goto out_free; > + if (reconnect_log->version != VHOST_RECONNECT_VERSION) { > + VHOST_CONFIG_LOG(name, ERR, > + "Version mismatch between backend > (0x%x) & reconnection file (0x%x)", > + VHOST_RECONNECT_VERSION, > reconnect_log->version); > + } > + > + if ((reconnect_log->features & features) != > reconnect_log->features) { > + VHOST_CONFIG_LOG(name, ERR, > + "Features mismatch between backend > (0x%" PRIx64 ") & reconnection file (0x%" PRIx64 ")", > + features, reconnect_log->features); > + ret = -1; > + goto out_ctrl_close; > + } > + > + if (reconnect_log->nr_vrings != total_queues) { > + VHOST_CONFIG_LOG(name, ERR, > + "Queues number mismatch between > backend (%u) and reconnection file (%u)", > + total_queues, > reconnect_log->nr_vrings); > + ret = -1; > + goto out_ctrl_close; > + } > + } else { > + reco_fd = open(reconnect_file, O_CREAT | O_EXCL | O_RDWR, > 0600); > + if (reco_fd < 0) { > + if (errno == EEXIST) { > + VHOST_CONFIG_LOG(name, ERR, "Reconnect file > %s exists but not the device", > + reconnect_file); > + } else { > + VHOST_CONFIG_LOG(name, ERR, "Failed to open > reconnect file %s (%s)", > + reconnect_file, > strerror(errno)); > + } > + ret = -1; > + goto out_ctrl_close; > + } > + > + ret = ftruncate(reco_fd, sizeof(*reconnect_log)); > + if (ret < 0) { > + VHOST_CONFIG_LOG(name, ERR, "Failed to truncate > reconnect file %s (%s)", > + reconnect_file, strerror(errno)); > + close(reco_fd); > + goto out_ctrl_close; > + } > + > + reconnect_log = mmap(NULL, sizeof(*reconnect_log), PROT_READ > | PROT_WRITE, > + MAP_SHARED, reco_fd, 0); > + close(reco_fd); > + if (reconnect_log == MAP_FAILED) { > + VHOST_CONFIG_LOG(name, ERR, "Failed to mmap reconnect > file %s (%s)", > + reconnect_file, strerror(errno)); > + ret = -1; > + goto out_ctrl_close; > + } > + > + reconnect_log->version = VHOST_RECONNECT_VERSION; > + > + dev_config = malloc(offsetof(struct vduse_dev_config, config) > + > + sizeof(vnet_config)); > + if (!dev_config) { > + VHOST_CONFIG_LOG(name, ERR, "Failed to allocate VDUSE > config"); > + ret = -1; > + goto out_ctrl_close; > + } > + > + vnet_config.max_virtqueue_pairs = max_queue_pairs; > + memset(dev_config, 0, sizeof(struct vduse_dev_config)); > + > + rte_strscpy(dev_config->name, name, VDUSE_NAME_MAX - 1); > + dev_config->device_id = VIRTIO_ID_NET; > + dev_config->vendor_id = 0; > + dev_config->features = features; > + dev_config->vq_num = total_queues; > + dev_config->vq_align = sysconf(_SC_PAGE_SIZE); > + dev_config->config_size = sizeof(struct virtio_net_config); > + memcpy(dev_config->config, &vnet_config, sizeof(vnet_config)); > + > + ret = ioctl(control_fd, VDUSE_CREATE_DEV, dev_config); > + if (ret < 0) { > + VHOST_CONFIG_LOG(name, ERR, "Failed to create VDUSE > device: %s", > + strerror(errno)); > + goto out_free; > + } > + > + memcpy(&reconnect_log->config, &vnet_config, > sizeof(vnet_config)); > + reconnect_log->nr_vrings = total_queues; > + free(dev_config); > + dev_config = NULL; > } > > dev_fd = open(path, O_RDWR); > @@ -519,10 +692,15 @@ vduse_device_create(const char *path, bool > compliant_ol_flags) > strncpy(dev->ifname, path, IF_NAME_SZ - 1); > dev->vduse_ctrl_fd = control_fd; > dev->vduse_dev_fd = dev_fd; > + dev->reconnect_log = reconnect_log; > + if (reconnect) > + dev->status = dev->reconnect_log->status; > + > vhost_setup_virtio_net(dev->vid, true, compliant_ol_flags, true, true); > > for (i = 0; i < total_queues; i++) { > struct vduse_vq_config vq_cfg = { 0 }; > + struct vhost_virtqueue *vq; > > ret = alloc_vring_queue(dev, i); > if (ret) { > @@ -530,6 +708,12 @@ vduse_device_create(const char *path, bool > compliant_ol_flags) > goto out_dev_destroy; > } > > + vq = dev->virtqueue[i]; > + vq->reconnect_log = &reconnect_log->vring[i]; > + > + if (reconnect) > + continue; > + > vq_cfg.index = i; > vq_cfg.max_size = 1024; > > @@ -549,7 +733,32 @@ vduse_device_create(const char *path, bool > compliant_ol_flags) > goto out_dev_destroy; > } > > - free(dev_config); > + if (reconnect && dev->status & VIRTIO_DEVICE_STATUS_DRIVER_OK) { > + /* > + * Make vduse_device_start() being executed in the same > + * context for both reconnection and fresh startup. > + */ > + reco_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); > + if (reco_fd < 0) { > + VHOST_CONFIG_LOG(name, ERR, "Failed to create > reco_fd: %s", > + strerror(errno)); > + ret = -1; > + goto out_dev_destroy; > + } > + > + ret = fdset_add(vduse.fdset, reco_fd, > vduse_reconnect_handler, NULL, dev); > + if (ret) { > + VHOST_CONFIG_LOG(name, ERR, "Failed to add reconnect > fd %d to vduse fdset", > + reco_fd); > + goto out_dev_destroy; > + } > + > + ret = eventfd_write(reco_fd, (eventfd_t)1); > + if (ret < 0) { > + VHOST_CONFIG_LOG(name, ERR, "Failed to write to > reconnect eventfd"); > + goto out_dev_destroy; > + } > + } > > return 0; > > @@ -587,6 +796,9 @@ vduse_device_destroy(const char *path) > if (vid == RTE_MAX_VHOST_DEVICE) > return -1; > > + if (dev->reconnect_log) > + munmap(dev->reconnect_log, sizeof(*dev->reconnect_log)); > + > vduse_device_stop(dev); > > fdset_del(vduse.fdset, dev->vduse_dev_fd); > @@ -597,10 +809,26 @@ vduse_device_destroy(const char *path) > } > > if (dev->vduse_ctrl_fd >= 0) { > + char reconnect_file[PATH_MAX]; > + > ret = ioctl(dev->vduse_ctrl_fd, VDUSE_DESTROY_DEV, name); > - if (ret) > + if (ret) { > VHOST_CONFIG_LOG(name, ERR, "Failed to destroy VDUSE > device: %s", > strerror(errno)); > + } else { > + /* > + * VDUSE device was no more attached to the vDPA bus, > + * so we can remove the reconnect file. > + */ > + ret = snprintf(reconnect_file, > sizeof(reconnect_file), "%s/%s", > + vduse_reconnect_dir, name); > + if (ret < 0 || ret == sizeof(reconnect_file)) > + VHOST_CONFIG_LOG(name, ERR, > + "Failed to create vduse > reconnect path name"); > + else > + unlink(reconnect_file); > + } > + > close(dev->vduse_ctrl_fd); > dev->vduse_ctrl_fd = -1; > } > -- > 2.46.0 >
Reviewed-by: Chenbo Xia <chen...@nvidia.com>