Thanks for your reply. > 2023年7月28日 上午5:21,Raphael Norwitz <raphael.norw...@nutanix.com> 写道: > > > >> On Jul 25, 2023, at 6:19 AM, Li Feng <fen...@smartx.com> wrote: >> >> Thanks for your comments. >> >>> 2023年7月25日 上午1:21,Raphael Norwitz <raphael.norw...@nutanix.com> 写道: >>> >>> Very excited to see this. High level looks good modulo a few small things. >>> >>> My major concern is around existing vhost-user-scsi backends which don’t >>> support VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD. IMO we should hide the >>> reconnect behavior behind a VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD check. We >>> may want to do the same for vhost-user-blk. >>> >>> The question is then what happens if the check is false. IIUC without an >>> inflight FD, if a device processes requests out of order, it’s not safe to >>> continue execution on reconnect, as there’s no way for the backend to know >>> how to replay IO. Should we permanently wedge the device or have QEMU fail >>> out? May be nice to have a toggle for this. >> >> Based on what MST said, is there anything else I need to do? > > I don’t think so. > >>> >>>> On Jul 21, 2023, at 6:51 AM, Li Feng <fen...@smartx.com> wrote: >>>> >>>> If the backend crashes and restarts, the device is broken. >>>> This patch adds reconnect for vhost-user-scsi. >>>> >>>> Tested with spdk backend. >>>> >>>> Signed-off-by: Li Feng <fen...@smartx.com> >>>> --- >>>> hw/block/vhost-user-blk.c | 2 - >>>> hw/scsi/vhost-scsi-common.c | 27 ++--- >>>> hw/scsi/vhost-user-scsi.c | 163 +++++++++++++++++++++++++--- >>>> include/hw/virtio/vhost-user-scsi.h | 3 + >>>> include/hw/virtio/vhost.h | 2 + >>>> 5 files changed, 165 insertions(+), 32 deletions(-) >>>> >>>> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c >>>> index eecf3f7a81..f250c740b5 100644 >>>> --- a/hw/block/vhost-user-blk.c >>>> +++ b/hw/block/vhost-user-blk.c >>>> @@ -32,8 +32,6 @@ >>>> #include "sysemu/sysemu.h" >>>> #include "sysemu/runstate.h" >>>> >>>> -#define REALIZE_CONNECTION_RETRIES 3 >>>> - >>>> static const int user_feature_bits[] = { >>>> VIRTIO_BLK_F_SIZE_MAX, >>>> VIRTIO_BLK_F_SEG_MAX, >>>> diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c >>> >>> Why can’t all the vhost-scsi-common stuff be moved to a separate change? >> >> I will move this code to separate patch. >>> >>> Especially the stuff introduced for vhost-user-blk in >>> 1b0063b3048af65dfaae6422a572c87db8575a92 should be moved out. >> OK. >> >>> >>>> index a06f01af26..08801886b8 100644 >>>> --- a/hw/scsi/vhost-scsi-common.c >>>> +++ b/hw/scsi/vhost-scsi-common.c >>>> @@ -52,16 +52,22 @@ int vhost_scsi_common_start(VHostSCSICommon *vsc) >>>> >>>> vsc->dev.acked_features = vdev->guest_features; >>>> >>>> - assert(vsc->inflight == NULL); >>>> - vsc->inflight = g_new0(struct vhost_inflight, 1); >>>> - ret = vhost_dev_get_inflight(&vsc->dev, >>>> - vs->conf.virtqueue_size, >>>> - vsc->inflight); >>>> + ret = vhost_dev_prepare_inflight(&vsc->dev, vdev); >>>> if (ret < 0) { >>>> - error_report("Error get inflight: %d", -ret); >>>> + error_report("Error setting inflight format: %d", -ret); >>>> goto err_guest_notifiers; >>>> } >>>> >>>> + if (!vsc->inflight->addr) { >>>> + ret = vhost_dev_get_inflight(&vsc->dev, >>>> + vs->conf.virtqueue_size, >>>> + vsc->inflight); >>>> + if (ret < 0) { >>>> + error_report("Error get inflight: %d", -ret); >>>> + goto err_guest_notifiers; >>>> + } >>>> + } >>>> + >>>> ret = vhost_dev_set_inflight(&vsc->dev, vsc->inflight); >>>> if (ret < 0) { >>>> error_report("Error set inflight: %d", -ret); >>>> @@ -85,9 +91,6 @@ int vhost_scsi_common_start(VHostSCSICommon *vsc) >>>> return ret; >>>> >>>> err_guest_notifiers: >>>> - g_free(vsc->inflight); >>>> - vsc->inflight = NULL; >>>> - >>>> k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false); >>>> err_host_notifiers: >>>> vhost_dev_disable_notifiers(&vsc->dev, vdev); >>>> @@ -111,12 +114,6 @@ void vhost_scsi_common_stop(VHostSCSICommon *vsc) >>>> } >>>> assert(ret >= 0); >>>> >>> >>> In the vhost-scsi (kernel backend) path, what will cleanup vsc->inflight >>> now? >> OK, we should check the vsc->inflight if it is null, the vhost-scsi doesn’t >> allocate the >> inflight object memory. > > Are you saying vhost-scsi never allocates inflight so we don’t need to check > for it? We have checked the vsc->inflight, and only if allocated, we send the get/set_inflight_fd. This works with vhost-user-scsi/vhost-scsi both. > >> >>> >>>> - if (vsc->inflight) { >>>> - vhost_dev_free_inflight(vsc->inflight); >>>> - g_free(vsc->inflight); >>>> - vsc->inflight = NULL; >>>> - } >>>> - >>>> vhost_dev_disable_notifiers(&vsc->dev, vdev); >>>> } >>>> >>>> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c >>>> index ee99b19e7a..e0e88b0c42 100644 >>>> --- a/hw/scsi/vhost-user-scsi.c >>>> +++ b/hw/scsi/vhost-user-scsi.c >>>> @@ -89,14 +89,126 @@ static void vhost_dummy_handle_output(VirtIODevice >>>> *vdev, VirtQueue *vq) >>>> { >>>> } >>>> >>>> +static int vhost_user_scsi_connect(DeviceState *dev, Error **errp) >>>> +{ >>>> + VirtIODevice *vdev = VIRTIO_DEVICE(dev); >>>> + VHostUserSCSI *s = VHOST_USER_SCSI(vdev); >>>> + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); >>>> + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); >>>> + int ret = 0; >>>> + >>>> + if (s->connected) { >>>> + return 0; >>>> + } >>>> + s->connected = true; >>>> + >>>> + vsc->dev.num_queues = vs->conf.num_queues; >>>> + vsc->dev.nvqs = VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues; >>>> + vsc->dev.vqs = s->vhost_vqs; >>>> + vsc->dev.vq_index = 0; >>>> + vsc->dev.backend_features = 0; >>>> + >>>> + ret = vhost_dev_init(&vsc->dev, &s->vhost_user, >>>> VHOST_BACKEND_TYPE_USER, 0, >>>> + errp); >>>> + if (ret < 0) { >>>> + return ret; >>>> + } >>>> + >>>> + /* restore vhost state */ >>> >>> Should this use virtio_device_should_start like vhost_user_blk? >> I will change this. >>> >>>> + if (virtio_device_started(vdev, vdev->status)) { >>>> + ret = vhost_scsi_common_start(vsc); >>>> + if (ret < 0) { >>>> + return ret; >>>> + } >>>> + } >>>> + >>>> + return 0; >>>> +} >>>> + >>>> +static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event); >>>> + >>>> +static void vhost_user_scsi_disconnect(DeviceState *dev) >>>> +{ >>>> + VirtIODevice *vdev = VIRTIO_DEVICE(dev); >>>> + VHostUserSCSI *s = VHOST_USER_SCSI(vdev); >>>> + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); >>>> + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); >>>> + >>> >>> I don’t think we want to execute vhost_scsi_common_stop() if the device >>> hasn’t been started. I remember that caused a number of races with the >>> vhost_user_blk connecting/disconnecting on startup. >>> >>> Let’s add a similar started_vu check? >> I will add it. >>> >>>> + if (!s->connected) { >>>> + return; >>>> + } >>>> + s->connected = false; >>>> + >>>> + vhost_scsi_common_stop(vsc); >>>> + >>>> + vhost_dev_cleanup(&vsc->dev); >>>> + >>>> + /* Re-instate the event handler for new connections */ >>>> + qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, >>>> + vhost_user_scsi_event, NULL, dev, NULL, >>>> true); >>>> +} >>>> + >>>> +static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event) >>>> +{ >>>> + DeviceState *dev = opaque; >>>> + VirtIODevice *vdev = VIRTIO_DEVICE(dev); >>>> + VHostUserSCSI *s = VHOST_USER_SCSI(vdev); >>>> + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); >>>> + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); >>>> + Error *local_err = NULL; >>>> + >>>> + switch (event) { >>>> + case CHR_EVENT_OPENED: >>>> + if (vhost_user_scsi_connect(dev, &local_err) < 0) { >>>> + error_report_err(local_err); >>>> + qemu_chr_fe_disconnect(&vs->conf.chardev); >>>> + return; >>>> + } >>>> + break; >>>> + case CHR_EVENT_CLOSED: >>>> + /* defer close until later to avoid circular close */ >>>> + vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev, >>>> + vhost_user_scsi_disconnect); >>>> + break; >>>> + case CHR_EVENT_BREAK: >>>> + case CHR_EVENT_MUX_IN: >>>> + case CHR_EVENT_MUX_OUT: >>>> + /* Ignore */ >>>> + break; >>>> + } >>>> +} >>>> + >>>> +static int vhost_user_scsi_realize_connect(VHostUserSCSI *s, Error **errp) >>>> +{ >>>> + DeviceState *dev = &s->parent_obj.parent_obj.parent_obj.parent_obj; >>>> + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); >>>> + int ret; >>>> + >>>> + s->connected = false; >>>> + >>>> + ret = qemu_chr_fe_wait_connected(&vs->conf.chardev, errp); >>>> + if (ret < 0) { >>>> + return ret; >>>> + } >>>> + >>>> + ret = vhost_user_scsi_connect(dev, errp); >>>> + if (ret < 0) { >>>> + qemu_chr_fe_disconnect(&vs->conf.chardev); >>>> + return ret; >>>> + } >>>> + assert(s->connected); >>>> + >>>> + return 0; >>>> +} >>>> + >>>> static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) >>>> { >>>> VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); >>>> VHostUserSCSI *s = VHOST_USER_SCSI(dev); >>>> VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); >>>> - struct vhost_virtqueue *vqs = NULL; >>>> Error *err = NULL; >>>> int ret; >>>> + int retries = REALIZE_CONNECTION_RETRIES; >>>> >>>> if (!vs->conf.chardev.chr) { >>>> error_setg(errp, "vhost-user-scsi: missing chardev"); >>>> @@ -112,21 +224,31 @@ static void vhost_user_scsi_realize(DeviceState >>>> *dev, Error **errp) >>>> } >>>> >>>> if (!vhost_user_init(&s->vhost_user, &vs->conf.chardev, errp)) { >>> >>> Why execute vhost_user_cleanup() if vhost_user_init() fails? >> OK, move this line up in v2. >> >>> >>>> - goto free_virtio; >>>> + goto free_vhost; >>>> } >>>> >>>> - vsc->dev.nvqs = VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues; >>>> - vsc->dev.vqs = g_new0(struct vhost_virtqueue, vsc->dev.nvqs); >>>> - vsc->dev.vq_index = 0; >>>> - vsc->dev.backend_features = 0; >>>> - vqs = vsc->dev.vqs; >>>> + vsc->inflight = g_new0(struct vhost_inflight, 1); >>>> + s->vhost_vqs = g_new0(struct vhost_virtqueue, >>>> + VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues); >>>> + >>>> + assert(!*errp); >>>> + do { >>>> + if (*errp) { >>>> + error_prepend(errp, "Reconnecting after error: "); >>>> + error_report_err(*errp); >>>> + *errp = NULL; >>>> + } >>>> + ret = vhost_user_scsi_realize_connect(s, errp); >>>> + } while (ret < 0 && retries--); >>>> >>>> - ret = vhost_dev_init(&vsc->dev, &s->vhost_user, >>>> - VHOST_BACKEND_TYPE_USER, 0, errp); >>>> if (ret < 0) { >>>> - goto free_vhost; >>>> + goto free_vqs; >>>> } >>>> >>>> + /* we're fully initialized, now we can operate, so add the handler */ >>>> + qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, >>>> + vhost_user_scsi_event, NULL, (void *)dev, >>>> + NULL, true); >>>> /* Channel and lun both are 0 for bootable vhost-user-scsi disk */ >>>> vsc->channel = 0; >>>> vsc->lun = 0; >>>> @@ -134,10 +256,15 @@ static void vhost_user_scsi_realize(DeviceState >>>> *dev, Error **errp) >>>> >>>> return; >>>> >>>> +free_vqs: >>>> + g_free(s->vhost_vqs); >>>> + s->vhost_vqs = NULL; >>>> + g_free(vsc->inflight); >>>> + vsc->inflight = NULL; >>>> + >>>> free_vhost: >>>> vhost_user_cleanup(&s->vhost_user); >>>> - g_free(vqs); >>>> -free_virtio: >>>> + >>>> virtio_scsi_common_unrealize(dev); >>>> } >>>> >>>> @@ -146,16 +273,22 @@ static void vhost_user_scsi_unrealize(DeviceState >>>> *dev) >>>> VirtIODevice *vdev = VIRTIO_DEVICE(dev); >>>> VHostUserSCSI *s = VHOST_USER_SCSI(dev); >>>> VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); >>>> - struct vhost_virtqueue *vqs = vsc->dev.vqs; >>>> + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); >>>> >>>> /* This will stop the vhost backend. */ >>>> vhost_user_scsi_set_status(vdev, 0); >>>> + qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, NULL, NULL, >>>> NULL, >>>> + NULL, false); >>>> >>>> vhost_dev_cleanup(&vsc->dev); >>>> - g_free(vqs); >>> >>> Nit: Why not put vhost_dev_free_inflight next to the remaining inflight >>> cleanup? >> OK. >>> >>>> + vhost_dev_free_inflight(vsc->inflight); >>>> + g_free(s->vhost_vqs); >>>> + s->vhost_vqs = NULL; >>>> + g_free(vsc->inflight); >>>> + vsc->inflight = NULL; >>>> >>> >>> Curiosity - why reorder here? Is something in vhost_user_cleanup() >>> dependent on state freed in virtio_scsi_common_unrealize()? >>> >>> If so, should that go as a standalone fix? >> >> Because in vhost_user_scsi_realize, we initialize in order: >> virtio_scsi_common_realize >> vhost_user_init >> >> And in the error handler of vhost_user_scsi_realize, the uninitialize in >> order: >> vhost_user_cleanup >> virtio_scsi_common_unrealize >> >> I think in vhost_user_scsi_unrealize we should keep it the same order, right? > > I’m not saying it’s wrong. If there’s no dependency (i.e. this is not fixing > a bug, just a stylistic improvement) it can stay in the same change. OK. > >> >>> >>>> - virtio_scsi_common_unrealize(dev); >>>> vhost_user_cleanup(&s->vhost_user); >>>> + virtio_scsi_common_unrealize(dev); >>>> } >>>> >>>> static Property vhost_user_scsi_properties[] = { >>>> diff --git a/include/hw/virtio/vhost-user-scsi.h >>>> b/include/hw/virtio/vhost-user-scsi.h >>>> index 521b08e559..c66acc68b7 100644 >>>> --- a/include/hw/virtio/vhost-user-scsi.h >>>> +++ b/include/hw/virtio/vhost-user-scsi.h >>>> @@ -29,6 +29,9 @@ OBJECT_DECLARE_SIMPLE_TYPE(VHostUserSCSI, >>>> VHOST_USER_SCSI) >>>> struct VHostUserSCSI { >>>> VHostSCSICommon parent_obj; >>>> VhostUserState vhost_user; >>> >>> See above - we should probably have started_vu here/ >> I will add it. >>> >>> Maybe we should have some shared struct with vhost_user_blk for >>> connectivity params? >> >> In the future vhost-user-blk/scsi can be refactored to share the same code. > > Sure - this can be done at some point in the future. > >>> >>>> + bool connected; >>>> + >>>> + struct vhost_virtqueue *vhost_vqs; >>>> }; >>>> >>>> #endif /* VHOST_USER_SCSI_H */ >>>> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h >>>> index 6a173cb9fa..b904346fe1 100644 >>>> --- a/include/hw/virtio/vhost.h >>>> +++ b/include/hw/virtio/vhost.h >>>> @@ -8,6 +8,8 @@ >>>> #define VHOST_F_DEVICE_IOTLB 63 >>>> #define VHOST_USER_F_PROTOCOL_FEATURES 30 >>>> >>> >>> Should the macro name indicate that this is for vhost-user? >>> >>> VU_REALIZE_CONN_RETRIES? >> I will rename it in v2. >> >>> >>>> +#define REALIZE_CONNECTION_RETRIES 3 >>>> + >>>> /* Generic structures common for any vhost based device. */ >>>> >>>> struct vhost_inflight { >>>> -- >>>> 2.41.0
Any comments about other patches?