During live migration, when stopping vhost-user device, 'vhost_dev_stop' will be called, 'vhost_dev_stop' will call a batch of 'vhost_user_read' and 'vhost_user_write'. If a previous 'vhost_user_read' or 'vhost_user_write' failed because the vhost user backend failed, the 'CHR_EVENT_CLOSED' event will be triggerd, followed by the call chain chr_closed_bh()->vhost_user_stop()-> vhost_net_cleanup()->vhost_dev_cleanup()
vhost_dev_cleanup will clear vhost_dev struct, so the later 'vhost_user_read' or 'vhost_user_read' will reference null pointer and cause qemu crash Signed-off-by: Liang Li <liliang...@didiglobal.com> --- hw/net/vhost_net.c | 6 ++++++ hw/virtio/vhost-user.c | 15 +++++++++++++-- include/hw/virtio/vhost.h | 1 + include/net/vhost_net.h | 1 + net/vhost-user.c | 3 +++ 5 files changed, 24 insertions(+), 2 deletions(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index e037db6..77994e9 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -113,6 +113,11 @@ uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) features); } +void vhost_net_mark_break_down(struct vhost_net *net) +{ + net->dev.break_down = true; +} + void vhost_net_ack_features(struct vhost_net *net, uint64_t features) { net->dev.acked_features = net->dev.backend_features; @@ -156,6 +161,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) net->dev.max_queues = 1; net->dev.nvqs = 2; net->dev.vqs = net->vqs; + net->dev.break_down = false; if (backend_kernel) { r = vhost_net_get_fd(options->net_backend); diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index b041343..1394719 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -213,14 +213,20 @@ static bool ioeventfd_enabled(void) static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) { struct vhost_user *u = dev->opaque; - CharBackend *chr = u->user->chr; + CharBackend *chr; uint8_t *p = (uint8_t *) msg; int r, size = VHOST_USER_HDR_SIZE; + if (dev->break_down) { + goto fail; + } + + chr = u->user->chr; r = qemu_chr_fe_read_all(chr, p, size); if (r != size) { error_report("Failed to read msg header. Read %d instead of %d." " Original request %d.", r, size, msg->hdr.request); + dev->break_down = true; goto fail; } @@ -299,9 +305,12 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, int *fds, int fd_num) { struct vhost_user *u = dev->opaque; - CharBackend *chr = u->user->chr; + CharBackend *chr; int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; + if (dev->break_down) { + return -1; + } /* * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, * we just need send it once in the first time. For later such @@ -312,6 +321,7 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, return 0; } + chr = u->user->chr; if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) { error_report("Failed to set msg fds."); return -1; @@ -319,6 +329,7 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size); if (ret != size) { + dev->break_down = true; error_report("Failed to write msg." " Wrote %d instead of %d.", ret, size); return -1; diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index a7f449f..86d0dc5 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -74,6 +74,7 @@ struct vhost_dev { bool started; bool log_enabled; uint64_t log_size; + bool break_down; Error *migration_blocker; const VhostOps *vhost_ops; void *opaque; diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h index 77e4739..06f2c08 100644 --- a/include/net/vhost_net.h +++ b/include/net/vhost_net.h @@ -27,6 +27,7 @@ void vhost_net_cleanup(VHostNetState *net); uint64_t vhost_net_get_features(VHostNetState *net, uint64_t features); void vhost_net_ack_features(VHostNetState *net, uint64_t features); +void vhost_net_mark_break_down(VHostNetState *net); bool vhost_net_virtqueue_pending(VHostNetState *net, int n); void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, diff --git a/net/vhost-user.c b/net/vhost-user.c index a39f9c9..b99e20b 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -270,6 +270,9 @@ static void net_vhost_user_event(void *opaque, int event) if (s->watch) { AioContext *ctx = qemu_get_current_aio_context(); + if (s->vhost_net) { + vhost_net_mark_break_down(s->vhost_net); + } g_source_remove(s->watch); s->watch = 0; qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, NULL, NULL, -- 1.8.3.1