Hi Marc-André,

[very old patch...]

On 27/2/17 11:49, Marc-André Lureau wrote:
Since commit b0a335e351103bf92f3f9d0bd5759311be8156ac, a socket write
may trigger a disconnect events, calling vhost_user_stop() and clearing
all the vhost_dev strutures holding data that vhost.c functions expect
to remain valid. Delay the cleanup to keep the vhost_dev structure
valid during the vhost.c functions.

Signed-off-by: Marc-André Lureau <marcandre.lur...@redhat.com>
---
v3:
  - use aio_bh_schedule_oneshot(), as suggest by Paolo
v2:
  - fix reconnect race

net/vhost-user.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++-------
  1 file changed, 46 insertions(+), 7 deletions(-)

diff --git a/net/vhost-user.c b/net/vhost-user.c
index 77b8110f8c..e7e63408a1 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -190,7 +190,35 @@ static gboolean net_vhost_user_watch(GIOChannel *chan, 
GIOCondition cond,
qemu_chr_fe_disconnect(&s->chr); - return FALSE;
+    return TRUE;

Do you mind explaining this change again, it is not clear from
the commit description. We listen to G_IO_HUP, got a SIGHUP so
we disconnect the chardev but keep listening for future HUP?
In which case can that happen? How can we get a chardev connected
and initialized here without calling net_init_vhost_user() again?

+}
+
+static void net_vhost_user_event(void *opaque, int event);
+
+static void chr_closed_bh(void *opaque)
+{
+    const char *name = opaque;
+    NetClientState *ncs[MAX_QUEUE_NUM];
+    VhostUserState *s;
+    Error *err = NULL;
+    int queues;
+
+    queues = qemu_find_net_clients_except(name, ncs,
+                                          NET_CLIENT_DRIVER_NIC,
+                                          MAX_QUEUE_NUM);
+    assert(queues < MAX_QUEUE_NUM);
+
+    s = DO_UPCAST(VhostUserState, nc, ncs[0]);
+
+    qmp_set_link(name, false, &err);
+    vhost_user_stop(queues, ncs);
+
+    qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event,
+                             opaque, NULL, true);
+
+    if (err) {
+        error_report_err(err);
+    }
  }
static void net_vhost_user_event(void *opaque, int event)
@@ -212,20 +240,31 @@ static void net_vhost_user_event(void *opaque, int event)
      trace_vhost_user_event(chr->label, event);
      switch (event) {
      case CHR_EVENT_OPENED:
-        s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP,
-                                         net_vhost_user_watch, s);
          if (vhost_user_start(queues, ncs, &s->chr) < 0) {
              qemu_chr_fe_disconnect(&s->chr);
              return;
          }
+        s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP,
+                                         net_vhost_user_watch, s);
          qmp_set_link(name, true, &err);
          s->started = true;
          break;
      case CHR_EVENT_CLOSED:
-        qmp_set_link(name, false, &err);
-        vhost_user_stop(queues, ncs);
-        g_source_remove(s->watch);
-        s->watch = 0;
+        /* a close event may happen during a read/write, but vhost
+         * code assumes the vhost_dev remains setup, so delay the
+         * stop & clear to idle.
+         * FIXME: better handle failure in vhost code, remove bh
+         */
+        if (s->watch) {
+            AioContext *ctx = qemu_get_current_aio_context();
+
+            g_source_remove(s->watch);
+            s->watch = 0;
+            qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, NULL,
+                                     NULL, NULL, false);
+
+            aio_bh_schedule_oneshot(ctx, chr_closed_bh, opaque);
+        }
          break;
      }

Reply via email to