On Wed, Aug 07, 2024 at 05:55:08PM GMT, yaozhenguo wrote:
When hotplug and hotunplug vhost-user-net device quickly.

I'd replace the . with ,

qemu will crash. BT is as below:

0  __pthread_kill_implementation () at /usr/lib64/libc.so.6
1  raise () at /usr/lib64/libc.so.6
2  abort () at /usr/lib64/libc.so.6
3  try_dequeue () at ../util/rcu.c:235
4  call_rcu_thread (opaque=opaque@entry=0x0) at ../util/rcu.c:288
5  qemu_thread_start (args=0x55b10d9ceaa0) at ../util/qemu-thread-posix.c:541
6  start_thread () at /usr/lib64/libc.so.6
7  clone3 () at /usr/lib64/libc.so.6

1. device_del qmp process

virtio_set_status
 vhost_dev_stop
   vhost_user_get_vring_base
     vhost_user_host_notifier_remove

vhost_user_slave_handle_vring_host_notifier maybe called asynchronous after
 ^
Now it's called vhost_user_backend_handle_vring_host_notifier, I'd suggest to use the new name.

vhost_user_host_notifier_remove. vhost_user_host_notifier_remove will not
all call_rcu because of notifier->addr is NULL at this time.

s/all/call ?


2. netdev_del qmp process

vhost_user_cleanup
      vhost_user_host_notifier_remove
      g_free_rcu

vhost_user_host_notifier_remove and g_free_rcu will sumbit same rcu_head

s/sumbit/submit

to rcu node list. rcu_call_count add twice but only one node is added.
rcu thread will abort when calling try_dequeue with node list is empty.

What's not clear to me is how 1 and 2 are related, could you explain that?

Fix this by moving g_free(n) to vhost_user_host_notifier_free.
`
Fixes: 503e355465 ("virtio/vhost-user: dynamically assign 
VhostUserHostNotifiers")
Signed-off-by: yaozhenguo <yaozhen...@jd.com>
---
hw/virtio/vhost-user.c         | 23 +++++++++++------------
include/hw/virtio/vhost-user.h |  1 +
2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 00561daa06..7ab37c0da2 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -1188,6 +1188,12 @@ static void 
vhost_user_host_notifier_free(VhostUserHostNotifier *n)
    assert(n && n->unmap_addr);
    munmap(n->unmap_addr, qemu_real_host_page_size());
    n->unmap_addr = NULL;
+    if (n->need_free) {
+        memory_region_transaction_begin();
+        object_unparent(OBJECT(&n->mr));
+        memory_region_transaction_commit();
+        g_free(n);
+    }
}

/*
@@ -1195,7 +1201,7 @@ static void 
vhost_user_host_notifier_free(VhostUserHostNotifier *n)
 * under rcu.
 */
static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
-                                            VirtIODevice *vdev)
+                                            VirtIODevice *vdev, bool free)
{
    if (n->addr) {
        if (vdev) {
@@ -1204,6 +1210,7 @@ static void 
vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
        assert(!n->unmap_addr);
        n->unmap_addr = n->addr;
        n->addr = NULL;
+        n->need_free = free;
        call_rcu(n, vhost_user_host_notifier_free, rcu);
    }
}
@@ -1280,7 +1287,7 @@ static int vhost_user_get_vring_base(struct vhost_dev 
*dev,

    VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
    if (n) {
-        vhost_user_host_notifier_remove(n, dev->vdev);
+        vhost_user_host_notifier_remove(n, dev->vdev, false);
    }

    ret = vhost_user_write(dev, &msg, NULL, 0);
@@ -1562,7 +1569,7 @@ static int 
vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev,
     * new mapped address.
     */
    n = fetch_or_create_notifier(user, queue_idx);
-    vhost_user_host_notifier_remove(n, vdev);
+    vhost_user_host_notifier_remove(n, vdev, false);

    if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
        return 0;
@@ -2737,13 +2744,7 @@ static void vhost_user_state_destroy(gpointer data)
{
    VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
    if (n) {
-        vhost_user_host_notifier_remove(n, NULL);
-        object_unparent(OBJECT(&n->mr));
-        /*
-         * We can't free until vhost_user_host_notifier_remove has
-         * done it's thing so schedule the free with RCU.
-         */
-        g_free_rcu(n, rcu);
+        vhost_user_host_notifier_remove(n, NULL, true);

I'm not sure I understand the problem well, but could it be that now we don't see the problem anymore, but we have a memory leak?

Here for example could it be the case that `n->addr` is NULL and therefore `vhost_user_host_notifier_free` with `n->need_free = true` will never be submitted?

    }
}

@@ -2765,9 +2766,7 @@ void vhost_user_cleanup(VhostUserState *user)
    if (!user->chr) {
        return;
    }
-    memory_region_transaction_begin();
user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true);
-    memory_region_transaction_commit();

This is no longer necessary, because the `user->notifiers` free function no longer calls `object_unparent(OBJECT(&n->mr))`, right?

Maybe it's worth mentioning in the commit description.

    user->chr = NULL;
}

diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
index 324cd8663a..a171f29e0b 100644
--- a/include/hw/virtio/vhost-user.h
+++ b/include/hw/virtio/vhost-user.h
@@ -54,6 +54,7 @@ typedef struct VhostUserHostNotifier {
    void *addr;
    void *unmap_addr;
    int idx;
+    bool need_free;
} VhostUserHostNotifier;

/**
--
2.43.0



Reply via email to