On Tue, Oct 01, 2024 at 02:21:55PM +0200, Stefano Garzarella wrote:
> On Thu, Sep 19, 2024 at 03:29:44PM GMT, yaozhenguo wrote:
> > During the process of hot-unplug in vhost-user-net NIC, vhost_user_cleanup
> > may add same rcu node to rcu list. Function calls are as follows:
> > 
> > vhost_user_cleanup
> >    ->vhost_user_host_notifier_remove
> >        ->call_rcu(n, vhost_user_host_notifier_free, rcu);
> >    ->g_free_rcu(n, rcu);
> > 
> > When this happens, QEMU will abort in try_dequeue:
> > 
> > if (head == &dummy && qatomic_mb_read(&tail) == &dummy.next) {
> >    abort();
> > }
> > 
> > Backtrace is as follows:
> > 0  __pthread_kill_implementation () at /usr/lib64/libc.so.6
> > 1  raise () at /usr/lib64/libc.so.6
> > 2  abort () at /usr/lib64/libc.so.6
> > 3  try_dequeue () at ../util/rcu.c:235
> > 4  call_rcu_thread (0) at ../util/rcu.c:288
> > 5  qemu_thread_start (0) at ../util/qemu-thread-posix.c:541
> > 6  start_thread () at /usr/lib64/libc.so.6
> > 7  clone3 () at /usr/lib64/libc.so.6
> > 
> > Reason for the abort is that adding two identical nodes to the rcu list will
> > cause it becomes a ring. After dummy node is added to the tail of the list 
> > in
> > try_dequeue, the ring is opened. But lead to a situation that only one node 
> > is
> > added to list and rcu_call_count is added twice. This will cause try_dequeue
> > abort.
> > 
> > This issue happens when n->addr != 0 in vhost_user_host_notifier_remove. It 
> > can
> > happens in a hotplug stress test with a 32queue vhost-user-net type NIC.
> > Because n->addr is set in VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG 
> > function.
> > during device hotplug process and it is cleared in vhost_dev_stop during 
> > device
> > hot-unplug. Since VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG is a message 
> > sent
> > by DPDK to qemu, it is asynchronous. So, there is no guaranteed order 
> > between
> > the two processes of setting n->addr and clearing n->addr. If setting 
> > n->addr
> > in hotplug occurs after clearing n->addr in hotunplug, the issue will occur.
> > So, it is necessary to merge g_free_rcu and vhost_user_host_notifier_free 
> > into
> > one rcu node.
> > 
> > Fixes: 503e355465 ("virtio/vhost-user: dynamically assign 
> > VhostUserHostNotifiers")
> > 
> > Signed-off-by: yaozhenguo <yaozhen...@jd.com>
> > ---
> > 
> > V1->V2:
> >    add n->addr check in vhost_user_get_vring_base and 
> > vhost_user_backend_handle_vring_host_notifier
> >    to prevent submit same node to rcu list.
> > 
> > ---
> > hw/virtio/vhost-user.c         | 39 +++++++++++++++++++++------------------
> > include/hw/virtio/vhost-user.h |  1 +
> > 2 files changed, 22 insertions(+), 18 deletions(-)
> > 
> > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> > index 00561da..ba4c09c 100644
> > --- a/hw/virtio/vhost-user.c
> > +++ b/hw/virtio/vhost-user.c
> > @@ -51,7 +51,6 @@
> > #else
> > #define VHOST_USER_MAX_RAM_SLOTS 512
> > #endif
> > -
> > /*
> >  * Maximum size of virtio device config space
> >  */
> > @@ -1185,9 +1184,16 @@ static int vhost_user_set_vring_num(struct vhost_dev 
> > *dev,
> > 
> > static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
> > {
> > -    assert(n && n->unmap_addr);
> > -    munmap(n->unmap_addr, qemu_real_host_page_size());
> > -    n->unmap_addr = NULL;
> > +    if (n->unmap_addr) {
> > +        munmap(n->unmap_addr, qemu_real_host_page_size());
> > +        n->unmap_addr = NULL;
> > +    }
> > +    if (n->need_free) {
> > +        memory_region_transaction_begin();
> > +        object_unparent(OBJECT(&n->mr));
> > +        memory_region_transaction_commit();
> > +        g_free(n);
> > +    }
> > }
> > 
> > /*
> > @@ -1195,17 +1201,20 @@ static void 
> > vhost_user_host_notifier_free(VhostUserHostNotifier *n)
> >  * under rcu.
> >  */
> > static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
> > -                                            VirtIODevice *vdev)
> > +                                            VirtIODevice *vdev, bool free)
> 
> What about `destroy` instead of `free`?
> 
> In that way is more clear that it should be true when called by
> `vhost_user_state_destroy()`.
> 
Yes, "destroy" is better.
> > {
> >     if (n->addr) {
> >         if (vdev) {
> > +            memory_region_transaction_begin();
> >             virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false);
> > +            memory_region_transaction_commit();
> >         }
> >         assert(!n->unmap_addr);
> >         n->unmap_addr = n->addr;
> >         n->addr = NULL;
> > -        call_rcu(n, vhost_user_host_notifier_free, rcu);
> >     }
> 
> Instead of checking n->addr in the caller, I suggest to move the check
> here:
> 
>       if (destroy || n->unmap_addr) {
>           s->destroy = destroy;
>           call_rcu(n, vhost_user_host_notifier_free, rcu);
>       }
> 
> Thanks,
> Stefano

I think there will be other problem with this modification. Because the
interval between two submissions to the rcu list may be relatively short.
During this period, it is possible that the rcu thread has been scheduled away. 
Therefore, the rcu task submitted for the first time does not run. It is still  
possible to submit two identical rcu nodes to the rcu list.                     
                                                                                
Below is this situation:                      
                                                                                
1: destroy == false and n->addr != NULL                                         
                                                                                
vhost_user_get_vring_base                                                       
  -> vhost_user_host_notifier_remove;                                           
  n->unmap_addr = n->addr;                                                      
  n->addr = NULL;                                                               
   -> call_rcu                                                                  
                                                                                
2: destroy == false and n->addr == NULL but n->unmap_addr != NULL               
  Because rcu thread has been scheduled away, n->unmap_addr has not been        
 
  cleared yet                                                                   
                                                                                
vhost_user_backend_handle_vring_host_notifier                                   
  -> vhost_user_host_notifier_remove;                                           
   -> call_rcu                                                                  
                                                                                
So, I think below modification may be better:                                   
                                                                                
static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,           
                                            VirtIODevice *vdev, bool destroy)   
{                                                                               
+    if (!destroy && !n->addr)                                                  
+        return;                                                                
}                                                           
> 
> > +    n->need_free = free;
> > +    call_rcu(n, vhost_user_host_notifier_free, rcu);
> > }
> > 
> > static int vhost_user_set_vring_base(struct vhost_dev *dev,
> > @@ -1279,8 +1288,8 @@ static int vhost_user_get_vring_base(struct vhost_dev 
> > *dev,
> >     struct vhost_user *u = dev->opaque;
> > 
> >     VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
> > -    if (n) {
> > -        vhost_user_host_notifier_remove(n, dev->vdev);
> > +    if (n && n->addr) {
> > +        vhost_user_host_notifier_remove(n, dev->vdev, false);
> >     }
> > 
> >     ret = vhost_user_write(dev, &msg, NULL, 0);
> > @@ -1562,7 +1571,9 @@ static int 
> > vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev,
> >      * new mapped address.
> >      */
> >     n = fetch_or_create_notifier(user, queue_idx);
> > -    vhost_user_host_notifier_remove(n, vdev);
> > +    if (n && n->addr) {
> > +        vhost_user_host_notifier_remove(n, vdev, false);
> > +    }
> > 
> >     if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
> >         return 0;
> > @@ -2737,13 +2748,7 @@ static void vhost_user_state_destroy(gpointer data)
> > {
> >     VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
> >     if (n) {
> > -        vhost_user_host_notifier_remove(n, NULL);
> > -        object_unparent(OBJECT(&n->mr));
> > -        /*
> > -         * We can't free until vhost_user_host_notifier_remove has
> > -         * done it's thing so schedule the free with RCU.
> > -         */
> > -        g_free_rcu(n, rcu);
> > +        vhost_user_host_notifier_remove(n, NULL, true);
> >     }
> > }
> > 
> > @@ -2765,9 +2770,7 @@ void vhost_user_cleanup(VhostUserState *user)
> >     if (!user->chr) {
> >         return;
> >     }
> > -    memory_region_transaction_begin();
> >     user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true);
> > -    memory_region_transaction_commit();
> >     user->chr = NULL;
> > }
> > 
> > diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
> > index 324cd86..a171f29 100644
> > --- a/include/hw/virtio/vhost-user.h
> > +++ b/include/hw/virtio/vhost-user.h
> > @@ -54,6 +54,7 @@ typedef struct VhostUserHostNotifier {
> >     void *addr;
> >     void *unmap_addr;
> >     int idx;
> > +    bool need_free;
> > } VhostUserHostNotifier;
> > 
> > /**
> > -- 
> > 1.8.3.1
> > 
> 

Reply via email to