The vhost_user_read_cb() and rte_vhost_driver_unregister() can be called at the same time by 2 threads, and may lead to deadlock. Eg thread1 calls vhost_user_read_cb()->vhost_user_get_vring_base()->destroy_device(), then thread2 calls rte_vhost_driver_unregister(), and will retry the fdset_try_del() in loop.
Some application implements destroy_device() as a blocking function, eg OVS calls ovsrcu_synchronize() insides destroy_device(). As a result, thread1(eg vhost_events) is blocked to wait quiesce of thread2(eg ovs-vswitchd), and thread2 is in a loop to wait thread1 to give up the use of the vhost fd, then leads to deadlock. It is better to return -EAGAIN to application, who will decide how to handle (eg OVS can call ovsrcu_quiesce() and then retry). Signed-off-by: Zhike Wang <wangzh...@jd.com> --- lib/librte_vhost/rte_vhost.h | 4 +++- lib/librte_vhost/socket.c | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h index c7b619a..276db11 100644 --- a/lib/librte_vhost/rte_vhost.h +++ b/lib/librte_vhost/rte_vhost.h @@ -389,7 +389,9 @@ void rte_vhost_log_used_vring(int vid, uint16_t vring_idx, */ int rte_vhost_driver_register(const char *path, uint64_t flags); -/* Unregister vhost driver. This is only meaningful to vhost user. */ +/* Unregister vhost driver. This is only meaningful to vhost user. + * Return -EAGAIN if device is busy, and leave it to be handled by application. + */ int rte_vhost_driver_unregister(const char *path); /** diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c index 7c80121..a75a3f6 100644 --- a/lib/librte_vhost/socket.c +++ b/lib/librte_vhost/socket.c @@ -1027,7 +1027,8 @@ struct vhost_user_reconnect_list { } /** - * Unregister the specified vhost socket + * Unregister the specified vhost socket. + * Return -EAGAIN if device is busy, and leave it to be handled by application. */ int rte_vhost_driver_unregister(const char *path) @@ -1039,7 +1040,6 @@ struct vhost_user_reconnect_list { if (path == NULL) return -1; -again: pthread_mutex_lock(&vhost_user.mutex); for (i = 0; i < vhost_user.vsocket_cnt; i++) { @@ -1063,7 +1063,7 @@ struct vhost_user_reconnect_list { pthread_mutex_unlock( &vsocket->conn_mutex); pthread_mutex_unlock(&vhost_user.mutex); - goto again; + return -EAGAIN; } VHOST_LOG_CONFIG(INFO, @@ -1085,7 +1085,7 @@ struct vhost_user_reconnect_list { if (fdset_try_del(&vhost_user.fdset, vsocket->socket_fd) == -1) { pthread_mutex_unlock(&vhost_user.mutex); - goto again; + return -EAGAIN; } close(vsocket->socket_fd); -- 1.8.3.1