Hi Gaoxiang, > -----Original Message----- > From: Gaoxiang Liu <gaoxiangl...@163.com> > Sent: Friday, August 27, 2021 10:19 PM > To: maxime.coque...@redhat.com; Xia, Chenbo <chenbo....@intel.com> > Cc: dev@dpdk.org; liugaoxi...@huawei.com; Gaoxiang Liu <gaoxiangl...@163.com> > Subject: [PATCH v6] vhost: fix crash on port deletion > > The rte_vhost_driver_unregister() and vhost_user_read_cb() > can be called at the same time by 2 threads. > when memory of vsocket is freed in rte_vhost_driver_unregister(), > the invalid memory of vsocket is accessed in vhost_user_read_cb(). > It's a bug of both mode for vhost as server or client. > > E.g.,vhostuser port is created as server.
Put a space after ',' > Thread1 calls rte_vhost_driver_unregister(). > Before the listen fd is deleted from poll waiting fds, > "vhost-events" thread then calls vhost_user_server_new_connection(), > then a new conn fd is added in fdset when trying to reconnect. > "vhost-events" thread then calls vhost_user_read_cb() and > accesses invalid memory of socket while thread1 frees the memory of > vsocket. > > E.g.,vhostuser port is created as client. Same here. > Thread1 calls rte_vhost_driver_unregister(). > Before vsocket of reconn is deleted from reconn list, > "vhost_reconn" thread then calls vhost_user_add_connection() > then a new conn fd is added in fdset when trying to reconnect. > "vhost-events" thread then calls vhost_user_read_cb() and > accesses invalid memory of socket while thread1 frees the memory of > vsocket. > > The fix is to move the "fdset_try_del" in front of free memory of conn, > then avoid the race condition. > > The core trace is: > Program terminated with signal 11, Segmentation fault. > > Fixes: 52d874dc6705 ("vhost: fix crash on closing in client mode") > > Signed-off-by: Gaoxiang Liu <liugaoxi...@huawei.com> > --- > > v2: > * Fix coding style issues. > > v3: > * Add detailed log. > > v4: > * Add the reason, when vhostuser port is created as server. > > v5: > * Add detailed log when vhostuser port is created as client > > v6: > * Add 'path' check before deleting listen fd > * Fix spelling issues > --- > lib/vhost/socket.c | 108 ++++++++++++++++++++++----------------------- > 1 file changed, 54 insertions(+), 54 deletions(-) > > diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c > index 5d0d728d5..27d5e8695 100644 > --- a/lib/vhost/socket.c > +++ b/lib/vhost/socket.c > @@ -1023,66 +1023,66 @@ rte_vhost_driver_unregister(const char *path) > > for (i = 0; i < vhost_user.vsocket_cnt; i++) { > struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; > + if (strcmp(vsocket->path, path)) { > + continue; > + } braces {} are not necessary for single statement blocks > > - if (!strcmp(vsocket->path, path)) { > - pthread_mutex_lock(&vsocket->conn_mutex); > - for (conn = TAILQ_FIRST(&vsocket->conn_list); > - conn != NULL; > - conn = next) { > - next = TAILQ_NEXT(conn, next); > - > - /* > - * If r/wcb is executing, release vsocket's > - * conn_mutex and vhost_user's mutex locks, and > - * try again since the r/wcb may use the > - * conn_mutex and mutex locks. > - */ > - if (fdset_try_del(&vhost_user.fdset, > - conn->connfd) == -1) { > - pthread_mutex_unlock( > - &vsocket->conn_mutex); > - pthread_mutex_unlock(&vhost_user.mutex); > - goto again; > - } > - > - VHOST_LOG_CONFIG(INFO, > - "free connfd = %d for device '%s'\n", > - conn->connfd, path); > - close(conn->connfd); > - vhost_destroy_device(conn->vid); > - TAILQ_REMOVE(&vsocket->conn_list, conn, next); > - free(conn); > - } > - pthread_mutex_unlock(&vsocket->conn_mutex); > - > - if (vsocket->is_server) { > - /* > - * If r/wcb is executing, release vhost_user's > - * mutex lock, and try again since the r/wcb > - * may use the mutex lock. > - */ > - if (fdset_try_del(&vhost_user.fdset, > - vsocket->socket_fd) == -1) { > - pthread_mutex_unlock(&vhost_user.mutex); > - goto again; > - } > - > - close(vsocket->socket_fd); > - unlink(path); > - } else if (vsocket->reconnect) { > - vhost_user_remove_reconnect(vsocket); > + if (vsocket->is_server) { > + /* > + * If r/wcb is executing, release vhost_user's > + * mutex lock, and try again since the r/wcb > + * may use the mutex lock. > + */ > + if (fdset_try_del(&vhost_user.fdset, > vsocket->socket_fd) == > -1) { > + pthread_mutex_unlock(&vhost_user.mutex); > + goto again; > } > + } else if (vsocket->reconnect) { > + vhost_user_remove_reconnect(vsocket); > + } > > - pthread_mutex_destroy(&vsocket->conn_mutex); > - vhost_user_socket_mem_free(vsocket); > + pthread_mutex_lock(&vsocket->conn_mutex); > + for (conn = TAILQ_FIRST(&vsocket->conn_list); > + conn != NULL; > + conn = next) { > + next = TAILQ_NEXT(conn, next); > > - count = --vhost_user.vsocket_cnt; > - vhost_user.vsockets[i] = vhost_user.vsockets[count]; > - vhost_user.vsockets[count] = NULL; > - pthread_mutex_unlock(&vhost_user.mutex); > + /* > + * If r/wcb is executing, release vsocket's > + * conn_mutex and vhost_user's mutex locks, and > + * try again since the r/wcb may use the > + * conn_mutex and mutex locks. > + */ > + if (fdset_try_del(&vhost_user.fdset, > + conn->connfd) == -1) { > + pthread_mutex_unlock(&vsocket->conn_mutex); > + pthread_mutex_unlock(&vhost_user.mutex); > + goto again; > + } > + > + VHOST_LOG_CONFIG(INFO, > + "free connfd = %d for device '%s'\n", > + conn->connfd, path); > + close(conn->connfd); > + vhost_destroy_device(conn->vid); > + TAILQ_REMOVE(&vsocket->conn_list, conn, next); > + free(conn); > + } > + pthread_mutex_unlock(&vsocket->conn_mutex); > > - return 0; > + if (vsocket->is_server) { > + close(vsocket->socket_fd); > + unlink(path); > } I think you miss my comment in V5 of asking why this is not moved up after fdset_try_del server socket fd. Thanks, Chenbo > + > + pthread_mutex_destroy(&vsocket->conn_mutex); > + vhost_user_socket_mem_free(vsocket); > + > + count = --vhost_user.vsocket_cnt; > + vhost_user.vsockets[i] = vhost_user.vsockets[count]; > + vhost_user.vsockets[count] = NULL; > + pthread_mutex_unlock(&vhost_user.mutex); > + return 0; > } > pthread_mutex_unlock(&vhost_user.mutex); > > -- > 2.32.0 >