When secondary process quit, the mp_socket* file still exist, that cause rte_mp_request_sync fail when try to send message on a floating socket.
The patch fix the issue by introduce a function rte_mp_channel_fini. This function will be called by rte_eal_cleanup and it will close the mp socket and delete the mp_socket* file. Fixes: bacaa2754017 ("eal: add channel for multi-process communication") Cc: sta...@dpdk.org Signed-off-by: Qi Zhang <qi.z.zh...@intel.com> --- lib/librte_eal/common/eal_common_proc.c | 22 +++++++++++++++++++++- lib/librte_eal/common/eal_private.h | 6 ++++++ lib/librte_eal/linuxapp/eal/eal.c | 1 + 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c index 6b876590a..ff19bea15 100644 --- a/lib/librte_eal/common/eal_common_proc.c +++ b/lib/librte_eal/common/eal_common_proc.c @@ -37,6 +37,7 @@ static int mp_fd = -1; static char mp_filter[PATH_MAX]; /* Filter for secondary process sockets */ static char mp_dir_path[PATH_MAX]; /* The directory path for all mp sockets */ static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER; +static char peer_name[PATH_MAX]; struct action_entry { TAILQ_ENTRY(action_entry) next; @@ -511,9 +512,9 @@ async_reply_handle(void *arg) static int open_socket_fd(void) { - char peer_name[PATH_MAX] = {0}; struct sockaddr_un un; + peer_name[0] = '\0'; if (rte_eal_process_type() == RTE_PROC_SECONDARY) snprintf(peer_name, sizeof(peer_name), "%d_%"PRIx64, getpid(), rte_rdtsc()); @@ -542,6 +543,19 @@ open_socket_fd(void) return mp_fd; } +static void +close_socket_fd(void) +{ + char path[PATH_MAX]; + + if (mp_fd < 0) + return; + + close(mp_fd); + create_socket_path(peer_name, path, sizeof(path)); + unlink(path); +} + int rte_mp_channel_init(void) { @@ -602,6 +616,12 @@ rte_mp_channel_init(void) return 0; } +void +rte_mp_channel_fini(void) +{ + close_socket_fd(); +} + /** * Return -1, as fail to send message and it's caused by the local side. * Return 0, as fail to send message and it's caused by the remote side. diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 442c6dc48..10b6857f6 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -259,6 +259,12 @@ struct rte_bus *rte_bus_find_by_device_name(const char *str); int rte_mp_channel_init(void); /** + * Primary/secondary communication cleanup. + */ + +void rte_mp_channel_fini(void); + +/** * @internal * Parse a device string and store its information in an * rte_devargs structure. diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index d252c8591..52df05e8d 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -1229,6 +1229,7 @@ rte_eal_cleanup(void) if (rte_eal_process_type() == RTE_PROC_PRIMARY) rte_memseg_walk(mark_freeable, NULL); rte_service_finalize(); + rte_mp_channel_fini(); return 0; } -- 2.13.6