Hi Luc, On Wed, Apr 07, 2021 at 08:53:23AM -0400, Luc Pelletier wrote: > The creation of control threads uses a pthread barrier for > synchronization. This patch fixes a race condition where the pthread > barrier could get destroyed while one of the threads has not yet > returned from the pthread_barrier_wait function, which could result in > undefined behaviour. > > Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread creation") > Cc: jianfeng....@intel.com > Cc: sta...@dpdk.org > > Signed-off-by: Luc Pelletier <lucp.at.w...@gmail.com> > --- > > Same as v4 except that I fixed 2 minor style issues flagged by patchwork. > > lib/librte_eal/common/eal_common_thread.c | 52 +++++++++++------------ > 1 file changed, 25 insertions(+), 27 deletions(-) > > diff --git a/lib/librte_eal/common/eal_common_thread.c > b/lib/librte_eal/common/eal_common_thread.c > index 73a055902..c1044e795 100644 > --- a/lib/librte_eal/common/eal_common_thread.c > +++ b/lib/librte_eal/common/eal_common_thread.c > @@ -170,11 +170,19 @@ struct rte_thread_ctrl_params { > void *(*start_routine)(void *); > void *arg; > pthread_barrier_t configured; > + unsigned int refcnt; > }; > > +static void ctrl_params_free(struct rte_thread_ctrl_params *params) > +{ > + if (__atomic_sub_fetch(¶ms->refcnt, 1, __ATOMIC_ACQ_REL) == 0) { > + pthread_barrier_destroy(¶ms->configured); > + free(params); > + } > +} > + > static void *ctrl_thread_init(void *arg) > { > - int ret; > struct internal_config *internal_conf = > eal_get_internal_configuration(); > rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset; > @@ -184,11 +192,8 @@ static void *ctrl_thread_init(void *arg) > > __rte_thread_init(rte_lcore_id(), cpuset); > > - ret = pthread_barrier_wait(¶ms->configured); > - if (ret == PTHREAD_BARRIER_SERIAL_THREAD) { > - pthread_barrier_destroy(¶ms->configured); > - free(params); > - } > + pthread_barrier_wait(¶ms->configured); > + ctrl_params_free(params); > > return start_routine(routine_arg); > } > @@ -210,14 +215,15 @@ rte_ctrl_thread_create(pthread_t *thread, const char > *name, > > params->start_routine = start_routine; > params->arg = arg; > + params->refcnt = 2; > > - pthread_barrier_init(¶ms->configured, NULL, 2); > + ret = pthread_barrier_init(¶ms->configured, NULL, 2); > + if (ret != 0) > + goto fail_no_barrier; > > ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params); > - if (ret != 0) { > - free(params); > - return -ret; > - } > + if (ret != 0) > + goto fail_with_barrier; > > if (name != NULL) { > ret = rte_thread_setname(*thread, name); > @@ -227,25 +233,17 @@ rte_ctrl_thread_create(pthread_t *thread, const char > *name, > } > > ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset); > - if (ret) > - goto fail; > + pthread_barrier_wait(¶ms->configured); > + ctrl_params_free(params); > > - ret = pthread_barrier_wait(¶ms->configured); > - if (ret == PTHREAD_BARRIER_SERIAL_THREAD) { > - pthread_barrier_destroy(¶ms->configured); > - free(params); > - } > + return -ret;
I think not killing the thread when pthread_setaffinity_np() returns an error is not very understandable from the API user point of view. What about doing this on top of your patch? The idea is to set start_routine to NULL before the barrier if pthread_setaffinity_np() failed. So there is no need to cancel the thread, it will exit by itself. @@ -187,14 +187,18 @@ static void *ctrl_thread_init(void *arg) eal_get_internal_configuration(); rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset; struct rte_thread_ctrl_params *params = arg; - void *(*start_routine)(void *) = params->start_routine; + void *(*start_routine)(void *); void *routine_arg = params->arg; __rte_thread_init(rte_lcore_id(), cpuset); pthread_barrier_wait(¶ms->configured); + start_routine = params->start_routine; ctrl_params_free(params); + if (start_routine == NULL) + return NULL; + return start_routine(routine_arg); } @@ -233,10 +237,18 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name, } ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset); + if (ret != 0) + params->start_routine = NULL; + pthread_barrier_wait(¶ms->configured); ctrl_params_free(params); - return -ret; + if (ret != 0) { + pthread_join(*thread, NULL); + return -ret; + } + + return 0; fail_with_barrier: pthread_barrier_destroy(¶ms->configured); Regards, Olivier