<snip> > > Hi Luc, > > On Wed, Apr 07, 2021 at 08:53:23AM -0400, Luc Pelletier wrote: > > The creation of control threads uses a pthread barrier for > > synchronization. This patch fixes a race condition where the pthread > > barrier could get destroyed while one of the threads has not yet > > returned from the pthread_barrier_wait function, which could result in > > undefined behaviour. > > > > Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread > > creation") > > Cc: jianfeng....@intel.com > > Cc: sta...@dpdk.org > > > > Signed-off-by: Luc Pelletier <lucp.at.w...@gmail.com> > > --- > > > > Same as v4 except that I fixed 2 minor style issues flagged by patchwork. > > > > lib/librte_eal/common/eal_common_thread.c | 52 > > +++++++++++------------ > > 1 file changed, 25 insertions(+), 27 deletions(-) > > > > diff --git a/lib/librte_eal/common/eal_common_thread.c > > b/lib/librte_eal/common/eal_common_thread.c > > index 73a055902..c1044e795 100644 > > --- a/lib/librte_eal/common/eal_common_thread.c > > +++ b/lib/librte_eal/common/eal_common_thread.c > > @@ -170,11 +170,19 @@ struct rte_thread_ctrl_params { > > void *(*start_routine)(void *); > > void *arg; > > pthread_barrier_t configured; > > + unsigned int refcnt; > > }; > > > > +static void ctrl_params_free(struct rte_thread_ctrl_params *params) { > > + if (__atomic_sub_fetch(¶ms->refcnt, 1, __ATOMIC_ACQ_REL) == > 0) { > > + pthread_barrier_destroy(¶ms->configured); > > + free(params); > > + } > > +} > > + > > static void *ctrl_thread_init(void *arg) { > > - int ret; > > struct internal_config *internal_conf = > > eal_get_internal_configuration(); > > rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset; @@ -184,11 > > +192,8 @@ static void *ctrl_thread_init(void *arg) > > > > __rte_thread_init(rte_lcore_id(), cpuset); > > > > - ret = pthread_barrier_wait(¶ms->configured); > > - if (ret == PTHREAD_BARRIER_SERIAL_THREAD) { > > - pthread_barrier_destroy(¶ms->configured); > > - free(params); > > - } > > + pthread_barrier_wait(¶ms->configured); > > + ctrl_params_free(params); > > > > return start_routine(routine_arg); > > } > > @@ -210,14 +215,15 @@ rte_ctrl_thread_create(pthread_t *thread, const > > char *name, > > > > params->start_routine = start_routine; > > params->arg = arg; > > + params->refcnt = 2; > > > > - pthread_barrier_init(¶ms->configured, NULL, 2); > > + ret = pthread_barrier_init(¶ms->configured, NULL, 2); > > + if (ret != 0) > > + goto fail_no_barrier; > > > > ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params); > > - if (ret != 0) { > > - free(params); > > - return -ret; > > - } > > + if (ret != 0) > > + goto fail_with_barrier; > > > > if (name != NULL) { > > ret = rte_thread_setname(*thread, name); @@ -227,25 > +233,17 @@ > > rte_ctrl_thread_create(pthread_t *thread, const char *name, > > } > > > > ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset); > > - if (ret) > > - goto fail; > > + pthread_barrier_wait(¶ms->configured); > > + ctrl_params_free(params); > > > > - ret = pthread_barrier_wait(¶ms->configured); > > - if (ret == PTHREAD_BARRIER_SERIAL_THREAD) { > > - pthread_barrier_destroy(¶ms->configured); > > - free(params); > > - } > > + return -ret; > > I think not killing the thread when pthread_setaffinity_np() returns an error > is > not very understandable from the API user point of view. Agree.
> > What about doing this on top of your patch? The idea is to set start_routine > to NULL before the barrier if pthread_setaffinity_np() failed. So there is no > need to cancel the thread, it will exit by itself. How about using the pthread_attr_setaffinity_np API? It is deviating from the documentation of the 'rte_ctrl_thread_create'. But, from the user perspective, the behavior should not change. This way we do not have to handle the error after the thread is launched. > > @@ -187,14 +187,18 @@ static void *ctrl_thread_init(void *arg) > eal_get_internal_configuration(); > rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset; > struct rte_thread_ctrl_params *params = arg; > - void *(*start_routine)(void *) = params->start_routine; > + void *(*start_routine)(void *); > void *routine_arg = params->arg; > > __rte_thread_init(rte_lcore_id(), cpuset); > > pthread_barrier_wait(¶ms->configured); > + start_routine = params->start_routine; > ctrl_params_free(params); > > + if (start_routine == NULL) > + return NULL; > + > return start_routine(routine_arg); > } > > @@ -233,10 +237,18 @@ rte_ctrl_thread_create(pthread_t *thread, const > char *name, > } > > ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset); > + if (ret != 0) > + params->start_routine = NULL; > + > pthread_barrier_wait(¶ms->configured); > ctrl_params_free(params); > > - return -ret; > + if (ret != 0) { > + pthread_join(*thread, NULL); > + return -ret; > + } > + > + return 0; > > fail_with_barrier: > pthread_barrier_destroy(¶ms->configured); > > > Regards, > Olivier