Hi Luc,

On Wed, Apr 07, 2021 at 08:53:23AM -0400, Luc Pelletier wrote:
> The creation of control threads uses a pthread barrier for
> synchronization. This patch fixes a race condition where the pthread
> barrier could get destroyed while one of the threads has not yet
> returned from the pthread_barrier_wait function, which could result in
> undefined behaviour.
> 
> Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread creation")
> Cc: jianfeng....@intel.com
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Luc Pelletier <lucp.at.w...@gmail.com>
> ---
> 
> Same as v4 except that I fixed 2 minor style issues flagged by patchwork.
> 
>  lib/librte_eal/common/eal_common_thread.c | 52 +++++++++++------------
>  1 file changed, 25 insertions(+), 27 deletions(-)
> 
> diff --git a/lib/librte_eal/common/eal_common_thread.c 
> b/lib/librte_eal/common/eal_common_thread.c
> index 73a055902..c1044e795 100644
> --- a/lib/librte_eal/common/eal_common_thread.c
> +++ b/lib/librte_eal/common/eal_common_thread.c
> @@ -170,11 +170,19 @@ struct rte_thread_ctrl_params {
>       void *(*start_routine)(void *);
>       void *arg;
>       pthread_barrier_t configured;
> +     unsigned int refcnt;
>  };
>  
> +static void ctrl_params_free(struct rte_thread_ctrl_params *params)
> +{
> +     if (__atomic_sub_fetch(&params->refcnt, 1, __ATOMIC_ACQ_REL) == 0) {
> +             pthread_barrier_destroy(&params->configured);
> +             free(params);
> +     }
> +}
> +
>  static void *ctrl_thread_init(void *arg)
>  {
> -     int ret;
>       struct internal_config *internal_conf =
>               eal_get_internal_configuration();
>       rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
> @@ -184,11 +192,8 @@ static void *ctrl_thread_init(void *arg)
>  
>       __rte_thread_init(rte_lcore_id(), cpuset);
>  
> -     ret = pthread_barrier_wait(&params->configured);
> -     if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
> -             pthread_barrier_destroy(&params->configured);
> -             free(params);
> -     }
> +     pthread_barrier_wait(&params->configured);
> +     ctrl_params_free(params);
>  
>       return start_routine(routine_arg);
>  }
> @@ -210,14 +215,15 @@ rte_ctrl_thread_create(pthread_t *thread, const char 
> *name,
>  
>       params->start_routine = start_routine;
>       params->arg = arg;
> +     params->refcnt = 2;
>  
> -     pthread_barrier_init(&params->configured, NULL, 2);
> +     ret = pthread_barrier_init(&params->configured, NULL, 2);
> +     if (ret != 0)
> +             goto fail_no_barrier;
>  
>       ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
> -     if (ret != 0) {
> -             free(params);
> -             return -ret;
> -     }
> +     if (ret != 0)
> +             goto fail_with_barrier;
>  
>       if (name != NULL) {
>               ret = rte_thread_setname(*thread, name);
> @@ -227,25 +233,17 @@ rte_ctrl_thread_create(pthread_t *thread, const char 
> *name,
>       }
>  
>       ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
> -     if (ret)
> -             goto fail;
> +     pthread_barrier_wait(&params->configured);
> +     ctrl_params_free(params);
>  
> -     ret = pthread_barrier_wait(&params->configured);
> -     if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
> -             pthread_barrier_destroy(&params->configured);
> -             free(params);
> -     }
> +     return -ret;
I think not killing the thread when pthread_setaffinity_np() returns an
error is not very understandable from the API user point of view.

What about doing this on top of your patch? The idea is to set
start_routine to NULL before the barrier if pthread_setaffinity_np()
failed. So there is no need to cancel the thread, it will exit by
itself.

  @@ -187,14 +187,18 @@ static void *ctrl_thread_init(void *arg)
                  eal_get_internal_configuration();
          rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
          struct rte_thread_ctrl_params *params = arg;
  -       void *(*start_routine)(void *) = params->start_routine;
  +       void *(*start_routine)(void *);
          void *routine_arg = params->arg;
   
          __rte_thread_init(rte_lcore_id(), cpuset);
   
          pthread_barrier_wait(&params->configured);
  +       start_routine = params->start_routine;
          ctrl_params_free(params);
   
  +       if (start_routine == NULL)
  +               return NULL;
  +
          return start_routine(routine_arg);
   }
   
  @@ -233,10 +237,18 @@ rte_ctrl_thread_create(pthread_t *thread, const char 
*name,
          }
   
          ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
  +       if (ret != 0)
  +               params->start_routine = NULL;
  +
          pthread_barrier_wait(&params->configured);
          ctrl_params_free(params);
   
  -       return -ret;
  +       if (ret != 0) {
  +               pthread_join(*thread, NULL);
  +               return -ret;
  +       }
  +
  +       return 0;
   
   fail_with_barrier:
          pthread_barrier_destroy(&params->configured);


Regards,
Olivier

Reply via email to