Who could review this fix please?
29/11/2018 19:53, Erik Gabriel Carrillo:
> rte_timer_manage() adds expired timers to a "run list", and walks the
> list, transitioning each timer from the PENDING to the RUNNING state.
> If another lcore resets or stops the timer at precisely this
> moment, the timer state would instead be set to CONFIG by that other
> lcore, which would cause timer_manage() to skip over it. This is
> expected behavior.
>
> However, if a timer expires quickly enough, there exists the
> following race condition that causes the timer_manage() routine to
> misinterpret a timer in CONFIG state, resulting in lost timers:
>
> - Thread A:
> - starts a timer with rte_timer_reset()
> - the timer is moved to CONFIG state
> - the spinlock associated with the appropriate skiplist is acquired
> - timer is inserted into the skiplist
> - the spinlock is released
> - Thread B:
> - executes rte_timer_manage()
> - find above timer as expired, add it to run list
> - walk run list, see above timer still in CONFIG state, unlink it from
> run list and continue on
> - Thread A:
> - move timer to PENDING state
> - return from rte_timer_reset()
> - timer is now in PENDING state, but not actually linked into skiplist
> and will never get processed further by rte_timer_manage()
>
> This commit fixes this race condition by only releasing the spinlock
> after the timer state has been transitioned from CONFIG to PENDING,
> which prevents rte_timer_manage() from seeing an incorrect state.
>
> Fixes: 9b15ba895b9f ("timer: use a skip list")
> Signed-off-by: Erik Gabriel Carrillo <erik.g.carri...@intel.com>
> ---
> lib/librte_timer/rte_timer.c | 28 ++++++++++++++--------------
> 1 file changed, 14 insertions(+), 14 deletions(-)
>
> diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c
> index 590488c..30c7b0a 100644
> --- a/lib/librte_timer/rte_timer.c
> +++ b/lib/librte_timer/rte_timer.c
> @@ -241,24 +241,17 @@ timer_get_prev_entries_for_node(struct rte_timer *tim,
> unsigned tim_lcore,
> }
> }
>
> -/*
> - * add in list, lock if needed
> +/* call with lock held as necessary
> + * add in list
> * timer must be in config state
> * timer must not be in a list
> */
> static void
> -timer_add(struct rte_timer *tim, unsigned tim_lcore, int local_is_locked)
> +timer_add(struct rte_timer *tim, unsigned int tim_lcore)
> {
> - unsigned lcore_id = rte_lcore_id();
> unsigned lvl;
> struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
>
> - /* if timer needs to be scheduled on another core, we need to
> - * lock the list; if it is on local core, we need to lock if
> - * we are not called from rte_timer_manage() */
> - if (tim_lcore != lcore_id || !local_is_locked)
> - rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
> -
> /* find where exactly this element goes in the list of elements
> * for each depth. */
> timer_get_prev_entries(tim->expire, tim_lcore, prev);
> @@ -282,9 +275,6 @@ timer_add(struct rte_timer *tim, unsigned tim_lcore, int
> local_is_locked)
> * NOTE: this is not atomic on 32-bit*/
> priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
> pending_head.sl_next[0]->expire;
> -
> - if (tim_lcore != lcore_id || !local_is_locked)
> - rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
> }
>
> /*
> @@ -379,8 +369,15 @@ __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
> tim->f = fct;
> tim->arg = arg;
>
> + /* if timer needs to be scheduled on another core, we need to
> + * lock the destination list; if it is on local core, we need to lock if
> + * we are not called from rte_timer_manage()
> + */
> + if (tim_lcore != lcore_id || !local_is_locked)
> + rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
> +
> __TIMER_STAT_ADD(pending, 1);
> - timer_add(tim, tim_lcore, local_is_locked);
> + timer_add(tim, tim_lcore);
>
> /* update state: as we are in CONFIG state, only us can modify
> * the state so we don't need to use cmpset() here */
> @@ -389,6 +386,9 @@ __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
> status.owner = (int16_t)tim_lcore;
> tim->status.u32 = status.u32;
>
> + if (tim_lcore != lcore_id || !local_is_locked)
> + rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
> +
> return 0;
> }
>
>