Hi Erik, > Subject: [PATCH v2] lib/timer: relax barrier for status update > > Volatile has no ordering semantics. The rte_timer structure defines timer > status as a volatile variable and uses the rte_r/wmb barrier to guarantee > inter-thread visibility. > > This patch optimized the volatile operation with c11 atomic operations and > one-way barrier to save the performance penalty. According to the > timer_perf_autotest benchmarking results, this patch can uplift 10%~16% > timer appending performance, 3%~20% timer resetting performance and 45% > timer callbacks scheduling performance on aarch64 and no loss in > performance for x86. > > Suggested-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> > Signed-off-by: Phil Yang <phil.y...@arm.com> > Reviewed-by: Gavin Hu <gavin...@arm.com> > > --- > This patch depends on patch: > http://patchwork.dpdk.org/patch/65997/ > > v2: > 1. Changed the memory ordering comment in timer_set_config_state. > 2. It is still using built-ins as the wrapper functions for C11 built-ins are > not > defined yet. It is too late to get the wrapper functions done for 20.05. It was decided in yesterday's tech board meeting to go ahead with C11 atomic built-ins (since there is lot of code in DPDK that uses C11 built-ins). If there are no further comments, can you please provide your ack?
> > lib/librte_timer/rte_timer.c | 85 ++++++++++++++++++++++++++++++----------- > --- > lib/librte_timer/rte_timer.h | 2 +- > 2 files changed, 60 insertions(+), 27 deletions(-) > > diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c index > 269e921..ba17216 100644 > --- a/lib/librte_timer/rte_timer.c > +++ b/lib/librte_timer/rte_timer.c > @@ -10,7 +10,6 @@ > #include <assert.h> > #include <sys/queue.h> > > -#include <rte_atomic.h> > #include <rte_common.h> > #include <rte_cycles.h> > #include <rte_eal_memconfig.h> > @@ -218,7 +217,7 @@ rte_timer_init(struct rte_timer *tim) > > status.state = RTE_TIMER_STOP; > status.owner = RTE_TIMER_NO_OWNER; > - tim->status.u32 = status.u32; > + __atomic_store_n(&tim->status.u32, status.u32, > __ATOMIC_RELAXED); > } > > /* > @@ -239,9 +238,9 @@ timer_set_config_state(struct rte_timer *tim, > > /* wait that the timer is in correct status before update, > * and mark it as being configured */ > - while (success == 0) { > - prev_status.u32 = tim->status.u32; > + prev_status.u32 = __atomic_load_n(&tim->status.u32, > __ATOMIC_RELAXED); > > + while (success == 0) { > /* timer is running on another core > * or ready to run on local core, exit > */ > @@ -258,9 +257,15 @@ timer_set_config_state(struct rte_timer *tim, > * mark it atomically as being configured */ > status.state = RTE_TIMER_CONFIG; > status.owner = (int16_t)lcore_id; > - success = rte_atomic32_cmpset(&tim->status.u32, > - prev_status.u32, > - status.u32); > + /* CONFIG states are acting as locked states. If the > + * timer is in CONFIG state, the state cannot be changed > + * by other threads. So, we should use ACQUIRE here. > + */ > + success = __atomic_compare_exchange_n(&tim->status.u32, > + &prev_status.u32, > + status.u32, 0, > + __ATOMIC_ACQUIRE, > + __ATOMIC_RELAXED); > } > > ret_prev_status->u32 = prev_status.u32; @@ -279,20 +284,27 @@ > timer_set_running_state(struct rte_timer *tim) > > /* wait that the timer is in correct status before update, > * and mark it as running */ > - while (success == 0) { > - prev_status.u32 = tim->status.u32; > + prev_status.u32 = __atomic_load_n(&tim->status.u32, > __ATOMIC_RELAXED); > > + while (success == 0) { > /* timer is not pending anymore */ > if (prev_status.state != RTE_TIMER_PENDING) > return -1; > > /* here, we know that timer is stopped or pending, > - * mark it atomically as being configured */ > + * mark it atomically as being running > + */ > status.state = RTE_TIMER_RUNNING; > status.owner = (int16_t)lcore_id; > - success = rte_atomic32_cmpset(&tim->status.u32, > - prev_status.u32, > - status.u32); > + /* RUNNING states are acting as locked states. If the > + * timer is in RUNNING state, the state cannot be changed > + * by other threads. So, we should use ACQUIRE here. > + */ > + success = __atomic_compare_exchange_n(&tim->status.u32, > + &prev_status.u32, > + status.u32, 0, > + __ATOMIC_ACQUIRE, > + __ATOMIC_RELAXED); > } > > return 0; > @@ -520,10 +532,12 @@ __rte_timer_reset(struct rte_timer *tim, uint64_t > expire, > > /* update state: as we are in CONFIG state, only us can modify > * the state so we don't need to use cmpset() here */ > - rte_wmb(); > status.state = RTE_TIMER_PENDING; > status.owner = (int16_t)tim_lcore; > - tim->status.u32 = status.u32; > + /* The "RELEASE" ordering guarantees the memory operations above > + * the status update are observed before the update by all threads > + */ > + __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELEASE); > > if (tim_lcore != lcore_id || !local_is_locked) > rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock); > @@ -600,10 +614,12 @@ __rte_timer_stop(struct rte_timer *tim, int > local_is_locked, > } > > /* mark timer as stopped */ > - rte_wmb(); > status.state = RTE_TIMER_STOP; > status.owner = RTE_TIMER_NO_OWNER; > - tim->status.u32 = status.u32; > + /* The "RELEASE" ordering guarantees the memory operations above > + * the status update are observed before the update by all threads > + */ > + __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELEASE); > > return 0; > } > @@ -637,7 +653,8 @@ rte_timer_stop_sync(struct rte_timer *tim) int > rte_timer_pending(struct rte_timer *tim) { > - return tim->status.state == RTE_TIMER_PENDING; > + return __atomic_load_n(&tim->status.state, > + __ATOMIC_RELAXED) == > RTE_TIMER_PENDING; > } > > /* must be called periodically, run all timer that expired */ @@ -739,8 > +756,12 @@ __rte_timer_manage(struct rte_timer_data *timer_data) > /* remove from done list and mark timer as stopped > */ > status.state = RTE_TIMER_STOP; > status.owner = RTE_TIMER_NO_OWNER; > - rte_wmb(); > - tim->status.u32 = status.u32; > + /* The "RELEASE" ordering guarantees the memory > + * operations above the status update are observed > + * before the update by all threads > + */ > + __atomic_store_n(&tim->status.u32, status.u32, > + __ATOMIC_RELEASE); > } > else { > /* keep it in list and mark timer as pending */ @@ - > 748,8 +769,12 @@ __rte_timer_manage(struct rte_timer_data *timer_data) > status.state = RTE_TIMER_PENDING; > __TIMER_STAT_ADD(priv_timer, pending, 1); > status.owner = (int16_t)lcore_id; > - rte_wmb(); > - tim->status.u32 = status.u32; > + /* The "RELEASE" ordering guarantees the memory > + * operations above the status update are observed > + * before the update by all threads > + */ > + __atomic_store_n(&tim->status.u32, status.u32, > + __ATOMIC_RELEASE); > __rte_timer_reset(tim, tim->expire + tim->period, > tim->period, lcore_id, tim->f, tim->arg, 1, > timer_data); > @@ -919,8 +944,12 @@ rte_timer_alt_manage(uint32_t timer_data_id, > /* remove from done list and mark timer as stopped > */ > status.state = RTE_TIMER_STOP; > status.owner = RTE_TIMER_NO_OWNER; > - rte_wmb(); > - tim->status.u32 = status.u32; > + /* The "RELEASE" ordering guarantees the memory > + * operations above the status update are observed > + * before the update by all threads > + */ > + __atomic_store_n(&tim->status.u32, status.u32, > + __ATOMIC_RELEASE); > } else { > /* keep it in list and mark timer as pending */ > rte_spinlock_lock( > @@ -928,8 +957,12 @@ rte_timer_alt_manage(uint32_t timer_data_id, > status.state = RTE_TIMER_PENDING; > __TIMER_STAT_ADD(data->priv_timer, pending, 1); > status.owner = (int16_t)this_lcore; > - rte_wmb(); > - tim->status.u32 = status.u32; > + /* The "RELEASE" ordering guarantees the memory > + * operations above the status update are observed > + * before the update by all threads > + */ > + __atomic_store_n(&tim->status.u32, status.u32, > + __ATOMIC_RELEASE); > __rte_timer_reset(tim, tim->expire + tim->period, > tim->period, this_lcore, tim->f, tim->arg, 1, > data); > diff --git a/lib/librte_timer/rte_timer.h b/lib/librte_timer/rte_timer.h index > c6b3d45..df533fa 100644 > --- a/lib/librte_timer/rte_timer.h > +++ b/lib/librte_timer/rte_timer.h > @@ -101,7 +101,7 @@ struct rte_timer > { > uint64_t expire; /**< Time when timer expire. */ > struct rte_timer *sl_next[MAX_SKIPLIST_DEPTH]; > - volatile union rte_timer_status status; /**< Status of timer. */ > + union rte_timer_status status; /**< Status of timer. */ > uint64_t period; /**< Period of timer (0 if not periodic). */ > rte_timer_cb_t f; /**< Callback function. */ > void *arg; /**< Argument to callback function. */ > -- > 2.7.4