Hi Erik,

> Subject: [PATCH v2] lib/timer: relax barrier for status update
> 
> Volatile has no ordering semantics. The rte_timer structure defines timer
> status as a volatile variable and uses the rte_r/wmb barrier to guarantee
> inter-thread visibility.
> 
> This patch optimized the volatile operation with c11 atomic operations and
> one-way barrier to save the performance penalty. According to the
> timer_perf_autotest benchmarking results, this patch can uplift 10%~16%
> timer appending performance, 3%~20% timer resetting performance and 45%
> timer callbacks scheduling performance on aarch64 and no loss in
> performance for x86.
> 
> Suggested-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
> Signed-off-by: Phil Yang <phil.y...@arm.com>
> Reviewed-by: Gavin Hu <gavin...@arm.com>
> 
> ---
> This patch depends on patch:
> http://patchwork.dpdk.org/patch/65997/
> 
> v2:
> 1. Changed the memory ordering comment in timer_set_config_state.
> 2. It is still using built-ins as the wrapper functions for C11 built-ins are 
> not
> defined yet.
It is too late to get the wrapper functions done for 20.05. It was decided in 
yesterday's tech board meeting to go ahead with C11 atomic built-ins (since 
there is lot of code in DPDK that uses C11 built-ins). If there are no further 
comments, can you please provide your ack?

> 
>  lib/librte_timer/rte_timer.c | 85 ++++++++++++++++++++++++++++++-----------
> ---
>  lib/librte_timer/rte_timer.h |  2 +-
>  2 files changed, 60 insertions(+), 27 deletions(-)
> 
> diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c index
> 269e921..ba17216 100644
> --- a/lib/librte_timer/rte_timer.c
> +++ b/lib/librte_timer/rte_timer.c
> @@ -10,7 +10,6 @@
>  #include <assert.h>
>  #include <sys/queue.h>
> 
> -#include <rte_atomic.h>
>  #include <rte_common.h>
>  #include <rte_cycles.h>
>  #include <rte_eal_memconfig.h>
> @@ -218,7 +217,7 @@ rte_timer_init(struct rte_timer *tim)
> 
>       status.state = RTE_TIMER_STOP;
>       status.owner = RTE_TIMER_NO_OWNER;
> -     tim->status.u32 = status.u32;
> +     __atomic_store_n(&tim->status.u32, status.u32,
> __ATOMIC_RELAXED);
>  }
> 
>  /*
> @@ -239,9 +238,9 @@ timer_set_config_state(struct rte_timer *tim,
> 
>       /* wait that the timer is in correct status before update,
>        * and mark it as being configured */
> -     while (success == 0) {
> -             prev_status.u32 = tim->status.u32;
> +     prev_status.u32 = __atomic_load_n(&tim->status.u32,
> __ATOMIC_RELAXED);
> 
> +     while (success == 0) {
>               /* timer is running on another core
>                * or ready to run on local core, exit
>                */
> @@ -258,9 +257,15 @@ timer_set_config_state(struct rte_timer *tim,
>                * mark it atomically as being configured */
>               status.state = RTE_TIMER_CONFIG;
>               status.owner = (int16_t)lcore_id;
> -             success = rte_atomic32_cmpset(&tim->status.u32,
> -                                           prev_status.u32,
> -                                           status.u32);
> +             /* CONFIG states are acting as locked states. If the
> +              * timer is in CONFIG state, the state cannot be changed
> +              * by other threads. So, we should use ACQUIRE here.
> +              */
> +             success = __atomic_compare_exchange_n(&tim->status.u32,
> +                                           &prev_status.u32,
> +                                           status.u32, 0,
> +                                           __ATOMIC_ACQUIRE,
> +                                           __ATOMIC_RELAXED);
>       }
> 
>       ret_prev_status->u32 = prev_status.u32; @@ -279,20 +284,27 @@
> timer_set_running_state(struct rte_timer *tim)
> 
>       /* wait that the timer is in correct status before update,
>        * and mark it as running */
> -     while (success == 0) {
> -             prev_status.u32 = tim->status.u32;
> +     prev_status.u32 = __atomic_load_n(&tim->status.u32,
> __ATOMIC_RELAXED);
> 
> +     while (success == 0) {
>               /* timer is not pending anymore */
>               if (prev_status.state != RTE_TIMER_PENDING)
>                       return -1;
> 
>               /* here, we know that timer is stopped or pending,
> -              * mark it atomically as being configured */
> +              * mark it atomically as being running
> +              */
>               status.state = RTE_TIMER_RUNNING;
>               status.owner = (int16_t)lcore_id;
> -             success = rte_atomic32_cmpset(&tim->status.u32,
> -                                           prev_status.u32,
> -                                           status.u32);
> +             /* RUNNING states are acting as locked states. If the
> +              * timer is in RUNNING state, the state cannot be changed
> +              * by other threads. So, we should use ACQUIRE here.
> +              */
> +             success = __atomic_compare_exchange_n(&tim->status.u32,
> +                                           &prev_status.u32,
> +                                           status.u32, 0,
> +                                           __ATOMIC_ACQUIRE,
> +                                           __ATOMIC_RELAXED);
>       }
> 
>       return 0;
> @@ -520,10 +532,12 @@ __rte_timer_reset(struct rte_timer *tim, uint64_t
> expire,
> 
>       /* update state: as we are in CONFIG state, only us can modify
>        * the state so we don't need to use cmpset() here */
> -     rte_wmb();
>       status.state = RTE_TIMER_PENDING;
>       status.owner = (int16_t)tim_lcore;
> -     tim->status.u32 = status.u32;
> +     /* The "RELEASE" ordering guarantees the memory operations above
> +      * the status update are observed before the update by all threads
> +      */
> +     __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELEASE);
> 
>       if (tim_lcore != lcore_id || !local_is_locked)
>               rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
> @@ -600,10 +614,12 @@ __rte_timer_stop(struct rte_timer *tim, int
> local_is_locked,
>       }
> 
>       /* mark timer as stopped */
> -     rte_wmb();
>       status.state = RTE_TIMER_STOP;
>       status.owner = RTE_TIMER_NO_OWNER;
> -     tim->status.u32 = status.u32;
> +     /* The "RELEASE" ordering guarantees the memory operations above
> +      * the status update are observed before the update by all threads
> +      */
> +     __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELEASE);
> 
>       return 0;
>  }
> @@ -637,7 +653,8 @@ rte_timer_stop_sync(struct rte_timer *tim)  int
> rte_timer_pending(struct rte_timer *tim)  {
> -     return tim->status.state == RTE_TIMER_PENDING;
> +     return __atomic_load_n(&tim->status.state,
> +                             __ATOMIC_RELAXED) ==
> RTE_TIMER_PENDING;
>  }
> 
>  /* must be called periodically, run all timer that expired */ @@ -739,8
> +756,12 @@ __rte_timer_manage(struct rte_timer_data *timer_data)
>                       /* remove from done list and mark timer as stopped
> */
>                       status.state = RTE_TIMER_STOP;
>                       status.owner = RTE_TIMER_NO_OWNER;
> -                     rte_wmb();
> -                     tim->status.u32 = status.u32;
> +                     /* The "RELEASE" ordering guarantees the memory
> +                      * operations above the status update are observed
> +                      * before the update by all threads
> +                      */
> +                     __atomic_store_n(&tim->status.u32, status.u32,
> +                             __ATOMIC_RELEASE);
>               }
>               else {
>                       /* keep it in list and mark timer as pending */ @@ -
> 748,8 +769,12 @@ __rte_timer_manage(struct rte_timer_data *timer_data)
>                       status.state = RTE_TIMER_PENDING;
>                       __TIMER_STAT_ADD(priv_timer, pending, 1);
>                       status.owner = (int16_t)lcore_id;
> -                     rte_wmb();
> -                     tim->status.u32 = status.u32;
> +                     /* The "RELEASE" ordering guarantees the memory
> +                      * operations above the status update are observed
> +                      * before the update by all threads
> +                      */
> +                     __atomic_store_n(&tim->status.u32, status.u32,
> +                             __ATOMIC_RELEASE);
>                       __rte_timer_reset(tim, tim->expire + tim->period,
>                               tim->period, lcore_id, tim->f, tim->arg, 1,
>                               timer_data);
> @@ -919,8 +944,12 @@ rte_timer_alt_manage(uint32_t timer_data_id,
>                       /* remove from done list and mark timer as stopped
> */
>                       status.state = RTE_TIMER_STOP;
>                       status.owner = RTE_TIMER_NO_OWNER;
> -                     rte_wmb();
> -                     tim->status.u32 = status.u32;
> +                     /* The "RELEASE" ordering guarantees the memory
> +                      * operations above the status update are observed
> +                      * before the update by all threads
> +                      */
> +                     __atomic_store_n(&tim->status.u32, status.u32,
> +                             __ATOMIC_RELEASE);
>               } else {
>                       /* keep it in list and mark timer as pending */
>                       rte_spinlock_lock(
> @@ -928,8 +957,12 @@ rte_timer_alt_manage(uint32_t timer_data_id,
>                       status.state = RTE_TIMER_PENDING;
>                       __TIMER_STAT_ADD(data->priv_timer, pending, 1);
>                       status.owner = (int16_t)this_lcore;
> -                     rte_wmb();
> -                     tim->status.u32 = status.u32;
> +                     /* The "RELEASE" ordering guarantees the memory
> +                      * operations above the status update are observed
> +                      * before the update by all threads
> +                      */
> +                     __atomic_store_n(&tim->status.u32, status.u32,
> +                             __ATOMIC_RELEASE);
>                       __rte_timer_reset(tim, tim->expire + tim->period,
>                               tim->period, this_lcore, tim->f, tim->arg, 1,
>                               data);
> diff --git a/lib/librte_timer/rte_timer.h b/lib/librte_timer/rte_timer.h index
> c6b3d45..df533fa 100644
> --- a/lib/librte_timer/rte_timer.h
> +++ b/lib/librte_timer/rte_timer.h
> @@ -101,7 +101,7 @@ struct rte_timer
>  {
>       uint64_t expire;       /**< Time when timer expire. */
>       struct rte_timer *sl_next[MAX_SKIPLIST_DEPTH];
> -     volatile union rte_timer_status status; /**< Status of timer. */
> +     union rte_timer_status status; /**< Status of timer. */
>       uint64_t period;       /**< Period of timer (0 if not periodic). */
>       rte_timer_cb_t f;      /**< Callback function. */
>       void *arg;             /**< Argument to callback function. */
> --
> 2.7.4

Reply via email to