<snip> > Subject: [PATCH v1 03/12] test/timer: use compiler atomic builtins for sync > > Convert rte_atomic usages to compiler atomic built-ins for lcore_state and > collisions sync. > > Signed-off-by: Joyce Kong <joyce.k...@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com> > --- > app/test/test_timer.c | 28 ++++++++++++---------------- > app/test/test_timer_secondary.c | 1 - > 2 files changed, 12 insertions(+), 17 deletions(-) > > diff --git a/app/test/test_timer.c b/app/test/test_timer.c index > a10b2fe9da..9a3e1b53e4 100644 > --- a/app/test/test_timer.c > +++ b/app/test/test_timer.c > @@ -102,7 +102,6 @@ > #include <rte_eal.h> > #include <rte_per_lcore.h> > #include <rte_lcore.h> > -#include <rte_atomic.h> > #include <rte_timer.h> > #include <rte_random.h> > #include <rte_malloc.h> > @@ -203,7 +202,7 @@ timer_stress_main_loop(__rte_unused void *arg) > > /* Need to synchronize worker lcores through multiple steps. */ enum { > WORKER_WAITING = 1, WORKER_RUN_SIGNAL, WORKER_RUNNING, > WORKER_FINISHED }; -static rte_atomic16_t lcore_state[RTE_MAX_LCORE]; > +static uint16_t lcore_state[RTE_MAX_LCORE]; > > static void > main_init_workers(void) > @@ -211,7 +210,7 @@ main_init_workers(void) > unsigned i; > > RTE_LCORE_FOREACH_WORKER(i) { > - rte_atomic16_set(&lcore_state[i], WORKER_WAITING); > + __atomic_store_n(&lcore_state[i], WORKER_WAITING, > __ATOMIC_RELAXED); > } > } > > @@ -221,11 +220,10 @@ main_start_workers(void) > unsigned i; > > RTE_LCORE_FOREACH_WORKER(i) { > - rte_atomic16_set(&lcore_state[i], WORKER_RUN_SIGNAL); > + __atomic_store_n(&lcore_state[i], WORKER_RUN_SIGNAL, > +__ATOMIC_RELAXED); We need to use RELEASE order here. When this function is called in timer_stress2_main_loop, the main_lcore is releasing memory operations to worker cores.
> } > RTE_LCORE_FOREACH_WORKER(i) { > - while (rte_atomic16_read(&lcore_state[i]) != > WORKER_RUNNING) > - rte_pause(); > + rte_wait_until_equal_16(&lcore_state[i], WORKER_RUNNING, > +__ATOMIC_RELAXED); This should be ACQUIRE order. > } > } > > @@ -235,8 +233,7 @@ main_wait_for_workers(void) > unsigned i; > > RTE_LCORE_FOREACH_WORKER(i) { > - while (rte_atomic16_read(&lcore_state[i]) != > WORKER_FINISHED) > - rte_pause(); > + rte_wait_until_equal_16(&lcore_state[i], WORKER_FINISHED, > +__ATOMIC_RELAXED); We should use ACQUIRE here as the workers are releasing memory updates to the main lcore. > } > } > > @@ -245,9 +242,8 @@ worker_wait_to_start(void) { > unsigned lcore_id = rte_lcore_id(); > > - while (rte_atomic16_read(&lcore_state[lcore_id]) != > WORKER_RUN_SIGNAL) > - rte_pause(); > - rte_atomic16_set(&lcore_state[lcore_id], WORKER_RUNNING); > + rte_wait_until_equal_16(&lcore_state[lcore_id], > WORKER_RUN_SIGNAL, __ATOMIC_RELAXED); Since the worker threads will use the memory operations that happened in main_lcore, we need ACQUIRE memory ordering here. > + __atomic_store_n(&lcore_state[lcore_id], WORKER_RUNNING, > +__ATOMIC_RELAXED); This should be RELEASE memory order. > } > > static void > @@ -255,7 +251,7 @@ worker_finish(void) > { > unsigned lcore_id = rte_lcore_id(); > > - rte_atomic16_set(&lcore_state[lcore_id], WORKER_FINISHED); > + __atomic_store_n(&lcore_state[lcore_id], WORKER_FINISHED, > +__ATOMIC_RELAXED); Should be RELEASE as workers are releasing updates to the main lcore. > } > > > @@ -281,12 +277,12 @@ timer_stress2_main_loop(__rte_unused void *arg) > unsigned int lcore_id = rte_lcore_id(); > unsigned int main_lcore = rte_get_main_lcore(); > int32_t my_collisions = 0; > - static rte_atomic32_t collisions; > + static uint32_t collisions; > > if (lcore_id == main_lcore) { > cb_count = 0; > test_failed = 0; > - rte_atomic32_set(&collisions, 0); > + __atomic_store_n(&collisions, 0, __ATOMIC_RELAXED); > main_init_workers(); I think the existing code is probably incorrect in calling 'main_init_workers' in this function. This should be called outside of ' timer_stress2_main_loop' so that lcore_state is guaranteed to be initialized correctly before the threads are launched. Please call 'main_init_workers()' in ' test_timer' function. > timers = rte_malloc(NULL, sizeof(*timers) * > NB_STRESS2_TIMERS, 0); > if (timers == NULL) { > @@ -315,7 +311,7 @@ timer_stress2_main_loop(__rte_unused void *arg) > my_collisions++; > } > if (my_collisions != 0) > - rte_atomic32_add(&collisions, my_collisions); > + __atomic_fetch_add(&collisions, my_collisions, > __ATOMIC_RELAXED); > > /* wait long enough for timers to expire */ > rte_delay_ms(100); > @@ -329,7 +325,7 @@ timer_stress2_main_loop(__rte_unused void *arg) > > /* now check that we get the right number of callbacks */ > if (lcore_id == main_lcore) { > - my_collisions = rte_atomic32_read(&collisions); > + my_collisions = __atomic_load_n(&collisions, > __ATOMIC_RELAXED); > if (my_collisions != 0) > printf("- %d timer reset collisions (OK)\n", > my_collisions); > rte_timer_manage(); > diff --git a/app/test/test_timer_secondary.c > b/app/test/test_timer_secondary.c index 16a9f1878b..5795c97f07 100644 > --- a/app/test/test_timer_secondary.c > +++ b/app/test/test_timer_secondary.c > @@ -9,7 +9,6 @@ > #include <rte_lcore.h> > #include <rte_debug.h> > #include <rte_memzone.h> > -#include <rte_atomic.h> > #include <rte_timer.h> > #include <rte_cycles.h> > #include <rte_mempool.h> > -- > 2.17.1