From: Joyce Kong <joyce.k...@arm.com> Run ring perf test on all available cores to really verify MPMC operations. The old way of running on a pair of cores is not enough for MPMC rings.
Suggested-by: gavin hu <gavin...@arm.com> Signed-off-by: joyce kong <joyce.k...@arm.com> Reviewed-by: ruifeng wang <ruifeng.w...@arm.com> Reviewed-by: honnappa nagarahalli <honnappa.nagaraha...@arm.com> Reviewed-by: dharmik thakkar <dharmik.thak...@arm.com> Reviewed-by: ola liljedahl <ola.liljed...@arm.com> Reviewed-by: gavin hu <gavin...@arm.com> --- test/test/test_ring_perf.c | 82 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 3 deletions(-) diff --git a/test/test/test_ring_perf.c b/test/test/test_ring_perf.c index ebb3939..01c6937 100644 --- a/test/test/test_ring_perf.c +++ b/test/test/test_ring_perf.c @@ -9,7 +9,7 @@ #include <rte_cycles.h> #include <rte_launch.h> #include <rte_pause.h> - +#include <string.h> #include "test.h" /* @@ -20,6 +20,7 @@ * * Empty ring dequeue * * Enqueue/dequeue of bursts in 1 threads * * Enqueue/dequeue of bursts in 2 threads + * * Enqueue/dequeue of bursts in all available threads */ #define RING_NAME "RING_PERF" @@ -248,9 +249,80 @@ run_on_core_pair(struct lcore_pair *cores, struct rte_ring *r, } } +static rte_atomic32_t synchro; +static uint64_t queue_count[RTE_MAX_LCORE]; + +#define TIME_MS 100 + +static int +load_loop_fn(void *p) +{ + uint64_t time_diff = 0; + uint64_t begin = 0; + uint64_t hz = rte_get_timer_hz(); + uint64_t lcount = 0; + const unsigned int lcore = rte_lcore_id(); + struct thread_params *params = p; + void *burst[MAX_BURST] = {0}; + + /* wait synchro for slaves */ + if (lcore != rte_get_master_lcore()) + while (rte_atomic32_read(&synchro) == 0) + rte_pause(); + + begin = rte_get_timer_cycles(); + while (time_diff < hz * TIME_MS / 1000) { + rte_ring_mp_enqueue_bulk(params->r, burst, params->size, NULL); + rte_ring_mc_dequeue_bulk(params->r, burst, params->size, NULL); + lcount++; + time_diff = rte_get_timer_cycles() - begin; + } + queue_count[lcore] = lcount; + return 0; +} + +static int +run_on_all_cores(struct rte_ring *r) +{ + uint64_t total = 0; + unsigned int i, c; + struct thread_params param; + + memset(¶m, 0, sizeof(struct thread_params)); + for (i = 0; i < RTE_DIM(bulk_sizes); i++) { + printf("\nBulk enq/dequeue count on size %u\n", bulk_sizes[i]); + param.size = bulk_sizes[i]; + param.r = r; + + /* clear synchro and start slaves */ + rte_atomic32_set(&synchro, 0); + if (rte_eal_mp_remote_launch(load_loop_fn, + ¶m, SKIP_MASTER) < 0) + return -1; + + /* start synchro and launch test on master */ + rte_atomic32_set(&synchro, 1); + load_loop_fn(¶m); + + rte_eal_mp_wait_lcore(); + + RTE_LCORE_FOREACH(c) { + printf("Core [%u] count = %"PRIu64"\n", + c, queue_count[c]); + total += queue_count[c]; + } + + printf("Total count (size: %u): %"PRIu64"\n", bulk_sizes[i], + total); + } + + return 0; +} + /* - * Test function that determines how long an enqueue + dequeue of a single item - * takes on a single lcore. Result is for comparison with the bulk enq+deq. + * Test function that determines how long an enqueue + dequeue of a single + * item takes on a single lcore. Result is for comparison with the bulk + * enq+deq. */ static void test_single_enqueue_dequeue(struct rte_ring *r) @@ -394,6 +466,10 @@ test_ring_perf(void) printf("\n### Testing using two NUMA nodes ###\n"); run_on_core_pair(&cores, r, enqueue_bulk, dequeue_bulk); } + + printf("\n### Testing using all slave nodes ###\n"); + run_on_all_cores(r); + rte_ring_free(r); return 0; } -- 2.7.4