+Cc Olivier, maintainer of the ring library.

20/12/2018 12:33, Gavin Hu:
> From: Joyce Kong <joyce.k...@arm.com>
> 
> Run ring perf test on all available cores to really verify MPMC operations.
> The old way of running on a pair of cores is not enough for MPMC rings. We
> used this test case for ring optimization and it was really helpful for
> measuring the ring performance in multi-core environment.
> 
> Suggested-by: Gavin Hu <gavin...@arm.com>
> Signed-off-by: Joyce Kong <joyce.k...@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
> Reviewed-by: Dharmik Thakkar <dharmik.thak...@arm.com>
> Reviewed-by: Ola Liljedahl <ola.liljed...@arm.com>
> Reviewed-by: Gavin Hu <gavin...@arm.com>
> ---
>  test/test/test_ring_perf.c | 82 
> ++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 80 insertions(+), 2 deletions(-)
> 
> diff --git a/test/test/test_ring_perf.c b/test/test/test_ring_perf.c
> index ebb3939..819d119 100644
> --- a/test/test/test_ring_perf.c
> +++ b/test/test/test_ring_perf.c
> @@ -20,12 +20,17 @@
>   *  * Empty ring dequeue
>   *  * Enqueue/dequeue of bursts in 1 threads
>   *  * Enqueue/dequeue of bursts in 2 threads
> + *  * Enqueue/dequeue of bursts in all available threads
>   */
>  
>  #define RING_NAME "RING_PERF"
>  #define RING_SIZE 4096
>  #define MAX_BURST 32
>  
> +#ifndef ARRAY_SIZE
> +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
> +#endif
> +
>  /*
>   * the sizes to enqueue and dequeue in testing
>   * (marked volatile so they won't be seen as compile-time constants)
> @@ -248,9 +253,78 @@ run_on_core_pair(struct lcore_pair *cores, struct 
> rte_ring *r,
>       }
>  }
>  
> +static rte_atomic32_t synchro;
> +static uint64_t queue_count[RTE_MAX_LCORE] = {0};
> +
> +#define TIME_MS 100
> +
> +static int
> +load_loop_fn(void *p)
> +{
> +     uint64_t time_diff = 0;
> +     uint64_t begin = 0;
> +     uint64_t hz = rte_get_timer_hz();
> +     uint64_t lcount = 0;
> +     const unsigned int lcore = rte_lcore_id();
> +     struct thread_params *params = p;
> +     void *burst[MAX_BURST] = {0};
> +
> +     /* wait synchro for slaves */
> +     if (lcore != rte_get_master_lcore())
> +             while (rte_atomic32_read(&synchro) == 0)
> +                     rte_pause();
> +
> +     begin = rte_get_timer_cycles();
> +     while (time_diff < hz * TIME_MS / 1000) {
> +             rte_ring_mp_enqueue_bulk(params->r, burst, params->size, NULL);
> +             rte_ring_mc_dequeue_bulk(params->r, burst, params->size, NULL);
> +             lcount++;
> +             time_diff = rte_get_timer_cycles() - begin;
> +     }
> +     queue_count[lcore] = lcount;
> +     return 0;
> +}
> +
> +static int
> +run_on_all_cores(struct rte_ring *r)
> +{
> +     uint64_t total = 0;
> +     struct thread_params param = {0};
> +     unsigned int i, c;
> +     for (i = 0; i < ARRAY_SIZE(bulk_sizes); i++) {
> +             printf("\nBulk enq/dequeue count on size %u\n", bulk_sizes[i]);
> +             param.size = bulk_sizes[i];
> +             param.r = r;
> +
> +             /* clear synchro and start slaves */
> +             rte_atomic32_set(&synchro, 0);
> +             if (rte_eal_mp_remote_launch(load_loop_fn,
> +                             &param, SKIP_MASTER) < 0)
> +                     return -1;
> +
> +             /* start synchro and launch test on master */
> +             rte_atomic32_set(&synchro, 1);
> +             load_loop_fn(&param);
> +
> +             rte_eal_mp_wait_lcore();
> +
> +             RTE_LCORE_FOREACH(c) {
> +                     printf("Core [%u] count = %"PRIu64"\n",
> +                                     c, queue_count[c]);
> +                     total += queue_count[c];
> +             }
> +
> +             printf("Total count (size: %u): %"PRIu64"\n", bulk_sizes[i],
> +                                             total);
> +     }
> +
> +     return 0;
> +}
> +
>  /*
> - * Test function that determines how long an enqueue + dequeue of a single 
> item
> - * takes on a single lcore. Result is for comparison with the bulk enq+deq.
> + * Test function that determines how long an enqueue + dequeue of a single
> + * item takes on a single lcore. Result is for comparison with the bulk
> + * enq+deq.
>   */
>  static void
>  test_single_enqueue_dequeue(struct rte_ring *r)
> @@ -394,6 +468,10 @@ test_ring_perf(void)
>               printf("\n### Testing using two NUMA nodes ###\n");
>               run_on_core_pair(&cores, r, enqueue_bulk, dequeue_bulk);
>       }
> +
> +     printf("\n### Testing using all slave nodes ###\n");
> +     run_on_all_cores(r);
> +
>       rte_ring_free(r);
>       return 0;
>  }
> 





Reply via email to