Second PING for review. Med venlig hilsen / Kind regards, -Morten Brørup
> From: Morten Brørup [mailto:m...@smartsharesystems.com] > Sent: Thursday, 13 March 2025 09.23 > > PING for review. > > This could still make it into 25.03-rc3 (deadline: 14 March 2025). > > Med venlig hilsen / Kind regards, > -Morten Brørup > > > > From: Morten Brørup [mailto:m...@smartsharesystems.com] > > Sent: Friday, 28 February 2025 17.49 > > > > Bulk requests to get or put objects in a mempool often vary in size. > > A series of tests with pseudo random request sizes, to mitigate the > > benefits of the CPU's dynamic branch predictor, was added. > > > > Also, various other minor changes: > > - Improved the output formatting for readability. > > - Added test for the "default" mempool with cache. > > - Skip the tests for the "default" mempool, if it happens to use the > > same > > driver (i.e. operations) as already tested. > > - Replaced bare use of "unsigned" with "unsigned int", > > to make checkpatches happy. > > > > Signed-off-by: Morten Brørup <m...@smartsharesystems.com> > > --- > > app/test/test_mempool_perf.c | 219 +++++++++++++++++++++++++++------ > -- > > 1 file changed, 172 insertions(+), 47 deletions(-) > > > > diff --git a/app/test/test_mempool_perf.c > > b/app/test/test_mempool_perf.c > > index 4dd74ef75a..5e29797f02 100644 > > --- a/app/test/test_mempool_perf.c > > +++ b/app/test/test_mempool_perf.c > > @@ -33,6 +33,13 @@ > > * Mempool performance > > * ======= > > * > > + * Each core get *n_keep* objects per bulk of a pseudorandom > number > > + * between 1 and *n_max_bulk*. > > + * Objects are put back in the pool per bulk of a similar > > pseudorandom number. > > + * Note: The very low entropy of the randomization algorithm is > > harmless, because > > + * the sole purpose of randomization is to prevent the > CPU's > > dynamic branch > > + * predictor from enhancing the test results. > > + * > > * Each core get *n_keep* objects per bulk of *n_get_bulk*. Then, > > * objects are put back in the pool per bulk of *n_put_bulk*. > > * > > @@ -52,7 +59,12 @@ > > * - Two cores with user-owned cache > > * - Max. cores with user-owned cache > > * > > - * - Bulk size (*n_get_bulk*, *n_put_bulk*) > > + * - Pseudorandom max bulk size (*n_max_bulk*) > > + * > > + * - Max bulk from CACHE_LINE_BURST to 256, and > > RTE_MEMPOOL_CACHE_MAX_SIZE, > > + * where CACHE_LINE_BURST is the number of pointers fitting > > into one CPU cache line. > > + * > > + * - Fixed bulk size (*n_get_bulk*, *n_put_bulk*) > > * > > * - Bulk get from 1 to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE > > * - Bulk put from 1 to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE > > @@ -89,16 +101,19 @@ > > } while (0) > > > > static int use_external_cache; > > -static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE; > > +static unsigned int external_cache_size = > RTE_MEMPOOL_CACHE_MAX_SIZE; > > > > static RTE_ATOMIC(uint32_t) synchro; > > > > +/* max random number of objects in one bulk operation (get and put) > */ > > +static unsigned int n_max_bulk; > > + > > /* number of objects in one bulk operation (get or put) */ > > -static unsigned n_get_bulk; > > -static unsigned n_put_bulk; > > +static unsigned int n_get_bulk; > > +static unsigned int n_put_bulk; > > > > /* number of objects retrieved from mempool before putting them back > > */ > > -static unsigned n_keep; > > +static unsigned int n_keep; > > > > /* true if we want to test with constant n_get_bulk and n_put_bulk > */ > > static int use_constant_values; > > @@ -118,7 +133,7 @@ static struct mempool_test_stats > > stats[RTE_MAX_LCORE]; > > */ > > static void > > my_obj_init(struct rte_mempool *mp, __rte_unused void *arg, > > - void *obj, unsigned i) > > + void *obj, unsigned int i) > > { > > uint32_t *objnum = obj; > > memset(obj, 0, mp->elt_size); > > @@ -159,11 +174,55 @@ test_loop(struct rte_mempool *mp, struct > > rte_mempool_cache *cache, > > return 0; > > } > > > > +static __rte_always_inline int > > +test_loop_random(struct rte_mempool *mp, struct rte_mempool_cache > > *cache, > > + unsigned int x_keep, unsigned int x_max_bulk) > > +{ > > + alignas(RTE_CACHE_LINE_SIZE) void *obj_table[MAX_KEEP]; > > + unsigned int idx; > > + unsigned int i; > > + unsigned int r = 0; > > + unsigned int x_bulk; > > + int ret; > > + > > + for (i = 0; likely(i < (N / x_keep)); i++) { > > + /* get x_keep objects by bulk of random [1 .. x_max_bulk] > > */ > > + for (idx = 0; idx < x_keep; idx += x_bulk, r++) { > > + /* Generate a pseudorandom number [1 .. x_max_bulk]. > > */ > > + x_bulk = ((r ^ (r >> 2) ^ (r << 3)) & (x_max_bulk - > > 1)) + 1; > > + if (unlikely(idx + x_bulk > x_keep)) > > + x_bulk = x_keep - idx; > > + ret = rte_mempool_generic_get(mp, > > + &obj_table[idx], > > + x_bulk, > > + cache); > > + if (unlikely(ret < 0)) { > > + rte_mempool_dump(stdout, mp); > > + return ret; > > + } > > + } > > + > > + /* put the objects back by bulk of random [1 .. x_max_bulk] > > */ > > + for (idx = 0; idx < x_keep; idx += x_bulk, r++) { > > + /* Generate a pseudorandom number [1 .. x_max_bulk]. > > */ > > + x_bulk = ((r ^ (r >> 2) ^ (r << 3)) & (x_max_bulk - > > 1)) + 1; > > + if (unlikely(idx + x_bulk > x_keep)) > > + x_bulk = x_keep - idx; > > + rte_mempool_generic_put(mp, > > + &obj_table[idx], > > + x_bulk, > > + cache); > > + } > > + } > > + > > + return 0; > > +} > > + > > static int > > per_lcore_mempool_test(void *arg) > > { > > struct rte_mempool *mp = arg; > > - unsigned lcore_id = rte_lcore_id(); > > + unsigned int lcore_id = rte_lcore_id(); > > int ret = 0; > > uint64_t start_cycles, end_cycles; > > uint64_t time_diff = 0, hz = rte_get_timer_hz(); > > @@ -181,9 +240,9 @@ per_lcore_mempool_test(void *arg) > > } > > > > /* n_get_bulk and n_put_bulk must be divisors of n_keep */ > > - if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep) > > + if (!n_max_bulk && (((n_keep / n_get_bulk) * n_get_bulk) != > > n_keep)) > > GOTO_ERR(ret, out); > > - if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep) > > + if (!n_max_bulk && (((n_keep / n_put_bulk) * n_put_bulk) != > > n_keep)) > > GOTO_ERR(ret, out); > > /* for constant n, n_get_bulk and n_put_bulk must be the same */ > > if (use_constant_values && n_put_bulk != n_get_bulk) > > @@ -200,7 +259,9 @@ per_lcore_mempool_test(void *arg) > > start_cycles = rte_get_timer_cycles(); > > > > while (time_diff/hz < TIME_S) { > > - if (!use_constant_values) > > + if (n_max_bulk) > > + ret = test_loop_random(mp, cache, n_keep, > > n_max_bulk); > > + else if (!use_constant_values) > > ret = test_loop(mp, cache, n_keep, n_get_bulk, > > n_put_bulk); > > else if (n_get_bulk == 1) > > ret = test_loop(mp, cache, n_keep, 1, 1); > > @@ -246,10 +307,10 @@ per_lcore_mempool_test(void *arg) > > static int > > launch_cores(struct rte_mempool *mp, unsigned int cores) > > { > > - unsigned lcore_id; > > + unsigned int lcore_id; > > uint64_t rate; > > int ret; > > - unsigned cores_save = cores; > > + unsigned int cores_save = cores; > > double hz = rte_get_timer_hz(); > > > > rte_atomic_store_explicit(&synchro, 0, rte_memory_order_relaxed); > > @@ -257,11 +318,18 @@ launch_cores(struct rte_mempool *mp, unsigned > int > > cores) > > /* reset stats */ > > memset(stats, 0, sizeof(stats)); > > > > - printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u " > > - "n_put_bulk=%u n_keep=%u constant_n=%u ", > > + printf("mempool_autotest cache=%u cores=%u n_keep=%5u ", > > use_external_cache ? > > external_cache_size : (unsigned) mp->cache_size, > > - cores, n_get_bulk, n_put_bulk, n_keep, > > use_constant_values); > > + cores, > > + n_keep); > > + if (n_max_bulk) > > + printf("n_max_bulk=%3u ", > > + n_max_bulk); > > + else > > + printf("n_get_bulk=%3u n_put_bulk=%3u constant_n=%u ", > > + n_get_bulk, n_put_bulk, > > + use_constant_values); > > > > if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) { > > printf("mempool is not full\n"); > > @@ -301,7 +369,7 @@ launch_cores(struct rte_mempool *mp, unsigned int > > cores) > > rate += (double)stats[lcore_id].enq_count * hz / > > (double)stats[lcore_id].duration_cycles; > > > > - printf("rate_persec=%" PRIu64 "\n", rate); > > + printf("rate_persec=%10" PRIu64 "\n", rate); > > > > return 0; > > } > > @@ -310,25 +378,47 @@ launch_cores(struct rte_mempool *mp, unsigned > int > > cores) > > static int > > do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int > > external_cache) > > { > > + unsigned int bulk_tab_max[] = { CACHE_LINE_BURST, 32, 64, 128, > > 256, > > + RTE_MEMPOOL_CACHE_MAX_SIZE, 0 }; > > unsigned int bulk_tab_get[] = { 1, 4, CACHE_LINE_BURST, 32, 64, > > 128, 256, > > RTE_MEMPOOL_CACHE_MAX_SIZE, 0 }; > > unsigned int bulk_tab_put[] = { 1, 4, CACHE_LINE_BURST, 32, 64, > > 128, 256, > > RTE_MEMPOOL_CACHE_MAX_SIZE, 0 }; > > unsigned int keep_tab[] = { 32, 128, 512, 2048, 8192, 32768, 0 }; > > - unsigned *get_bulk_ptr; > > - unsigned *put_bulk_ptr; > > - unsigned *keep_ptr; > > + unsigned int *max_bulk_ptr; > > + unsigned int *get_bulk_ptr; > > + unsigned int *put_bulk_ptr; > > + unsigned int *keep_ptr; > > int ret; > > > > - for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) > > { > > - for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; > > put_bulk_ptr++) { > > - for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) { > > + for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) { > > + for (max_bulk_ptr = bulk_tab_max; *max_bulk_ptr; > > max_bulk_ptr++) { > > + > > + if (*keep_ptr < *max_bulk_ptr) > > + continue; > > + > > + use_external_cache = external_cache; > > + use_constant_values = 0; > > + n_max_bulk = *max_bulk_ptr; > > + n_get_bulk = 0; > > + n_put_bulk = 0; > > + n_keep = *keep_ptr; > > + ret = launch_cores(mp, cores); > > + if (ret < 0) > > + return -1; > > + } > > + } > > + > > + for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) { > > + for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; > > get_bulk_ptr++) { > > + for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; > > put_bulk_ptr++) { > > > > if (*keep_ptr < *get_bulk_ptr || *keep_ptr < > > *put_bulk_ptr) > > continue; > > > > use_external_cache = external_cache; > > use_constant_values = 0; > > + n_max_bulk = 0; > > n_get_bulk = *get_bulk_ptr; > > n_put_bulk = *put_bulk_ptr; > > n_keep = *keep_ptr; > > @@ -346,6 +436,7 @@ do_one_mempool_test(struct rte_mempool *mp, > > unsigned int cores, int external_cac > > } > > } > > } > > + > > return 0; > > } > > > > @@ -354,7 +445,10 @@ do_all_mempool_perf_tests(unsigned int cores) > > { > > struct rte_mempool *mp_cache = NULL; > > struct rte_mempool *mp_nocache = NULL; > > - struct rte_mempool *default_pool = NULL; > > + struct rte_mempool *default_pool_cache = NULL; > > + struct rte_mempool *default_pool_nocache = NULL; > > + const char *mp_cache_ops; > > + const char *mp_nocache_ops; > > const char *default_pool_ops; > > int ret = -1; > > > > @@ -368,6 +462,7 @@ do_all_mempool_perf_tests(unsigned int cores) > > printf("cannot allocate mempool (without cache)\n"); > > goto err; > > } > > + mp_nocache_ops = rte_mempool_get_ops(mp_nocache->ops_index)- > > >name; > > > > /* create a mempool (with cache) */ > > mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE, > > @@ -380,47 +475,76 @@ do_all_mempool_perf_tests(unsigned int cores) > > printf("cannot allocate mempool (with cache)\n"); > > goto err; > > } > > + mp_cache_ops = rte_mempool_get_ops(mp_cache->ops_index)->name; > > > > default_pool_ops = rte_mbuf_best_mempool_ops(); > > - /* Create a mempool based on Default handler */ > > - default_pool = rte_mempool_create_empty("default_pool", > > - MEMPOOL_SIZE, > > - MEMPOOL_ELT_SIZE, > > - 0, 0, > > - SOCKET_ID_ANY, 0); > > - > > - if (default_pool == NULL) { > > - printf("cannot allocate %s mempool\n", default_pool_ops); > > + > > + /* Create a mempool (without cache) based on Default handler */ > > + default_pool_nocache = > > rte_mempool_create_empty("default_pool_nocache", > > + MEMPOOL_SIZE, > > + MEMPOOL_ELT_SIZE, > > + 0, 0, > > + SOCKET_ID_ANY, 0); > > + if (default_pool_nocache == NULL) { > > + printf("cannot allocate %s mempool (without cache)\n", > > default_pool_ops); > > goto err; > > } > > - > > - if (rte_mempool_set_ops_byname(default_pool, default_pool_ops, > > NULL) > > - < 0) { > > + if (rte_mempool_set_ops_byname(default_pool_nocache, > > default_pool_ops, NULL) < 0) { > > printf("cannot set %s handler\n", default_pool_ops); > > goto err; > > } > > - > > - if (rte_mempool_populate_default(default_pool) < 0) { > > + if (rte_mempool_populate_default(default_pool_nocache) < 0) { > > printf("cannot populate %s mempool\n", default_pool_ops); > > goto err; > > } > > + rte_mempool_obj_iter(default_pool_nocache, my_obj_init, NULL); > > + > > + /* Create a mempool (with cache) based on Default handler */ > > + default_pool_cache = > > rte_mempool_create_empty("default_pool_cache", > > + MEMPOOL_SIZE, > > + MEMPOOL_ELT_SIZE, > > + RTE_MEMPOOL_CACHE_MAX_SIZE, 0, > > + SOCKET_ID_ANY, 0); > > + if (default_pool_cache == NULL) { > > + printf("cannot allocate %s mempool (with cache)\n", > > default_pool_ops); > > + goto err; > > + } > > + if (rte_mempool_set_ops_byname(default_pool_cache, > > default_pool_ops, NULL) < 0) { > > + printf("cannot set %s handler\n", default_pool_ops); > > + goto err; > > + } > > + if (rte_mempool_populate_default(default_pool_cache) < 0) { > > + printf("cannot populate %s mempool\n", default_pool_ops); > > + goto err; > > + } > > + rte_mempool_obj_iter(default_pool_cache, my_obj_init, NULL); > > > > - rte_mempool_obj_iter(default_pool, my_obj_init, NULL); > > - > > - printf("start performance test (without cache)\n"); > > + printf("start performance test (using %s, without cache)\n", > > + mp_nocache_ops); > > if (do_one_mempool_test(mp_nocache, cores, 0) < 0) > > goto err; > > > > - printf("start performance test for %s (without cache)\n", > > - default_pool_ops); > > - if (do_one_mempool_test(default_pool, cores, 0) < 0) > > - goto err; > > + if (strcmp(default_pool_ops, mp_nocache_ops) != 0) { > > + printf("start performance test for %s (without cache)\n", > > + default_pool_ops); > > + if (do_one_mempool_test(default_pool_nocache, cores, 0) < > > 0) > > + goto err; > > + } > > > > - printf("start performance test (with cache)\n"); > > + printf("start performance test (using %s, with cache)\n", > > + mp_cache_ops); > > if (do_one_mempool_test(mp_cache, cores, 0) < 0) > > goto err; > > > > - printf("start performance test (with user-owned cache)\n"); > > + if (strcmp(default_pool_ops, mp_cache_ops) != 0) { > > + printf("start performance test for %s (with cache)\n", > > + default_pool_ops); > > + if (do_one_mempool_test(default_pool_cache, cores, 0) < 0) > > + goto err; > > + } > > + > > + printf("start performance test (using %s, with user-owned > > cache)\n", > > + mp_nocache_ops); > > if (do_one_mempool_test(mp_nocache, cores, 1) < 0) > > goto err; > > > > @@ -431,7 +555,8 @@ do_all_mempool_perf_tests(unsigned int cores) > > err: > > rte_mempool_free(mp_cache); > > rte_mempool_free(mp_nocache); > > - rte_mempool_free(default_pool); > > + rte_mempool_free(default_pool_cache); > > + rte_mempool_free(default_pool_nocache); > > return ret; > > } > > > > -- > > 2.43.0