<snip> > Subject: RE: [PATCH v7 03/17] test/ring: add functional tests for > rte_ring_xxx_elem APIs > > > > > Add basic infrastructure to test rte_ring_xxx_elem APIs. Add test > > > > cases for testing burst and bulk tests. > > > > > > > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> > > > > Reviewed-by: Gavin Hu <gavin...@arm.com> > > > > --- <snip>
> > > > diff --git a/app/test/test_ring.h b/app/test/test_ring.h new file > > > > mode > > > > 100644 index 000000000..19ef1b399 > > > > --- /dev/null > > > > +++ b/app/test/test_ring.h > > > > @@ -0,0 +1,203 @@ > > > > +/* SPDX-License-Identifier: BSD-3-Clause > > > > + * Copyright(c) 2019 Arm Limited > > > > + */ > > > > + > > > > +#include <rte_malloc.h> > > > > +#include <rte_ring.h> > > > > +#include <rte_ring_elem.h> > > > > + > > > > +/* API type to call > > > > + * N - Calls default APIs > > > > + * S - Calls SP or SC API > > > > + * M - Calls MP or MC API > > > > + */ > > > > +#define TEST_RING_N 1 > > > > +#define TEST_RING_S 2 > > > > +#define TEST_RING_M 4 > > > > + > > > > +/* API type to call > > > > + * SL - Calls single element APIs > > > > + * BL - Calls bulk APIs > > > > + * BR - Calls burst APIs > > > > + */ > > > > +#define TEST_RING_SL 8 > > > > +#define TEST_RING_BL 16 > > > > +#define TEST_RING_BR 32 > > > > + > > > > +#define TEST_RING_IGNORE_API_TYPE ~0U > > > > + > > > > +#define TEST_RING_INCP(obj, esize, n) do { \ > > > > + /* Legacy queue APIs? */ \ > > > > + if ((esize) == -1) \ > > > > + obj = ((void **)obj) + n; \ > > > > + else \ > > > > + obj = (void **)(((uint32_t *)obj) + \ > > > > + (n * esize / > > > > sizeof(uint32_t))); \ } > > > while (0) > > > > + > > > > +#define TEST_RING_CREATE(name, esize, count, socket_id, flags, r) do > { \ > > > > + /* Legacy queue APIs? */ \ > > > > + if ((esize) == -1) \ > > > > + r = rte_ring_create((name), (count), (socket_id), > > > > (flags)); \ > > > > + else \ > > > > + r = rte_ring_create_elem((name), (esize), (count), \ > > > > + (socket_id), (flags)); \ > > > > +} while (0) > > > > + > > > > +#define TEST_RING_ENQUEUE(r, obj, esize, n, ret, api_type) do { \ > > > > + /* Legacy queue APIs? */ \ > > > > + if ((esize) == -1) \ > > > > + switch (api_type) { \ > > > > + case (TEST_RING_N | TEST_RING_SL): \ > > > > + ret = rte_ring_enqueue(r, obj); \ > > > > + break; \ > > > > + case (TEST_RING_S | TEST_RING_SL): \ > > > > + ret = rte_ring_sp_enqueue(r, obj); \ > > > > + break; \ > > > > + case (TEST_RING_M | TEST_RING_SL): \ > > > > + ret = rte_ring_mp_enqueue(r, obj); \ > > > > + break; \ > > > > + case (TEST_RING_N | TEST_RING_BL): \ > > > > + ret = rte_ring_enqueue_bulk(r, obj, n, NULL); \ > > > > + break; \ > > > > + case (TEST_RING_S | TEST_RING_BL): \ > > > > + ret = rte_ring_sp_enqueue_bulk(r, obj, n, > > > > NULL); \ > > > > + break; \ > > > > + case (TEST_RING_M | TEST_RING_BL): \ > > > > + ret = rte_ring_mp_enqueue_bulk(r, obj, n, > > > > NULL); \ > > > > + break; \ > > > > + case (TEST_RING_N | TEST_RING_BR): \ > > > > + ret = rte_ring_enqueue_burst(r, obj, n, NULL); \ > > > > + break; \ > > > > + case (TEST_RING_S | TEST_RING_BR): \ > > > > + ret = rte_ring_sp_enqueue_burst(r, obj, n, > > > > NULL); \ > > > > + break; \ > > > > + case (TEST_RING_M | TEST_RING_BR): \ > > > > + ret = rte_ring_mp_enqueue_burst(r, obj, n, > > > > NULL); \ > > > > + } \ > > > > + else \ > > > > + switch (api_type) { \ > > > > + case (TEST_RING_N | TEST_RING_SL): \ > > > > + ret = rte_ring_enqueue_elem(r, obj, esize); \ > > > > + break; \ > > > > + case (TEST_RING_S | TEST_RING_SL): \ > > > > + ret = rte_ring_sp_enqueue_elem(r, obj, esize); \ > > > > + break; \ > > > > + case (TEST_RING_M | TEST_RING_SL): \ > > > > + ret = rte_ring_mp_enqueue_elem(r, obj, esize); \ > > > > + break; \ > > > > + case (TEST_RING_N | TEST_RING_BL): \ > > > > + ret = rte_ring_enqueue_bulk_elem(r, obj, esize, > > > > n, \ > > > > + NULL); \ > > > > + break; \ > > > > + case (TEST_RING_S | TEST_RING_BL): \ > > > > + ret = rte_ring_sp_enqueue_bulk_elem(r, obj, > > > > esize, n, > > > \ > > > > + NULL); \ > > > > + break; \ > > > > + case (TEST_RING_M | TEST_RING_BL): \ > > > > + ret = rte_ring_mp_enqueue_bulk_elem(r, obj, > > > > esize, n, > > > \ > > > > + NULL); \ > > > > + break; \ > > > > + case (TEST_RING_N | TEST_RING_BR): \ > > > > + ret = rte_ring_enqueue_burst_elem(r, obj, > > > > esize, n, \ > > > > + NULL); \ > > > > + break; \ > > > > + case (TEST_RING_S | TEST_RING_BR): \ > > > > + ret = rte_ring_sp_enqueue_burst_elem(r, obj, > > > > esize, n, > > > \ > > > > + NULL); \ > > > > + break; \ > > > > + case (TEST_RING_M | TEST_RING_BR): \ > > > > + ret = rte_ring_mp_enqueue_burst_elem(r, obj, > > > > esize, > > > n, \ > > > > + NULL); \ > > > > + } \ > > > > +} while (0) > > > > + > > > > +#define TEST_RING_DEQUEUE(r, obj, esize, n, ret, api_type) do { \ > > > > + /* Legacy queue APIs? */ \ > > > > + if ((esize) == -1) \ > > > > + switch (api_type) { \ > > > > + case (TEST_RING_N | TEST_RING_SL): \ > > > > + ret = rte_ring_dequeue(r, obj); \ > > > > + break; \ > > > > + case (TEST_RING_S | TEST_RING_SL): \ > > > > + ret = rte_ring_sc_dequeue(r, obj); \ > > > > + break; \ > > > > + case (TEST_RING_M | TEST_RING_SL): \ > > > > + ret = rte_ring_mc_dequeue(r, obj); \ > > > > + break; \ > > > > + case (TEST_RING_N | TEST_RING_BL): \ > > > > + ret = rte_ring_dequeue_bulk(r, obj, n, NULL); \ > > > > + break; \ > > > > + case (TEST_RING_S | TEST_RING_BL): \ > > > > + ret = rte_ring_sc_dequeue_bulk(r, obj, n, > > > > NULL); \ > > > > + break; \ > > > > + case (TEST_RING_M | TEST_RING_BL): \ > > > > + ret = rte_ring_mc_dequeue_bulk(r, obj, n, > > > > NULL); \ > > > > + break; \ > > > > + case (TEST_RING_N | TEST_RING_BR): \ > > > > + ret = rte_ring_dequeue_burst(r, obj, n, NULL); \ > > > > + break; \ > > > > + case (TEST_RING_S | TEST_RING_BR): \ > > > > + ret = rte_ring_sc_dequeue_burst(r, obj, n, > > > > NULL); \ > > > > + break; \ > > > > + case (TEST_RING_M | TEST_RING_BR): \ > > > > + ret = rte_ring_mc_dequeue_burst(r, obj, n, > > > > NULL); \ > > > > + } \ > > > > + else \ > > > > + switch (api_type) { \ > > > > + case (TEST_RING_N | TEST_RING_SL): \ > > > > + ret = rte_ring_dequeue_elem(r, obj, esize); \ > > > > + break; \ > > > > + case (TEST_RING_S | TEST_RING_SL): \ > > > > + ret = rte_ring_sc_dequeue_elem(r, obj, esize); \ > > > > + break; \ > > > > + case (TEST_RING_M | TEST_RING_SL): \ > > > > + ret = rte_ring_mc_dequeue_elem(r, obj, esize); \ > > > > + break; \ > > > > + case (TEST_RING_N | TEST_RING_BL): \ > > > > + ret = rte_ring_dequeue_bulk_elem(r, obj, esize, > > > > n, \ > > > > + NULL); \ > > > > + break; \ > > > > + case (TEST_RING_S | TEST_RING_BL): \ > > > > + ret = rte_ring_sc_dequeue_bulk_elem(r, obj, > > > > esize, n, > > > \ > > > > + NULL); \ > > > > + break; \ > > > > + case (TEST_RING_M | TEST_RING_BL): \ > > > > + ret = rte_ring_mc_dequeue_bulk_elem(r, obj, > > > > esize, n, > > > \ > > > > + NULL); \ > > > > + break; \ > > > > + case (TEST_RING_N | TEST_RING_BR): \ > > > > + ret = rte_ring_dequeue_burst_elem(r, obj, > > > > esize, n, \ > > > > + NULL); \ > > > > + break; \ > > > > + case (TEST_RING_S | TEST_RING_BR): \ > > > > + ret = rte_ring_sc_dequeue_burst_elem(r, obj, > > > > esize, n, > > > \ > > > > + NULL); \ > > > > + break; \ > > > > + case (TEST_RING_M | TEST_RING_BR): \ > > > > + ret = rte_ring_mc_dequeue_burst_elem(r, obj, > > > > esize, > > > n, \ > > > > + NULL); \ > > > > + } \ > > > > +} while (0) > > > > > > > > > My thought to avoid test-code duplication was a bit different. > > Yes, this can be done multiple ways. My implementation is not complicated > either. > > > > > Instead of adding extra enums/parameters and then do switch on them, > > > my > > The switch statement should be removed by the compiler for the > performance tests. > > I am sure the compiler will do its job properly. > My concern is that with all these extra flags, it is really hard to read and > understand what exactly function we are calling and what we are trying to > test. There are just 2 flags - 1) representing single/bulk/burst 2) representing default/single/multiple threads. This is the way the rte_ring APIs are also organized (rte_ring_<sp/mp or sc/mc>_enqueue_<bulk/burst>). If we want to keep the code flexible, we have to keep these 2 flags that can be varied. Your proposal considers only the element size as a variable. It does not consider the above mentioned variables. This results in code duplication. This is visible in patch 10/17. > Might be just me, but let say in original version for enqueue_bulk() we have: > > const uint64_t sp_start = rte_rdtsc(); > for (i = 0; i < iterations; i++) > while (rte_ring_sp_enqueue_bulk(r, burst, size, NULL) == 0) > rte_pause(); > const uint64_t sp_end = rte_rdtsc(); > > const uint64_t mp_start = rte_rdtsc(); > for (i = 0; i < iterations; i++) > while (rte_ring_mp_enqueue_bulk(r, burst, size, NULL) == 0) > rte_pause(); > const uint64_t mp_end = rte_rdtsc(); > > Simple and easy to understand. > Same code after the patch doesn't that straightforward anymore: > > const uint64_t sp_start = rte_rdtsc(); > for (i = 0; i < iterations; i++) > do { > if (flag == 0) > TEST_RING_ENQUEUE(r, burst, esize, bsize, ret, > TEST_RING_S | TEST_RING_BL); > else if (flag == 1) > TEST_RING_DEQUEUE(r, burst, esize, bsize, ret, > TEST_RING_S | TEST_RING_BL); Would it help if the #define names are better? May be convert TEST_RING_SL to TEST_ELEM_SINGLE TEST_RING_BL to TEST_ELEM_BULK TEST_RING_BR to TEST_ELEM_BURST and TEST_RING_N to TEST_THREAD_DEFAULT TEST_RING_S to TEST_THREAD_SPSC TEST_RING_M to TEST_THREAD_MPMC > if (ret == 0) > rte_pause(); > } while (!ret); > const uint64_t sp_end = rte_rdtsc(); > > Another thing - if tomorrow we'll want to add perf tests for elem_size==4/8, > etc. - we'll need to do copy/paste for all test-case invocations, as you did > for > 16B (or some code reorg). This is a mistake on my side. Looking at the code, 'test_ring_perf' can be simplified to avoid the copy/paste. 'test_ring_perf' can be changed to call another function (that contains the test cases) with different element sizes. I will make this change. The only issue would be the wrappers 'dequeue_bulk', 'dequeue_bulk_16B' etc. However, the wrappers are simple enough to maintain. > > > > > > intention was something like that: > > > > > > 1. mv test_ring_perf.c test_ring_perf.h 2. Inside test_ring_perf.h > > > change rte_ring_ create/enqueue/dequeue function > > > calls to some not-defined function/macros invocations. > > > With similar name, same number of parameters, and same semantics. > > > Also change 'void *burst[..]' to 'RING_ELEM[...]'; 3. For each > > > test configuration we want to have (default, 4B, 8B, 16B) > > > create a new .c file where we: > > > - define used in test_ring_perf.h macros(/function) > > > - include test_ring_perf.h > > > - REGISTER_TEST_COMMAND(<test_name>, test_ring_perf); > > > > > > As an example: > > > test_ring_perf.h: > > > ... > > > static int > > > enqueue_bulk(void *p) > > > { > > > ... > > > RING_ELEM burst[MAX_BURST]; > > > > > > memset(burst, 0, sizeof(burst)); > > > .... > > > const uint64_t sp_start = rte_rdtsc(); > > > for (i = 0; i < iterations; i++) > > > while (RING_SP_ENQUEUE_BULK(r, burst, size, NULL) == 0) > > > rte_pause(); > > > const uint64_t sp_end = rte_rdtsc(); > > > > > > const uint64_t mp_start = rte_rdtsc(); > > > for (i = 0; i < iterations; i++) > > > while (RING_MP_ENQUEUE_BULK(r, burst, size, NULL) == 0) > > > rte_pause(); > > > const uint64_t mp_end = rte_rdtsc(); > > > .... > > > > > > Then in test_ring_perf.c: > > > > > > .... > > > #define RING_ELEM void * > > > ... > > > #define RING_SP_ENQUEUE_BULK(ring, buf, size, spc) \ > > > rte_ring_sp_enqueue_bulk(ring, buf, size, spc) .... > > > > > > #include "test_ring_perf.h" > > > REGISTER_TEST_COMMAND(ring_perf_autotest, test_ring_perf); > > > > > > > > > In test_ring_elem16B_perf.c: > > > .... > > > #define RING_ELEM __uint128_t > > > #define RING_SP_ENQUEUE_BULK(ring, buf, size, spc) \ > > > rte_ring_sp_enqueue_bulk_elem(ring, buf, sizeof(RING_ELEM), size, > > > spc) .... > > > #include "test_ring_perf.h" > > > REGISTER_TEST_COMMAND(ring_perf_elem16B_autotest, test_ring_perf); > > > > > > In test_ring_elem4B_per.c: > > > > > > .... > > > #define RING_ELEM uint32_t > > > #define RING_SP_ENQUEUE_BULK(ring, buf, size, spc) \ > > > rte_ring_sp_enqueue_bulk_elem(ring, buf, sizeof(RING_ELEM), size, > > > spc) .... > > > #include "test_ring_perf.h" > > > REGISTER_TEST_COMMAND(ring_perf_elem4B_autotest, test_ring_perf); > > > > > > And so on. This will result in additional test files. > > > > > > > + > > > > +/* This function is placed here as it is required for both > > > > + * performance and functional tests. > > > > + */ > > > > +static __rte_always_inline void * test_ring_calloc(unsigned int > > > > +rsize, int esize) { > > > > + unsigned int sz; > > > > + void *p; > > > > + > > > > + /* Legacy queue APIs? */ > > > > + if (esize == -1) > > > > + sz = sizeof(void *); > > > > + else > > > > + sz = esize; > > > > + > > > > + p = rte_zmalloc(NULL, rsize * sz, RTE_CACHE_LINE_SIZE); > > > > + if (p == NULL) > > > > + printf("Failed to allocate memory\n"); > > > > + > > > > + return p; > > > > +} > > > > -- > > > > 2.17.1