<snip>

> Subject: RE: [PATCH v7 03/17] test/ring: add functional tests for
> rte_ring_xxx_elem APIs
> 
> > > > Add basic infrastructure to test rte_ring_xxx_elem APIs. Add test
> > > > cases for testing burst and bulk tests.
> > > >
> > > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
> > > > Reviewed-by: Gavin Hu <gavin...@arm.com>
> > > > ---
<snip>

> > > > diff --git a/app/test/test_ring.h b/app/test/test_ring.h new file
> > > > mode
> > > > 100644 index 000000000..19ef1b399
> > > > --- /dev/null
> > > > +++ b/app/test/test_ring.h
> > > > @@ -0,0 +1,203 @@
> > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > + * Copyright(c) 2019 Arm Limited
> > > > + */
> > > > +
> > > > +#include <rte_malloc.h>
> > > > +#include <rte_ring.h>
> > > > +#include <rte_ring_elem.h>
> > > > +
> > > > +/* API type to call
> > > > + * N - Calls default APIs
> > > > + * S - Calls SP or SC API
> > > > + * M - Calls MP or MC API
> > > > + */
> > > > +#define TEST_RING_N 1
> > > > +#define TEST_RING_S 2
> > > > +#define TEST_RING_M 4
> > > > +
> > > > +/* API type to call
> > > > + * SL - Calls single element APIs
> > > > + * BL - Calls bulk APIs
> > > > + * BR - Calls burst APIs
> > > > + */
> > > > +#define TEST_RING_SL 8
> > > > +#define TEST_RING_BL 16
> > > > +#define TEST_RING_BR 32
> > > > +
> > > > +#define TEST_RING_IGNORE_API_TYPE ~0U
> > > > +
> > > > +#define TEST_RING_INCP(obj, esize, n) do { \
> > > > +       /* Legacy queue APIs? */ \
> > > > +       if ((esize) == -1) \
> > > > +               obj = ((void **)obj) + n; \
> > > > +       else \
> > > > +               obj = (void **)(((uint32_t *)obj) + \
> > > > +                                       (n * esize / 
> > > > sizeof(uint32_t))); \ }
> > > while (0)
> > > > +
> > > > +#define TEST_RING_CREATE(name, esize, count, socket_id, flags, r) do
> { \
> > > > +       /* Legacy queue APIs? */ \
> > > > +       if ((esize) == -1) \
> > > > +               r = rte_ring_create((name), (count), (socket_id), 
> > > > (flags)); \
> > > > +       else \
> > > > +               r = rte_ring_create_elem((name), (esize), (count), \
> > > > +                                               (socket_id), (flags)); \
> > > > +} while (0)
> > > > +
> > > > +#define TEST_RING_ENQUEUE(r, obj, esize, n, ret, api_type) do { \
> > > > +       /* Legacy queue APIs? */ \
> > > > +       if ((esize) == -1) \
> > > > +               switch (api_type) { \
> > > > +               case (TEST_RING_N | TEST_RING_SL): \
> > > > +                       ret = rte_ring_enqueue(r, obj); \
> > > > +                       break; \
> > > > +               case (TEST_RING_S | TEST_RING_SL): \
> > > > +                       ret = rte_ring_sp_enqueue(r, obj); \
> > > > +                       break; \
> > > > +               case (TEST_RING_M | TEST_RING_SL): \
> > > > +                       ret = rte_ring_mp_enqueue(r, obj); \
> > > > +                       break; \
> > > > +               case (TEST_RING_N | TEST_RING_BL): \
> > > > +                       ret = rte_ring_enqueue_bulk(r, obj, n, NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_S | TEST_RING_BL): \
> > > > +                       ret = rte_ring_sp_enqueue_bulk(r, obj, n, 
> > > > NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_M | TEST_RING_BL): \
> > > > +                       ret = rte_ring_mp_enqueue_bulk(r, obj, n, 
> > > > NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_N | TEST_RING_BR): \
> > > > +                       ret = rte_ring_enqueue_burst(r, obj, n, NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_S | TEST_RING_BR): \
> > > > +                       ret = rte_ring_sp_enqueue_burst(r, obj, n, 
> > > > NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_M | TEST_RING_BR): \
> > > > +                       ret = rte_ring_mp_enqueue_burst(r, obj, n, 
> > > > NULL); \
> > > > +               } \
> > > > +       else \
> > > > +               switch (api_type) { \
> > > > +               case (TEST_RING_N | TEST_RING_SL): \
> > > > +                       ret = rte_ring_enqueue_elem(r, obj, esize); \
> > > > +                       break; \
> > > > +               case (TEST_RING_S | TEST_RING_SL): \
> > > > +                       ret = rte_ring_sp_enqueue_elem(r, obj, esize); \
> > > > +                       break; \
> > > > +               case (TEST_RING_M | TEST_RING_SL): \
> > > > +                       ret = rte_ring_mp_enqueue_elem(r, obj, esize); \
> > > > +                       break; \
> > > > +               case (TEST_RING_N | TEST_RING_BL): \
> > > > +                       ret = rte_ring_enqueue_bulk_elem(r, obj, esize, 
> > > > n, \
> > > > +                                                               NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_S | TEST_RING_BL): \
> > > > +                       ret = rte_ring_sp_enqueue_bulk_elem(r, obj, 
> > > > esize, n,
> > > \
> > > > +                                                               NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_M | TEST_RING_BL): \
> > > > +                       ret = rte_ring_mp_enqueue_bulk_elem(r, obj, 
> > > > esize, n,
> > > \
> > > > +                                                               NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_N | TEST_RING_BR): \
> > > > +                       ret = rte_ring_enqueue_burst_elem(r, obj, 
> > > > esize, n, \
> > > > +                                                               NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_S | TEST_RING_BR): \
> > > > +                       ret = rte_ring_sp_enqueue_burst_elem(r, obj, 
> > > > esize, n,
> > > \
> > > > +                                                               NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_M | TEST_RING_BR): \
> > > > +                       ret = rte_ring_mp_enqueue_burst_elem(r, obj, 
> > > > esize,
> > > n, \
> > > > +                                                               NULL); \
> > > > +               } \
> > > > +} while (0)
> > > > +
> > > > +#define TEST_RING_DEQUEUE(r, obj, esize, n, ret, api_type) do { \
> > > > +       /* Legacy queue APIs? */ \
> > > > +       if ((esize) == -1) \
> > > > +               switch (api_type) { \
> > > > +               case (TEST_RING_N | TEST_RING_SL): \
> > > > +                       ret = rte_ring_dequeue(r, obj); \
> > > > +                       break; \
> > > > +               case (TEST_RING_S | TEST_RING_SL): \
> > > > +                       ret = rte_ring_sc_dequeue(r, obj); \
> > > > +                       break; \
> > > > +               case (TEST_RING_M | TEST_RING_SL): \
> > > > +                       ret = rte_ring_mc_dequeue(r, obj); \
> > > > +                       break; \
> > > > +               case (TEST_RING_N | TEST_RING_BL): \
> > > > +                       ret = rte_ring_dequeue_bulk(r, obj, n, NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_S | TEST_RING_BL): \
> > > > +                       ret = rte_ring_sc_dequeue_bulk(r, obj, n, 
> > > > NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_M | TEST_RING_BL): \
> > > > +                       ret = rte_ring_mc_dequeue_bulk(r, obj, n, 
> > > > NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_N | TEST_RING_BR): \
> > > > +                       ret = rte_ring_dequeue_burst(r, obj, n, NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_S | TEST_RING_BR): \
> > > > +                       ret = rte_ring_sc_dequeue_burst(r, obj, n, 
> > > > NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_M | TEST_RING_BR): \
> > > > +                       ret = rte_ring_mc_dequeue_burst(r, obj, n, 
> > > > NULL); \
> > > > +               } \
> > > > +       else \
> > > > +               switch (api_type) { \
> > > > +               case (TEST_RING_N | TEST_RING_SL): \
> > > > +                       ret = rte_ring_dequeue_elem(r, obj, esize); \
> > > > +                       break; \
> > > > +               case (TEST_RING_S | TEST_RING_SL): \
> > > > +                       ret = rte_ring_sc_dequeue_elem(r, obj, esize); \
> > > > +                       break; \
> > > > +               case (TEST_RING_M | TEST_RING_SL): \
> > > > +                       ret = rte_ring_mc_dequeue_elem(r, obj, esize); \
> > > > +                       break; \
> > > > +               case (TEST_RING_N | TEST_RING_BL): \
> > > > +                       ret = rte_ring_dequeue_bulk_elem(r, obj, esize, 
> > > > n, \
> > > > +                                                               NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_S | TEST_RING_BL): \
> > > > +                       ret = rte_ring_sc_dequeue_bulk_elem(r, obj, 
> > > > esize, n,
> > > \
> > > > +                                                               NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_M | TEST_RING_BL): \
> > > > +                       ret = rte_ring_mc_dequeue_bulk_elem(r, obj, 
> > > > esize, n,
> > > \
> > > > +                                                               NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_N | TEST_RING_BR): \
> > > > +                       ret = rte_ring_dequeue_burst_elem(r, obj, 
> > > > esize, n, \
> > > > +                                                               NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_S | TEST_RING_BR): \
> > > > +                       ret = rte_ring_sc_dequeue_burst_elem(r, obj, 
> > > > esize, n,
> > > \
> > > > +                                                               NULL); \
> > > > +                       break; \
> > > > +               case (TEST_RING_M | TEST_RING_BR): \
> > > > +                       ret = rte_ring_mc_dequeue_burst_elem(r, obj, 
> > > > esize,
> > > n, \
> > > > +                                                               NULL); \
> > > > +               } \
> > > > +} while (0)
> > >
> > >
> > > My thought to avoid test-code duplication was a bit different.
> > Yes, this can be done multiple ways. My implementation is not complicated
> either.
> >
> > > Instead of adding extra enums/parameters and then do switch on them,
> > > my
> > The switch statement should be removed by the compiler for the
> performance tests.
> 
> I am sure the compiler will do its job properly.
> My concern is that with all these extra flags, it is really hard to read and
> understand what exactly function we are calling and what we are trying to
> test.
There are just 2 flags - 1) representing single/bulk/burst 2) representing 
default/single/multiple threads. This is the way the rte_ring APIs are also 
organized (rte_ring_<sp/mp or sc/mc>_enqueue_<bulk/burst>).
If we want to keep the code flexible, we have to keep these 2 flags that can be 
varied.
Your proposal considers only the element size as a variable. It does not 
consider the above mentioned variables. This results in code duplication. This 
is visible in patch 10/17.

> Might be just me, but let say in original version for enqueue_bulk() we have:
> 
>         const uint64_t sp_start = rte_rdtsc();
>         for (i = 0; i < iterations; i++)
>                 while (rte_ring_sp_enqueue_bulk(r, burst, size, NULL) == 0)
>                         rte_pause();
>         const uint64_t sp_end = rte_rdtsc();
> 
>         const uint64_t mp_start = rte_rdtsc();
>         for (i = 0; i < iterations; i++)
>                 while (rte_ring_mp_enqueue_bulk(r, burst, size, NULL) == 0)
>                         rte_pause();
>         const uint64_t mp_end = rte_rdtsc();
> 
> Simple and easy to understand.
> Same code after the patch doesn't that straightforward anymore:
> 
>  const uint64_t sp_start = rte_rdtsc();
>         for (i = 0; i < iterations; i++)
>                 do {
>                         if (flag == 0)
>                                 TEST_RING_ENQUEUE(r, burst, esize, bsize, ret,
>                                                 TEST_RING_S | TEST_RING_BL);
>                         else if (flag == 1)
>                                 TEST_RING_DEQUEUE(r, burst, esize, bsize, ret,
>                                                 TEST_RING_S | TEST_RING_BL);
Would it help if the #define names are better?
May be convert

TEST_RING_SL to TEST_ELEM_SINGLE
TEST_RING_BL to TEST_ELEM_BULK
TEST_RING_BR to TEST_ELEM_BURST

and

TEST_RING_N to TEST_THREAD_DEFAULT
TEST_RING_S to TEST_THREAD_SPSC
TEST_RING_M to TEST_THREAD_MPMC

>                         if (ret == 0)
>                                 rte_pause();
>                 } while (!ret);
>  const uint64_t sp_end = rte_rdtsc();
> 
> Another thing - if tomorrow we'll want to add perf tests for elem_size==4/8,
> etc. - we'll need to do copy/paste for all test-case invocations, as you did 
> for
> 16B (or some code reorg).
This is a mistake on my side. Looking at the code, 'test_ring_perf' can be 
simplified to avoid the copy/paste. 'test_ring_perf' can be changed to call 
another function (that contains the test cases) with different element sizes. I 
will make this change.
The only issue would be the wrappers 'dequeue_bulk', 'dequeue_bulk_16B' etc. 
However, the wrappers are simple enough to maintain.

> 
> >
> > > intention was something like that:
> > >
> > > 1. mv  test_ring_perf.c test_ring_perf.h 2. Inside test_ring_perf.h
> > > change rte_ring_ create/enqueue/dequeue function
> > >     calls to some not-defined function/macros invocations.
> > >    With similar name, same number of parameters, and same semantics.
> > >    Also change 'void *burst[..]' to 'RING_ELEM[...]'; 3. For each
> > > test configuration we want to have (default, 4B, 8B, 16B)
> > >     create a new .c file where we:
> > >     - define used in test_ring_perf.h macros(/function)
> > >    - include test_ring_perf.h
> > >    -  REGISTER_TEST_COMMAND(<test_name>, test_ring_perf);
> > >
> > > As an example:
> > > test_ring_perf.h:
> > > ...
> > > static int
> > > enqueue_bulk(void *p)
> > > {
> > >         ...
> > >         RING_ELEM burst[MAX_BURST];
> > >
> > >         memset(burst, 0, sizeof(burst));
> > >         ....
> > >         const uint64_t sp_start = rte_rdtsc();
> > >         for (i = 0; i < iterations; i++)
> > >                 while (RING_SP_ENQUEUE_BULK(r, burst, size, NULL) == 0)
> > >                         rte_pause();
> > >         const uint64_t sp_end = rte_rdtsc();
> > >
> > >         const uint64_t mp_start = rte_rdtsc();
> > >         for (i = 0; i < iterations; i++)
> > >                 while (RING_MP_ENQUEUE_BULK(r, burst, size, NULL) == 0)
> > >                         rte_pause();
> > >         const uint64_t mp_end = rte_rdtsc();
> > >         ....
> > >
> > > Then in test_ring_perf.c:
> > >
> > > ....
> > > #define RING_ELEM void *
> > > ...
> > > #define RING_SP_ENQUEUE_BULK(ring, buf, size, spc)  \
> > >        rte_ring_sp_enqueue_bulk(ring, buf, size, spc) ....
> > >
> > > #include "test_ring_perf.h"
> > > REGISTER_TEST_COMMAND(ring_perf_autotest, test_ring_perf);
> > >
> > >
> > > In test_ring_elem16B_perf.c:
> > > ....
> > > #define RING_ELEM __uint128_t
> > > #define RING_SP_ENQUEUE_BULK(ring, buf, size, spc)  \
> > >   rte_ring_sp_enqueue_bulk_elem(ring, buf, sizeof(RING_ELEM), size,
> > > spc) ....
> > > #include "test_ring_perf.h"
> > > REGISTER_TEST_COMMAND(ring_perf_elem16B_autotest, test_ring_perf);
> > >
> > > In test_ring_elem4B_per.c:
> > >
> > > ....
> > > #define RING_ELEM uint32_t
> > > #define RING_SP_ENQUEUE_BULK(ring, buf, size, spc)  \
> > >   rte_ring_sp_enqueue_bulk_elem(ring, buf, sizeof(RING_ELEM), size,
> > > spc) ....
> > > #include "test_ring_perf.h"
> > > REGISTER_TEST_COMMAND(ring_perf_elem4B_autotest, test_ring_perf);
> > >
> > > And so on.
This will result in additional test files.

> > >
> > > > +
> > > > +/* This function is placed here as it is required for both
> > > > + * performance and functional tests.
> > > > + */
> > > > +static __rte_always_inline void * test_ring_calloc(unsigned int
> > > > +rsize, int esize) {
> > > > +       unsigned int sz;
> > > > +       void *p;
> > > > +
> > > > +       /* Legacy queue APIs? */
> > > > +       if (esize == -1)
> > > > +               sz = sizeof(void *);
> > > > +       else
> > > > +               sz = esize;
> > > > +
> > > > +       p = rte_zmalloc(NULL, rsize * sz, RTE_CACHE_LINE_SIZE);
> > > > +       if (p == NULL)
> > > > +               printf("Failed to allocate memory\n");
> > > > +
> > > > +       return p;
> > > > +}
> > > > --
> > > > 2.17.1

Reply via email to