Use rte_ring_xxx_elem_xxx APIs to replace legacy API implementation. This reduces code duplication and improves code maintenance.
Tests done on Arm, x86 [1] and PPC [2] do not indicate performance degradation. [1] https://mails.dpdk.org/archives/dev/2020-July/173780.html [2] https://mails.dpdk.org/archives/dev/2020-July/173863.html Signed-off-by: Feifei Wang <feifei.wa...@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com> Tested-by: Konstantin Ananyev <konstantin.anan...@intel.com> Tested-by: David Christensen <d...@linux.vnet.ibm.com> Acked-by: Konstantin Ananyev <konstantin.anan...@intel.com> --- v2: 1. add performance tests in other platforms. (Konstantin, David) 2. removing the perf data from the commit message. (Honnappa) 3. fix the error of the pointer (*obj) passing. (ci failed) lib/librte_ring/rte_ring.h | 284 ++++--------------------------------- 1 file changed, 30 insertions(+), 254 deletions(-) diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h index 35f3f8c42..da17ed6d7 100644 --- a/lib/librte_ring/rte_ring.h +++ b/lib/librte_ring/rte_ring.h @@ -191,168 +191,6 @@ void rte_ring_free(struct rte_ring *r); */ void rte_ring_dump(FILE *f, const struct rte_ring *r); -/* the actual enqueue of pointers on the ring. - * Placed here since identical code needed in both - * single and multi producer enqueue functions */ -#define ENQUEUE_PTRS(r, ring_start, prod_head, obj_table, n, obj_type) do { \ - unsigned int i; \ - const uint32_t size = (r)->size; \ - uint32_t idx = prod_head & (r)->mask; \ - obj_type *ring = (obj_type *)ring_start; \ - if (likely(idx + n < size)) { \ - for (i = 0; i < (n & ~0x3); i += 4, idx += 4) { \ - ring[idx] = obj_table[i]; \ - ring[idx + 1] = obj_table[i + 1]; \ - ring[idx + 2] = obj_table[i + 2]; \ - ring[idx + 3] = obj_table[i + 3]; \ - } \ - switch (n & 0x3) { \ - case 3: \ - ring[idx++] = obj_table[i++]; /* fallthrough */ \ - case 2: \ - ring[idx++] = obj_table[i++]; /* fallthrough */ \ - case 1: \ - ring[idx++] = obj_table[i++]; \ - } \ - } else { \ - for (i = 0; idx < size; i++, idx++)\ - ring[idx] = obj_table[i]; \ - for (idx = 0; i < n; i++, idx++) \ - ring[idx] = obj_table[i]; \ - } \ -} while (0) - -/* the actual copy of pointers on the ring to obj_table. - * Placed here since identical code needed in both - * single and multi consumer dequeue functions */ -#define DEQUEUE_PTRS(r, ring_start, cons_head, obj_table, n, obj_type) do { \ - unsigned int i; \ - uint32_t idx = cons_head & (r)->mask; \ - const uint32_t size = (r)->size; \ - obj_type *ring = (obj_type *)ring_start; \ - if (likely(idx + n < size)) { \ - for (i = 0; i < (n & ~0x3); i += 4, idx += 4) {\ - obj_table[i] = ring[idx]; \ - obj_table[i + 1] = ring[idx + 1]; \ - obj_table[i + 2] = ring[idx + 2]; \ - obj_table[i + 3] = ring[idx + 3]; \ - } \ - switch (n & 0x3) { \ - case 3: \ - obj_table[i++] = ring[idx++]; /* fallthrough */ \ - case 2: \ - obj_table[i++] = ring[idx++]; /* fallthrough */ \ - case 1: \ - obj_table[i++] = ring[idx++]; \ - } \ - } else { \ - for (i = 0; idx < size; i++, idx++) \ - obj_table[i] = ring[idx]; \ - for (idx = 0; i < n; i++, idx++) \ - obj_table[i] = ring[idx]; \ - } \ -} while (0) - -/* Between load and load. there might be cpu reorder in weak model - * (powerpc/arm). - * There are 2 choices for the users - * 1.use rmb() memory barrier - * 2.use one-direction load_acquire/store_release barrier,defined by - * CONFIG_RTE_USE_C11_MEM_MODEL=y - * It depends on performance test results. - * By default, move common functions to rte_ring_generic.h - */ -#ifdef RTE_USE_C11_MEM_MODEL -#include "rte_ring_c11_mem.h" -#else -#include "rte_ring_generic.h" -#endif - -/** - * @internal Enqueue several objects on the ring - * - * @param r - * A pointer to the ring structure. - * @param obj_table - * A pointer to a table of void * pointers (objects). - * @param n - * The number of objects to add in the ring from the obj_table. - * @param behavior - * RTE_RING_QUEUE_FIXED: Enqueue a fixed number of items from a ring - * RTE_RING_QUEUE_VARIABLE: Enqueue as many items as possible from ring - * @param is_sp - * Indicates whether to use single producer or multi-producer head update - * @param free_space - * returns the amount of space after the enqueue operation has finished - * @return - * Actual number of objects enqueued. - * If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only. - */ -static __rte_always_inline unsigned int -__rte_ring_do_enqueue(struct rte_ring *r, void * const *obj_table, - unsigned int n, enum rte_ring_queue_behavior behavior, - unsigned int is_sp, unsigned int *free_space) -{ - uint32_t prod_head, prod_next; - uint32_t free_entries; - - n = __rte_ring_move_prod_head(r, is_sp, n, behavior, - &prod_head, &prod_next, &free_entries); - if (n == 0) - goto end; - - ENQUEUE_PTRS(r, &r[1], prod_head, obj_table, n, void *); - - update_tail(&r->prod, prod_head, prod_next, is_sp, 1); -end: - if (free_space != NULL) - *free_space = free_entries - n; - return n; -} - -/** - * @internal Dequeue several objects from the ring - * - * @param r - * A pointer to the ring structure. - * @param obj_table - * A pointer to a table of void * pointers (objects). - * @param n - * The number of objects to pull from the ring. - * @param behavior - * RTE_RING_QUEUE_FIXED: Dequeue a fixed number of items from a ring - * RTE_RING_QUEUE_VARIABLE: Dequeue as many items as possible from ring - * @param is_sc - * Indicates whether to use single consumer or multi-consumer head update - * @param available - * returns the number of remaining ring entries after the dequeue has finished - * @return - * - Actual number of objects dequeued. - * If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only. - */ -static __rte_always_inline unsigned int -__rte_ring_do_dequeue(struct rte_ring *r, void **obj_table, - unsigned int n, enum rte_ring_queue_behavior behavior, - unsigned int is_sc, unsigned int *available) -{ - uint32_t cons_head, cons_next; - uint32_t entries; - - n = __rte_ring_move_cons_head(r, (int)is_sc, n, behavior, - &cons_head, &cons_next, &entries); - if (n == 0) - goto end; - - DEQUEUE_PTRS(r, &r[1], cons_head, obj_table, n, void *); - - update_tail(&r->cons, cons_head, cons_next, is_sc, 0); - -end: - if (available != NULL) - *available = entries - n; - return n; -} - /** * Enqueue several objects on the ring (multi-producers safe). * @@ -375,8 +213,8 @@ static __rte_always_inline unsigned int rte_ring_mp_enqueue_bulk(struct rte_ring *r, void * const *obj_table, unsigned int n, unsigned int *free_space) { - return __rte_ring_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED, - RTE_RING_SYNC_MT, free_space); + return rte_ring_mp_enqueue_bulk_elem(r, obj_table, sizeof(void *), + n, free_space); } /** @@ -398,8 +236,8 @@ static __rte_always_inline unsigned int rte_ring_sp_enqueue_bulk(struct rte_ring *r, void * const *obj_table, unsigned int n, unsigned int *free_space) { - return __rte_ring_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED, - RTE_RING_SYNC_ST, free_space); + return rte_ring_sp_enqueue_bulk_elem(r, obj_table, sizeof(void *), + n, free_space); } /** @@ -425,24 +263,8 @@ static __rte_always_inline unsigned int rte_ring_enqueue_bulk(struct rte_ring *r, void * const *obj_table, unsigned int n, unsigned int *free_space) { - switch (r->prod.sync_type) { - case RTE_RING_SYNC_MT: - return rte_ring_mp_enqueue_bulk(r, obj_table, n, free_space); - case RTE_RING_SYNC_ST: - return rte_ring_sp_enqueue_bulk(r, obj_table, n, free_space); -#ifdef ALLOW_EXPERIMENTAL_API - case RTE_RING_SYNC_MT_RTS: - return rte_ring_mp_rts_enqueue_bulk(r, obj_table, n, - free_space); - case RTE_RING_SYNC_MT_HTS: - return rte_ring_mp_hts_enqueue_bulk(r, obj_table, n, - free_space); -#endif - } - - /* valid ring should never reach this point */ - RTE_ASSERT(0); - return 0; + return rte_ring_enqueue_bulk_elem(r, obj_table, sizeof(void *), + n, free_space); } /** @@ -462,7 +284,7 @@ rte_ring_enqueue_bulk(struct rte_ring *r, void * const *obj_table, static __rte_always_inline int rte_ring_mp_enqueue(struct rte_ring *r, void *obj) { - return rte_ring_mp_enqueue_bulk(r, &obj, 1, NULL) ? 0 : -ENOBUFS; + return rte_ring_mp_enqueue_elem(r, &obj, sizeof(void *)); } /** @@ -479,7 +301,7 @@ rte_ring_mp_enqueue(struct rte_ring *r, void *obj) static __rte_always_inline int rte_ring_sp_enqueue(struct rte_ring *r, void *obj) { - return rte_ring_sp_enqueue_bulk(r, &obj, 1, NULL) ? 0 : -ENOBUFS; + return rte_ring_sp_enqueue_elem(r, &obj, sizeof(void *)); } /** @@ -500,7 +322,7 @@ rte_ring_sp_enqueue(struct rte_ring *r, void *obj) static __rte_always_inline int rte_ring_enqueue(struct rte_ring *r, void *obj) { - return rte_ring_enqueue_bulk(r, &obj, 1, NULL) ? 0 : -ENOBUFS; + return rte_ring_enqueue_elem(r, &obj, sizeof(void *)); } /** @@ -525,8 +347,8 @@ static __rte_always_inline unsigned int rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned int n, unsigned int *available) { - return __rte_ring_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED, - RTE_RING_SYNC_MT, available); + return rte_ring_mc_dequeue_bulk_elem(r, obj_table, sizeof(void *), + n, available); } /** @@ -549,8 +371,8 @@ static __rte_always_inline unsigned int rte_ring_sc_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned int n, unsigned int *available) { - return __rte_ring_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED, - RTE_RING_SYNC_ST, available); + return rte_ring_sc_dequeue_bulk_elem(r, obj_table, sizeof(void *), + n, available); } /** @@ -576,22 +398,8 @@ static __rte_always_inline unsigned int rte_ring_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned int n, unsigned int *available) { - switch (r->cons.sync_type) { - case RTE_RING_SYNC_MT: - return rte_ring_mc_dequeue_bulk(r, obj_table, n, available); - case RTE_RING_SYNC_ST: - return rte_ring_sc_dequeue_bulk(r, obj_table, n, available); -#ifdef ALLOW_EXPERIMENTAL_API - case RTE_RING_SYNC_MT_RTS: - return rte_ring_mc_rts_dequeue_bulk(r, obj_table, n, available); - case RTE_RING_SYNC_MT_HTS: - return rte_ring_mc_hts_dequeue_bulk(r, obj_table, n, available); -#endif - } - - /* valid ring should never reach this point */ - RTE_ASSERT(0); - return 0; + return rte_ring_dequeue_bulk_elem(r, obj_table, sizeof(void *), + n, available); } /** @@ -612,7 +420,7 @@ rte_ring_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned int n, static __rte_always_inline int rte_ring_mc_dequeue(struct rte_ring *r, void **obj_p) { - return rte_ring_mc_dequeue_bulk(r, obj_p, 1, NULL) ? 0 : -ENOENT; + return rte_ring_mc_dequeue_elem(r, obj_p, sizeof(void *)); } /** @@ -630,7 +438,7 @@ rte_ring_mc_dequeue(struct rte_ring *r, void **obj_p) static __rte_always_inline int rte_ring_sc_dequeue(struct rte_ring *r, void **obj_p) { - return rte_ring_sc_dequeue_bulk(r, obj_p, 1, NULL) ? 0 : -ENOENT; + return rte_ring_sc_dequeue_elem(r, obj_p, sizeof(void *)); } /** @@ -652,7 +460,7 @@ rte_ring_sc_dequeue(struct rte_ring *r, void **obj_p) static __rte_always_inline int rte_ring_dequeue(struct rte_ring *r, void **obj_p) { - return rte_ring_dequeue_bulk(r, obj_p, 1, NULL) ? 0 : -ENOENT; + return rte_ring_dequeue_elem(r, obj_p, sizeof(void *)); } /** @@ -860,8 +668,8 @@ static __rte_always_inline unsigned int rte_ring_mp_enqueue_burst(struct rte_ring *r, void * const *obj_table, unsigned int n, unsigned int *free_space) { - return __rte_ring_do_enqueue(r, obj_table, n, - RTE_RING_QUEUE_VARIABLE, RTE_RING_SYNC_MT, free_space); + return rte_ring_mp_enqueue_burst_elem(r, obj_table, sizeof(void *), + n, free_space); } /** @@ -883,8 +691,8 @@ static __rte_always_inline unsigned int rte_ring_sp_enqueue_burst(struct rte_ring *r, void * const *obj_table, unsigned int n, unsigned int *free_space) { - return __rte_ring_do_enqueue(r, obj_table, n, - RTE_RING_QUEUE_VARIABLE, RTE_RING_SYNC_ST, free_space); + return rte_ring_sp_enqueue_burst_elem(r, obj_table, sizeof(void *), + n, free_space); } /** @@ -910,24 +718,8 @@ static __rte_always_inline unsigned int rte_ring_enqueue_burst(struct rte_ring *r, void * const *obj_table, unsigned int n, unsigned int *free_space) { - switch (r->prod.sync_type) { - case RTE_RING_SYNC_MT: - return rte_ring_mp_enqueue_burst(r, obj_table, n, free_space); - case RTE_RING_SYNC_ST: - return rte_ring_sp_enqueue_burst(r, obj_table, n, free_space); -#ifdef ALLOW_EXPERIMENTAL_API - case RTE_RING_SYNC_MT_RTS: - return rte_ring_mp_rts_enqueue_burst(r, obj_table, n, - free_space); - case RTE_RING_SYNC_MT_HTS: - return rte_ring_mp_hts_enqueue_burst(r, obj_table, n, - free_space); -#endif - } - - /* valid ring should never reach this point */ - RTE_ASSERT(0); - return 0; + return rte_ring_enqueue_burst_elem(r, obj_table, sizeof(void *), + n, free_space); } /** @@ -954,8 +746,8 @@ static __rte_always_inline unsigned int rte_ring_mc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned int n, unsigned int *available) { - return __rte_ring_do_dequeue(r, obj_table, n, - RTE_RING_QUEUE_VARIABLE, RTE_RING_SYNC_MT, available); + return rte_ring_mc_dequeue_burst_elem(r, obj_table, sizeof(void *), + n, available); } /** @@ -979,8 +771,8 @@ static __rte_always_inline unsigned int rte_ring_sc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned int n, unsigned int *available) { - return __rte_ring_do_dequeue(r, obj_table, n, - RTE_RING_QUEUE_VARIABLE, RTE_RING_SYNC_ST, available); + return rte_ring_sc_dequeue_burst_elem(r, obj_table, sizeof(void *), + n, available); } /** @@ -1006,24 +798,8 @@ static __rte_always_inline unsigned int rte_ring_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned int n, unsigned int *available) { - switch (r->cons.sync_type) { - case RTE_RING_SYNC_MT: - return rte_ring_mc_dequeue_burst(r, obj_table, n, available); - case RTE_RING_SYNC_ST: - return rte_ring_sc_dequeue_burst(r, obj_table, n, available); -#ifdef ALLOW_EXPERIMENTAL_API - case RTE_RING_SYNC_MT_RTS: - return rte_ring_mc_rts_dequeue_burst(r, obj_table, n, - available); - case RTE_RING_SYNC_MT_HTS: - return rte_ring_mc_hts_dequeue_burst(r, obj_table, n, - available); -#endif - } - - /* valid ring should never reach this point */ - RTE_ASSERT(0); - return 0; + return rte_ring_dequeue_burst_elem(r, obj_table, sizeof(void *), + n, available); } #ifdef __cplusplus -- 2.17.1