The GCC builtin atomic special case is not needed for updating tail. The performance is the same with C11 memory model.
Signed-off-by: Stephen Hemminger <[email protected]> --- lib/ring/rte_ring_c11_pvt.h | 24 ------------------------ lib/ring/rte_ring_elem_pvt.h | 22 ++++++++++++++++++++++ lib/ring/rte_ring_gcc_pvt.h | 25 ------------------------- 3 files changed, 22 insertions(+), 49 deletions(-) diff --git a/lib/ring/rte_ring_c11_pvt.h b/lib/ring/rte_ring_c11_pvt.h index 8358b0f21f..3258829696 100644 --- a/lib/ring/rte_ring_c11_pvt.h +++ b/lib/ring/rte_ring_c11_pvt.h @@ -19,30 +19,6 @@ * For more information please refer to <rte_ring.h>. */ -/** - * @internal This function updates tail values. - */ -static __rte_always_inline void -__rte_ring_update_tail(struct rte_ring_headtail *ht, uint32_t old_val, - uint32_t new_val, uint32_t single, uint32_t enqueue) -{ - RTE_SET_USED(enqueue); - - /* - * If there are other enqueues/dequeues in progress that preceded us, - * we need to wait for them to complete - */ - if (!single) - rte_wait_until_equal_32((uint32_t *)(uintptr_t)&ht->tail, old_val, - rte_memory_order_relaxed); - - /* - * R0: Establishes a synchronizing edge with load-acquire of tail at A1. - * Ensures that memory effects by this thread on ring elements array - * is observed by a different thread of the other type. - */ - rte_atomic_store_explicit(&ht->tail, new_val, rte_memory_order_release); -} /** * @internal This is a helper function that moves the producer/consumer head * optimized for single threaded case diff --git a/lib/ring/rte_ring_elem_pvt.h b/lib/ring/rte_ring_elem_pvt.h index 9a0170c4f0..a7ff76931b 100644 --- a/lib/ring/rte_ring_elem_pvt.h +++ b/lib/ring/rte_ring_elem_pvt.h @@ -299,6 +299,28 @@ __rte_ring_dequeue_elems(struct rte_ring *r, uint32_t cons_head, cons_head & r->mask, esize, num); } +static __rte_always_inline void +__rte_ring_update_tail(struct rte_ring_headtail *ht, uint32_t old_val, + uint32_t new_val, uint32_t single, uint32_t enqueue) +{ + RTE_SET_USED(enqueue); + + /* + * If there are other enqueues/dequeues in progress that preceded us, + * we need to wait for them to complete + */ + if (!single) + rte_wait_until_equal_32((uint32_t *)(uintptr_t)&ht->tail, old_val, + rte_memory_order_relaxed); + + /* + * R0: Establishes a synchronizing edge with load-acquire of tail at A1. + * Ensures that memory effects by this thread on ring elements array + * is observed by a different thread of the other type. + */ + rte_atomic_store_explicit(&ht->tail, new_val, rte_memory_order_release); +} + /* Between load and load. there might be cpu reorder in weak model * (powerpc/arm). * There are 2 choices for the users diff --git a/lib/ring/rte_ring_gcc_pvt.h b/lib/ring/rte_ring_gcc_pvt.h index 9033a15647..6b14c1c822 100644 --- a/lib/ring/rte_ring_gcc_pvt.h +++ b/lib/ring/rte_ring_gcc_pvt.h @@ -18,31 +18,6 @@ * For more information please refer to <rte_ring.h>. */ -/** - * @internal This function updates tail values. - */ -static __rte_always_inline void -__rte_ring_update_tail(struct rte_ring_headtail *ht, uint32_t old_val, - uint32_t new_val, uint32_t single, uint32_t enqueue) -{ - RTE_SET_USED(enqueue); - - /* - * If there are other enqueues/dequeues in progress that preceded us, - * we need to wait for them to complete - */ - if (!single) - rte_wait_until_equal_32((volatile uint32_t *)(uintptr_t)&ht->tail, old_val, - rte_memory_order_relaxed); - - /* - * R0: Establishes a synchronizing edge with load-acquire of tail at A1. - * Ensures that memory effects by this thread on ring elements array - * is observed by a different thread of the other type. - */ - __atomic_store_n(&ht->tail, new_val, __ATOMIC_RELEASE); -} - /** * @internal This is a helper function that moves the producer/consumer head * for use in multi-thread safe path -- 2.53.0

