+To: @Mattias, @Ferruh, @Bruce, participants in a related discussion > From: Stephen Hemminger [mailto:step...@networkplumber.org] > Sent: Tuesday, 14 May 2024 17.35 > > This header implements 64 bit counters that are NOT atomic > but are safe against load/store splits on 32 bit platforms. > > Signed-off-by: Stephen Hemminger <step...@networkplumber.org> > Acked-by: Morten Brørup <m...@smartsharesystems.com> > ---
With a long term perspective, I consider this patch very useful. And its 32 bit implementation can be optimized for various architectures/compilers later. In addition, it would be "nice to have" if reset() and fetch() could be called from another thread than the thread adding to the counter. As previously discussed [1], I think it can be done without significantly affecting fast path add() performance, by using an "offset" with Release-Consume ordering. [1]: https://inbox.dpdk.org/dev/98cbd80474fa8b44bf855df32c47dc35e9f...@smartserver.smartshare.dk/ rte_counter64_add(rte_counter64_t *counter, uint32_t val) { // Write "counter" with memory_order_relaxed, so // it eventually becomes visible in other threads. rte_counter64_t ctr = *counter + val; rte_atomic_store_explicit(counter, ctr, rte_memory_order_relaxed); } rte_counter64_get(rte_counter64_t *counter, rte_counter64_t *offset) { // Read "offset" with memory_order_consume, so: // - no reads or writes in the current thread dependent on "offset" // can be reordered before this load, and // - writes to "counter" (a data-dependent variable) // in other threads that release "offset" are visible in the current thread. rte_counter64_t off = rte_atomic_load_explicit(offset, rte_memory_order_consume); rte_counter64_t ctr = rte_atomic_load_explicit(counter, rte_memory_order_relaxed); return ctr - off; } rte_counter64_reset(rte_counter64_t *counter, rte_counter64_t *offset) { // Write "offset" with memory_order_release, so // "counter" cannot be visible after it. rte_counter64_t ctr = rte_atomic_load_explicit(offset, rte_memory_order_relaxed); rte_atomic_store_explicit(offset, ctr, rte_memory_order_release); } Support for counters shared by multi threads, e.g. rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed, should be provided too: rte_counter64_mt_add(rte_counter64_t *counter, uint32_t val) { rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed); } > lib/eal/include/meson.build | 1 + > lib/eal/include/rte_counter.h | 91 +++++++++++++++++++++++++++++++++++ > 2 files changed, 92 insertions(+) > create mode 100644 lib/eal/include/rte_counter.h > > diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build > index e94b056d46..c070dd0079 100644 > --- a/lib/eal/include/meson.build > +++ b/lib/eal/include/meson.build > @@ -12,6 +12,7 @@ headers += files( > 'rte_class.h', > 'rte_common.h', > 'rte_compat.h', > + 'rte_counter.h', > 'rte_debug.h', > 'rte_dev.h', > 'rte_devargs.h', > diff --git a/lib/eal/include/rte_counter.h b/lib/eal/include/rte_counter.h > new file mode 100644 > index 0000000000..8068d6d26e > --- /dev/null > +++ b/lib/eal/include/rte_counter.h > @@ -0,0 +1,91 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright (c) Stephen Hemminger <step...@networkplumber.org> > + */ > + > +#ifndef _RTE_COUNTER_H_ > +#define _RTE_COUNTER_H_ > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +/** > + * @file > + * RTE Counter > + * > + * A counter is 64 bit value that is safe from split read/write > + * on 32 bit platforms. It assumes that only one cpu at a time > + * will update the counter, and another CPU may want to read it. > + * > + * This is a much weaker guarantee than full atomic variables > + * but is faster since no locked operations are required for update. > + */ > + > +#include <stdatomic.h> > + > +#ifdef RTE_ARCH_64 > +/* > + * On a platform that can support native 64 bit type, no special handling. > + * These are just wrapper around 64 bit value. > + */ > +typedef uint64_t rte_counter64_t; > + > +/** > + * Add value to counter. > + */ > +__rte_experimental > +static inline void > +rte_counter64_add(rte_counter64_t *counter, uint32_t val) > +{ > + *counter += val; > +} > + > +__rte_experimental > +static inline uint64_t > +rte_counter64_fetch(const rte_counter64_t *counter) > +{ > + return *counter; > +} > + > +__rte_experimental > +static inline void > +rte_counter64_reset(rte_counter64_t *counter) > +{ > + *counter = 0; > +} > + > +#else > +/* > + * On a 32 bit platform need to use atomic to force the compler to not > + * split 64 bit read/write. > + */ > +typedef RTE_ATOMIC(uint64_t) rte_counter64_t; > + > +__rte_experimental > +static inline void > +rte_counter64_add(rte_counter64_t *counter, uint32_t val) > +{ > + rte_atomic_fetch_add_explicit(counter, val, rte_memory_order_relaxed); > +} > + > +__rte_experimental > +static inline uint64_t > +rte_counter64_fetch(const rte_counter64_t *counter) > +{ > + return rte_atomic_load_explicit(counter, rte_memory_order_relaxed); > +} > + > +__rte_experimental > +static inline void > +rte_counter64_reset(rte_counter64_t *counter) > +{ > + rte_atomic_store_explicit(counter, 0, rte_memory_order_relaxed); > +} > +#endif > + > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* _RTE_COUNTER_H_ */ > -- > 2.43.0