<snip> > Subject: [PATCH v5 4/4] eal/atomic: add wrapper for c11 atomic thread fence > > Provide a wrapper for __atomic_thread_fence built-in to support optimized > code for __ATOMIC_SEQ_CST memory order for x86 platforms. > > Suggested-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> > Signed-off-by: Phil Yang <phil.y...@arm.com> > Reviewed-by: Ola Liljedahl <ola.liljed...@arm.com> > --- > lib/librte_eal/arm/include/rte_atomic_32.h | 6 ++++++ > lib/librte_eal/arm/include/rte_atomic_64.h | 6 ++++++ > lib/librte_eal/include/generic/rte_atomic.h | 6 ++++++ > lib/librte_eal/ppc/include/rte_atomic.h | 6 ++++++ > lib/librte_eal/x86/include/rte_atomic.h | 17 +++++++++++++++++ > 5 files changed, 41 insertions(+) > > diff --git a/lib/librte_eal/arm/include/rte_atomic_32.h > b/lib/librte_eal/arm/include/rte_atomic_32.h > index 7dc0d06..dbe7cc6 100644 > --- a/lib/librte_eal/arm/include/rte_atomic_32.h > +++ b/lib/librte_eal/arm/include/rte_atomic_32.h > @@ -37,6 +37,12 @@ extern "C" { > > #define rte_cio_rmb() rte_rmb() > > +static __rte_always_inline void > +rte_atomic_thread_fence(int mo) > +{ > + __atomic_thread_fence(mo); > +} > + > #ifdef __cplusplus > } > #endif > diff --git a/lib/librte_eal/arm/include/rte_atomic_64.h > b/lib/librte_eal/arm/include/rte_atomic_64.h > index 7b7099c..22ff8ec 100644 > --- a/lib/librte_eal/arm/include/rte_atomic_64.h > +++ b/lib/librte_eal/arm/include/rte_atomic_64.h > @@ -41,6 +41,12 @@ extern "C" { > > #define rte_cio_rmb() asm volatile("dmb oshld" : : : "memory") > > +static __rte_always_inline void > +rte_atomic_thread_fence(int mo) > +{ > + __atomic_thread_fence(mo); > +} > + > /*------------------------ 128 bit atomic operations > -------------------------*/ > > #if defined(__ARM_FEATURE_ATOMICS) || > defined(RTE_ARM_FEATURE_ATOMICS) diff --git > a/lib/librte_eal/include/generic/rte_atomic.h > b/lib/librte_eal/include/generic/rte_atomic.h > index e6ab15a..5b941db 100644 > --- a/lib/librte_eal/include/generic/rte_atomic.h > +++ b/lib/librte_eal/include/generic/rte_atomic.h > @@ -158,6 +158,12 @@ static inline void rte_cio_rmb(void); > asm volatile ("" : : : "memory"); \ > } while(0) > > +/** > + * Synchronization fence between threads based on the specified > + * memory order. > + */ > +static inline void rte_atomic_thread_fence(int mo); > + > /*------------------------- 16 bit atomic operations > -------------------------*/ > > /** > diff --git a/lib/librte_eal/ppc/include/rte_atomic.h > b/lib/librte_eal/ppc/include/rte_atomic.h > index 7e3e131..91c5f30 100644 > --- a/lib/librte_eal/ppc/include/rte_atomic.h > +++ b/lib/librte_eal/ppc/include/rte_atomic.h > @@ -40,6 +40,12 @@ extern "C" { > > #define rte_cio_rmb() rte_rmb() > > +static __rte_always_inline void > +rte_atomic_thread_fence(int mo) > +{ > + __atomic_thread_fence(mo); > +} > + > /*------------------------- 16 bit atomic operations > -------------------------*/ > /* To be compatible with Power7, use GCC built-in functions for 16 bit > * operations */ > diff --git a/lib/librte_eal/x86/include/rte_atomic.h > b/lib/librte_eal/x86/include/rte_atomic.h > index b9dcd30..bd256e7 100644 > --- a/lib/librte_eal/x86/include/rte_atomic.h > +++ b/lib/librte_eal/x86/include/rte_atomic.h > @@ -83,6 +83,23 @@ rte_smp_mb(void) > > #define rte_cio_rmb() rte_compiler_barrier() > > +/** > + * Synchronization fence between threads based on the specified > + * memory order. > + * > + * On x86 the __atomic_thread_fence(__ATOMIC_SEQ_CST) generates > + * full 'mfence' which is quite expensive. The optimized > + * implementation of rte_smp_mb is used instead. > + */ > +static __rte_always_inline void > +rte_atomic_thread_fence(int mo) > +{ > + if (mo == __ATOMIC_SEQ_CST) > + rte_smp_mb(); > + else > + __atomic_thread_fence(mo); > +} I think __ATOMIC_SEQ_CST needs to be used rarely. IMO, rte_atomic_thread_fence should be called only for __ATOMIC_SEQ_CST memory order. For all others the __atomic_thread_fence can be used directly. This will help us to stick to using the atomic built-ins in most of the cases.
Konstantin, is this ok for you? > + > /*------------------------- 16 bit atomic operations > -------------------------*/ > > #ifndef RTE_FORCE_INTRINSICS > -- > 2.7.4