> > <snip> > > > Subject: [PATCH v5 4/4] eal/atomic: add wrapper for c11 atomic thread fence > > > > Provide a wrapper for __atomic_thread_fence built-in to support optimized > > code for __ATOMIC_SEQ_CST memory order for x86 platforms. > > > > Suggested-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> > > Signed-off-by: Phil Yang <phil.y...@arm.com> > > Reviewed-by: Ola Liljedahl <ola.liljed...@arm.com> > > --- > > lib/librte_eal/arm/include/rte_atomic_32.h | 6 ++++++ > > lib/librte_eal/arm/include/rte_atomic_64.h | 6 ++++++ > > lib/librte_eal/include/generic/rte_atomic.h | 6 ++++++ > > lib/librte_eal/ppc/include/rte_atomic.h | 6 ++++++ > > lib/librte_eal/x86/include/rte_atomic.h | 17 +++++++++++++++++ > > 5 files changed, 41 insertions(+) > > > > diff --git a/lib/librte_eal/arm/include/rte_atomic_32.h > > b/lib/librte_eal/arm/include/rte_atomic_32.h > > index 7dc0d06..dbe7cc6 100644 > > --- a/lib/librte_eal/arm/include/rte_atomic_32.h > > +++ b/lib/librte_eal/arm/include/rte_atomic_32.h > > @@ -37,6 +37,12 @@ extern "C" { > > > > #define rte_cio_rmb() rte_rmb() > > > > +static __rte_always_inline void > > +rte_atomic_thread_fence(int mo) > > +{ > > + __atomic_thread_fence(mo); > > +} > > + > > #ifdef __cplusplus > > } > > #endif > > diff --git a/lib/librte_eal/arm/include/rte_atomic_64.h > > b/lib/librte_eal/arm/include/rte_atomic_64.h > > index 7b7099c..22ff8ec 100644 > > --- a/lib/librte_eal/arm/include/rte_atomic_64.h > > +++ b/lib/librte_eal/arm/include/rte_atomic_64.h > > @@ -41,6 +41,12 @@ extern "C" { > > > > #define rte_cio_rmb() asm volatile("dmb oshld" : : : "memory") > > > > +static __rte_always_inline void > > +rte_atomic_thread_fence(int mo) > > +{ > > + __atomic_thread_fence(mo); > > +} > > + > > /*------------------------ 128 bit atomic operations > > -------------------------*/ > > > > #if defined(__ARM_FEATURE_ATOMICS) || > > defined(RTE_ARM_FEATURE_ATOMICS) diff --git > > a/lib/librte_eal/include/generic/rte_atomic.h > > b/lib/librte_eal/include/generic/rte_atomic.h > > index e6ab15a..5b941db 100644 > > --- a/lib/librte_eal/include/generic/rte_atomic.h > > +++ b/lib/librte_eal/include/generic/rte_atomic.h > > @@ -158,6 +158,12 @@ static inline void rte_cio_rmb(void); > > asm volatile ("" : : : "memory"); \ > > } while(0) > > > > +/** > > + * Synchronization fence between threads based on the specified > > + * memory order. > > + */ > > +static inline void rte_atomic_thread_fence(int mo); > > + > > /*------------------------- 16 bit atomic operations > > -------------------------*/ > > > > /** > > diff --git a/lib/librte_eal/ppc/include/rte_atomic.h > > b/lib/librte_eal/ppc/include/rte_atomic.h > > index 7e3e131..91c5f30 100644 > > --- a/lib/librte_eal/ppc/include/rte_atomic.h > > +++ b/lib/librte_eal/ppc/include/rte_atomic.h > > @@ -40,6 +40,12 @@ extern "C" { > > > > #define rte_cio_rmb() rte_rmb() > > > > +static __rte_always_inline void > > +rte_atomic_thread_fence(int mo) > > +{ > > + __atomic_thread_fence(mo); > > +} > > + > > /*------------------------- 16 bit atomic operations > > -------------------------*/ > > /* To be compatible with Power7, use GCC built-in functions for 16 bit > > * operations */ > > diff --git a/lib/librte_eal/x86/include/rte_atomic.h > > b/lib/librte_eal/x86/include/rte_atomic.h > > index b9dcd30..bd256e7 100644 > > --- a/lib/librte_eal/x86/include/rte_atomic.h > > +++ b/lib/librte_eal/x86/include/rte_atomic.h > > @@ -83,6 +83,23 @@ rte_smp_mb(void) > > > > #define rte_cio_rmb() rte_compiler_barrier() > > > > +/** > > + * Synchronization fence between threads based on the specified > > + * memory order. > > + * > > + * On x86 the __atomic_thread_fence(__ATOMIC_SEQ_CST) generates > > + * full 'mfence' which is quite expensive. The optimized > > + * implementation of rte_smp_mb is used instead. > > + */ > > +static __rte_always_inline void > > +rte_atomic_thread_fence(int mo) > > +{ > > + if (mo == __ATOMIC_SEQ_CST) > > + rte_smp_mb(); > > + else > > + __atomic_thread_fence(mo); > > +} > I think __ATOMIC_SEQ_CST needs to be used rarely. IMO, > rte_atomic_thread_fence should be called only for __ATOMIC_SEQ_CST memory > order. For all others the __atomic_thread_fence can be used directly. This > will help us to stick to using the atomic built-ins in most of the > cases. > > Konstantin, is this ok for you?
My preference is to have one generic rte_atomic_thread_fence() for all cases (I.E - current Phil implementation looks good to me). I think it is more consistent approach and would help to avoid confusion. Konstantin