<snip>

> Subject: [PATCH v5 4/4] eal/atomic: add wrapper for c11 atomic thread fence
> 
> Provide a wrapper for __atomic_thread_fence built-in to support optimized
> code for __ATOMIC_SEQ_CST memory order for x86 platforms.
> 
> Suggested-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
> Signed-off-by: Phil Yang <phil.y...@arm.com>
> Reviewed-by: Ola Liljedahl <ola.liljed...@arm.com>
> ---
>  lib/librte_eal/arm/include/rte_atomic_32.h  |  6 ++++++
> lib/librte_eal/arm/include/rte_atomic_64.h  |  6 ++++++
> lib/librte_eal/include/generic/rte_atomic.h |  6 ++++++
>  lib/librte_eal/ppc/include/rte_atomic.h     |  6 ++++++
>  lib/librte_eal/x86/include/rte_atomic.h     | 17 +++++++++++++++++
>  5 files changed, 41 insertions(+)
> 
> diff --git a/lib/librte_eal/arm/include/rte_atomic_32.h
> b/lib/librte_eal/arm/include/rte_atomic_32.h
> index 7dc0d06..dbe7cc6 100644
> --- a/lib/librte_eal/arm/include/rte_atomic_32.h
> +++ b/lib/librte_eal/arm/include/rte_atomic_32.h
> @@ -37,6 +37,12 @@ extern "C" {
> 
>  #define rte_cio_rmb() rte_rmb()
> 
> +static __rte_always_inline void
> +rte_atomic_thread_fence(int mo)
> +{
> +     __atomic_thread_fence(mo);
> +}
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/librte_eal/arm/include/rte_atomic_64.h
> b/lib/librte_eal/arm/include/rte_atomic_64.h
> index 7b7099c..22ff8ec 100644
> --- a/lib/librte_eal/arm/include/rte_atomic_64.h
> +++ b/lib/librte_eal/arm/include/rte_atomic_64.h
> @@ -41,6 +41,12 @@ extern "C" {
> 
>  #define rte_cio_rmb() asm volatile("dmb oshld" : : : "memory")
> 
> +static __rte_always_inline void
> +rte_atomic_thread_fence(int mo)
> +{
> +     __atomic_thread_fence(mo);
> +}
> +
>  /*------------------------ 128 bit atomic operations 
> -------------------------*/
> 
>  #if defined(__ARM_FEATURE_ATOMICS) ||
> defined(RTE_ARM_FEATURE_ATOMICS) diff --git
> a/lib/librte_eal/include/generic/rte_atomic.h
> b/lib/librte_eal/include/generic/rte_atomic.h
> index e6ab15a..5b941db 100644
> --- a/lib/librte_eal/include/generic/rte_atomic.h
> +++ b/lib/librte_eal/include/generic/rte_atomic.h
> @@ -158,6 +158,12 @@ static inline void rte_cio_rmb(void);
>       asm volatile ("" : : : "memory");       \
>  } while(0)
> 
> +/**
> + * Synchronization fence between threads based on the specified
> + * memory order.
> + */
> +static inline void rte_atomic_thread_fence(int mo);
> +
>  /*------------------------- 16 bit atomic operations 
> -------------------------*/
> 
>  /**
> diff --git a/lib/librte_eal/ppc/include/rte_atomic.h
> b/lib/librte_eal/ppc/include/rte_atomic.h
> index 7e3e131..91c5f30 100644
> --- a/lib/librte_eal/ppc/include/rte_atomic.h
> +++ b/lib/librte_eal/ppc/include/rte_atomic.h
> @@ -40,6 +40,12 @@ extern "C" {
> 
>  #define rte_cio_rmb() rte_rmb()
> 
> +static __rte_always_inline void
> +rte_atomic_thread_fence(int mo)
> +{
> +     __atomic_thread_fence(mo);
> +}
> +
>  /*------------------------- 16 bit atomic operations 
> -------------------------*/
>  /* To be compatible with Power7, use GCC built-in functions for 16 bit
>   * operations */
> diff --git a/lib/librte_eal/x86/include/rte_atomic.h
> b/lib/librte_eal/x86/include/rte_atomic.h
> index b9dcd30..bd256e7 100644
> --- a/lib/librte_eal/x86/include/rte_atomic.h
> +++ b/lib/librte_eal/x86/include/rte_atomic.h
> @@ -83,6 +83,23 @@ rte_smp_mb(void)
> 
>  #define rte_cio_rmb() rte_compiler_barrier()
> 
> +/**
> + * Synchronization fence between threads based on the specified
> + * memory order.
> + *
> + * On x86 the __atomic_thread_fence(__ATOMIC_SEQ_CST) generates
> + * full 'mfence' which is quite expensive. The optimized
> + * implementation of rte_smp_mb is used instead.
> + */
> +static __rte_always_inline void
> +rte_atomic_thread_fence(int mo)
> +{
> +     if (mo == __ATOMIC_SEQ_CST)
> +             rte_smp_mb();
> +     else
> +             __atomic_thread_fence(mo);
> +}
I think __ATOMIC_SEQ_CST needs to be used rarely. IMO, rte_atomic_thread_fence 
should be called only for __ATOMIC_SEQ_CST memory order. For all others the 
__atomic_thread_fence can be used directly. This will help us to stick to using 
the atomic built-ins in most of the cases.

Konstantin, is this ok for you?

> +
>  /*------------------------- 16 bit atomic operations 
> -------------------------*/
> 
>  #ifndef RTE_FORCE_INTRINSICS
> --
> 2.7.4

Reply via email to