In acquiring a spinlock, cores repeatedly poll the lock variable. This is replaced by rte_wait_until_equal API.
5~10% performance gain was measured by running spinlock_autotest on 14 isolated cores of ThunderX2. Signed-off-by: Gavin Hu <gavin...@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com> Reviewed-by: Phil Yang <phil.y...@arm.com> Reviewed-by: Steve Capper <steve.cap...@arm.com> Reviewed-by: Ola Liljedahl <ola.liljed...@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> Tested-by: Pavan Nikhilesh <pbhagavat...@marvell.com> --- .../common/include/arch/arm/rte_spinlock.h | 25 ++++++++++++++++++++++ .../common/include/generic/rte_spinlock.h | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/lib/librte_eal/common/include/arch/arm/rte_spinlock.h b/lib/librte_eal/common/include/arch/arm/rte_spinlock.h index 1a6916b..f25d17f 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_spinlock.h +++ b/lib/librte_eal/common/include/arch/arm/rte_spinlock.h @@ -16,6 +16,31 @@ extern "C" { #include <rte_common.h> #include "generic/rte_spinlock.h" +/* armv7a does support WFE, but an explicit wake-up signal using SEV is + * required (must be preceded by DSB to drain the store buffer) and + * this is less performant, so keep armv7a implementation unchanged. + */ +#if defined(RTE_USE_WFE) && defined(RTE_ARCH_ARM64) +static inline void +rte_spinlock_lock(rte_spinlock_t *sl) +{ + unsigned int tmp; + /* http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc. + * faqs/ka16809.html + */ + asm volatile( + "sevl\n" + "1: wfe\n" + "2: ldaxr %w[tmp], %w[locked]\n" + "cbnz %w[tmp], 1b\n" + "stxr %w[tmp], %w[one], %w[locked]\n" + "cbnz %w[tmp], 2b\n" + : [tmp] "=&r" (tmp), [locked] "+Q"(sl->locked) + : [one] "r" (1) + : "cc", "memory"); +} +#endif + static inline int rte_tm_supported(void) { return 0; diff --git a/lib/librte_eal/common/include/generic/rte_spinlock.h b/lib/librte_eal/common/include/generic/rte_spinlock.h index 87ae7a4..cf4f15b 100644 --- a/lib/librte_eal/common/include/generic/rte_spinlock.h +++ b/lib/librte_eal/common/include/generic/rte_spinlock.h @@ -57,7 +57,7 @@ rte_spinlock_init(rte_spinlock_t *sl) static inline void rte_spinlock_lock(rte_spinlock_t *sl); -#ifdef RTE_FORCE_INTRINSICS +#if defined(RTE_FORCE_INTRINSICS) && !defined(RTE_USE_WFE) static inline void rte_spinlock_lock(rte_spinlock_t *sl) { -- 2.7.4