Wait for event with timeout (WFET) puts the CPU in a low power mode and stays there until an event is signalled (SEV), loss of an exclusive monitor or a timeout. WFET is enabled selectively by checking FEAT_WFxT in Linux auxiliary vector. If FEAT_WFxT is not available power management will fallback to WFE. RTE_ARM_USE_WFE macro is not required to enable WFE feature for PMD power monitoring.
Signed-off-by: Wathsala Vithanage <wathsala.vithan...@arm.com> Reviewed-by: Dhruv Tripathi <dhruv.tripa...@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> Reviewed-by: Jack Bond-Preston <jack.bond-pres...@foss.arm.com> Reviewed-by: Nick Connolly <nick.conno...@arm.com> Reviewed-by: Vinod Krishna <vinod.kris...@arm.com> --- .mailmap | 2 ++ app/test/test_cpuflags.c | 3 ++ lib/eal/arm/include/rte_cpuflags_64.h | 1 + lib/eal/arm/include/rte_pause_64.h | 21 ++++++++++--- lib/eal/arm/rte_cpuflags.c | 3 +- lib/eal/arm/rte_power_intrinsics.c | 45 +++++++++++++++++---------- 6 files changed, 52 insertions(+), 23 deletions(-) diff --git a/.mailmap b/.mailmap index 3843868716..31995d492d 100644 --- a/.mailmap +++ b/.mailmap @@ -332,6 +332,7 @@ Dexia Li <dexia...@jaguarmicro.com> Dexuan Cui <de...@microsoft.com> Dharmik Thakkar <dharmikjayesh.thak...@arm.com> <dharmik.thak...@arm.com> Dheemanth Mallikarjun <dheeman...@vmware.com> +Dhruv Tripathi <dhruv.tripa...@arm.com> Diana Wang <na.w...@corigine.com> Didier Pallard <didier.pall...@6wind.com> Dilshod Urazov <dilshod.ura...@oktetlabs.ru> @@ -1516,6 +1517,7 @@ Vincent Jardin <vincent.jar...@6wind.com> Vincent Li <vincent.mc...@gmail.com> Vincent S. Cojot <vco...@redhat.com> Vinh Tran <vinh.t.tra...@gmail.com> +Vinod Krishna <vinod.kris...@arm.com> Vipin Varghese <vipin.vargh...@amd.com> <vipin.vargh...@intel.com> Vipul Ashri <vipul.as...@oracle.com> Visa Hankala <v...@hankala.org> diff --git a/app/test/test_cpuflags.c b/app/test/test_cpuflags.c index a0ff74720c..22ab4dff0a 100644 --- a/app/test/test_cpuflags.c +++ b/app/test/test_cpuflags.c @@ -156,6 +156,9 @@ test_cpuflags(void) printf("Check for SVEBF16:\t"); CHECK_FOR_FLAG(RTE_CPUFLAG_SVEBF16); + + printf("Check for WFXT:\t"); + CHECK_FOR_FLAG(RTE_CPUFLAG_WFXT); #endif #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) diff --git a/lib/eal/arm/include/rte_cpuflags_64.h b/lib/eal/arm/include/rte_cpuflags_64.h index afe70209c3..1945a97ca1 100644 --- a/lib/eal/arm/include/rte_cpuflags_64.h +++ b/lib/eal/arm/include/rte_cpuflags_64.h @@ -36,6 +36,7 @@ enum rte_cpu_flag_t { RTE_CPUFLAG_SVEF64MM, RTE_CPUFLAG_SVEBF16, RTE_CPUFLAG_AARCH64, + RTE_CPUFLAG_WFXT, }; #include "generic/rte_cpuflags.h" diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h index 5cb8b59056..f732407425 100644 --- a/lib/eal/arm/include/rte_pause_64.h +++ b/lib/eal/arm/include/rte_pause_64.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: BSD-3-Clause * Copyright(c) 2017 Cavium, Inc - * Copyright(c) 2019 Arm Limited + * Copyright(c) 2024 Arm Limited */ #ifndef _RTE_PAUSE_ARM64_H_ @@ -23,17 +23,28 @@ static inline void rte_pause(void) asm volatile("yield" ::: "memory"); } -#ifdef RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED -/* Send a local event to quit WFE. */ +/* Send a local event to quit WFE/WFxT. */ #define __RTE_ARM_SEVL() { asm volatile("sevl" : : : "memory"); } -/* Send a global event to quit WFE for all cores. */ +/* Send a global event to quit WFE/WFxT for all cores. */ #define __RTE_ARM_SEV() { asm volatile("sev" : : : "memory"); } /* Put processor into low power WFE(Wait For Event) state. */ #define __RTE_ARM_WFE() { asm volatile("wfe" : : : "memory"); } +/* Put processor into low power WFET (WFE with Timeout) state. */ +#ifdef RTE_ARM_FEATURE_WFXT +#define __RTE_ARM_WFET(t) { \ + asm volatile("wfet %x[to]" \ + : \ + : [to] "r" (t) \ + : "memory"); \ + } +#else +#define __RTE_ARM_WFET(t) { RTE_SET_USED(t); } +#endif + /* * Atomic exclusive load from addr, it returns the 8-bit content of * *addr while making it 'monitored', when it is written by someone @@ -147,6 +158,8 @@ static inline void rte_pause(void) __RTE_ARM_LOAD_EXC_128(src, dst, memorder) \ } +#ifdef RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED + static __rte_always_inline void rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected, int memorder) diff --git a/lib/eal/arm/rte_cpuflags.c b/lib/eal/arm/rte_cpuflags.c index 7ba4f8ba97..ad76f2448b 100644 --- a/lib/eal/arm/rte_cpuflags.c +++ b/lib/eal/arm/rte_cpuflags.c @@ -115,6 +115,7 @@ const struct feature_entry rte_cpu_feature_table[] = { FEAT_DEF(SVEF32MM, REG_HWCAP2, 10) FEAT_DEF(SVEF64MM, REG_HWCAP2, 11) FEAT_DEF(SVEBF16, REG_HWCAP2, 12) + FEAT_DEF(WFXT, REG_HWCAP2, 31) FEAT_DEF(AARCH64, REG_PLATFORM, 0) }; #endif /* RTE_ARCH */ @@ -163,7 +164,5 @@ void rte_cpu_get_intrinsics_support(struct rte_cpu_intrinsics *intrinsics) { memset(intrinsics, 0, sizeof(*intrinsics)); -#ifdef RTE_ARM_USE_WFE intrinsics->power_monitor = 1; -#endif } diff --git a/lib/eal/arm/rte_power_intrinsics.c b/lib/eal/arm/rte_power_intrinsics.c index f54cf59e80..fc7a0c61f0 100644 --- a/lib/eal/arm/rte_power_intrinsics.c +++ b/lib/eal/arm/rte_power_intrinsics.c @@ -4,20 +4,31 @@ #include <errno.h> +#include "rte_cpuflags.h" #include "rte_power_intrinsics.h" /** - * This function uses WFE instruction to make lcore suspend + * Set wfet_en if WFET is supported + */ +uint8_t wfet_en; + +RTE_INIT(rte_power_intrinsics_init) +{ +#ifdef RTE_ARCH_64 + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_WFXT)) + wfet_en = 1; +#endif +} + +/** + * This function uses WFE/WFET instruction to make lcore suspend * execution on ARM. - * Note that timestamp based timeout is not supported yet. */ int rte_power_monitor(const struct rte_power_monitor_cond *pmc, const uint64_t tsc_timestamp) { - RTE_SET_USED(tsc_timestamp); - -#ifdef RTE_ARM_USE_WFE +#ifdef RTE_ARCH_64 const unsigned int lcore_id = rte_lcore_id(); uint64_t cur_value; @@ -33,28 +44,30 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc, switch (pmc->size) { case sizeof(uint8_t): - __RTE_ARM_LOAD_EXC_8(pmc->addr, cur_value, rte_memory_order_relaxed) - __RTE_ARM_WFE() + __RTE_ARM_LOAD_EXC_8(pmc->addr, cur_value, rte_memory_order_relaxed); break; case sizeof(uint16_t): - __RTE_ARM_LOAD_EXC_16(pmc->addr, cur_value, rte_memory_order_relaxed) - __RTE_ARM_WFE() + __RTE_ARM_LOAD_EXC_16(pmc->addr, cur_value, rte_memory_order_relaxed); break; case sizeof(uint32_t): - __RTE_ARM_LOAD_EXC_32(pmc->addr, cur_value, rte_memory_order_relaxed) - __RTE_ARM_WFE() + __RTE_ARM_LOAD_EXC_32(pmc->addr, cur_value, rte_memory_order_relaxed); break; case sizeof(uint64_t): - __RTE_ARM_LOAD_EXC_64(pmc->addr, cur_value, rte_memory_order_relaxed) - __RTE_ARM_WFE() + __RTE_ARM_LOAD_EXC_64(pmc->addr, cur_value, rte_memory_order_relaxed); break; default: return -EINVAL; /* unexpected size */ } + if (wfet_en) + __RTE_ARM_WFET(tsc_timestamp) + else + __RTE_ARM_WFE() + return 0; #else RTE_SET_USED(pmc); + RTE_SET_USED(tsc_timestamp); return -ENOTSUP; #endif @@ -80,10 +93,8 @@ int rte_power_monitor_wakeup(const unsigned int lcore_id) { RTE_SET_USED(lcore_id); - -#ifdef RTE_ARM_USE_WFE - __RTE_ARM_SEV() - +#ifdef RTE_ARCH_64 + __RTE_ARM_SEV(); return 0; #else return -ENOTSUP; -- 2.34.1