Keep per-lcore power intrinsics state in a lcore variable to reduce cache working set size and avoid any CPU next-line-prefetching causing false sharing.
Signed-off-by: Mattias Rönnblom <mattias.ronnb...@ericsson.com> Acked-by: Morten Brørup <m...@smartsharesystems.com> Acked-by: Konstantin Ananyev <konstantin.anan...@huawei.com> Acked-by: Chengwen Feng <fengcheng...@huawei.com> Acked-by: Stephen Hemminger <step...@networkplumber.org> --- lib/eal/x86/rte_power_intrinsics.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/lib/eal/x86/rte_power_intrinsics.c b/lib/eal/x86/rte_power_intrinsics.c index 6d9b64240c..98a2cbc611 100644 --- a/lib/eal/x86/rte_power_intrinsics.c +++ b/lib/eal/x86/rte_power_intrinsics.c @@ -6,6 +6,7 @@ #include <rte_common.h> #include <rte_lcore.h> +#include <rte_lcore_var.h> #include <rte_rtm.h> #include <rte_spinlock.h> @@ -14,10 +15,14 @@ /* * Per-lcore structure holding current status of C0.2 sleeps. */ -static alignas(RTE_CACHE_LINE_SIZE) struct power_wait_status { +struct power_wait_status { rte_spinlock_t lock; volatile void *monitor_addr; /**< NULL if not currently sleeping */ -} wait_status[RTE_MAX_LCORE]; +}; + +RTE_LCORE_VAR_HANDLE(struct power_wait_status, wait_status); + +RTE_LCORE_VAR_INIT(wait_status); /* * This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state. @@ -172,7 +177,7 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc, if (pmc->fn == NULL) return -EINVAL; - s = &wait_status[lcore_id]; + s = RTE_LCORE_VAR_LCORE(lcore_id, wait_status); /* update sleep address */ rte_spinlock_lock(&s->lock); @@ -264,7 +269,7 @@ rte_power_monitor_wakeup(const unsigned int lcore_id) if (lcore_id >= RTE_MAX_LCORE) return -EINVAL; - s = &wait_status[lcore_id]; + s = RTE_LCORE_VAR_LCORE(lcore_id, wait_status); /* * There is a race condition between sleep, wakeup and locking, but we @@ -303,8 +308,8 @@ int rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[], const uint32_t num, const uint64_t tsc_timestamp) { - const unsigned int lcore_id = rte_lcore_id(); - struct power_wait_status *s = &wait_status[lcore_id]; + struct power_wait_status *s = RTE_LCORE_VAR(wait_status); + uint32_t i, rc; /* check if supported */ -- 2.43.0