> Use RTM and WAITPKG instructions to perform a wait-for-writes similar to > what UMWAIT does, but without the limitation of having to listen for > just one event. This works because the optimized power state used by the > TPAUSE instruction will cause a wake up on RTM transaction abort, so if > we add the addresses we're interested in to the read-set, any write to > those addresses will wake us up. > > Signed-off-by: Konstantin Ananyev <konstantin.anan...@intel.com> > Signed-off-by: Anatoly Burakov <anatoly.bura...@intel.com> > --- > > Notes: > v2: > - Adapt to callback mechanism > > doc/guides/rel_notes/release_21_08.rst | 2 + > lib/eal/arm/rte_power_intrinsics.c | 11 +++ > lib/eal/include/generic/rte_cpuflags.h | 2 + > .../include/generic/rte_power_intrinsics.h | 35 ++++++++++ > lib/eal/ppc/rte_power_intrinsics.c | 11 +++ > lib/eal/version.map | 3 + > lib/eal/x86/rte_cpuflags.c | 2 + > lib/eal/x86/rte_power_intrinsics.c | 69 +++++++++++++++++++ > 8 files changed, 135 insertions(+) > ...
> diff --git a/lib/eal/x86/rte_power_intrinsics.c > b/lib/eal/x86/rte_power_intrinsics.c > index 3c5c9ce7ad..3fc6f62ef5 100644 > --- a/lib/eal/x86/rte_power_intrinsics.c > +++ b/lib/eal/x86/rte_power_intrinsics.c > @@ -4,6 +4,7 @@ > > #include <rte_common.h> > #include <rte_lcore.h> > +#include <rte_rtm.h> > #include <rte_spinlock.h> > > #include "rte_power_intrinsics.h" > @@ -28,6 +29,7 @@ __umwait_wakeup(volatile void *addr) > } > > static bool wait_supported; > +static bool wait_multi_supported; > > static inline uint64_t > __get_umwait_val(const volatile void *p, const uint8_t sz) > @@ -164,6 +166,8 @@ RTE_INIT(rte_power_intrinsics_init) { > > if (i.power_monitor && i.power_pause) > wait_supported = 1; > + if (i.power_monitor_multi) > + wait_multi_supported = 1; > } > > int > @@ -202,6 +206,9 @@ rte_power_monitor_wakeup(const unsigned int lcore_id) > * In this case, since we've already woken up, the "wakeup" was > * unneeded, and since T1 is still waiting on T2 releasing the lock, the > * wakeup address is still valid so it's perfectly safe to write it. > + * > + * For multi-monitor case, the act of locking will in itself trigger the > + * wakeup, so no additional writes necessary. > */ > rte_spinlock_lock(&s->lock); > if (s->monitor_addr != NULL) > @@ -210,3 +217,65 @@ rte_power_monitor_wakeup(const unsigned int lcore_id) > > return 0; > } > + > +int > +rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[], > + const uint32_t num, const uint64_t tsc_timestamp) > +{ > + const unsigned int lcore_id = rte_lcore_id(); > + struct power_wait_status *s = &wait_status[lcore_id]; > + uint32_t i, rc; > + > + /* check if supported */ > + if (!wait_multi_supported) > + return -ENOTSUP; > + > + if (pmc == NULL || num == 0) > + return -EINVAL; > + > + /* we are already inside transaction region, return */ > + if (rte_xtest() != 0) > + return 0; > + > + /* start new transaction region */ > + rc = rte_xbegin(); > + > + /* transaction abort, possible write to one of wait addresses */ > + if (rc != RTE_XBEGIN_STARTED) > + return 0; > + > + /* > + * the mere act of reading the lock status here adds the lock to > + * the read set. This means that when we trigger a wakeup from another > + * thread, even if we don't have a defined wakeup address and thus don't > + * actually cause any writes, the act of locking our lock will itself > + * trigger the wakeup and abort the transaction. > + */ > + rte_spinlock_is_locked(&s->lock); > + > + /* > + * add all addresses to wait on into transaction read-set and check if > + * any of wakeup conditions are already met. > + */ > + for (i = 0; i < num; i++) { > + const struct rte_power_monitor_cond *c = &pmc[i]; > + > + if (pmc->fn == NULL) Should be c->fn, I believe. > + continue; Actually that way, if c->fn == NULL, we'll never add our c->addr to monitored addresses. Is that what we really want? My thought was, that if callback is not set, we'll just go to power-save state without extra checking, no? Something like that: const struct rte_power_monitor_cond *c = &pmc[i]; const uint64_t val = __get_umwait_val(c->addr, c->size); if (c->fn && c->fn(val, c->opaque) != 0) break; Same thought for rte_power_monitor(). > + const uint64_t val = __get_umwait_val(pmc->addr, pmc->size); Same thing: s/pmc->/c->/ > + > + /* abort if callback indicates that we need to stop */ > + if (c->fn(val, c->opaque) != 0) > + break; > + } > + > + /* none of the conditions were met, sleep until timeout */ > + if (i == num) > + rte_power_pause(tsc_timestamp); > + > + /* end transaction region */ > + rte_xend(); > + > + return 0; > +} > -- > 2.25.1