On Thu, Oct 1, 2020 at 6:00 AM Omkar Maslekar <omkar.masle...@intel.com> wrote: > > rte_cldemote is similar to a prefetch hint - in reverse. cldemote(addr) > enables software to hint to hardware that line is likely to be shared. > Useful in core-to-core communications where cache-line is likely to be > shared. ARM and PPC implementation is provided with NOP and can be added > if any equivalent instructions could be used for implementation on those > architectures. > > Signed-off-by: Omkar Maslekar <omkar.masle...@intel.com> > Acked-by: Bruce Richardson <bruce.richard...@intel.com> > > --- > v5: documentation updated > fixed formatting issue in release notes > added Acked-by: Bruce Richardson <bruce.richard...@intel.com> > * > v4: updated bold text for title and fixed margin in release notes > * > v3: fixed warning regarding whitespace > * > v2: documentation updated > --- > --- > doc/guides/rel_notes/release_20_11.rst | 7 +++++++ > lib/librte_eal/arm/include/rte_prefetch_32.h | 5 +++++ > lib/librte_eal/arm/include/rte_prefetch_64.h | 5 +++++ > lib/librte_eal/include/generic/rte_prefetch.h | 14 ++++++++++++++ > lib/librte_eal/ppc/include/rte_prefetch.h | 5 +++++ > lib/librte_eal/x86/include/rte_prefetch.h | 9 +++++++++ > 6 files changed, 45 insertions(+) > > diff --git a/doc/guides/rel_notes/release_20_11.rst > b/doc/guides/rel_notes/release_20_11.rst > index df227a1..dc402ab 100644 > --- a/doc/guides/rel_notes/release_20_11.rst > +++ b/doc/guides/rel_notes/release_20_11.rst > @@ -55,6 +55,13 @@ New Features > Also, make sure to start the actual text at the margin. > ======================================================= > > +* **Added new function rte_cldemote in rte_prefetch.h.** > + > + Added a hardware hint CLDEMOTE, which is similar to prefetch in reverse. > + CLDEMOTE moves the cache line to the more remote cache, where it expects > + sharing to be efficient. Moving the cache line to a level more distant from > + the processor helps to accelerate core-to-core communication. > + > > Removed Items > ------------- > diff --git a/lib/librte_eal/arm/include/rte_prefetch_32.h > b/lib/librte_eal/arm/include/rte_prefetch_32.h > index e53420a..ad91edd 100644 > --- a/lib/librte_eal/arm/include/rte_prefetch_32.h > +++ b/lib/librte_eal/arm/include/rte_prefetch_32.h > @@ -33,6 +33,11 @@ static inline void rte_prefetch_non_temporal(const > volatile void *p) > rte_prefetch0(p); > } > > +static inline void rte_cldemote(const volatile void *p) > +{ > + RTE_SET_USED(p); > +} > + > #ifdef __cplusplus > } > #endif > diff --git a/lib/librte_eal/arm/include/rte_prefetch_64.h > b/lib/librte_eal/arm/include/rte_prefetch_64.h > index fc2b391..35d278a 100644 > --- a/lib/librte_eal/arm/include/rte_prefetch_64.h > +++ b/lib/librte_eal/arm/include/rte_prefetch_64.h > @@ -32,6 +32,11 @@ static inline void rte_prefetch_non_temporal(const > volatile void *p) > asm volatile ("PRFM PLDL1STRM, [%0]" : : "r" (p)); > } > > +static inline void rte_cldemote(const volatile void *p) > +{ > + RTE_SET_USED(p); > +}
ARM64 does not have this support so NOP is fine for this. Acked-by: Jerin Jacob <jer...@marvell.com> > + > #ifdef __cplusplus > } > #endif > diff --git a/lib/librte_eal/include/generic/rte_prefetch.h > b/lib/librte_eal/include/generic/rte_prefetch.h > index 6e47bdf..5500cd5 100644 > --- a/lib/librte_eal/include/generic/rte_prefetch.h > +++ b/lib/librte_eal/include/generic/rte_prefetch.h > @@ -51,4 +51,18 @@ > */ > static inline void rte_prefetch_non_temporal(const volatile void *p); > > +/** > + * Demote a cache line to a more distant level of cache from the processor. > + * > + * CLDEMOTE hints to hardware to move (demote) a cache line from the closest > to > + * the processor to a level more distant from the processor. It is a hint and > + * not guarantee. rte_cldemote is intended to move the cache line to the more > + * remote cache, where it expects sharing to be efficient and to indicate > that a > + * line may be accessed by a different core in the future. > + * > + * @param p > + * Address to demote > + */ > +static inline void rte_cldemote(const volatile void *p); > + > #endif /* _RTE_PREFETCH_H_ */ > diff --git a/lib/librte_eal/ppc/include/rte_prefetch.h > b/lib/librte_eal/ppc/include/rte_prefetch.h > index 9ba07c8..3fe9655 100644 > --- a/lib/librte_eal/ppc/include/rte_prefetch.h > +++ b/lib/librte_eal/ppc/include/rte_prefetch.h > @@ -34,6 +34,11 @@ static inline void rte_prefetch_non_temporal(const > volatile void *p) > rte_prefetch0(p); > } > > +static inline void rte_cldemote(const volatile void *p) > +{ > + RTE_SET_USED(p); > +} > + > #ifdef __cplusplus > } > #endif > diff --git a/lib/librte_eal/x86/include/rte_prefetch.h > b/lib/librte_eal/x86/include/rte_prefetch.h > index 384c6b3..029d06e 100644 > --- a/lib/librte_eal/x86/include/rte_prefetch.h > +++ b/lib/librte_eal/x86/include/rte_prefetch.h > @@ -32,6 +32,15 @@ static inline void rte_prefetch_non_temporal(const > volatile void *p) > asm volatile ("prefetchnta %[p]" : : [p] "m" (*(const volatile char > *)p)); > } > > +/* > + * we're using raw byte codes for now as only the newest compiler > + * versions support this instruction natively. > + */ > +static inline void rte_cldemote(const volatile void *p) > +{ > + asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (p)); > +} > + > #ifdef __cplusplus > } > #endif > -- > 1.8.3.1 >