> -----Original Message----- > From: Omkar Maslekar <omkar.masle...@intel.com> > Sent: Tuesday, October 13, 2020 5:43 PM > To: dev@dpdk.org > Cc: bruce.richard...@intel.com; ciara.lof...@intel.com; > omkar.masle...@intel.com; d...@linux.vnet.ibm.com; jer...@marvell.com; > Ruifeng Wang <ruifeng.w...@arm.com>; Honnappa Nagarahalli > <honnappa.nagaraha...@arm.com> > Subject: [PATCH v7] eal: add cache-line demote support > > rte_cldemote is similar to a prefetch hint - in reverse. cldemote(addr) > enables software to hint to hardware that line is likely to be shared. > Useful in core-to-core communications where cache-line is likely to be > shared. ARM and PPC implementation is provided with NOP and can be > added if any equivalent instructions could be used for implementation on > those architectures. > > Signed-off-by: Omkar Maslekar <omkar.masle...@intel.com> > Acked-by: Bruce Richardson <bruce.richard...@intel.com> > Acked-by: David Christensen <d...@linux.vnet.ibm.com> > Acked-by: Jerin Jacob <jer...@marvell.com> > > --- > v7: fixed experimental tag > > v6: marked rte_cldemote as experimental > added rte_cldemote call in existing app/test_prefetch.c > > v5: documentation updated > fixed formatting issue in release notes > added Acked-by: Bruce Richardson <bruce.richard...@intel.com> > * > v4: updated bold text for title and fixed margin in release notes > * > v3: fixed warning regarding whitespace > * > v2: documentation updated > --- > --- > app/test/test_prefetch.c | 4 ++++ > doc/guides/rel_notes/release_20_11.rst | 7 +++++++ > lib/librte_eal/arm/include/rte_prefetch_32.h | 7 +++++++ > lib/librte_eal/arm/include/rte_prefetch_64.h | 7 +++++++ > lib/librte_eal/include/generic/rte_prefetch.h | 15 +++++++++++++++ > lib/librte_eal/ppc/include/rte_prefetch.h | 7 +++++++ > lib/librte_eal/x86/include/rte_prefetch.h | 11 +++++++++++ > 7 files changed, 58 insertions(+) > > diff --git a/app/test/test_prefetch.c b/app/test/test_prefetch.c index > 41f219a..5c58d0c 100644 > --- a/app/test/test_prefetch.c > +++ b/app/test/test_prefetch.c > @@ -26,7 +26,11 @@ > rte_prefetch1(&a); > rte_prefetch2(&a); > > +/* test for marking a line as shared to test cldemote functionality */ > + rte_cldemote(&a); > + > return 0; > } > > + > REGISTER_TEST_COMMAND(prefetch_autotest, test_prefetch); diff --git > a/doc/guides/rel_notes/release_20_11.rst > b/doc/guides/rel_notes/release_20_11.rst > index b7881f2..8a1ed01 100644 > --- a/doc/guides/rel_notes/release_20_11.rst > +++ b/doc/guides/rel_notes/release_20_11.rst > @@ -171,6 +171,13 @@ New Features > * Extern objects and functions can be plugged into the pipeline. > * Transaction-oriented table updates. > > +* **Added new function rte_cldemote in rte_prefetch.h.** > + > + Added a hardware hint CLDEMOTE, which is similar to prefetch in reverse. > + CLDEMOTE moves the cache line to the more remote cache, where it > + expects sharing to be efficient. Moving the cache line to a level > + more distant from the processor helps to accelerate core-to-core > communication. > + > > Removed Items > ------------- > diff --git a/lib/librte_eal/arm/include/rte_prefetch_32.h > b/lib/librte_eal/arm/include/rte_prefetch_32.h > index e53420a..28b3d48 100644 > --- a/lib/librte_eal/arm/include/rte_prefetch_32.h > +++ b/lib/librte_eal/arm/include/rte_prefetch_32.h > @@ -10,6 +10,7 @@ > #endif > > #include <rte_common.h> > +#include <rte_compat.h> > #include "generic/rte_prefetch.h" > > static inline void rte_prefetch0(const volatile void *p) @@ -33,6 +34,12 @@ > static inline void rte_prefetch_non_temporal(const volatile void *p) > rte_prefetch0(p); > } > > +__rte_experimental > +static inline void rte_cldemote(const volatile void *p) { > + RTE_SET_USED(p); > +} > + > #ifdef __cplusplus > } > #endif > diff --git a/lib/librte_eal/arm/include/rte_prefetch_64.h > b/lib/librte_eal/arm/include/rte_prefetch_64.h > index fc2b391..1c722eb 100644 > --- a/lib/librte_eal/arm/include/rte_prefetch_64.h > +++ b/lib/librte_eal/arm/include/rte_prefetch_64.h > @@ -10,6 +10,7 @@ > #endif > > #include <rte_common.h> > +#include <rte_compat.h> > #include "generic/rte_prefetch.h" > > static inline void rte_prefetch0(const volatile void *p) @@ -32,6 +33,12 @@ > static inline void rte_prefetch_non_temporal(const volatile void *p) > asm volatile ("PRFM PLDL1STRM, [%0]" : : "r" (p)); } > > +__rte_experimental > +static inline void rte_cldemote(const volatile void *p) { > + RTE_SET_USED(p); > +} > + > #ifdef __cplusplus > } > #endif > diff --git a/lib/librte_eal/include/generic/rte_prefetch.h > b/lib/librte_eal/include/generic/rte_prefetch.h > index 6e47bdf..ad9844c 100644 > --- a/lib/librte_eal/include/generic/rte_prefetch.h > +++ b/lib/librte_eal/include/generic/rte_prefetch.h > @@ -51,4 +51,19 @@ > */ > static inline void rte_prefetch_non_temporal(const volatile void *p); > > +/** > + * Demote a cache line to a more distant level of cache from the processor. > + * > + * CLDEMOTE hints to hardware to move (demote) a cache line from the > +closest to > + * the processor to a level more distant from the processor. It is a > +hint and > + * not guarantee. rte_cldemote is intended to move the cache line to > +the more > + * remote cache, where it expects sharing to be efficient and to > +indicate that a > + * line may be accessed by a different core in the future. > + * > + * @param p > + * Address to demote > + */ > +__rte_experimental > +static inline void rte_cldemote(const volatile void *p); > + > #endif /* _RTE_PREFETCH_H_ */ > diff --git a/lib/librte_eal/ppc/include/rte_prefetch.h > b/lib/librte_eal/ppc/include/rte_prefetch.h > index 9ba07c8..b55cac4 100644 > --- a/lib/librte_eal/ppc/include/rte_prefetch.h > +++ b/lib/librte_eal/ppc/include/rte_prefetch.h > @@ -11,6 +11,7 @@ > #endif > > #include <rte_common.h> > +#include <rte_compat.h> > #include "generic/rte_prefetch.h" > > static inline void rte_prefetch0(const volatile void *p) @@ -34,6 +35,12 @@ > static inline void rte_prefetch_non_temporal(const volatile void *p) > rte_prefetch0(p); > } > > +__rte_experimental > +static inline void rte_cldemote(const volatile void *p) { > + RTE_SET_USED(p); > +} > + > #ifdef __cplusplus > } > #endif > diff --git a/lib/librte_eal/x86/include/rte_prefetch.h > b/lib/librte_eal/x86/include/rte_prefetch.h > index 384c6b3..92ba05a 100644 > --- a/lib/librte_eal/x86/include/rte_prefetch.h > +++ b/lib/librte_eal/x86/include/rte_prefetch.h > @@ -10,6 +10,7 @@ > #endif > > #include <rte_common.h> > +#include <rte_compat.h> > #include "generic/rte_prefetch.h" > > static inline void rte_prefetch0(const volatile void *p) @@ -32,6 +33,16 @@ > static inline void rte_prefetch_non_temporal(const volatile void *p) > asm volatile ("prefetchnta %[p]" : : [p] "m" (*(const volatile char > *)p)); } > > +/* > + * we're using raw byte codes for now as only the newest compiler > + * versions support this instruction natively. > + */ > +__rte_experimental > +static inline void rte_cldemote(const volatile void *p) { > + asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (p)); } > + > #ifdef __cplusplus > } > #endif > -- > 1.8.3.1
Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com>