On Fri, Jun 19, 2020 at 01:06:19PM +0100, Radu Nicolau wrote: > Add rte_write32_wc and rte_write32_wc_relaxed functions > that implement 32bit stores using write combining memory protocol. > Provided generic stubs and x86 implementation. > > Signed-off-by: Radu Nicolau <radu.nico...@intel.com> > --- > v2 rework new eal io functions >
I think you need to add into the cover letter, as suggest on V1 by Konstantin, a bit about using WC memory for better performance. > lib/librte_eal/include/generic/rte_io.h | 47 ++++++++++++++++++++++++++ > lib/librte_eal/x86/include/rte_io.h | 59 > +++++++++++++++++++++++++++++++++ > 2 files changed, 106 insertions(+) > > diff --git a/lib/librte_eal/include/generic/rte_io.h > b/lib/librte_eal/include/generic/rte_io.h > index da457f7..7391782 100644 > --- a/lib/librte_eal/include/generic/rte_io.h > +++ b/lib/librte_eal/include/generic/rte_io.h > @@ -229,6 +229,39 @@ rte_write32(uint32_t value, volatile void *addr); > static inline void > rte_write64(uint64_t value, volatile void *addr); > > +/** > + * Write a 32-bit value to I/O device memory address addr using write > + * combining memory write protocol. Depending on the platform write combining > + * may not be available and/or may be treated as a hint and the behavior may > + * fallback to a regular store. > + * > + * @param value > + * Value to write > + * @param addr > + * I/O memory address to write the value to > + */ > +static inline void > +rte_write32_wc(uint32_t value, volatile void *addr); > + > +/** > + * Write a 32-bit value to I/O device memory address addr using write > + * combining memory write protocol. Depending on the platform write combining > + * may not be available and/or may be treated as a hint and the behavior may > + * fallback to a regular store. > + * > + * The relaxed version does not have additional I/O memory barrier, useful in > + * accessing the device registers of integrated controllers which implicitly > + * strongly ordered with respect to memory access. > + * > + * @param value > + * Value to write > + * @param addr > + * I/O memory address to write the value to > + */ > +static inline void > +rte_write32_wc_relaxed(uint32_t value, volatile void *addr); > + > + > #endif /* __DOXYGEN__ */ > > #ifndef RTE_OVERRIDE_IO_H > @@ -345,6 +378,20 @@ rte_write64(uint64_t value, volatile void *addr) > rte_write64_relaxed(value, addr); > } > > +#ifndef RTE_NATIVE_WRITE32_WC > +rte_write32_wc(uint32_t value, volatile void *addr) > +{ > + rte_write32(value, addr); > +} > + > +static __rte_always_inline void > +rte_write32_wc_relaxed(uint32_t value, volatile void *addr) > +{ > + rte_write32_relaxed(value, addr); > +} > +#endif /* RTE_NATIVE_WRITE32_WC */ > + > + > #endif /* RTE_OVERRIDE_IO_H */ > I like this approach, since it saves duplicating the non-overridden functions. Nice! > #endif /* _RTE_IO_H_ */ > diff --git a/lib/librte_eal/x86/include/rte_io.h > b/lib/librte_eal/x86/include/rte_io.h > index 2db71b1..5efbf0d 100644 > --- a/lib/librte_eal/x86/include/rte_io.h > +++ b/lib/librte_eal/x86/include/rte_io.h > @@ -9,8 +9,67 @@ > extern "C" { > #endif > > +#include "rte_cpuflags.h" > + > +#define RTE_NATIVE_WRITE32_WC > #include "generic/rte_io.h" > > +/** > + * @internal > + * MOVDIRI wrapper. > + */ > +static __rte_always_inline void > +_rte_x86_movdiri(uint32_t value, volatile void *addr) > +{ > + asm volatile( > + /* MOVDIRI */ > + ".byte 0x40, 0x0f, 0x38, 0xf9, 0x02" > + : > + : "a" (value), "d" (addr)); > +} > + > +static __rte_always_inline void > +rte_write32_wc(uint32_t value, volatile void *addr) > +{ > + static int _x86_movdiri_flag = -1; > + if (_x86_movdiri_flag == 1) { > + rte_wmb(); > + _rte_x86_movdiri(value, addr); > + } else if (_x86_movdiri_flag == 0) { > + rte_write32(value, addr); > + } else { > + _x86_movdiri_flag = > + (rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI) > 0); > + if (_x86_movdiri_flag == 1) { > + rte_wmb(); > + _rte_x86_movdiri(value, addr); > + } else { > + rte_write32(value, addr); > + } > + } > +} > + > +static __rte_always_inline void > +rte_write32_wc_relaxed(uint32_t value, volatile void *addr) > +{ > + static int _x86_movdiri_flag = -1; > + if (_x86_movdiri_flag == 1) { > + _rte_x86_movdiri(value, addr); > + } else if (_x86_movdiri_flag == 0) { > + rte_write32_relaxed(value, addr); > + } else { > + _x86_movdiri_flag = > + (rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI) > 0); > + if (_x86_movdiri_flag == 1) > + _rte_x86_movdiri(value, addr); > + else > + rte_write32_relaxed(value, addr); > + } > +} > + > + > + > + Rather a lot of whitespace here. > #ifdef __cplusplus > } > #endif > -- > 2.7.4 With the nits called out above fixed: Acked-by: Bruce Richardson <bruce.richar...@intel.com>