On Thu, Jul 2, 2020 at 11:24 AM Radu Nicolau <radu.nico...@intel.com> wrote: > > Add rte_write32_wc and rte_write32_wc_relaxed functions > that implement 32bit stores using write combining memory protocol. > Provided generic stubs and x86 implementation. > > Signed-off-by: Radu Nicolau <radu.nico...@intel.com> > Acked-by: Bruce Richardson <bruce.richard...@intel.com> > --- > v4: address feedback and include ack > > lib/librte_eal/include/generic/rte_io.h | 47 +++++++++++++++++++++++++++ > lib/librte_eal/x86/include/rte_io.h | 56 > +++++++++++++++++++++++++++++++++ > 2 files changed, 103 insertions(+) > > diff --git a/lib/librte_eal/include/generic/rte_io.h > b/lib/librte_eal/include/generic/rte_io.h > index da457f7..7391782 100644 > --- a/lib/librte_eal/include/generic/rte_io.h > +++ b/lib/librte_eal/include/generic/rte_io.h > @@ -229,6 +229,39 @@ rte_write32(uint32_t value, volatile void *addr); > static inline void > rte_write64(uint64_t value, volatile void *addr); > > +/** > + * Write a 32-bit value to I/O device memory address addr using write > + * combining memory write protocol. Depending on the platform write combining > + * may not be available and/or may be treated as a hint and the behavior may > + * fallback to a regular store. > + * > + * @param value > + * Value to write > + * @param addr > + * I/O memory address to write the value to > + */ > +static inline void > +rte_write32_wc(uint32_t value, volatile void *addr);
This is a new API, and even if inlined, it should be marked experimental. Is volatile necessary? > + > +/** > + * Write a 32-bit value to I/O device memory address addr using write > + * combining memory write protocol. Depending on the platform write combining > + * may not be available and/or may be treated as a hint and the behavior may > + * fallback to a regular store. > + * > + * The relaxed version does not have additional I/O memory barrier, useful in > + * accessing the device registers of integrated controllers which implicitly > + * strongly ordered with respect to memory access. > + * > + * @param value > + * Value to write > + * @param addr > + * I/O memory address to write the value to > + */ > +static inline void > +rte_write32_wc_relaxed(uint32_t value, volatile void *addr); > + > + Double empty line (there are some other in this patch that I won't flag again). > #endif /* __DOXYGEN__ */ > > #ifndef RTE_OVERRIDE_IO_H > @@ -345,6 +378,20 @@ rte_write64(uint64_t value, volatile void *addr) > rte_write64_relaxed(value, addr); > } > > +#ifndef RTE_NATIVE_WRITE32_WC Missing return type, this causes build failure on anything but x86. > +rte_write32_wc(uint32_t value, volatile void *addr) > +{ > + rte_write32(value, addr); > +} > + > +static __rte_always_inline void > +rte_write32_wc_relaxed(uint32_t value, volatile void *addr) > +{ > + rte_write32_relaxed(value, addr); > +} > +#endif /* RTE_NATIVE_WRITE32_WC */ > + > + > #endif /* RTE_OVERRIDE_IO_H */ > > #endif /* _RTE_IO_H_ */ > diff --git a/lib/librte_eal/x86/include/rte_io.h > b/lib/librte_eal/x86/include/rte_io.h > index 2db71b1..c95ed67 100644 > --- a/lib/librte_eal/x86/include/rte_io.h > +++ b/lib/librte_eal/x86/include/rte_io.h > @@ -9,8 +9,64 @@ > extern "C" { > #endif > > +#include "rte_cpuflags.h" Inclusion of this header should be out of the extern "C" block. > + > +#define RTE_NATIVE_WRITE32_WC > #include "generic/rte_io.h" > > +/** > + * @internal > + * MOVDIRI wrapper. > + */ > +static __rte_always_inline void > +_rte_x86_movdiri(uint32_t value, volatile void *addr) > +{ > + asm volatile( > + /* MOVDIRI */ > + ".byte 0x40, 0x0f, 0x38, 0xf9, 0x02" > + : > + : "a" (value), "d" (addr)); > +} > + > +static __rte_always_inline void > +rte_write32_wc(uint32_t value, volatile void *addr) > +{ > + static int _x86_movdiri_flag = -1; > + if (_x86_movdiri_flag == 1) { > + rte_wmb(); > + _rte_x86_movdiri(value, addr); > + } else if (_x86_movdiri_flag == 0) { > + rte_write32(value, addr); > + } else { > + _x86_movdiri_flag = > + (rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI) > 0); Can't this cpu flag check be moved in a constructor? This would avoid this copy/paste. > + if (_x86_movdiri_flag == 1) { > + rte_wmb(); > + _rte_x86_movdiri(value, addr); > + } else { > + rte_write32(value, addr); > + } > + } > +} > + > +static __rte_always_inline void > +rte_write32_wc_relaxed(uint32_t value, volatile void *addr) > +{ > + static int _x86_movdiri_flag = -1; Same check with a static variable with the same name. I wonder if wrapping all of this in a single function would be more elegant. Then rte_write32_wc(|_relaxed) would call it with a flag. > + if (_x86_movdiri_flag == 1) { > + _rte_x86_movdiri(value, addr); > + } else if (_x86_movdiri_flag == 0) { > + rte_write32_relaxed(value, addr); > + } else { > + _x86_movdiri_flag = > + (rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI) > 0); > + if (_x86_movdiri_flag == 1) > + _rte_x86_movdiri(value, addr); > + else > + rte_write32_relaxed(value, addr); > + } > +} > + > #ifdef __cplusplus > } > #endif > -- > 2.7.4 > -- David Marchand