On 7/20/2020 7:42 AM, Ruifeng Wang wrote:
-----Original Message-----
From: Radu Nicolau <radu.nico...@intel.com>
Sent: Friday, July 17, 2020 6:50 PM
To: dev@dpdk.org
Cc: beilei.x...@intel.com; jia....@intel.com; bruce.richard...@intel.com;
konstantin.anan...@intel.com; jerinjac...@gmail.com;
david.march...@redhat.com; fiona.tr...@intel.com; wei.zh...@intel.com;
Ruifeng Wang <ruifeng.w...@arm.com>; Radu Nicolau
<radu.nico...@intel.com>
Subject: [PATCH v8 1/4] eal: add WC store functions

Add rte_write32_wc and rte_write32_wc_relaxed functions that implement
32bit stores using write combining memory protocol.
Provided generic stubs and x86 implementation.

Signed-off-by: Radu Nicolau <radu.nico...@intel.com>
Acked-by: Bruce Richardson <bruce.richard...@intel.com>
---
  lib/librte_eal/arm/include/rte_io_64.h  | 12 +++++++
lib/librte_eal/include/generic/rte_io.h | 48
++++++++++++++++++++++++++++
  lib/librte_eal/x86/include/rte_io.h     | 56
+++++++++++++++++++++++++++++++++
  3 files changed, 116 insertions(+)

diff --git a/lib/librte_eal/arm/include/rte_io_64.h
b/lib/librte_eal/arm/include/rte_io_64.h
index e534624..d07d9cb 100644
--- a/lib/librte_eal/arm/include/rte_io_64.h
+++ b/lib/librte_eal/arm/include/rte_io_64.h
@@ -164,6 +164,18 @@ rte_write64(uint64_t value, volatile void *addr)
        rte_write64_relaxed(value, addr);
  }

+static __rte_always_inline void
+rte_write32_wc(uint32_t value, volatile void *addr) {
+       rte_write32(value, addr);
+}
+
+static __rte_always_inline void
+rte_write32_wc_relaxed(uint32_t value, volatile void *addr) {
+       rte_write32_relaxed(value, addr);
+}
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/lib/librte_eal/include/generic/rte_io.h
b/lib/librte_eal/include/generic/rte_io.h
index da457f7..0669baa 100644
--- a/lib/librte_eal/include/generic/rte_io.h
+++ b/lib/librte_eal/include/generic/rte_io.h
@@ -229,6 +229,40 @@ rte_write32(uint32_t value, volatile void *addr);
static inline void  rte_write64(uint64_t value, volatile void *addr);

+/**
+ * Write a 32-bit value to I/O device memory address addr using write
+ * combining memory write protocol. Depending on the platform write
+combining
+ * may not be available and/or may be treated as a hint and the
+behavior may
+ * fallback to a regular store.
I'm trying to understand write combining use cases here.
Is it applicable for all MMIO writes?

It's dependant on the architecture and specific use case, but generally this is a good usecase, updating the tail registers. It has some particularities that prevents it to be a replacement for mmio writes, it is weakly ordered and it will bypass the cache hierarchy.

How to identify where to use rte_write32_wc(_relaxed)?
The relaxed version can be used is sections of the code that already have the proper fencing, as to avoid having a redundant memory fence, or when there is no need to have a memory fence at all.

Thanks.
/Ruifeng
+ *
+ * @param value
+ *  Value to write
+ * @param addr
+ *  I/O memory address to write the value to  */ __rte_experimental
+static inline void rte_write32_wc(uint32_t value, volatile void *addr);
+
+/**
+ * Write a 32-bit value to I/O device memory address addr using write
+ * combining memory write protocol. Depending on the platform write
+combining
+ * may not be available and/or may be treated as a hint and the
+behavior may
+ * fallback to a regular store.
+ *
+ * The relaxed version does not have additional I/O memory barrier,
+useful in
+ * accessing the device registers of integrated controllers which
+implicitly
+ * strongly ordered with respect to memory access.
+ *
+ * @param value
+ *  Value to write
+ * @param addr
+ *  I/O memory address to write the value to  */ __rte_experimental
+static inline void rte_write32_wc_relaxed(uint32_t value, volatile void
+*addr);
+
  #endif /* __DOXYGEN__ */

  #ifndef RTE_OVERRIDE_IO_H
@@ -345,6 +379,20 @@ rte_write64(uint64_t value, volatile void *addr)
        rte_write64_relaxed(value, addr);
  }

+#ifndef RTE_NATIVE_WRITE32_WC
+static __rte_always_inline void
+rte_write32_wc(uint32_t value, volatile void *addr) {
+       rte_write32(value, addr);
+}
+
+static __rte_always_inline void
+rte_write32_wc_relaxed(uint32_t value, volatile void *addr) {
+       rte_write32_relaxed(value, addr);
+}
+#endif /* RTE_NATIVE_WRITE32_WC */
+
  #endif /* RTE_OVERRIDE_IO_H */

  #endif /* _RTE_IO_H_ */
diff --git a/lib/librte_eal/x86/include/rte_io.h
b/lib/librte_eal/x86/include/rte_io.h
index 2db71b1..c95ed67 100644
--- a/lib/librte_eal/x86/include/rte_io.h
+++ b/lib/librte_eal/x86/include/rte_io.h
@@ -9,8 +9,64 @@
  extern "C" {
  #endif

+#include "rte_cpuflags.h"
+
+#define RTE_NATIVE_WRITE32_WC
  #include "generic/rte_io.h"

+/**
+ * @internal
+ * MOVDIRI wrapper.
+ */
+static __rte_always_inline void
+_rte_x86_movdiri(uint32_t value, volatile void *addr) {
+       asm volatile(
+               /* MOVDIRI */
+               ".byte 0x40, 0x0f, 0x38, 0xf9, 0x02"
+               :
+               : "a" (value), "d" (addr));
+}
+
+static __rte_always_inline void
+rte_write32_wc(uint32_t value, volatile void *addr) {
+       static int _x86_movdiri_flag = -1;
+       if (_x86_movdiri_flag == 1) {
+               rte_wmb();
+               _rte_x86_movdiri(value, addr);
+       } else if (_x86_movdiri_flag == 0) {
+               rte_write32(value, addr);
+       } else {
+               _x86_movdiri_flag =
+
        (rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI) > 0);
+               if (_x86_movdiri_flag == 1) {
+                       rte_wmb();
+                       _rte_x86_movdiri(value, addr);
+               } else {
+                       rte_write32(value, addr);
+               }
+       }
+}
+
+static __rte_always_inline void
+rte_write32_wc_relaxed(uint32_t value, volatile void *addr) {
+       static int _x86_movdiri_flag = -1;
+       if (_x86_movdiri_flag == 1) {
+               _rte_x86_movdiri(value, addr);
+       } else if (_x86_movdiri_flag == 0) {
+               rte_write32_relaxed(value, addr);
+       } else {
+               _x86_movdiri_flag =
+
        (rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI) > 0);
+               if (_x86_movdiri_flag == 1)
+                       _rte_x86_movdiri(value, addr);
+               else
+                       rte_write32_relaxed(value, addr);
+       }
+}
+
  #ifdef __cplusplus
  }
  #endif
--
2.7.4

Reply via email to