Another option would be to just do what PPC already does.
The ENA part is because it has some garbage trying to use memcpy
always (which is one of those bad ideas).

From 74e7ab929e61e0481f6e0214d4d06a716b2f7d79 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <step...@networkplumber.org>
Date: Sun, 3 Mar 2024 08:02:07 -0800
Subject: [PATCH] rte_memcpy: use builtin memcpy for fixed sizes

This makes x86 arch do same thing as PPC, and also allows
code checkers to see memcpy issues.  It shows a pre-existing
bug in ipsec test now.

Signed-off-by: Stephen Hemminger <step...@networkplumber.org>
---
 drivers/net/ena/base/ena_plat_dpdk.h |  9 +-----
 lib/eal/x86/include/rte_memcpy.h     | 45 +++++++++++++++-------------
 2 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ena/base/ena_plat_dpdk.h 
b/drivers/net/ena/base/ena_plat_dpdk.h
index 14bf582a451f..997e6aa3dfbd 100644
--- a/drivers/net/ena/base/ena_plat_dpdk.h
+++ b/drivers/net/ena/base/ena_plat_dpdk.h
@@ -70,14 +70,7 @@ typedef uint64_t dma_addr_t;
 #define ENA_UDELAY(x) rte_delay_us_block(x)
 
 #define ENA_TOUCH(x) ((void)(x))
-/* Redefine memcpy with caution: rte_memcpy can be simply aliased to memcpy, so
- * make the redefinition only if it's safe (and beneficial) to do so.
- */
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64_MEMCPY) || \
-       defined(RTE_ARCH_ARM_NEON_MEMCPY)
-#undef memcpy
-#define memcpy rte_memcpy
-#endif
+
 #define wmb rte_wmb
 #define rmb rte_rmb
 #define mb rte_mb
diff --git a/lib/eal/x86/include/rte_memcpy.h b/lib/eal/x86/include/rte_memcpy.h
index 72a92290e05d..aab30be0eeb9 100644
--- a/lib/eal/x86/include/rte_memcpy.h
+++ b/lib/eal/x86/include/rte_memcpy.h
@@ -27,24 +27,6 @@ extern "C" {
 #pragma GCC diagnostic ignored "-Wstringop-overflow"
 #endif
 
-/**
- * Copy bytes from one location to another. The locations must not overlap.
- *
- * @note This is implemented as a macro, so it's address should not be taken
- * and care is needed as parameter expressions may be evaluated multiple times.
- *
- * @param dst
- *   Pointer to the destination of the data.
- * @param src
- *   Pointer to the source data.
- * @param n
- *   Number of bytes to copy.
- * @return
- *   Pointer to the destination data.
- */
-static __rte_always_inline void *
-rte_memcpy(void *dst, const void *src, size_t n);
-
 /**
  * Copy bytes from one location to another,
  * locations should not overlap.
@@ -859,8 +841,8 @@ rte_memcpy_aligned(void *dst, const void *src, size_t n)
        return ret;
 }
 
-static __rte_always_inline void *
-rte_memcpy(void *dst, const void *src, size_t n)
+static inline void *
+rte_memcpy_func(void *dst, const void *src, size_t n)
 {
        if (!(((uintptr_t)dst | (uintptr_t)src) & ALIGNMENT_MASK))
                return rte_memcpy_aligned(dst, src, n);
@@ -868,6 +850,29 @@ rte_memcpy(void *dst, const void *src, size_t n)
                return rte_memcpy_generic(dst, src, n);
 }
 
+
+/**
+ * Copy bytes from one location to another. The locations must not overlap.
+ *
+ * @note This is implemented as a macro, so it's address should not be taken
+ * and care is needed as parameter expressions may be evaluated multiple times.
+ *
+ * @param dst
+ *   Pointer to the destination of the data.
+ * @param src
+ *   Pointer to the source data.
+ * @param n
+ *   Number of bytes to copy.
+ * @return
+ *   Pointer to the destination data.
+ */
+#define rte_memcpy(dst, src, n)              \
+       __extension__ ({                     \
+       (__builtin_constant_p(n)) ?          \
+       memcpy((dst), (src), (n)) :          \
+       rte_memcpy_func((dst), (src), (n)); })
+
+
 #undef ALIGNMENT_MASK
 
 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION >= 100000)
-- 
2.43.0



Reply via email to