Provide build option to have functions in <rte_memcpy.h> delegate to the standard compiler/libc memcpy(), instead of using the various custom DPDK, handcrafted, per-architecture rte_memcpy() implementations.
A new meson build option 'use_cc_memcpy' is added. By default, the traditional, custom DPDK rte_memcpy() implementation is used. The performance benefits of the custom DPDK rte_memcpy() implementations have been diminishing with every compiler release, and with current toolchains the use of a custom memcpy() implementation may even be a liability. An additional benefit of this change is that compilers and static analysis tools have an easier time detecting incorrect usage of rte_memcpy() (e.g., buffer overruns, or overlapping source and destination buffers). Signed-off-by: Mattias Rönnblom <mattias.ronnb...@ericsson.com> Acked-by: Morten Brørup <m...@smartsharesystems.com> --- PATCH v5: o Take a more cautious approach, setting use_cc_memcpy to disabled by default. o Fix ARM build issue in case RTE_ARCH_ARM64_MEMCPY was set. o Use separate macros to indicate that the rte_memcpy() is implemented by the compiler, and that use_cc_memcpy is set, to avoid accidental <rte_build_config.h> #undefs. o Remove redundant rte_config.h includes. PATCH: o Add entry in release notes. o Update meson help text. RFC v3: o Fix missing #endif on loongarch. o PPC and RISCV now implemented, meaning all architectures are supported. o Unnecessary <rte_vect.h> include is removed from <rte_memcpy.h>. RFC v2: * Fix bug where rte_memcpy.h was not installed on x86. * Made attempt to make Loongarch compile. --- config/meson.build | 1 + doc/guides/rel_notes/release_24_07.rst | 21 +++++++++ lib/eal/arm/include/rte_memcpy.h | 9 ++++ lib/eal/include/generic/rte_memcpy.h | 61 +++++++++++++++++++++++--- lib/eal/loongarch/include/rte_memcpy.h | 54 +---------------------- lib/eal/ppc/include/rte_memcpy.h | 9 ++++ lib/eal/riscv/include/rte_memcpy.h | 54 +---------------------- lib/eal/x86/include/meson.build | 1 + lib/eal/x86/include/rte_memcpy.h | 9 ++++ meson_options.txt | 2 + 10 files changed, 110 insertions(+), 111 deletions(-) diff --git a/config/meson.build b/config/meson.build index 8c8b019c25..456056628e 100644 --- a/config/meson.build +++ b/config/meson.build @@ -353,6 +353,7 @@ endforeach # set other values pulled from the build options dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports')) dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet')) +dpdk_conf.set('RTE_USE_CC_MEMCPY', get_option('use_cc_memcpy')) dpdk_conf.set('RTE_ENABLE_STDATOMIC', get_option('enable_stdatomic')) dpdk_conf.set('RTE_ENABLE_TRACE_FP', get_option('enable_trace_fp')) dpdk_conf.set('RTE_PKTMBUF_HEADROOM', get_option('pkt_mbuf_headroom')) diff --git a/doc/guides/rel_notes/release_24_07.rst b/doc/guides/rel_notes/release_24_07.rst index eb2ed1a55f..31af6303b3 100644 --- a/doc/guides/rel_notes/release_24_07.rst +++ b/doc/guides/rel_notes/release_24_07.rst @@ -24,6 +24,27 @@ DPDK Release 24.07 New Features ------------ +* **Compiler memcpy replaces custom DPDK implementation.** + + The memory copy functions of ``<rte_memcpy.h>`` now optionally + delegates to the standard memcpy() function, implemented by the + compiler and the C runtime (e.g., libc). + + In this release of DPDK, the handcrafted, per-architecture memory + copy implementations are still the default. Compiler memcpy is + enabled by setting the new ``use_cc_memcpy`` build option to true. + + The performance benefits of the custom DPDK rte_memcpy() + implementations have been diminishing with every new compiler + release, and with current toolchains the use of a custom memcpy() + implementation may even result in worse performance than the + standard memcpy(). + + An additional benefit of using compiler memcpy is that compilers and + static analysis tools have an easier time detecting incorrect usage + of rte_memcpy() (e.g., buffer overruns, or overlapping source and + destination buffers). + .. This section should contain new features added in this release. Sample format: diff --git a/lib/eal/arm/include/rte_memcpy.h b/lib/eal/arm/include/rte_memcpy.h index 47dea9a8cc..5d2ea7dbfa 100644 --- a/lib/eal/arm/include/rte_memcpy.h +++ b/lib/eal/arm/include/rte_memcpy.h @@ -5,10 +5,19 @@ #ifndef _RTE_MEMCPY_ARM_H_ #define _RTE_MEMCPY_ARM_H_ +#if defined(RTE_USE_CC_MEMCPY) || !defined(RTE_ARCH_ARM64_MEMCPY) + +#define RTE_CC_MEMCPY +#include <generic/rte_memcpy.h> + +#else + #ifdef RTE_ARCH_64 #include <rte_memcpy_64.h> #else #include <rte_memcpy_32.h> #endif +#endif /* RTE_USE_CC_MEMCPY */ + #endif /* _RTE_MEMCPY_ARM_H_ */ diff --git a/lib/eal/include/generic/rte_memcpy.h b/lib/eal/include/generic/rte_memcpy.h index e7f0f8eaa9..cfb0175bd2 100644 --- a/lib/eal/include/generic/rte_memcpy.h +++ b/lib/eal/include/generic/rte_memcpy.h @@ -5,12 +5,19 @@ #ifndef _RTE_MEMCPY_H_ #define _RTE_MEMCPY_H_ +#ifdef __cplusplus +extern "C" { +#endif + /** * @file * * Functions for vectorised implementation of memcpy(). */ +#include <stdint.h> +#include <string.h> + /** * Copy 16 bytes from one location to another using optimised * instructions. The locations should not overlap. @@ -35,8 +42,6 @@ rte_mov16(uint8_t *dst, const uint8_t *src); static inline void rte_mov32(uint8_t *dst, const uint8_t *src); -#ifdef __DOXYGEN__ - /** * Copy 48 bytes from one location to another using optimised * instructions. The locations should not overlap. @@ -49,8 +54,6 @@ rte_mov32(uint8_t *dst, const uint8_t *src); static inline void rte_mov48(uint8_t *dst, const uint8_t *src); -#endif /* __DOXYGEN__ */ - /** * Copy 64 bytes from one location to another using optimised * instructions. The locations should not overlap. @@ -87,8 +90,6 @@ rte_mov128(uint8_t *dst, const uint8_t *src); static inline void rte_mov256(uint8_t *dst, const uint8_t *src); -#ifdef __DOXYGEN__ - /** * Copy bytes from one location to another. The locations must not overlap. * @@ -111,6 +112,52 @@ rte_mov256(uint8_t *dst, const uint8_t *src); static void * rte_memcpy(void *dst, const void *src, size_t n); -#endif /* __DOXYGEN__ */ +#ifdef RTE_CC_MEMCPY +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 16); +} + +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 32); +} + +static inline void +rte_mov48(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 48); +} + +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 64); +} + +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 128); +} + +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 256); +} + +static inline void * +rte_memcpy(void *dst, const void *src, size_t n) +{ + return memcpy(dst, src, n); +} +#endif /* RTE_CC_MEMCPY */ + +#ifdef __cplusplus +} +#endif #endif /* _RTE_MEMCPY_H_ */ diff --git a/lib/eal/loongarch/include/rte_memcpy.h b/lib/eal/loongarch/include/rte_memcpy.h index 22578d40f4..4e6027caee 100644 --- a/lib/eal/loongarch/include/rte_memcpy.h +++ b/lib/eal/loongarch/include/rte_memcpy.h @@ -5,57 +5,7 @@ #ifndef RTE_MEMCPY_LOONGARCH_H #define RTE_MEMCPY_LOONGARCH_H -#include <stdint.h> -#include <string.h> - -#include "rte_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#include "generic/rte_memcpy.h" - -static inline void -rte_mov16(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 16); -} - -static inline void -rte_mov32(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 32); -} - -static inline void -rte_mov48(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 48); -} - -static inline void -rte_mov64(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 64); -} - -static inline void -rte_mov128(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 128); -} - -static inline void -rte_mov256(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 256); -} - -#define rte_memcpy(d, s, n) memcpy((d), (s), (n)) - -#ifdef __cplusplus -} -#endif +#define RTE_CC_MEMCPY +#include <generic/rte_memcpy.h> #endif /* RTE_MEMCPY_LOONGARCH_H */ diff --git a/lib/eal/ppc/include/rte_memcpy.h b/lib/eal/ppc/include/rte_memcpy.h index 6f388c0234..162c1483f5 100644 --- a/lib/eal/ppc/include/rte_memcpy.h +++ b/lib/eal/ppc/include/rte_memcpy.h @@ -6,6 +6,13 @@ #ifndef _RTE_MEMCPY_PPC_64_H_ #define _RTE_MEMCPY_PPC_64_H_ +#ifdef RTE_USE_CC_MEMCPY + +#define RTE_CC_MEMCPY +#include <generic/rte_memcpy.h> + +#else + #include <stdint.h> #include <string.h> @@ -215,4 +222,6 @@ rte_memcpy_func(void *dst, const void *src, size_t n) } #endif +#endif /* RTE_USE_CC_MEMCPY */ + #endif /* _RTE_MEMCPY_PPC_64_H_ */ diff --git a/lib/eal/riscv/include/rte_memcpy.h b/lib/eal/riscv/include/rte_memcpy.h index e34f19396e..7f6c07d090 100644 --- a/lib/eal/riscv/include/rte_memcpy.h +++ b/lib/eal/riscv/include/rte_memcpy.h @@ -7,57 +7,7 @@ #ifndef RTE_MEMCPY_RISCV_H #define RTE_MEMCPY_RISCV_H -#include <stdint.h> -#include <string.h> - -#include "rte_common.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#include "generic/rte_memcpy.h" - -static inline void -rte_mov16(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 16); -} - -static inline void -rte_mov32(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 32); -} - -static inline void -rte_mov48(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 48); -} - -static inline void -rte_mov64(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 64); -} - -static inline void -rte_mov128(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 128); -} - -static inline void -rte_mov256(uint8_t *dst, const uint8_t *src) -{ - memcpy(dst, src, 256); -} - -#define rte_memcpy(d, s, n) memcpy((d), (s), (n)) - -#ifdef __cplusplus -} -#endif +#define RTE_CC_MEMCPY +#include <generic/rte_memcpy.h> #endif /* RTE_MEMCPY_RISCV_H */ diff --git a/lib/eal/x86/include/meson.build b/lib/eal/x86/include/meson.build index 52d2f8e969..09c2fe2485 100644 --- a/lib/eal/x86/include/meson.build +++ b/lib/eal/x86/include/meson.build @@ -16,6 +16,7 @@ arch_headers = files( 'rte_spinlock.h', 'rte_vect.h', ) + arch_indirect_headers = files( 'rte_atomic_32.h', 'rte_atomic_64.h', diff --git a/lib/eal/x86/include/rte_memcpy.h b/lib/eal/x86/include/rte_memcpy.h index 42058e4a3f..2d9f5954f1 100644 --- a/lib/eal/x86/include/rte_memcpy.h +++ b/lib/eal/x86/include/rte_memcpy.h @@ -11,6 +11,13 @@ * Functions for SSE/AVX/AVX2/AVX512 implementation of memcpy(). */ +#ifdef RTE_USE_CC_MEMCPY + +#define RTE_CC_MEMCPY +#include <generic/rte_memcpy.h> + +#else + #include <stdio.h> #include <stdint.h> #include <string.h> @@ -767,4 +774,6 @@ rte_memcpy(void *dst, const void *src, size_t n) } #endif +#endif /* RTE_USE_CC_MEMCPY */ + #endif /* _RTE_MEMCPY_X86_64_H_ */ diff --git a/meson_options.txt b/meson_options.txt index e49b2fc089..69a01f6578 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -60,3 +60,5 @@ option('tests', type: 'boolean', value: true, description: 'build unit tests') option('use_hpet', type: 'boolean', value: false, description: 'use HPET timer in EAL') +option('use_cc_memcpy', type: 'boolean', value: false, description: + 'Have the functions of <rte_memcpy.h> delegate to compiler/libc memcpy() instead of using custom implementation.') -- 2.34.1