> -----Original Message----- > From: dev <dev-boun...@dpdk.org> On Behalf Of Mairtin o Loingsigh > Sent: Friday, October 9, 2020 9:51 PM > To: jasvinder.si...@intel.com; bruce.richard...@intel.com; > pablo.de.lara.gua...@intel.com; konstantin.anan...@intel.com > Cc: dev@dpdk.org; brendan.r...@intel.com; mairtin.oloings...@intel.com; > david.co...@intel.com > Subject: [dpdk-dev] [PATCH v5 1/2] net: add run-time architecture specific > CRC selection > > This patch adds support for run-time selection of the optimal architecture- > specific CRC path, based on the supported instruction set(s) of the CPU. > > The compiler option checks have been moved from the C files to the meson > script. The rte_cpu_get_flag_enabled function is called automatically by the > library at process initialization time to determine which instructions the CPU > supports, with the most optimal supported CRC path ultimately selected. > > Signed-off-by: Mairtin o Loingsigh <mairtin.oloings...@intel.com> > Signed-off-by: David Coyle <david.co...@intel.com> > Acked-by: Konstantin Ananyev <konstantin.anan...@intel.com> > --- > doc/guides/rel_notes/release_20_11.rst | 4 + > lib/librte_net/meson.build | 34 ++++++- > lib/librte_net/net_crc.h | 34 +++++++ > lib/librte_net/{net_crc_neon.h => net_crc_neon.c} | 26 ++--- > lib/librte_net/{net_crc_sse.h => net_crc_sse.c} | 34 ++----- > lib/librte_net/rte_net_crc.c | 116 > +++++++++++++++------- > 6 files changed, 168 insertions(+), 80 deletions(-) create mode 100644 > lib/librte_net/net_crc.h rename lib/librte_net/{net_crc_neon.h => > net_crc_neon.c} (95%) rename lib/librte_net/{net_crc_sse.h => > net_crc_sse.c} (94%) > > diff --git a/doc/guides/rel_notes/release_20_11.rst > b/doc/guides/rel_notes/release_20_11.rst > index 808bdc4e5..b77297f7e 100644 > --- a/doc/guides/rel_notes/release_20_11.rst > +++ b/doc/guides/rel_notes/release_20_11.rst > @@ -55,6 +55,10 @@ New Features > Also, make sure to start the actual text at the margin. > ======================================================= > > +* **Updated CRC modules of rte_net library.** > + > + * Added run-time selection of the optimal architecture-specific CRC path. > + > * **Updated Broadcom bnxt driver.** > > Updated the Broadcom bnxt driver with new features and improvements, > including: > diff --git a/lib/librte_net/meson.build b/lib/librte_net/meson.build index > 24ed8253b..fa439b9e5 100644 > --- a/lib/librte_net/meson.build > +++ b/lib/librte_net/meson.build > @@ -1,5 +1,5 @@ > # SPDX-License-Identifier: BSD-3-Clause -# Copyright(c) 2017 Intel > Corporation > +# Copyright(c) 2017-2020 Intel Corporation > > headers = files('rte_ip.h', > 'rte_tcp.h', > @@ -20,3 +20,35 @@ headers = files('rte_ip.h', > > sources = files('rte_arp.c', 'rte_ether.c', 'rte_net.c', 'rte_net_crc.c') > deps += > ['mbuf'] > + > +if dpdk_conf.has('RTE_ARCH_X86_64') > + net_crc_sse42_cpu_support = ( > + cc.get_define('__PCLMUL__', args: machine_args) != '') > + net_crc_sse42_cc_support = ( > + cc.has_argument('-mpclmul') and cc.has_argument('-maes')) > + > + build_static_net_crc_sse42_lib = 0 > + > + if net_crc_sse42_cpu_support == true > + sources += files('net_crc_sse.c') > + cflags += ['-DCC_X86_64_SSE42_PCLMULQDQ_SUPPORT'] > + elif net_crc_sse42_cc_support == true > + build_static_net_crc_sse42_lib = 1 > + net_crc_sse42_lib_cflags = ['-mpclmul', '-maes'] > + cflags += ['-DCC_X86_64_SSE42_PCLMULQDQ_SUPPORT'] > + endif > + > + if build_static_net_crc_sse42_lib == 1 > + net_crc_sse42_lib = static_library( > + 'net_crc_sse42_lib', > + 'net_crc_sse.c', > + dependencies: static_rte_eal, > + c_args: [cflags, > + net_crc_sse42_lib_cflags]) > + objs += net_crc_sse42_lib.extract_objects('net_crc_sse.c') > + endif > +elif (dpdk_conf.has('RTE_ARCH_ARM64') and > + cc.get_define('__ARM_FEATURE_CRYPTO', args: > machine_args) != '') > + sources += files('net_crc_neon.c') > + cflags += ['-DCC_ARM64_NEON_PMULL_SUPPORT'] endif > diff --git a/lib/librte_net/net_crc.h b/lib/librte_net/net_crc.h new file mode > 100644 index 000000000..a1578a56c > --- /dev/null > +++ b/lib/librte_net/net_crc.h > @@ -0,0 +1,34 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2020 Intel Corporation > + */ > + > +#ifndef _NET_CRC_H_ > +#define _NET_CRC_H_ > + > +/* > + * Different implementations of CRC > + */ > + > +/* SSE4.2 */ > + > +void > +rte_net_crc_sse42_init(void); > + > +uint32_t > +rte_crc16_ccitt_sse42_handler(const uint8_t *data, uint32_t data_len); > + > +uint32_t > +rte_crc32_eth_sse42_handler(const uint8_t *data, uint32_t data_len); > + > +/* NEON */ > + > +void > +rte_net_crc_neon_init(void); > + > +uint32_t > +rte_crc16_ccitt_neon_handler(const uint8_t *data, uint32_t data_len); > + > +uint32_t > +rte_crc32_eth_neon_handler(const uint8_t *data, uint32_t data_len); > + > +#endif /* _NET_CRC_H_ */ > diff --git a/lib/librte_net/net_crc_neon.h b/lib/librte_net/net_crc_neon.c > similarity index 95% rename from lib/librte_net/net_crc_neon.h rename to > lib/librte_net/net_crc_neon.c index 63fa1d4a1..f61d75a8c 100644 > --- a/lib/librte_net/net_crc_neon.h > +++ b/lib/librte_net/net_crc_neon.c > @@ -2,17 +2,15 @@ > * Copyright(c) 2017 Cavium, Inc > */ > > -#ifndef _NET_CRC_NEON_H_ > -#define _NET_CRC_NEON_H_ > +#include <string.h> > > +#include <rte_common.h> > #include <rte_branch_prediction.h> > #include <rte_net_crc.h> > #include <rte_vect.h> > #include <rte_cpuflags.h> > > -#ifdef __cplusplus > -extern "C" { > -#endif > +#include "net_crc.h" > > /** PMULL CRC computation context structure */ struct crc_pmull_ctx > { @@ -218,7 +216,7 @@ crc32_eth_calc_pmull( > return n; > } > > -static inline void > +void > rte_net_crc_neon_init(void) > { > /* Initialize CRC16 data */ > @@ -242,9 +240,8 @@ rte_net_crc_neon_init(void) > crc32_eth_pmull.rk7_rk8 = vld1q_u64(eth_k7_k8); } > > -static inline uint32_t > -rte_crc16_ccitt_neon_handler(const uint8_t *data, > - uint32_t data_len) > +uint32_t > +rte_crc16_ccitt_neon_handler(const uint8_t *data, uint32_t data_len) > { > return (uint16_t)~crc32_eth_calc_pmull(data, > data_len, > @@ -252,18 +249,11 @@ rte_crc16_ccitt_neon_handler(const uint8_t *data, > &crc16_ccitt_pmull); > } > > -static inline uint32_t > -rte_crc32_eth_neon_handler(const uint8_t *data, > - uint32_t data_len) > +uint32_t > +rte_crc32_eth_neon_handler(const uint8_t *data, uint32_t data_len) > { > return ~crc32_eth_calc_pmull(data, > data_len, > 0xffffffffUL, > &crc32_eth_pmull); > } > - > -#ifdef __cplusplus > -} > -#endif > - > -#endif /* _NET_CRC_NEON_H_ */ > diff --git a/lib/librte_net/net_crc_sse.h b/lib/librte_net/net_crc_sse.c > similarity index 94% rename from lib/librte_net/net_crc_sse.h rename to > lib/librte_net/net_crc_sse.c index 1c7b7a548..053b54b39 100644 > --- a/lib/librte_net/net_crc_sse.h > +++ b/lib/librte_net/net_crc_sse.c > @@ -1,18 +1,16 @@ > /* SPDX-License-Identifier: BSD-3-Clause > - * Copyright(c) 2017 Intel Corporation > + * Copyright(c) 2017-2020 Intel Corporation > */ > > -#ifndef _RTE_NET_CRC_SSE_H_ > -#define _RTE_NET_CRC_SSE_H_ > +#include <string.h> > > +#include <rte_common.h> > #include <rte_branch_prediction.h> > +#include <rte_cpuflags.h> > > -#include <x86intrin.h> > -#include <cpuid.h> > +#include "net_crc.h" > > -#ifdef __cplusplus > -extern "C" { > -#endif > +#include <x86intrin.h> > > /** PCLMULQDQ CRC computation context structure */ struct > crc_pclmulqdq_ctx { @@ -259,8 +257,7 @@ crc32_eth_calc_pclmulqdq( > return n; > } > > - > -static inline void > +void > rte_net_crc_sse42_init(void) > { > uint64_t k1, k2, k5, k6; > @@ -303,12 +300,10 @@ rte_net_crc_sse42_init(void) > * use other data types such as float, double, etc. > */ > _mm_empty(); > - > } > > -static inline uint32_t > -rte_crc16_ccitt_sse42_handler(const uint8_t *data, > - uint32_t data_len) > +uint32_t > +rte_crc16_ccitt_sse42_handler(const uint8_t *data, uint32_t data_len) > { > /** return 16-bit CRC value */ > return (uint16_t)~crc32_eth_calc_pclmulqdq(data, > @@ -317,18 +312,11 @@ rte_crc16_ccitt_sse42_handler(const uint8_t *data, > &crc16_ccitt_pclmulqdq); > } > > -static inline uint32_t > -rte_crc32_eth_sse42_handler(const uint8_t *data, > - uint32_t data_len) > +uint32_t > +rte_crc32_eth_sse42_handler(const uint8_t *data, uint32_t data_len) > { > return ~crc32_eth_calc_pclmulqdq(data, > data_len, > 0xffffffffUL, > &crc32_eth_pclmulqdq); > } > - > -#ifdef __cplusplus > -} > -#endif > - > -#endif /* _RTE_NET_CRC_SSE_H_ */ > diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c index > 4f5b9e828..d271d5205 100644 > --- a/lib/librte_net/rte_net_crc.c > +++ b/lib/librte_net/rte_net_crc.c > @@ -1,5 +1,5 @@ > /* SPDX-License-Identifier: BSD-3-Clause > - * Copyright(c) 2017 Intel Corporation > + * Copyright(c) 2017-2020 Intel Corporation > */ > > #include <stddef.h> > @@ -10,17 +10,7 @@ > #include <rte_common.h> > #include <rte_net_crc.h> > > -#if defined(RTE_ARCH_X86_64) && defined(__PCLMUL__) > -#define X86_64_SSE42_PCLMULQDQ 1 > -#elif defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRYPTO) > -#define ARM64_NEON_PMULL 1 > -#endif > - > -#ifdef X86_64_SSE42_PCLMULQDQ > -#include <net_crc_sse.h> > -#elif defined ARM64_NEON_PMULL > -#include <net_crc_neon.h> > -#endif > +#include "net_crc.h" > > /** CRC polynomials */ > #define CRC32_ETH_POLYNOMIAL 0x04c11db7UL @@ -41,25 +31,27 @@ > rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len); typedef > uint32_t (*rte_net_crc_handler)(const uint8_t *data, uint32_t data_len); > > -static rte_net_crc_handler *handlers; > +static const rte_net_crc_handler *handlers; > > -static rte_net_crc_handler handlers_scalar[] = { > +static const rte_net_crc_handler handlers_scalar[] = { > [RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_handler, > [RTE_NET_CRC32_ETH] = rte_crc32_eth_handler, }; > - > -#ifdef X86_64_SSE42_PCLMULQDQ > -static rte_net_crc_handler handlers_sse42[] = { > +#ifdef CC_X86_64_SSE42_PCLMULQDQ_SUPPORT static const > +rte_net_crc_handler handlers_sse42[] = { > [RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_sse42_handler, > [RTE_NET_CRC32_ETH] = rte_crc32_eth_sse42_handler, }; -#elif > defined ARM64_NEON_PMULL -static rte_net_crc_handler handlers_neon[] > = { > +#endif > +#ifdef CC_ARM64_NEON_PMULL_SUPPORT > +static const rte_net_crc_handler handlers_neon[] = { > [RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_neon_handler, > [RTE_NET_CRC32_ETH] = rte_crc32_eth_neon_handler, }; #endif > > +/* Scalar handling */ > + > /** > * Reflect the bits about the middle > * > @@ -142,29 +134,82 @@ rte_crc32_eth_handler(const uint8_t *data, > uint32_t data_len) > crc32_eth_lut); > } > > +/* SSE4.2/PCLMULQDQ handling */ > + > +#define SSE42_PCLMULQDQ_CPU_SUPPORTED \ > + rte_cpu_get_flag_enabled(RTE_CPUFLAG_PCLMULQDQ) > + > +static const rte_net_crc_handler * > +sse42_pclmulqdq_get_handlers(void) > +{ > +#ifdef CC_X86_64_SSE42_PCLMULQDQ_SUPPORT > + if (SSE42_PCLMULQDQ_CPU_SUPPORTED) > + return handlers_sse42; > +#endif > + return NULL; > +} > + > +static uint8_t > +sse42_pclmulqdq_init(void) > +{ > +#ifdef CC_X86_64_SSE42_PCLMULQDQ_SUPPORT > + if (SSE42_PCLMULQDQ_CPU_SUPPORTED) { > + rte_net_crc_sse42_init(); > + return 1; > + } > +#endif > + return 0; > +} > + > +/* NEON/PMULL handling */ > + > +#define NEON_PMULL_CPU_SUPPORTED \ > + rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) > + > +static const rte_net_crc_handler * > +neon_pmull_get_handlers(void) > +{ > +#ifdef CC_ARM64_NEON_PMULL_SUPPORT > + if (NEON_PMULL_CPU_SUPPORTED) > + return handlers_neon; > +#endif > + return NULL; > +} > + > +static uint8_t > +neon_pmull_init(void) > +{ > +#ifdef CC_ARM64_NEON_PMULL_SUPPORT > + if (NEON_PMULL_CPU_SUPPORTED) { > + rte_net_crc_neon_init(); > + return 1; > + } > +#endif > + return 0; > +} > + > +/* Public API */ > + > void > rte_net_crc_set_alg(enum rte_net_crc_alg alg) { > + handlers = NULL; > + > switch (alg) { > -#ifdef X86_64_SSE42_PCLMULQDQ > case RTE_NET_CRC_SSE42: > - handlers = handlers_sse42; > - break; > -#elif defined ARM64_NEON_PMULL > - /* fall-through */ > + handlers = sse42_pclmulqdq_get_handlers(); > + break; /* for x86, always break here */ > case RTE_NET_CRC_NEON: > - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) { > - handlers = handlers_neon; > - break; > - } > -#endif > + handlers = neon_pmull_get_handlers(); > /* fall-through */ > case RTE_NET_CRC_SCALAR: > /* fall-through */ > default: > - handlers = handlers_scalar; > break; > } > + > + if (handlers == NULL) > + handlers = handlers_scalar; > } > > uint32_t > @@ -188,15 +233,10 @@ RTE_INIT(rte_net_crc_init) > > rte_net_crc_scalar_init(); > > -#ifdef X86_64_SSE42_PCLMULQDQ > - alg = RTE_NET_CRC_SSE42; > - rte_net_crc_sse42_init(); > -#elif defined ARM64_NEON_PMULL > - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) { > + if (sse42_pclmulqdq_init()) > + alg = RTE_NET_CRC_SSE42; > + if (neon_pmull_init()) > alg = RTE_NET_CRC_NEON; > - rte_net_crc_neon_init(); > - } > -#endif > > rte_net_crc_set_alg(alg); > } > -- > 2.12.3 The change looks good to me.
Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com>