> > > > > When choosing a vector path to take, an extra condition must be > > > satisfied to ensure the max SIMD bitwidth allows for the CPU enabled > > > path. > > > > > > The vector path was initially chosen in RTE_INIT, however this is no > > > longer suitable as we cannot check the max SIMD bitwidth at that time. > > > Default handlers are now chosen in RTE_INIT, these default handlers > > > are used the first time the crc calc is called, and they set the suitable > > > handlers to be used going forward. > > > > > > Suggested-by: Jasvinder Singh <jasvinder.si...@intel.com> > > > Suggested-by: Olivier Matz <olivier.m...@6wind.com> > > > > > > Signed-off-by: Ciara Power <ciara.po...@intel.com> > > > > > > --- > > > v4: > > > - Added default handlers to be set at RTE_INIT time, rather than > > > choosing scalar handlers. > > > - Modified logging. > > > - Updated enum name. > > > v3: > > > - Moved choosing vector paths out of RTE_INIT. > > > - Moved checking max_simd_bitwidth into the set_alg function. > > > --- > > > lib/librte_net/rte_net_crc.c | 75 ++++++++++++++++++++++++++++++------ > > > lib/librte_net/rte_net_crc.h | 8 ++++ > > > 2 files changed, 72 insertions(+), 11 deletions(-) > > > > > > diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c > > > index 4f5b9e8286..11d0161a32 100644 > > > --- a/lib/librte_net/rte_net_crc.c > > > +++ b/lib/librte_net/rte_net_crc.c > > > @@ -9,6 +9,7 @@ > > > #include <rte_cpuflags.h> > > > #include <rte_common.h> > > > #include <rte_net_crc.h> > > > +#include <rte_eal.h> > > > > > > #if defined(RTE_ARCH_X86_64) && defined(__PCLMUL__) > > > #define X86_64_SSE42_PCLMULQDQ 1 > > > @@ -32,6 +33,12 @@ > > > static uint32_t crc32_eth_lut[CRC_LUT_SIZE]; > > > static uint32_t crc16_ccitt_lut[CRC_LUT_SIZE]; > > > > > > +static uint32_t > > > +rte_crc16_ccitt_default_handler(const uint8_t *data, uint32_t data_len); > > > + > > > +static uint32_t > > > +rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len); > > > + > > > static uint32_t > > > rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len); > > > > > > @@ -41,7 +48,12 @@ rte_crc32_eth_handler(const uint8_t *data, uint32_t > > > data_len); > > > typedef uint32_t > > > (*rte_net_crc_handler)(const uint8_t *data, uint32_t data_len); > > > > > > -static rte_net_crc_handler *handlers; > > > +static rte_net_crc_handler handlers_default[] = { > > > + [RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_default_handler, > > > + [RTE_NET_CRC32_ETH] = rte_crc32_eth_default_handler, > > > +}; > > > + > > > +static rte_net_crc_handler *handlers = handlers_default; > > > > > > static rte_net_crc_handler handlers_scalar[] = { > > > [RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_handler, > > > @@ -60,6 +72,9 @@ static rte_net_crc_handler handlers_neon[] = { > > > }; > > > #endif > > > > > > +static uint16_t max_simd_bitwidth; > > > +RTE_LOG_REGISTER(libnet_logtype, lib.net, INFO); > > > + > > > /** > > > * Reflect the bits about the middle > > > * > > > @@ -112,6 +127,42 @@ crc32_eth_calc_lut(const uint8_t *data, > > > return crc; > > > } > > > > > > +static uint32_t > > > +rte_crc16_ccitt_default_handler(const uint8_t *data, uint32_t data_len) > > > +{ > > > + if (max_simd_bitwidth == 0) > > > + max_simd_bitwidth = rte_get_max_simd_bitwidth(); > > > + handlers = handlers_scalar; > > > +#ifdef X86_64_SSE42_PCLMULQDQ > > > + if (max_simd_bitwidth >= RTE_SIMD_128) > > > + handlers = handlers_sse42; > > > +#endif > > > +#ifdef ARM64_NEON_PMULL > > > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) && > > > + max_simd_bitwidth >= RTE_SIMD_128) { > > > + handlers = handlers_neon; > > > +#endif > > > > You probably don't want to make all these checks for *every* invocation > > of that function. I think it would be better: > > if (ma_simd_bitwidth == 0) {....} > > return handlers[..](...); > > As another thougth - it is probably a bit safer to update max_simd_bitwidht > after handler value update. > > handler = ...; rte_smp_wmb(); max_simd_width = ...; > > > > > BTW, while it allows us to use best possible handler, > > such approach means extra indirect call(/jump) anyway.
Ah sorry, that would only happen once, please ignore that one. > > Hard to say off-hand would it affect performance, > > and if yes how significantly. > > Couldn't find any perf tests in our UT for it... > > > > > + return handlers[RTE_NET_CRC16_CCITT](data, data_len); > > > +} > > > + > > > +static uint32_t > > > +rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len) > > > +{ > > > + if (max_simd_bitwidth == 0) > > > + max_simd_bitwidth = rte_get_max_simd_bitwidth(); > > > + handlers = handlers_scalar; > > > +#ifdef X86_64_SSE42_PCLMULQDQ > > > + if (max_simd_bitwidth >= RTE_SIMD_128) > > > + handlers = handlers_sse42; > > > +#endif > > > +#ifdef ARM64_NEON_PMULL > > > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) && > > > + max_simd_bitwidth >= RTE_SIMD_128) { > > > + handlers = handlers_neon; > > > +#endif > > > + return handlers[RTE_NET_CRC32_ETH](data, data_len); > > > +} > > > + > > > static void > > > rte_net_crc_scalar_init(void) > > > { > > > @@ -145,18 +196,26 @@ rte_crc32_eth_handler(const uint8_t *data, uint32_t > > > data_len) > > > void > > > rte_net_crc_set_alg(enum rte_net_crc_alg alg) > > > { > > > + if (max_simd_bitwidth == 0) > > > + max_simd_bitwidth = rte_get_max_simd_bitwidth(); > > > + > > > switch (alg) { > > > #ifdef X86_64_SSE42_PCLMULQDQ > > > case RTE_NET_CRC_SSE42: > > > - handlers = handlers_sse42; > > > - break; > > > + if (max_simd_bitwidth >= RTE_SIMD_128) { > > > + handlers = handlers_sse42; > > > + return; > > > + } > > > + NET_LOG(INFO, "Max SIMD Bitwidth too low, can't use SSE\n"); > > > #elif defined ARM64_NEON_PMULL > > > /* fall-through */ > > > case RTE_NET_CRC_NEON: > > > - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) { > > > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) && > > > + max_simd_bitwidth >= RTE_SIMD_128) { > > > handlers = handlers_neon; > > > - break; > > > + return; > > > } > > > + NET_LOG(INFO, "Max SIMD Bitwidth too low or CPU flag not > > > enabled, can't use NEON\n"); > > > #endif > > > /* fall-through */ > > > case RTE_NET_CRC_SCALAR: > > > @@ -184,19 +243,13 @@ rte_net_crc_calc(const void *data, > > > /* Select highest available crc algorithm as default one */ > > > RTE_INIT(rte_net_crc_init) > > > { > > > - enum rte_net_crc_alg alg = RTE_NET_CRC_SCALAR; > > > - > > > rte_net_crc_scalar_init(); > > > > > > #ifdef X86_64_SSE42_PCLMULQDQ > > > - alg = RTE_NET_CRC_SSE42; > > > rte_net_crc_sse42_init(); > > > #elif defined ARM64_NEON_PMULL > > > if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) { > > > - alg = RTE_NET_CRC_NEON; > > > rte_net_crc_neon_init(); > > > } > > > #endif > > > - > > > - rte_net_crc_set_alg(alg); > > > } > > > diff --git a/lib/librte_net/rte_net_crc.h b/lib/librte_net/rte_net_crc.h > > > index 16e85ca970..c942865ecf 100644 > > > --- a/lib/librte_net/rte_net_crc.h > > > +++ b/lib/librte_net/rte_net_crc.h > > > @@ -7,6 +7,8 @@ > > > > > > #include <stdint.h> > > > > > > +#include <rte_log.h> > > > + > > > #ifdef __cplusplus > > > extern "C" { > > > #endif > > > @@ -25,6 +27,12 @@ enum rte_net_crc_alg { > > > RTE_NET_CRC_NEON, > > > }; > > > > > > +extern int libnet_logtype; > > > + > > > +#define NET_LOG(level, fmt, args...) > > > \ > > > + rte_log(RTE_LOG_ ## level, libnet_logtype, "%s(): " fmt "\n", \ > > > + __func__, ## args) > > > + > > > /** > > > * This API set the CRC computation algorithm (i.e. scalar version, > > > * x86 64-bit sse4.2 intrinsic version, etc.) and internal data > > > -- > > > 2.22.0