When choosing a vector path to take, an extra condition must be satisfied to ensure the max SIMD bitwidth allows for the CPU enabled path.
The vector path was initially chosen in RTE_INIT, however this is no longer suitable as we cannot check the max SIMD bitwidth at that time. The default chosen in RTE_INIT is now scalar. For best performance and to use vector paths, apps must explicitly call the set algorithm function before using other functions from this library, as this is where vector handlers are now chosen. Suggested-by: Jasvinder Singh <jasvinder.si...@intel.com> Signed-off-by: Ciara Power <ciara.po...@intel.com> --- v3: - Moved choosing vector paths out of RTE_INIT. - Moved checking max_simd_bitwidth into the set_alg function. --- lib/librte_net/rte_net_crc.c | 26 +++++++++++++++++--------- lib/librte_net/rte_net_crc.h | 3 ++- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c index 9fd4794a9d..241eb16399 100644 --- a/lib/librte_net/rte_net_crc.c +++ b/lib/librte_net/rte_net_crc.c @@ -9,6 +9,7 @@ #include <rte_cpuflags.h> #include <rte_common.h> #include <rte_net_crc.h> +#include <rte_eal.h> #if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_PCLMULQDQ) #define X86_64_SSE42_PCLMULQDQ 1 @@ -60,6 +61,9 @@ static rte_net_crc_handler handlers_neon[] = { }; #endif +static uint16_t max_simd_bitwidth; +#define RTE_LOGTYPE_NET RTE_LOGTYPE_USER1 + /** * Reflect the bits about the middle * @@ -145,18 +149,26 @@ rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len) void rte_net_crc_set_alg(enum rte_net_crc_alg alg) { + if (max_simd_bitwidth == 0) + max_simd_bitwidth = rte_get_max_simd_bitwidth(); + switch (alg) { #ifdef X86_64_SSE42_PCLMULQDQ case RTE_NET_CRC_SSE42: - handlers = handlers_sse42; - break; + if (max_simd_bitwidth >= RTE_MAX_128_SIMD) { + handlers = handlers_sse42; + return; + } + RTE_LOG(INFO, NET, "Max SIMD Bitwidth too low, using scalar\n"); #elif defined ARM64_NEON_PMULL /* fall-through */ case RTE_NET_CRC_NEON: - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) { + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) && + max_simd_bitwidth >= RTE_MAX_128_SIMD) { handlers = handlers_neon; - break; + return; } + RTE_LOG(INFO, NET, "Max SIMD Bitwidth too low or CPU flag not enabled, using scalar\n"); #endif /* fall-through */ case RTE_NET_CRC_SCALAR: @@ -184,19 +196,15 @@ rte_net_crc_calc(const void *data, /* Select highest available crc algorithm as default one */ RTE_INIT(rte_net_crc_init) { - enum rte_net_crc_alg alg = RTE_NET_CRC_SCALAR; - rte_net_crc_scalar_init(); #ifdef X86_64_SSE42_PCLMULQDQ - alg = RTE_NET_CRC_SSE42; rte_net_crc_sse42_init(); #elif defined ARM64_NEON_PMULL if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) { - alg = RTE_NET_CRC_NEON; rte_net_crc_neon_init(); } #endif - rte_net_crc_set_alg(alg); + rte_net_crc_set_alg(RTE_NET_CRC_SCALAR); } diff --git a/lib/librte_net/rte_net_crc.h b/lib/librte_net/rte_net_crc.h index 16e85ca970..7a45ebe193 100644 --- a/lib/librte_net/rte_net_crc.h +++ b/lib/librte_net/rte_net_crc.h @@ -28,7 +28,8 @@ enum rte_net_crc_alg { /** * This API set the CRC computation algorithm (i.e. scalar version, * x86 64-bit sse4.2 intrinsic version, etc.) and internal data - * structure. + * structure. This should be called before any other functions, to + * choose the algorithm for best performance. * * @param alg * This parameter is used to select the CRC implementation version. -- 2.17.1