On 10/1/2024 7:11 PM, Arkadiusz Kusztal wrote: > The current net CRC API is not thread-safe, this patch > solves this by adding another, thread-safe API functions. > This API is also safe to use across multiple processes, > yet with limitations on max-simd-bitwidth, which will be checked only by > the process that created the CRC context; all other processes will use > the same CRC function when used with the same CRC context. > It is an undefined behavior when process binaries are compiled > with different SIMD capabilities when the same CRC context is used. > > Signed-off-by: Arkadiusz Kusztal <arkadiuszx.kusz...@intel.com>
<...> > +static struct > +{ > + uint32_t (*f[RTE_NET_CRC_REQS]) > + (const uint8_t *data, uint32_t data_len); > It increases readability to typedef function pointers. > +} handlers[RTE_NET_CRC_AVX512 + 1]; > > -/** > - * Reflect the bits about the middle > - * > - * @param val > - * value to be reflected > - * > - * @return > - * reflected value > - */ > -static uint32_t > +static inline uint32_t > Does changing to 'inline' required, as function is static compiler can do the same. > reflect_32bits(uint32_t val) > { > uint32_t i, res = 0; > @@ -99,26 +43,7 @@ reflect_32bits(uint32_t val) > return res; > } > > -static void > -crc32_eth_init_lut(uint32_t poly, > - uint32_t *lut) > -{ > - uint32_t i, j; > - > - for (i = 0; i < CRC_LUT_SIZE; i++) { > - uint32_t crc = reflect_32bits(i); > - > - for (j = 0; j < 8; j++) { > - if (crc & 0x80000000L) > - crc = (crc << 1) ^ poly; > - else > - crc <<= 1; > - } > - lut[i] = reflect_32bits(crc); > - } > -} > - > -static __rte_always_inline uint32_t > +static inline uint32_t > Why not forcing inline anymore? Are these inline changes related to the thread-safety? > crc32_eth_calc_lut(const uint8_t *data, > uint32_t data_len, > uint32_t crc, > @@ -130,20 +55,9 @@ crc32_eth_calc_lut(const uint8_t *data, > return crc; > } > > -static void > -rte_net_crc_scalar_init(void) > -{ > - /* 32-bit crc init */ > - crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut); > - > - /* 16-bit CRC init */ > - crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut); > -} > - > static inline uint32_t > -rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len) > +crc16_ccitt(const uint8_t *data, uint32_t data_len) > { > - /* return 16-bit CRC value */ > Why not keep comments? Are they wrong? <...> > +static void > +crc_scalar_init(void) > +{ > + crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut); > + crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut); > + > + handlers[RTE_NET_CRC_SCALAR].f[RTE_NET_CRC16_CCITT] = crc16_ccitt; > + handlers[RTE_NET_CRC_SCALAR].f[RTE_NET_CRC32_ETH] = crc32_eth; > +1 to remove global handlers pointer and add context, But current handlers array content is static, it can be set when defined, instead of functions. <...> > -static uint32_t > -rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len) > +struct rte_net_crc rte_net_crc_set(enum rte_net_crc_alg alg, > + enum rte_net_crc_type type) > { > - handlers = NULL; > - if (max_simd_bitwidth == 0) > - max_simd_bitwidth = rte_vect_get_max_simd_bitwidth(); > - > - handlers = avx512_vpclmulqdq_get_handlers(); > - if (handlers != NULL) > - return handlers[RTE_NET_CRC32_ETH](data, data_len); > - handlers = sse42_pclmulqdq_get_handlers(); > - if (handlers != NULL) > - return handlers[RTE_NET_CRC32_ETH](data, data_len); > - handlers = neon_pmull_get_handlers(); > - if (handlers != NULL) > - return handlers[RTE_NET_CRC32_ETH](data, data_len); > - handlers = handlers_scalar; > - return handlers[RTE_NET_CRC32_ETH](data, data_len); > -} > + uint16_t max_simd_bitwidth; > > -/* Public API */ > - > -void > -rte_net_crc_set_alg(enum rte_net_crc_alg alg) > -{ > - handlers = NULL; > - if (max_simd_bitwidth == 0) > - max_simd_bitwidth = rte_vect_get_max_simd_bitwidth(); > + max_simd_bitwidth = rte_vect_get_max_simd_bitwidth(); > > switch (alg) { > case RTE_NET_CRC_AVX512: > - handlers = avx512_vpclmulqdq_get_handlers(); > - if (handlers != NULL) > - break; > +#ifdef CC_X86_64_AVX512_VPCLMULQDQ_SUPPORT > + if (AVX512_VPCLMULQDQ_CPU_SUPPORTED && > + max_simd_bitwidth >= RTE_VECT_SIMD_512) { > + return (struct rte_net_crc){ RTE_NET_CRC_AVX512, type }; > + } > +#endif > /* fall-through */ > case RTE_NET_CRC_SSE42: > - handlers = sse42_pclmulqdq_get_handlers(); > - break; /* for x86, always break here */ > +#ifdef CC_X86_64_SSE42_PCLMULQDQ_SUPPORT > + if (SSE42_PCLMULQDQ_CPU_SUPPORTED && > + max_simd_bitwidth >= RTE_VECT_SIMD_128) { > + return (struct rte_net_crc){ RTE_NET_CRC_SSE42, type }; > + } > +#endif > + break; > case RTE_NET_CRC_NEON: > - handlers = neon_pmull_get_handlers(); > - /* fall-through */ > - case RTE_NET_CRC_SCALAR: > - /* fall-through */ > +#ifdef CC_ARM64_NEON_PMULL_SUPPORT > + if (NEON_PMULL_CPU_SUPPORTED && > + max_simd_bitwidth >= RTE_VECT_SIMD_128) { > + return (struct rte_net_crc){ RTE_NET_CRC_NEON, type }; > + } > +#endif > Is it more readable as following, up to you: ``` rte_net_crc_set(alg, type) { enum rte_net_crc_alg new_alg = RTE_NET_CRC_SCALAR; switch (alg) { case AVX512: new_alg = .. case NEON: new_alg = .. } return struct rte_net_crc){ new_alg, type }; ``` > + break; > default: > break; > } > - > - if (handlers == NULL) > - handlers = handlers_scalar; > + return (struct rte_net_crc){ RTE_NET_CRC_SCALAR, type }; > } > > -uint32_t > -rte_net_crc_calc(const void *data, > - uint32_t data_len, > - enum rte_net_crc_type type) > +uint32_t rte_net_crc(const struct rte_net_crc *ctx, > + const void *data, const uint32_t data_len) > { > - uint32_t ret; > - rte_net_crc_handler f_handle; > - > - f_handle = handlers[type]; > - ret = f_handle(data, data_len); > - > - return ret; > + return handlers[ctx->alg].f[ctx->type](data, data_len); > 'rte_net_crc()' gets input from user and "struct rte_net_crc" is not opaque, so user can provide invalid input, ctx->alg & ctx->type. To protect against it input values should be checked before using. Or I think user not need to know the details of the "struct rte_net_crc", so it can be an opaque variable for user. <...> > -/** > - * CRC compute API > - * > - * @param data > - * Pointer to the packet data for CRC computation > - * @param data_len > - * Data length for CRC computation > - * @param type > - * CRC type (enum rte_net_crc_type) > - * > - * @return > - * CRC value > - */ > -uint32_t > -rte_net_crc_calc(const void *data, > - uint32_t data_len, > +struct rte_net_crc rte_net_crc_set(enum rte_net_crc_alg alg, > enum rte_net_crc_type type); > > +uint32_t rte_net_crc(const struct rte_net_crc *ctx, > + const void *data, const uint32_t data_len); > + > As these are APIs, can you please add doxygen comments to them? > #ifdef __cplusplus > } > #endif > diff --git a/lib/net/version.map b/lib/net/version.map > index bec4ce23ea..47daf1464a 100644 > --- a/lib/net/version.map > +++ b/lib/net/version.map > @@ -4,11 +4,25 @@ DPDK_25 { > rte_eth_random_addr; > rte_ether_format_addr; > rte_ether_unformat_addr; > - rte_net_crc_calc; > - rte_net_crc_set_alg; > rte_net_get_ptype; > rte_net_make_rarp_packet; > rte_net_skip_ip6_ext; > + rte_net_crc; > + rte_net_crc_set; > > local: *; > }; > + > +INTERNAL { > + global: > + > + rte_net_crc_sse42_init; > + rte_crc16_ccitt_sse42_handler; > + rte_crc32_eth_sse42_handler; > + rte_net_crc_avx512_init; > + rte_crc16_ccitt_avx512_handler; > + rte_crc32_eth_avx512_handler; > + rte_net_crc_neon_init; > + rte_crc16_ccitt_neon_handler; > + rte_crc32_eth_neon_handler; > +}; > +1 to David's comment, these are used only within component, no need to export.