Hi Pablo,
> -----Original Message----- > From: De Lara Guarch, Pablo > Sent: Tuesday, March 28, 2017 7:04 PM > To: Singh, Jasvinder <jasvinder.si...@intel.com>; dev@dpdk.org > Cc: olivier.m...@6wind.com; Doherty, Declan <declan.dohe...@intel.com> > Subject: RE: [PATCH v5 1/2] librte_net: add crc compute APIs > > Hi Jasvinder, > > > -----Original Message----- > > From: Singh, Jasvinder > > Sent: Tuesday, March 21, 2017 2:46 PM > > To: dev@dpdk.org > > Cc: olivier.m...@6wind.com; Doherty, Declan; De Lara Guarch, Pablo > > Subject: [PATCH v5 1/2] librte_net: add crc compute APIs > > > > APIs for selecting the architecure specific implementation and > > computing the crc (16-bit and 32-bit CRCs) are added. For CRCs > > calculation, scalar as well as x86 intrinsic(sse4.2) versions are > > implemented. > > > > The scalar version is based on generic Look-Up Table(LUT) algorithm, > > while x86 intrinsic version uses carry-less multiplication for fast > > CRC computation. > > > > Signed-off-by: Jasvinder Singh <jasvinder.si...@intel.com> > > --- > > > diff --git a/lib/librte_net/rte_net_crc.c > > b/lib/librte_net/rte_net_crc.c new file mode 100644 index > > 0000000..89edd80 > > --- /dev/null > > +++ b/lib/librte_net/rte_net_crc.c > > ... > > > + > > +#include <rte_net_crc.h> > > +#include <stddef.h> > > + > > +/** crc tables */ > > +static uint32_t crc32_eth_lut[256]; > > +static uint32_t crc16_ccitt_lut[256]; > > Use a macro for 256, that you can use in crc32_eth_init_lut. > > + > > +static uint32_t rte_crc16_ccitt_handler(const uint8_t *data, > > + uint32_t data_len); > > Separate "static uint32_t" in another line. > > > > +/** > > + * Reflect the bits about the middle > > + * > > + * @param x value to be reflected > > Should be "val". > > > + * > > + * @return reflected value > > + */ > > +static uint32_t > > +reflect_32bits(const uint32_t val) > > No need for "const" here, as it is not a pointer. > > > +{ > > + uint32_t i, res = 0; > > + > > + for (i = 0; i < 32; i++) > > + if ((val & (1 << i)) != 0) > > + res |= (uint32_t)(1 << (31 - i)); > > + > > + return res; > > +} > > + > > +static void > > +crc32_eth_init_lut(const uint32_t poly, > > No need for "const" here. > > > + uint32_t *lut) > > +{ > > + uint_fast32_t i, j; > > + > > + for (i = 0; i < 256; i++) { > > + uint_fast32_t crc = reflect_32bits(i); > > + > > + for (j = 0; j < 8; j++) { > > + if (crc & 0x80000000L) > > + crc = (crc << 1) ^ poly; > > + else > > + crc <<= 1; > > + } > > + lut[i] = reflect_32bits(crc); > > Wrong indentation. > > > + } > > +} > > + > > +static inline __attribute__((always_inline)) uint32_t > > +crc32_eth_calc_lut(const uint8_t *data, > > + uint32_t data_len, > > + uint32_t crc, > > + const uint32_t *lut) > > +{ > > + while (data_len--) > > + crc = lut[(crc ^ *data++) & 0xffL] ^ (crc >> 8); > > + > > + return crc; > > +} > > + > > +static void > > +rte_net_crc_scalar_init(void) > > +{ > > + /** 32-bit crc init */ > > + crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut); > > + > > + /** 16-bit CRC init */ > > + crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, > > crc16_ccitt_lut); > > + > > Remove this blank line. > > > +} > > + > > +static inline uint32_t > > +rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len) { > > + return (uint16_t)~crc32_eth_calc_lut(data, > > + data_len, > > + 0xffff, > > + crc16_ccitt_lut); > > Since you are casting to uint16_t, when you are supposed to cast to uint32_t > (given the return type), I would add a comment explaining why. > > > +} > > + > > > > diff --git a/lib/librte_net/rte_net_crc.h > > b/lib/librte_net/rte_net_crc.h new file mode 100644 index > > 0000000..f8c9075 > > --- /dev/null > > +++ b/lib/librte_net/rte_net_crc.h > > @@ -0,0 +1,101 @@ > > ... > > > + > > +/** > > + * This API set the crc computation algorithm (i.e. scalar version, > > + * x86 64-bit sse4.2 intrinsic version, etc.) and internal data > > + * structure. > > + * > > + * @param alg > > Add extra information (CRC algorithm?). > > > + * - RTE_NET_CRC_SCALAR > > + * - RTE_NET_CRC_SSE42 (Use 64-bit SSE4.2 intrinsic) > > + */ > > +void > > +rte_net_crc_set_alg(enum rte_net_crc_alg alg); > > + > > +/** > > + * CRC compute API > > + * > > + * @param data > > + * Pointer to the packet data for crc computation > > + * @param data_len > > + * Data length for crc computation > > + * @param type > > + * Crc type (enum rte_net_crc_type) > > CRC > > > + * > > + * @return > > + * crc value > > Add two spaces after "@param" and "@return". > > > + */ > > +uint32_t > > +rte_net_crc_calc(const void *data, > > + uint32_t data_len, > > + enum rte_net_crc_type type); > > + > > +#if defined(RTE_ARCH_X86_64) || defined(RTE_CPU_FALGS_SSE_4_2) > > Typo in RTE_CPU_FALGS_SSE_4_2 (I missed the same one in rte_net_crc.c ). > Also, should it be "&&"? > > > > +#include <rte_net_crc_sse.h> > > +#endif > > + > > +#ifdef __cplusplus > > +} > > +#endif > > + > > + > > +#endif /* _RTE_NET_CRC_H_ */ > > diff --git a/lib/librte_net/rte_net_crc_sse.h > > b/lib/librte_net/rte_net_crc_sse.h > > new file mode 100644 > > index 0000000..e9af22d > > --- /dev/null > > +++ b/lib/librte_net/rte_net_crc_sse.h > > @@ -0,0 +1,345 @@ > > ... > > > + * @brief Performs one folding round > > + * > > + * Logically function operates as follows: > > + * DATA = READ_NEXT_16BYTES(); > > + * F1 = LSB8(FOLD) > > + * F2 = MSB8(FOLD) > > + * T1 = CLMUL(F1, RK1) > > + * T2 = CLMUL(F2, RK2) > > + * FOLD = XOR(T1, T2, DATA) > > + * > > + * @param data_block 16 byte data block > > + * @param precomp precomputed rk1 constanst > > + * @param fold running 16 byte folded data > > + * > > + * @return New 16 byte folded data > > Move parameter/rturn description in a separate line (same for other > functions). > > > + */ > > +static inline __attribute__((always_inline)) __m128i > > +crcr32_folding_round(const __m128i data_block, > > + const __m128i precomp, > > + const __m128i fold) > > No need to use "const" here. > > > +{ > > + __m128i tmp0 = _mm_clmulepi64_si128(fold, precomp, 0x01); > > + __m128i tmp1 = _mm_clmulepi64_si128(fold, precomp, 0x10); > > + > > + return _mm_xor_si128(tmp1, _mm_xor_si128(data_block, tmp0)); } > > + > > +/** > > + * Performs reduction from 128 bits to 64 bits > > + * > > + * @param data128 128 bits data to be reduced > > + * @param precomp rk5 and rk6 precomputed constants > > + * > > + * @return data reduced to 64 bits > > + */ > > + > > +static inline __attribute__((always_inline)) __m128i > > +crcr32_reduce_128_to_64(__m128i data128, > > + const __m128i precomp) > > No need to use "const" here. > > ... > > > + > > + > > +static inline void > > +rte_net_crc_sse42_init(void) > > +{ > > + uint64_t k1, k2, k5, k6; > > + uint64_t p = 0, q = 0; > > + > > + /** Initialize CRC16 data */ > > + k1 = 0x189aeLLU; > > + k2 = 0x8e10LLU; > > + k5 = 0x189aeLLU; > > + k6 = 0x114aaLLU; > > + q = 0x11c581910LLU; > > + p = 0x10811LLU; > > + > > + /** Save the params in context structure */ > > + crc16_ccitt_pclmulqdq.rk1_rk2 = > > + _mm_setr_epi64(_mm_cvtsi64_m64(k1), > > _mm_cvtsi64_m64(k2)); > > + crc16_ccitt_pclmulqdq.rk5_rk6 = > > + _mm_setr_epi64(_mm_cvtsi64_m64(k5), > > _mm_cvtsi64_m64(k6)); > > + crc16_ccitt_pclmulqdq.rk7_rk8 = > > + _mm_setr_epi64(_mm_cvtsi64_m64(q), > > _mm_cvtsi64_m64(p)); > > + > > + /** Initialize CRC32 data */ > > + k1 = 0xccaa009eLLU; > > + k2 = 0x1751997d0LLU; > > + k5 = 0xccaa009eLLU; > > + k6 = 0x163cd6124LLU; > > + q = 0x1f7011640LLU; > > + p = 0x1db710641LLU; > > + > > + /** Save the params in context structure */ > > + crc32_eth_pclmulqdq.rk1_rk2 = > > + _mm_setr_epi64(_mm_cvtsi64_m64(k1), > > _mm_cvtsi64_m64(k2)); > > Add extra tab for better readability. > > > + crc32_eth_pclmulqdq.rk5_rk6 = > > + _mm_setr_epi64(_mm_cvtsi64_m64(k5), > > _mm_cvtsi64_m64(k6)); > > + crc32_eth_pclmulqdq.rk7_rk8 = > > + _mm_setr_epi64(_mm_cvtsi64_m64(q), > > _mm_cvtsi64_m64(p)); > > + > > + _mm_empty(); > > Maybe we need a comment here. > > > + > > +} > > + > > +static inline uint32_t > > +rte_crc16_ccitt_sse42_handler(const uint8_t *data, > > + uint32_t data_len) > > +{ > > + return (uint16_t)~crc32_eth_calc_pclmulqdq(data, > > + data_len, > > + 0xffff, > > + &crc16_ccitt_pclmulqdq); > > Same comment about the casting here. > > > +} > > + > > +static inline uint32_t > > +rte_crc32_eth_sse42_handler(const uint8_t *data, > > + uint32_t data_len) > > +{ > > + return ~crc32_eth_calc_pclmulqdq(data, > > + data_len, > > + 0xffffffffUL, > > + &crc32_eth_pclmulqdq); > > +} > > + > > +#ifdef __cplusplus > > +} > > +#endif > > + > > +#endif /* _RTE_NET_CRC_SSE_H_ */ > > diff --git a/lib/librte_net/rte_net_version.map > > b/lib/librte_net/rte_net_version.map > > index 3b15e65..c6716ec 100644 > > --- a/lib/librte_net/rte_net_version.map > > +++ b/lib/librte_net/rte_net_version.map > > @@ -4,3 +4,11 @@ DPDK_16.11 { > > > > local: *; > > }; > > + > > +DPDK_17.05 { > > + global: > > + > > + rte_net_crc_set_alg; > > + rte_net_crc_calc; > > This has to be alphabetically sorted. Thank you for detailed review. I will revise the patch following your comments and will send v6. Jasvinder