Re: [dpdk-dev] [PATCH v5 1/2] librte_net: add crc compute APIs

Singh, Jasvinder Tue, 28 Mar 2017 12:21:40 -0700

Hi Pablo,


> -----Original Message-----
> From: De Lara Guarch, Pablo
> Sent: Tuesday, March 28, 2017 7:04 PM
> To: Singh, Jasvinder <[email protected]>; [email protected]
> Cc: [email protected]; Doherty, Declan <[email protected]>
> Subject: RE: [PATCH v5 1/2] librte_net: add crc compute APIs
> 
> Hi Jasvinder,
> 
> > -----Original Message-----
> > From: Singh, Jasvinder
> > Sent: Tuesday, March 21, 2017 2:46 PM
> > To: [email protected]
> > Cc: [email protected]; Doherty, Declan; De Lara Guarch, Pablo
> > Subject: [PATCH v5 1/2] librte_net: add crc compute APIs
> >
> > APIs for selecting the architecure specific implementation and
> > computing the crc (16-bit and 32-bit CRCs) are added. For CRCs
> > calculation, scalar as well as x86 intrinsic(sse4.2) versions are 
> > implemented.
> >
> > The scalar version is based on generic Look-Up Table(LUT) algorithm,
> > while x86 intrinsic version uses carry-less multiplication for fast
> > CRC computation.
> >
> > Signed-off-by: Jasvinder Singh <[email protected]>
> > ---
> 
> > diff --git a/lib/librte_net/rte_net_crc.c
> > b/lib/librte_net/rte_net_crc.c new file mode 100644 index
> > 0000000..89edd80
> > --- /dev/null
> > +++ b/lib/librte_net/rte_net_crc.c
> 
> ...
> 
> > +
> > +#include <rte_net_crc.h>
> > +#include <stddef.h>
> > +
> > +/** crc tables */
> > +static uint32_t crc32_eth_lut[256];
> > +static uint32_t crc16_ccitt_lut[256];
> 
> Use a macro for 256, that you can use in crc32_eth_init_lut.
> > +
> > +static uint32_t rte_crc16_ccitt_handler(const uint8_t *data,
> > +   uint32_t data_len);
> 
> Separate "static uint32_t" in another line.
> 
> 
> > +/**
> > + * Reflect the bits about the middle
> > + *
> > + * @param x value to be reflected
> 
> Should be "val".
> 
> > + *
> > + * @return reflected value
> > + */
> > +static uint32_t
> > +reflect_32bits(const uint32_t val)
> 
> No need for "const" here, as it is not a pointer.
> 
> > +{
> > +   uint32_t i, res = 0;
> > +
> > +   for (i = 0; i < 32; i++)
> > +           if ((val & (1 << i)) != 0)
> > +                   res |= (uint32_t)(1 << (31 - i));
> > +
> > +   return res;
> > +}
> > +
> > +static void
> > +crc32_eth_init_lut(const uint32_t poly,
> 
> No need for "const" here.
> 
> > +   uint32_t *lut)
> > +{
> > +   uint_fast32_t i, j;
> > +
> > +   for (i = 0; i < 256; i++) {
> > +           uint_fast32_t crc = reflect_32bits(i);
> > +
> > +           for (j = 0; j < 8; j++) {
> > +                   if (crc & 0x80000000L)
> > +                           crc = (crc << 1) ^ poly;
> > +                   else
> > +                           crc <<= 1;
> > +           }
> > +   lut[i] = reflect_32bits(crc);
> 
> Wrong indentation.
> 
> > +   }
> > +}
> > +
> > +static inline __attribute__((always_inline)) uint32_t
> > +crc32_eth_calc_lut(const uint8_t *data,
> > +   uint32_t data_len,
> > +   uint32_t crc,
> > +   const uint32_t *lut)
> > +{
> > +   while (data_len--)
> > +           crc = lut[(crc ^ *data++) & 0xffL] ^ (crc >> 8);
> > +
> > +   return crc;
> > +}
> > +
> > +static void
> > +rte_net_crc_scalar_init(void)
> > +{
> > +   /** 32-bit crc init */
> > +   crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
> > +
> > +   /** 16-bit CRC init */
> > +   crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16,
> > crc16_ccitt_lut);
> > +
> 
> Remove this blank line.
> 
> > +}
> > +
> > +static inline uint32_t
> > +rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len) {
> > +   return (uint16_t)~crc32_eth_calc_lut(data,
> > +           data_len,
> > +           0xffff,
> > +           crc16_ccitt_lut);
> 
> Since you are casting to uint16_t, when you are supposed to cast to uint32_t
> (given the return type), I would add a comment explaining why.
> 
> > +}
> > +
> 
> 
> > diff --git a/lib/librte_net/rte_net_crc.h
> > b/lib/librte_net/rte_net_crc.h new file mode 100644 index
> > 0000000..f8c9075
> > --- /dev/null
> > +++ b/lib/librte_net/rte_net_crc.h
> > @@ -0,0 +1,101 @@
> 
> ...
> 
> > +
> > +/**
> > + *  This API set the crc computation algorithm (i.e. scalar version,
> > + *  x86 64-bit sse4.2 intrinsic version, etc.) and internal data
> > + *  structure.
> > + *
> > + * @param alg
> 
> Add extra information (CRC algorithm?).
> 
> > + *   - RTE_NET_CRC_SCALAR
> > + *   - RTE_NET_CRC_SSE42 (Use 64-bit SSE4.2 intrinsic)
> > + */
> > +void
> > +rte_net_crc_set_alg(enum rte_net_crc_alg alg);
> > +
> > +/**
> > + * CRC compute API
> > + *
> > + * @param data
> > + *  Pointer to the packet data for crc computation
> > + * @param data_len
> > + *  Data length for crc computation
> > + * @param type
> > + *  Crc type (enum rte_net_crc_type)
> 
> CRC
> 
> > + *
> > + * @return
> > + *  crc value
> 
> Add two spaces after "@param" and "@return".
> 
> > + */
> > +uint32_t
> > +rte_net_crc_calc(const void *data,
> > +   uint32_t data_len,
> > +   enum rte_net_crc_type type);
> > +
> > +#if defined(RTE_ARCH_X86_64) || defined(RTE_CPU_FALGS_SSE_4_2)
> 
> Typo in RTE_CPU_FALGS_SSE_4_2 (I missed the same one in rte_net_crc.c ).
> Also, should it be "&&"?
> 
> 
> > +#include <rte_net_crc_sse.h>
> > +#endif
> > +
> > +#ifdef __cplusplus
> > +}
> > +#endif
> > +
> > +
> > +#endif /* _RTE_NET_CRC_H_ */
> > diff --git a/lib/librte_net/rte_net_crc_sse.h
> > b/lib/librte_net/rte_net_crc_sse.h
> > new file mode 100644
> > index 0000000..e9af22d
> > --- /dev/null
> > +++ b/lib/librte_net/rte_net_crc_sse.h
> > @@ -0,0 +1,345 @@
> 
> ...
> 
> > + * @brief Performs one folding round
> > + *
> > + * Logically function operates as follows:
> > + *     DATA = READ_NEXT_16BYTES();
> > + *     F1 = LSB8(FOLD)
> > + *     F2 = MSB8(FOLD)
> > + *     T1 = CLMUL(F1, RK1)
> > + *     T2 = CLMUL(F2, RK2)
> > + *     FOLD = XOR(T1, T2, DATA)
> > + *
> > + * @param data_block 16 byte data block
> > + * @param precomp precomputed rk1 constanst
> > + * @param fold running 16 byte folded data
> > + *
> > + * @return New 16 byte folded data
> 
> Move parameter/rturn description in a separate line (same for other
> functions).
> 
> > + */
> > +static inline __attribute__((always_inline)) __m128i
> > +crcr32_folding_round(const __m128i data_block,
> > +           const __m128i precomp,
> > +           const __m128i fold)
> 
> No need to use "const" here.
> 
> > +{
> > +   __m128i tmp0 = _mm_clmulepi64_si128(fold, precomp, 0x01);
> > +   __m128i tmp1 = _mm_clmulepi64_si128(fold, precomp, 0x10);
> > +
> > +   return _mm_xor_si128(tmp1, _mm_xor_si128(data_block, tmp0)); }
> > +
> > +/**
> > + * Performs reduction from 128 bits to 64 bits
> > + *
> > + * @param data128 128 bits data to be reduced
> > + * @param precomp rk5 and rk6 precomputed constants
> > + *
> > + * @return data reduced to 64 bits
> > + */
> > +
> > +static inline __attribute__((always_inline)) __m128i
> > +crcr32_reduce_128_to_64(__m128i data128,
> > +   const __m128i precomp)
> 
> No need to use "const" here.
> 
> ...
> 
> > +
> > +
> > +static inline void
> > +rte_net_crc_sse42_init(void)
> > +{
> > +   uint64_t k1, k2, k5, k6;
> > +   uint64_t p = 0, q = 0;
> > +
> > +   /** Initialize CRC16 data */
> > +   k1 = 0x189aeLLU;
> > +   k2 = 0x8e10LLU;
> > +   k5 = 0x189aeLLU;
> > +   k6 = 0x114aaLLU;
> > +   q =  0x11c581910LLU;
> > +   p =  0x10811LLU;
> > +
> > +   /** Save the params in context structure */
> > +   crc16_ccitt_pclmulqdq.rk1_rk2 =
> > +           _mm_setr_epi64(_mm_cvtsi64_m64(k1),
> >     _mm_cvtsi64_m64(k2));
> > +   crc16_ccitt_pclmulqdq.rk5_rk6 =
> > +           _mm_setr_epi64(_mm_cvtsi64_m64(k5),
> >     _mm_cvtsi64_m64(k6));
> > +   crc16_ccitt_pclmulqdq.rk7_rk8 =
> > +           _mm_setr_epi64(_mm_cvtsi64_m64(q),
> > _mm_cvtsi64_m64(p));
> > +
> > +   /** Initialize CRC32 data */
> > +   k1 = 0xccaa009eLLU;
> > +   k2 = 0x1751997d0LLU;
> > +   k5 = 0xccaa009eLLU;
> > +   k6 = 0x163cd6124LLU;
> > +   q =  0x1f7011640LLU;
> > +   p =  0x1db710641LLU;
> > +
> > +   /** Save the params in context structure */
> > +   crc32_eth_pclmulqdq.rk1_rk2 =
> > +           _mm_setr_epi64(_mm_cvtsi64_m64(k1),
> >     _mm_cvtsi64_m64(k2));
> 
> Add extra tab for better readability.
> 
> > +   crc32_eth_pclmulqdq.rk5_rk6 =
> > +           _mm_setr_epi64(_mm_cvtsi64_m64(k5),
> > _mm_cvtsi64_m64(k6));
> > +   crc32_eth_pclmulqdq.rk7_rk8 =
> > +           _mm_setr_epi64(_mm_cvtsi64_m64(q),
> > _mm_cvtsi64_m64(p));
> > +
> > +   _mm_empty();
> 
> Maybe we need a comment here.
> 
> > +
> > +}
> > +
> > +static inline uint32_t
> > +rte_crc16_ccitt_sse42_handler(const uint8_t *data,
> > +   uint32_t data_len)
> > +{
> > +   return (uint16_t)~crc32_eth_calc_pclmulqdq(data,
> > +           data_len,
> > +           0xffff,
> > +           &crc16_ccitt_pclmulqdq);
> 
> Same comment about the casting here.
> 
> > +}
> > +
> > +static inline uint32_t
> > +rte_crc32_eth_sse42_handler(const uint8_t *data,
> > +   uint32_t data_len)
> > +{
> > +   return ~crc32_eth_calc_pclmulqdq(data,
> > +           data_len,
> > +           0xffffffffUL,
> > +           &crc32_eth_pclmulqdq);
> > +}
> > +
> > +#ifdef __cplusplus
> > +}
> > +#endif
> > +
> > +#endif /* _RTE_NET_CRC_SSE_H_ */
> > diff --git a/lib/librte_net/rte_net_version.map
> > b/lib/librte_net/rte_net_version.map
> > index 3b15e65..c6716ec 100644
> > --- a/lib/librte_net/rte_net_version.map
> > +++ b/lib/librte_net/rte_net_version.map
> > @@ -4,3 +4,11 @@ DPDK_16.11 {
> >
> >     local: *;
> >  };
> > +
> > +DPDK_17.05 {
> > +   global:
> > +
> > +   rte_net_crc_set_alg;
> > +   rte_net_crc_calc;
> 
> This has to be alphabetically sorted.

Thank you for detailed review. I will revise the patch following your comments 
and will send v6.

Jasvinder

Re: [dpdk-dev] [PATCH v5 1/2] librte_net: add crc compute APIs

Reply via email to