[PATCH] net/gve: fix RSS hash endianness in DQO format
The hash field in struct gve_rx_compl_desc_dqo defined in gve_desc_dqo.h has the type __le32. Therefore the hash must be read in little endian rather than big endian. Bugzilla ID: 1441 Signed-off-by: Shreesh Adiga <16567adigashre...@gmail.com> --- drivers/net/gve/gve_rx_dqo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/gve/gve_rx_dqo.c b/drivers/net/gve/gve_rx_dqo.c index f08b58c78d..5efcce3312 100644 --- a/drivers/net/gve/gve_rx_dqo.c +++ b/drivers/net/gve/gve_rx_dqo.c @@ -161,7 +161,7 @@ gve_rx_burst_dqo(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rxm->ol_flags = 0; rxm->ol_flags |= RTE_MBUF_F_RX_RSS_HASH | gve_parse_csum_ol_flags(rx_desc, rxq->hw); - rxm->hash.rss = rte_be_to_cpu_32(rx_desc->hash); + rxm->hash.rss = rte_le_to_cpu_32(rx_desc->hash); rx_pkts[nb_rx++] = rxm; bytes += pkt_len; -- 2.44.2
Re: [PATCH] net/gve: fix RSS hash endianness in DQO format
Thank you Joshua for your review and suggestions. Appreciate the help with dpdk process. I've sent a new patch with "Fixes" tag to sta...@dpdk.org. I'm not sure if same needs to be sent here to dev@dpdk.org as well. Please let me know if I've missed anything. Thanks, Shreesh
[PATCH] net/mana: support rdma-core via pkg-config in meson
Currently building with custom rdma-core installed in /opt/rdma-core after setting PKG_CONFIG_PATH=/opt/rdma-core/lib64/pkgconfig/ results in the below meson logs: Run-time dependency libmana found: YES 1.0.54.0 Header "infiniband/manadv.h" has symbol "manadv_set_context_attr" : NO Thus to fix this, the libs is updated in meson.build and is passed to the cc.has_header_symbol call using dependencies. After this change, the libmana header files are getting included and net/mana is successfully enabled. Fixes: 517ed6e2d590 ("net/mana: add basic driver with build environment") Cc: lon...@microsoft.com Cc: sta...@dpdk.org Signed-off-by: Shreesh Adiga <16567adigashre...@gmail.com> --- drivers/net/mana/meson.build | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/mana/meson.build b/drivers/net/mana/meson.build index 2d72eca5a8..3ddc230ab4 100644 --- a/drivers/net/mana/meson.build +++ b/drivers/net/mana/meson.build @@ -19,12 +19,14 @@ sources += files( ) libnames = ['ibverbs', 'mana'] +libs = [] foreach libname:libnames lib = dependency('lib' + libname, required:false) if not lib.found() lib = cc.find_library(libname, required:false) endif if lib.found() +libs += lib ext_deps += lib else build = false @@ -43,7 +45,7 @@ required_symbols = [ ] foreach arg:required_symbols -if not cc.has_header_symbol(arg[0], arg[1]) +if not cc.has_header_symbol(arg[0], arg[1], dependencies: libs, args: cflags) build = false reason = 'missing symbol "' + arg[1] + '" in "' + arg[0] + '"' subdir_done() -- 2.44.2
[PATCH] net/crc: reduce usage of static arrays in net_crc_sse.c
Replace the clearing of lower 32 bits of XMM register with blend of zero register. Replace the clearing of upper 64 bits of XMM register with _mm_move_epi64. Clang is able to optimize away the AND + memory operand with the above sequence, however GCC is still emitting the code for AND with memory operands which is being explicitly eliminated here. Additionally replace the 48 byte crc_xmm_shift_tab with the contents of shf_table which is 32 bytes, achieving the same functionality. Signed-off-by: Shreesh Adiga <16567adigashre...@gmail.com> --- lib/net/net_crc_sse.c | 30 +++--- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/lib/net/net_crc_sse.c b/lib/net/net_crc_sse.c index 112dc94ac1..eec854e587 100644 --- a/lib/net/net_crc_sse.c +++ b/lib/net/net_crc_sse.c @@ -96,20 +96,13 @@ crcr32_reduce_128_to_64(__m128i data128, __m128i precomp) static __rte_always_inline uint32_t crcr32_reduce_64_to_32(__m128i data64, __m128i precomp) { - static const alignas(16) uint32_t mask1[4] = { - 0x, 0x, 0x, 0x - }; - - static const alignas(16) uint32_t mask2[4] = { - 0x, 0x, 0x, 0x - }; __m128i tmp0, tmp1, tmp2; - tmp0 = _mm_and_si128(data64, _mm_load_si128((const __m128i *)mask2)); + tmp0 = _mm_blend_epi16(_mm_setzero_si128(), data64, 252); tmp1 = _mm_clmulepi64_si128(tmp0, precomp, 0x00); tmp1 = _mm_xor_si128(tmp1, tmp0); - tmp1 = _mm_and_si128(tmp1, _mm_load_si128((const __m128i *)mask1)); + tmp1 = _mm_move_epi64(tmp1); tmp2 = _mm_clmulepi64_si128(tmp1, precomp, 0x10); tmp2 = _mm_xor_si128(tmp2, tmp1); @@ -118,13 +111,11 @@ crcr32_reduce_64_to_32(__m128i data64, __m128i precomp) return _mm_extract_epi32(tmp2, 2); } -static const alignas(16) uint8_t crc_xmm_shift_tab[48] = { - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +static const alignas(16) uint8_t crc_xmm_shift_tab[32] = { + 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }; /** @@ -216,19 +207,12 @@ crc32_eth_calc_pclmulqdq( 0x80808080, 0x80808080, 0x80808080, 0x80808080 }; - const alignas(16) uint8_t shf_table[32] = { - 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, - 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f - }; - __m128i last16, a, b; last16 = _mm_loadu_si128((const __m128i *)&data[data_len - 16]); temp = _mm_loadu_si128((const __m128i *) - &shf_table[data_len & 15]); + &crc_xmm_shift_tab[data_len & 15]); a = _mm_shuffle_epi8(fold, temp); temp = _mm_xor_si128(temp, -- 2.49.1