[PATCH] net/gve: fix RSS hash endianness in DQO format

2024-07-03 Thread Shreesh Adiga
The hash field in struct gve_rx_compl_desc_dqo defined in
gve_desc_dqo.h has the type __le32. Therefore the hash must be
read in little endian rather than big endian.

Bugzilla ID: 1441

Signed-off-by: Shreesh Adiga <16567adigashre...@gmail.com>
---
 drivers/net/gve/gve_rx_dqo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/gve/gve_rx_dqo.c b/drivers/net/gve/gve_rx_dqo.c
index f08b58c78d..5efcce3312 100644
--- a/drivers/net/gve/gve_rx_dqo.c
+++ b/drivers/net/gve/gve_rx_dqo.c
@@ -161,7 +161,7 @@ gve_rx_burst_dqo(void *rx_queue, struct rte_mbuf **rx_pkts, 
uint16_t nb_pkts)
rxm->ol_flags = 0;
rxm->ol_flags |= RTE_MBUF_F_RX_RSS_HASH |
gve_parse_csum_ol_flags(rx_desc, rxq->hw);
-   rxm->hash.rss = rte_be_to_cpu_32(rx_desc->hash);
+   rxm->hash.rss = rte_le_to_cpu_32(rx_desc->hash);
 
rx_pkts[nb_rx++] = rxm;
bytes += pkt_len;
-- 
2.44.2



Re: [PATCH] net/gve: fix RSS hash endianness in DQO format

2024-07-06 Thread Shreesh Adiga
Thank you Joshua for your review and suggestions.
Appreciate the help with dpdk process.
I've sent a new patch with "Fixes" tag to sta...@dpdk.org.
I'm not sure if same needs to be sent here to dev@dpdk.org as well.
Please let me know if I've missed anything.

Thanks,
Shreesh


[PATCH] net/mana: support rdma-core via pkg-config in meson

2024-09-20 Thread Shreesh Adiga
Currently building with custom rdma-core installed in /opt/rdma-core
after setting PKG_CONFIG_PATH=/opt/rdma-core/lib64/pkgconfig/ results
in the below meson logs:
Run-time dependency libmana found: YES 1.0.54.0
Header "infiniband/manadv.h" has symbol "manadv_set_context_attr" : NO

Thus to fix this, the libs is updated in meson.build and is passed to
the cc.has_header_symbol call using dependencies. After this change,
the libmana header files are getting included and net/mana is
successfully enabled.

Fixes: 517ed6e2d590 ("net/mana: add basic driver with build environment")
Cc: lon...@microsoft.com
Cc: sta...@dpdk.org
Signed-off-by: Shreesh Adiga <16567adigashre...@gmail.com>
---
 drivers/net/mana/meson.build | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mana/meson.build b/drivers/net/mana/meson.build
index 2d72eca5a8..3ddc230ab4 100644
--- a/drivers/net/mana/meson.build
+++ b/drivers/net/mana/meson.build
@@ -19,12 +19,14 @@ sources += files(
 )
 
 libnames = ['ibverbs', 'mana']
+libs = []
 foreach libname:libnames
 lib = dependency('lib' + libname, required:false)
 if not lib.found()
 lib = cc.find_library(libname, required:false)
 endif
 if lib.found()
+libs += lib
 ext_deps += lib
 else
 build = false
@@ -43,7 +45,7 @@ required_symbols = [
 ]
 
 foreach arg:required_symbols
-if not cc.has_header_symbol(arg[0], arg[1])
+if not cc.has_header_symbol(arg[0], arg[1], dependencies: libs, args: 
cflags)
 build = false
 reason = 'missing symbol "' + arg[1] + '" in "' + arg[0] + '"'
 subdir_done()
-- 
2.44.2



[PATCH] net/crc: reduce usage of static arrays in net_crc_sse.c

2025-07-16 Thread Shreesh Adiga
Replace the clearing of lower 32 bits of XMM register with blend of
zero register.
Replace the clearing of upper 64 bits of XMM register with _mm_move_epi64.
Clang is able to optimize away the AND + memory operand with the
above sequence, however GCC is still emitting the code for AND with
memory operands which is being explicitly eliminated here.

Additionally replace the 48 byte crc_xmm_shift_tab with the contents of
shf_table which is 32 bytes, achieving the same functionality.

Signed-off-by: Shreesh Adiga <16567adigashre...@gmail.com>
---
 lib/net/net_crc_sse.c | 30 +++---
 1 file changed, 7 insertions(+), 23 deletions(-)

diff --git a/lib/net/net_crc_sse.c b/lib/net/net_crc_sse.c
index 112dc94ac1..eec854e587 100644
--- a/lib/net/net_crc_sse.c
+++ b/lib/net/net_crc_sse.c
@@ -96,20 +96,13 @@ crcr32_reduce_128_to_64(__m128i data128, __m128i precomp)
 static __rte_always_inline uint32_t
 crcr32_reduce_64_to_32(__m128i data64, __m128i precomp)
 {
-   static const alignas(16) uint32_t mask1[4] = {
-   0x, 0x, 0x, 0x
-   };
-
-   static const alignas(16) uint32_t mask2[4] = {
-   0x, 0x, 0x, 0x
-   };
__m128i tmp0, tmp1, tmp2;
 
-   tmp0 = _mm_and_si128(data64, _mm_load_si128((const __m128i *)mask2));
+   tmp0 = _mm_blend_epi16(_mm_setzero_si128(), data64, 252);
 
tmp1 = _mm_clmulepi64_si128(tmp0, precomp, 0x00);
tmp1 = _mm_xor_si128(tmp1, tmp0);
-   tmp1 = _mm_and_si128(tmp1, _mm_load_si128((const __m128i *)mask1));
+   tmp1 = _mm_move_epi64(tmp1);
 
tmp2 = _mm_clmulepi64_si128(tmp1, precomp, 0x10);
tmp2 = _mm_xor_si128(tmp2, tmp1);
@@ -118,13 +111,11 @@ crcr32_reduce_64_to_32(__m128i data64, __m128i precomp)
return _mm_extract_epi32(tmp2, 2);
 }
 
-static const alignas(16) uint8_t crc_xmm_shift_tab[48] = {
-   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+static const alignas(16) uint8_t crc_xmm_shift_tab[32] = {
+   0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+   0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
-   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
 };
 
 /**
@@ -216,19 +207,12 @@ crc32_eth_calc_pclmulqdq(
0x80808080, 0x80808080, 0x80808080, 0x80808080
};
 
-   const alignas(16) uint8_t shf_table[32] = {
-   0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
-   0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
-   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
-   };
-
__m128i last16, a, b;
 
last16 = _mm_loadu_si128((const __m128i *)&data[data_len - 16]);
 
temp = _mm_loadu_si128((const __m128i *)
-   &shf_table[data_len & 15]);
+   &crc_xmm_shift_tab[data_len & 15]);
a = _mm_shuffle_epi8(fold, temp);
 
temp = _mm_xor_si128(temp,
-- 
2.49.1