[PATCH v2 01/10] common/cnxk: sync VF root weight with kernel

2024-05-28 Thread Nithin Dabilpuram
From: Satha Rao 

By default set VF root scheduling weight same as other kernel
configured VFs. Also fix a compilation issue when cflags has
-Werror=shadow=compatible-local.

Signed-off-by: Satha Rao 
---

v2:
- Added fixes tag to 9/10 and documentation to 10/10, 6/10
  patches

 drivers/common/cnxk/roc_nix.h   | 1 +
 drivers/common/cnxk/roc_nix_queue.c | 3 ++-
 drivers/common/cnxk/roc_nix_tm.c| 6 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index bd3e540f45..63bcd5b25e 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -473,6 +473,7 @@ struct roc_nix {
bool force_rx_aura_bp;
bool custom_meta_aura_ena;
bool rx_inj_ena;
+   uint32_t root_sched_weight;
/* End of input parameters */
/* LMT line base for "Per Core Tx LMT line" mode*/
uintptr_t lmt_base;
diff --git a/drivers/common/cnxk/roc_nix_queue.c 
b/drivers/common/cnxk/roc_nix_queue.c
index ae4e0ea40c..f5441e0e6b 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -1030,7 +1030,8 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct 
roc_nix_sq *sq)
thr = PLT_DIV_CEIL((nb_sqb_bufs * ROC_NIX_SQB_THRESH), 100);
nb_sqb_bufs += NIX_SQB_PREFETCH;
/* Clamp up the SQB count */
-   nb_sqb_bufs = PLT_MIN(roc_nix->max_sqb_count, 
(uint16_t)PLT_MAX(NIX_DEF_SQB, nb_sqb_bufs));
+   nb_sqb_bufs = PLT_MAX(NIX_DEF_SQB, nb_sqb_bufs);
+   nb_sqb_bufs = PLT_MIN(roc_nix->max_sqb_count, (uint16_t)nb_sqb_bufs);
 
sq->nb_sqb_bufs = nb_sqb_bufs;
sq->sqes_per_sqb_log2 = (uint16_t)plt_log2_u32(sqes_per_sqb);
diff --git a/drivers/common/cnxk/roc_nix_tm.c b/drivers/common/cnxk/roc_nix_tm.c
index 4e6a28f827..ac522f8235 100644
--- a/drivers/common/cnxk/roc_nix_tm.c
+++ b/drivers/common/cnxk/roc_nix_tm.c
@@ -1589,7 +1589,11 @@ nix_tm_prepare_default_tree(struct roc_nix *roc_nix)
node->id = nonleaf_id;
node->parent_id = parent;
node->priority = 0;
-   node->weight = NIX_TM_DFLT_RR_WT;
+   /* Default VF root RR_QUANTUM is in sync with kernel */
+   if (lvl == ROC_TM_LVL_ROOT && !nix_tm_have_tl1_access(nix))
+   node->weight = roc_nix->root_sched_weight;
+   else
+   node->weight = NIX_TM_DFLT_RR_WT;
node->shaper_profile_id = ROC_NIX_TM_SHAPER_PROFILE_NONE;
node->lvl = lvl;
node->tree = ROC_NIX_TM_DEFAULT;
-- 
2.25.1



[PATCH v2 02/10] net/cnxk: set VF default root schedule weight

2024-05-28 Thread Nithin Dabilpuram
From: Satha Rao 

To get better performance on LBK or VF interfaces, set the default
root schedule weight to known tested value.

Signed-off-by: Satha Rao 
---
 drivers/net/cnxk/cnxk_ethdev.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c
index 95a3d8aaf9..1bccebad9f 100644
--- a/drivers/net/cnxk/cnxk_ethdev.c
+++ b/drivers/net/cnxk/cnxk_ethdev.c
@@ -7,6 +7,8 @@
 
 #define CNXK_NIX_CQ_INL_CLAMP_MAX (64UL * 1024UL)
 
+#define NIX_TM_DFLT_RR_WT 71
+
 static inline uint64_t
 nix_get_rx_offload_capa(struct cnxk_eth_dev *dev)
 {
@@ -1884,6 +1886,8 @@ cnxk_eth_dev_init(struct rte_eth_dev *eth_dev)
nix->pci_dev = pci_dev;
nix->hw_vlan_ins = true;
nix->port_id = eth_dev->data->port_id;
+   /* For better performance set default VF root schedule weight */
+   nix->root_sched_weight = NIX_TM_DFLT_RR_WT;
if (roc_feature_nix_has_own_meta_aura())
nix->local_meta_aura_ena = true;
rc = roc_nix_dev_init(nix);
-- 
2.25.1



[PATCH v2 03/10] net/cnxk: fix extbuf handling for multisegment packet

2024-05-28 Thread Nithin Dabilpuram
From: Rakesh Kudurumalla 

Avoid double free of extbuf when during TX path in
multisegmented packet with extbuf as one of segment.

Fixes: dd9446991212 ("net/cnxk: add transmit completion handler")
Cc: sta...@dpdk.org

Signed-off-by: Rakesh Kudurumalla 
---
 drivers/net/cnxk/cn9k_tx.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index 6fc9e4d758..b56881c561 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -117,6 +117,7 @@ cn9k_nix_prefree_seg(struct rte_mbuf *m, struct rte_mbuf 
**extm, struct cn9k_eth
send_hdr->w1.sqe_id = sqe_id &
txq->tx_compl.nb_desc_mask;
txq->tx_compl.ptr[send_hdr->w1.sqe_id] = m;
+   m->next = NULL;
}
return 1;
} else {
-- 
2.25.1



[PATCH v2 04/10] common/cnxk: override UDP encap ports with session data

2024-05-28 Thread Nithin Dabilpuram
Override UDP encap ports with session info when non-zero on cn10k.
This makes the UDP encap ports configurable by user as needed.
Default UDP source and destination ports will still be 4500.

Signed-off-by: Nithin Dabilpuram 
---
 drivers/common/cnxk/cnxk_security.c | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/common/cnxk/cnxk_security.c 
b/drivers/common/cnxk/cnxk_security.c
index bab015e3b3..15b0bedf43 100644
--- a/drivers/common/cnxk/cnxk_security.c
+++ b/drivers/common/cnxk/cnxk_security.c
@@ -308,6 +308,7 @@ cnxk_ot_ipsec_inb_sa_fill(struct roc_ot_ipsec_inb_sa *sa,
  struct rte_crypto_sym_xform *crypto_xfrm,
  bool is_inline)
 {
+   uint16_t sport = 4500, dport = 4500;
union roc_ot_ipsec_sa_word2 w2;
uint32_t replay_win_sz;
size_t offset;
@@ -353,8 +354,14 @@ cnxk_ot_ipsec_inb_sa_fill(struct roc_ot_ipsec_inb_sa *sa,
/* ESN */
sa->w2.s.esn_en = !!ipsec_xfrm->options.esn;
if (ipsec_xfrm->options.udp_encap) {
-   sa->w10.s.udp_src_port = 4500;
-   sa->w10.s.udp_dst_port = 4500;
+   if (ipsec_xfrm->udp.sport)
+   sport = ipsec_xfrm->udp.sport;
+
+   if (ipsec_xfrm->udp.dport)
+   dport = ipsec_xfrm->udp.dport;
+
+   sa->w10.s.udp_src_port = sport;
+   sa->w10.s.udp_dst_port = dport;
}
 
if (ipsec_xfrm->options.udp_ports_verify)
@@ -411,6 +418,7 @@ cnxk_ot_ipsec_outb_sa_fill(struct roc_ot_ipsec_outb_sa *sa,
   struct rte_crypto_sym_xform *crypto_xfrm)
 {
struct rte_security_ipsec_tunnel_param *tunnel = &ipsec_xfrm->tunnel;
+   uint16_t sport = 4500, dport = 4500;
union roc_ot_ipsec_sa_word2 w2;
size_t offset;
int rc;
@@ -506,8 +514,14 @@ cnxk_ot_ipsec_outb_sa_fill(struct roc_ot_ipsec_outb_sa *sa,
sa->ctx.esn_val = ipsec_xfrm->esn.value - 1;
 
if (ipsec_xfrm->options.udp_encap) {
-   sa->w10.s.udp_src_port = 4500;
-   sa->w10.s.udp_dst_port = 4500;
+   if (ipsec_xfrm->udp.sport)
+   sport = ipsec_xfrm->udp.sport;
+
+   if (ipsec_xfrm->udp.dport)
+   dport = ipsec_xfrm->udp.dport;
+
+   sa->w10.s.udp_src_port = sport;
+   sa->w10.s.udp_dst_port = dport;
}
 
offset = offsetof(struct roc_ot_ipsec_outb_sa, ctx);
-- 
2.25.1



[PATCH v2 05/10] net/cnxk: update SA userdata and keep original cookie

2024-05-28 Thread Nithin Dabilpuram
Update SA userdata as part of session_update() and
keep the original cookie that is used to identify
SA.

Fixes: 8efa348e8160 ("net/cnxk: support custom SA index")
Cc: sta...@dpdk.org

Signed-off-by: Nithin Dabilpuram 
---
 drivers/net/cnxk/cn10k_ethdev_sec.c | 57 -
 1 file changed, 55 insertions(+), 2 deletions(-)

diff --git a/drivers/net/cnxk/cn10k_ethdev_sec.c 
b/drivers/net/cnxk/cn10k_ethdev_sec.c
index af27d3bbc1..eed4c29218 100644
--- a/drivers/net/cnxk/cn10k_ethdev_sec.c
+++ b/drivers/net/cnxk/cn10k_ethdev_sec.c
@@ -1101,8 +1101,8 @@ cn10k_eth_sec_session_update(void *device, struct 
rte_security_session *sess,
 {
struct rte_eth_dev *eth_dev = (struct rte_eth_dev *)device;
struct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);
-   struct roc_ot_ipsec_inb_sa *inb_sa_dptr;
struct rte_security_ipsec_xform *ipsec;
+   struct cn10k_sec_sess_priv sess_priv;
struct rte_crypto_sym_xform *crypto;
struct cnxk_eth_sec_sess *eth_sec;
bool inbound;
@@ -1123,6 +1123,11 @@ cn10k_eth_sec_session_update(void *device, struct 
rte_security_session *sess,
eth_sec->spi = conf->ipsec.spi;
 
if (inbound) {
+   struct roc_ot_ipsec_inb_sa *inb_sa_dptr, *inb_sa;
+   struct cn10k_inb_priv_data *inb_priv;
+
+   inb_sa = eth_sec->sa;
+   inb_priv = roc_nix_inl_ot_ipsec_inb_sa_sw_rsvd(inb_sa);
inb_sa_dptr = (struct roc_ot_ipsec_inb_sa *)dev->inb.sa_dptr;
memset(inb_sa_dptr, 0, sizeof(struct roc_ot_ipsec_inb_sa));
 
@@ -1130,26 +1135,74 @@ cn10k_eth_sec_session_update(void *device, struct 
rte_security_session *sess,
   true);
if (rc)
return -EINVAL;
+   /* Use cookie for original data */
+   inb_sa_dptr->w1.s.cookie = inb_sa->w1.s.cookie;
+
+   if (ipsec->options.stats == 1) {
+   /* Enable mib counters */
+   inb_sa_dptr->w0.s.count_mib_bytes = 1;
+   inb_sa_dptr->w0.s.count_mib_pkts = 1;
+   }
+
+   /* Enable out-of-place processing */
+   if (ipsec->options.ingress_oop)
+   inb_sa_dptr->w0.s.pkt_format = 
ROC_IE_OT_SA_PKT_FMT_FULL;
 
rc = roc_nix_inl_ctx_write(&dev->nix, inb_sa_dptr, eth_sec->sa,
   eth_sec->inb,
   sizeof(struct roc_ot_ipsec_inb_sa));
if (rc)
return -EINVAL;
+
+   /* Save userdata in inb private area */
+   inb_priv->userdata = conf->userdata;
} else {
-   struct roc_ot_ipsec_outb_sa *outb_sa_dptr;
+   struct roc_ot_ipsec_outb_sa *outb_sa_dptr, *outb_sa;
+   struct cn10k_outb_priv_data *outb_priv;
+   struct cnxk_ipsec_outb_rlens *rlens;
 
+   outb_sa = eth_sec->sa;
+   outb_priv = roc_nix_inl_ot_ipsec_outb_sa_sw_rsvd(outb_sa);
+   rlens = &outb_priv->rlens;
outb_sa_dptr = (struct roc_ot_ipsec_outb_sa *)dev->outb.sa_dptr;
memset(outb_sa_dptr, 0, sizeof(struct roc_ot_ipsec_outb_sa));
 
rc = cnxk_ot_ipsec_outb_sa_fill(outb_sa_dptr, ipsec, crypto);
if (rc)
return -EINVAL;
+
+   /* Save rlen info */
+   cnxk_ipsec_outb_rlens_get(rlens, ipsec, crypto);
+
+   if (ipsec->options.stats == 1) {
+   /* Enable mib counters */
+   outb_sa_dptr->w0.s.count_mib_bytes = 1;
+   outb_sa_dptr->w0.s.count_mib_pkts = 1;
+   }
+
+   sess_priv.u64 = 0;
+   sess_priv.sa_idx = outb_priv->sa_idx;
+   sess_priv.roundup_byte = rlens->roundup_byte;
+   sess_priv.roundup_len = rlens->roundup_len;
+   sess_priv.partial_len = rlens->partial_len;
+   sess_priv.mode = outb_sa_dptr->w2.s.ipsec_mode;
+   sess_priv.outer_ip_ver = outb_sa_dptr->w2.s.outer_ip_ver;
+   /* Propagate inner checksum enable from SA to fast path */
+   sess_priv.chksum =
+   (!ipsec->options.ip_csum_enable << 1 | 
!ipsec->options.l4_csum_enable);
+   sess_priv.dec_ttl = ipsec->options.dec_ttl;
+   if (roc_feature_nix_has_inl_ipsec_mseg() && 
dev->outb.cpt_eng_caps & BIT_ULL(35))
+   sess_priv.nixtx_off = 1;
+
rc = roc_nix_inl_ctx_write(&dev->nix, outb_sa_dptr, eth_sec->sa,
   eth_sec->inb,
   sizeof(struct roc_ot_ipsec_outb_sa));
if (rc)
return -EINVAL;
+
+   /* Save userdata */
+   outb_priv->

[PATCH v2 06/10] net/cnxk: add option to disable custom meta aura

2024-05-28 Thread Nithin Dabilpuram
Add option to explicitly disable custom meta aura. Currently
custom meta aura is enabled automatically when inl_cpt_channel
is set i.e inline dev is masking CHAN field in IPsec rules.

Also decouple the custom meta aura feature from custom sa action
so that the custom sa action can independently be used.

Signed-off-by: Nithin Dabilpuram 
---
 doc/guides/nics/cnxk.rst   | 13 +
 doc/guides/rel_notes/release_24_07.rst |  4 
 drivers/common/cnxk/roc_nix_inl.c  | 19 +--
 drivers/common/cnxk/roc_nix_inl.h  |  1 +
 drivers/common/cnxk/version.map|  1 +
 drivers/net/cnxk/cnxk_ethdev.c |  5 +
 drivers/net/cnxk/cnxk_ethdev.h |  3 +++
 drivers/net/cnxk/cnxk_ethdev_devargs.c |  8 +++-
 8 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/doc/guides/nics/cnxk.rst b/doc/guides/nics/cnxk.rst
index f5f296ee36..99ad224efd 100644
--- a/doc/guides/nics/cnxk.rst
+++ b/doc/guides/nics/cnxk.rst
@@ -444,6 +444,19 @@ Runtime Config Options
With the above configuration, driver would enable packet inject from ARM 
cores
to crypto to process and send back in Rx path.
 
+- ``Disable custom meta aura feature`` (default ``0``)
+
+   Custom meta aura i.e 1:N meta aura is enabled for second pass traffic by 
default when
+   ``inl_cpt_channel`` devarg is provided. Provide an option to disable the 
custom
+   meta aura feature by setting devarg ``custom_meta_aura_dis`` to ``1``.
+
+   For example::
+
+ -a 0002:02:00.0,custom_meta_aura_dis=1
+
+   With the above configuration, driver would disable custom meta aura feature 
for
+   ``0002:02:00.0`` ethdev.
+
 .. note::
 
Above devarg parameters are configurable per device, user needs to pass the
diff --git a/doc/guides/rel_notes/release_24_07.rst 
b/doc/guides/rel_notes/release_24_07.rst
index a69f24cf99..37eadb24d6 100644
--- a/doc/guides/rel_notes/release_24_07.rst
+++ b/doc/guides/rel_notes/release_24_07.rst
@@ -55,6 +55,10 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* **Updated Marvell cnxk net driver.**
+
+  * Added support disabling custom meta aura and seperately use custom
+SA action support.
 
 Removed Items
 -
diff --git a/drivers/common/cnxk/roc_nix_inl.c 
b/drivers/common/cnxk/roc_nix_inl.c
index 7dbeae5017..74a688abbd 100644
--- a/drivers/common/cnxk/roc_nix_inl.c
+++ b/drivers/common/cnxk/roc_nix_inl.c
@@ -872,7 +872,6 @@ roc_nix_inl_inb_init(struct roc_nix *roc_nix)
struct nix *nix = roc_nix_to_nix_priv(roc_nix);
struct roc_cpt_inline_ipsec_inb_cfg cfg;
struct idev_cfg *idev = idev_get_cfg();
-   struct nix_inl_dev *inl_dev;
uint16_t bpids[ROC_NIX_MAX_BPID_CNT];
struct roc_cpt *roc_cpt;
int rc;
@@ -929,11 +928,6 @@ roc_nix_inl_inb_init(struct roc_nix *roc_nix)
if (rc)
return rc;
 
-   inl_dev = idev->nix_inl_dev;
-
-   roc_nix->custom_meta_aura_ena = (roc_nix->local_meta_aura_ena &&
-((inl_dev && 
inl_dev->is_multi_channel) ||
- roc_nix->custom_sa_action));
if (!roc_model_is_cn9k() && !roc_errata_nix_no_meta_aura()) {
nix->need_meta_aura = true;
if (!roc_nix->local_meta_aura_ena || 
roc_nix->custom_meta_aura_ena)
@@ -1245,6 +1239,19 @@ roc_nix_inl_dev_is_probed(void)
return !!idev->nix_inl_dev;
 }
 
+bool
+roc_nix_inl_dev_is_multi_channel(void)
+{
+   struct idev_cfg *idev = idev_get_cfg();
+   struct nix_inl_dev *inl_dev;
+
+   if (idev == NULL || !idev->nix_inl_dev)
+   return false;
+
+   inl_dev = idev->nix_inl_dev;
+   return inl_dev->is_multi_channel;
+}
+
 bool
 roc_nix_inl_inb_is_enabled(struct roc_nix *roc_nix)
 {
diff --git a/drivers/common/cnxk/roc_nix_inl.h 
b/drivers/common/cnxk/roc_nix_inl.h
index 8acd7e0545..ab0965e512 100644
--- a/drivers/common/cnxk/roc_nix_inl.h
+++ b/drivers/common/cnxk/roc_nix_inl.h
@@ -115,6 +115,7 @@ int __roc_api roc_nix_inl_dev_stats_get(struct 
roc_nix_stats *stats);
 uint16_t __roc_api roc_nix_inl_dev_pffunc_get(void);
 int __roc_api roc_nix_inl_dev_cpt_setup(bool use_inl_dev_sso);
 int __roc_api roc_nix_inl_dev_cpt_release(void);
+bool __roc_api roc_nix_inl_dev_is_multi_channel(void);
 
 /* NIX Inline Inbound API */
 int __roc_api roc_nix_inl_inb_init(struct roc_nix *roc_nix);
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 424ad7f484..e8d32b331e 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -238,6 +238,7 @@ INTERNAL {
roc_nix_inl_dev_dump;
roc_nix_inl_dev_fini;
roc_nix_inl_dev_init;
+   roc_nix_inl_dev_is_multi_channel;
roc_nix_inl_dev_is_probed;
roc_nix_inl_dev_stats_get;
roc_nix_inl_dev_lock;
diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/driv

[PATCH v2 07/10] net/cnxk: fix issue with outbound security higher pkt burst

2024-05-28 Thread Nithin Dabilpuram
Fix issue with outbound security path while handling mixed traffic i.e
both plain and inline outbound pkts being present as part of burst
and burst size is > 32. The loop needs to be broken when
we don't have space for 4 pkts in LMT lines for CPT considering
both the full lmt lines and partial lmt lines used.

Fixes: 55bfac717c72 ("net/cnxk: support Tx security offload on cn10k")
Cc: sta...@dpdk.org

Signed-off-by: Nithin Dabilpuram 
---
 drivers/net/cnxk/cn10k_tx.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 3818b0445a..84b08403c0 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -2272,7 +2272,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
}
 
for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
-   if (flags & NIX_TX_OFFLOAD_SECURITY_F && c_lnum + 2 > 16) {
+   if (flags & NIX_TX_OFFLOAD_SECURITY_F &&
+   (((int)((16 - c_lnum) << 1) - c_loff) < 4)) {
burst = i;
break;
}
-- 
2.25.1



[PATCH v7] eal/x86: improve rte_memcpy const size 16 performance

2024-05-28 Thread Morten Brørup
When the rte_memcpy() size is 16, the same 16 bytes are copied twice.
In the case where the size is known to be 16 at build tine, omit the
duplicate copy.

Reduced the amount of effectively copy-pasted code by using #ifdef
inside functions instead of outside functions.

Suggested-by: Stephen Hemminger 
Signed-off-by: Morten Brørup 
Acked-by: Bruce Richardson 
---
Depends-on: patch-138647 ("eal: provide macro for GCC builtin constant 
intrinsic")

v7:
* Keep trying to fix that CI does not understand the dependency...
  Depend on patch instead of series.
  Move dependency out of the patch description itself, and down to the
  version log.
v6:
* Trying to fix CI not understanding dependency...
  Don't wrap dependency line.
v5:
* Fix for building with MSVC:
  Use __rte_constant() instead of __builtin_constant_p().
  Add dependency on patch providing __rte_constant().
v4:
* There are no problems compiling AVX2, only AVX. (Bruce Richardson)
v3:
* AVX2 is a superset of AVX;
  for a block of AVX code, testing for AVX suffices. (Bruce Richardson)
* Define RTE_MEMCPY_AVX if AVX is available, to avoid copy-pasting the
  check for older GCC version. (Bruce Richardson)
v2:
* For GCC, version 11 is required for proper AVX handling;
  if older GCC version, treat AVX as SSE.
  Clang does not have this issue.
  Note: Original code always treated AVX as SSE, regardless of compiler.
* Do not add copyright. (Stephen Hemminger)
---
 lib/eal/x86/include/rte_memcpy.h | 239 +--
 1 file changed, 64 insertions(+), 175 deletions(-)

diff --git a/lib/eal/x86/include/rte_memcpy.h b/lib/eal/x86/include/rte_memcpy.h
index 72a92290e0..1619a8f296 100644
--- a/lib/eal/x86/include/rte_memcpy.h
+++ b/lib/eal/x86/include/rte_memcpy.h
@@ -27,6 +27,16 @@ extern "C" {
 #pragma GCC diagnostic ignored "-Wstringop-overflow"
 #endif
 
+/*
+ * GCC older than version 11 doesn't compile AVX properly, so use SSE instead.
+ * There are no problems with AVX2.
+ */
+#if defined __AVX2__
+#define RTE_MEMCPY_AVX
+#elif defined __AVX__ && !(defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 
11))
+#define RTE_MEMCPY_AVX
+#endif
+
 /**
  * Copy bytes from one location to another. The locations must not overlap.
  *
@@ -91,14 +101,6 @@ rte_mov15_or_less(void *dst, const void *src, size_t n)
return ret;
 }
 
-#if defined __AVX512F__ && defined RTE_MEMCPY_AVX512
-
-#define ALIGNMENT_MASK 0x3F
-
-/**
- * AVX512 implementation below
- */
-
 /**
  * Copy 16 bytes from one location to another,
  * locations should not overlap.
@@ -119,10 +121,15 @@ rte_mov16(uint8_t *dst, const uint8_t *src)
 static __rte_always_inline void
 rte_mov32(uint8_t *dst, const uint8_t *src)
 {
+#if defined RTE_MEMCPY_AVX
__m256i ymm0;
 
ymm0 = _mm256_loadu_si256((const __m256i *)src);
_mm256_storeu_si256((__m256i *)dst, ymm0);
+#else /* SSE implementation */
+   rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
+   rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16);
+#endif
 }
 
 /**
@@ -132,10 +139,15 @@ rte_mov32(uint8_t *dst, const uint8_t *src)
 static __rte_always_inline void
 rte_mov64(uint8_t *dst, const uint8_t *src)
 {
+#if defined __AVX512F__ && defined RTE_MEMCPY_AVX512
__m512i zmm0;
 
zmm0 = _mm512_loadu_si512((const void *)src);
_mm512_storeu_si512((void *)dst, zmm0);
+#else /* AVX2, AVX & SSE implementation */
+   rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
+   rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32);
+#endif
 }
 
 /**
@@ -156,12 +168,18 @@ rte_mov128(uint8_t *dst, const uint8_t *src)
 static __rte_always_inline void
 rte_mov256(uint8_t *dst, const uint8_t *src)
 {
-   rte_mov64(dst + 0 * 64, src + 0 * 64);
-   rte_mov64(dst + 1 * 64, src + 1 * 64);
-   rte_mov64(dst + 2 * 64, src + 2 * 64);
-   rte_mov64(dst + 3 * 64, src + 3 * 64);
+   rte_mov128(dst + 0 * 128, src + 0 * 128);
+   rte_mov128(dst + 1 * 128, src + 1 * 128);
 }
 
+#if defined __AVX512F__ && defined RTE_MEMCPY_AVX512
+
+/**
+ * AVX512 implementation below
+ */
+
+#define ALIGNMENT_MASK 0x3F
+
 /**
  * Copy 128-byte blocks from one location to another,
  * locations should not overlap.
@@ -231,12 +249,22 @@ rte_memcpy_generic(void *dst, const void *src, size_t n)
/**
 * Fast way when copy size doesn't exceed 512 bytes
 */
+   if (__rte_constant(n) && n == 32) {
+   rte_mov32((uint8_t *)dst, (const uint8_t *)src);
+   return ret;
+   }
if (n <= 32) {
rte_mov16((uint8_t *)dst, (const uint8_t *)src);
+   if (__rte_constant(n) && n == 16)
+   return ret; /* avoid (harmless) duplicate copy */
rte_mov16((uint8_t *)dst - 16 + n,
  (const uint8_t *)src - 16 + n);
return ret;
}
+   if (__rte_constant(n) && n == 64) {
+   rte_

[PATCH v2 08/10] common/cnxk: add API to reset CGX stats

2024-05-28 Thread Nithin Dabilpuram
From: Sunil Kumar Kori 

Similar to NIX stats reset API, adding API to reset
CGX stats.

When user requests to reset the stats then it clears
if nix_lf is PF otherwise silently discard the request.

Signed-off-by: Sunil Kumar Kori 
---
 drivers/common/cnxk/roc_nix.h |  1 +
 drivers/common/cnxk/roc_nix_mac.c | 29 +
 drivers/common/cnxk/version.map   |  1 +
 3 files changed, 31 insertions(+)

diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index 63bcd5b25e..25cf261348 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -849,6 +849,7 @@ int __roc_api roc_nix_mac_link_info_get_cb_register(
 void __roc_api roc_nix_mac_link_info_get_cb_unregister(struct roc_nix 
*roc_nix);
 int __roc_api roc_nix_q_err_cb_register(struct roc_nix *roc_nix, q_err_get_t 
sq_err_handle);
 void __roc_api roc_nix_q_err_cb_unregister(struct roc_nix *roc_nix);
+int __roc_api roc_nix_mac_stats_reset(struct roc_nix *roc_nix);
 
 /* Ops */
 int __roc_api roc_nix_switch_hdr_set(struct roc_nix *roc_nix,
diff --git a/drivers/common/cnxk/roc_nix_mac.c 
b/drivers/common/cnxk/roc_nix_mac.c
index 2d1c29dd66..f79aaec4a5 100644
--- a/drivers/common/cnxk/roc_nix_mac.c
+++ b/drivers/common/cnxk/roc_nix_mac.c
@@ -354,6 +354,35 @@ roc_nix_mac_max_rx_len_set(struct roc_nix *roc_nix, 
uint16_t maxlen)
return rc;
 }
 
+int
+roc_nix_mac_stats_reset(struct roc_nix *roc_nix)
+{
+   struct nix *nix = roc_nix_to_nix_priv(roc_nix);
+   struct dev *dev = &nix->dev;
+   struct mbox *mbox = mbox_get(dev->mbox);
+   struct msg_req *req;
+   int rc = -ENOSPC;
+
+   if (roc_model_is_cn10k()) {
+   rc = 0;
+   goto exit;
+   }
+
+   if (roc_nix_is_vf_or_sdp(roc_nix)) {
+   rc = 0;
+   goto exit;
+   }
+
+   req = mbox_alloc_msg_cgx_stats_rst(mbox);
+   if (req == NULL)
+   goto exit;
+
+   rc = mbox_process(mbox);
+exit:
+   mbox_put(mbox);
+   return rc;
+}
+
 int
 roc_nix_mac_link_cb_register(struct roc_nix *roc_nix, link_status_t 
link_update)
 {
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index e8d32b331e..e37c1c7b7d 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -308,6 +308,7 @@ INTERNAL {
roc_nix_mac_mtu_set;
roc_nix_mac_promisc_mode_enable;
roc_nix_mac_rxtx_start_stop;
+   roc_nix_mac_stats_reset;
roc_nix_max_pkt_len;
roc_nix_mcast_list_free;
roc_nix_mcast_list_setup;
-- 
2.25.1



[PATCH v2 09/10] net/cnxk: fix xstats reset

2024-05-28 Thread Nithin Dabilpuram
From: Sunil Kumar Kori 

Currently only NIX stats are cleared during xstats
reset and CGX stats are left as it is.

Clearing CGX stats too during xstats reset.

Fixes: 8075b057b620 ("net/cnxk: support extended statistics")

Signed-off-by: Sunil Kumar Kori 
---
 drivers/net/cnxk/cnxk_stats.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/cnxk/cnxk_stats.c b/drivers/net/cnxk/cnxk_stats.c
index f2fc89..469faff405 100644
--- a/drivers/net/cnxk/cnxk_stats.c
+++ b/drivers/net/cnxk/cnxk_stats.c
@@ -316,6 +316,8 @@ cnxk_nix_xstats_reset(struct rte_eth_dev *eth_dev)
goto exit;
}
 
+   /* Reset MAC stats */
+   rc = roc_nix_mac_stats_reset(nix);
 exit:
return rc;
 }
-- 
2.25.1



[PATCH v2 10/10] net/cnxk: define CPT HW result format for PMD API

2024-05-28 Thread Nithin Dabilpuram
From: Srujana Challa 

Defines CPT HW result format for PMD API,
rte_pmd_cnxk_inl_ipsec_res().

Signed-off-by: Srujana Challa 
---
 drivers/net/cnxk/cn10k_ethdev_sec.c |  4 +--
 drivers/net/cnxk/rte_pmd_cnxk.h | 40 +++--
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/drivers/net/cnxk/cn10k_ethdev_sec.c 
b/drivers/net/cnxk/cn10k_ethdev_sec.c
index eed4c29218..b8b0da5ea9 100644
--- a/drivers/net/cnxk/cn10k_ethdev_sec.c
+++ b/drivers/net/cnxk/cn10k_ethdev_sec.c
@@ -1251,7 +1251,7 @@ rte_pmd_cnxk_hw_sa_write(void *device, struct 
rte_security_session *sess,
return 0;
 }
 
-void *
+union rte_pmd_cnxk_cpt_res_s *
 rte_pmd_cnxk_inl_ipsec_res(struct rte_mbuf *mbuf)
 {
const union nix_rx_parse_u *rx;
@@ -1265,7 +1265,7 @@ rte_pmd_cnxk_inl_ipsec_res(struct rte_mbuf *mbuf)
rx = (const union nix_rx_parse_u *)(wqe + 8);
desc_size = (rx->desc_sizem1 + 1) * 16;
 
-   /* cpt_res_s sits after SG list at 16B aligned address */
+   /* rte_pmd_cnxk_cpt_res_s sits after SG list at 16B aligned address */
return (void *)(wqe + 64 + desc_size);
 }
 
diff --git a/drivers/net/cnxk/rte_pmd_cnxk.h b/drivers/net/cnxk/rte_pmd_cnxk.h
index 43f2a7ed9b..88030046db 100644
--- a/drivers/net/cnxk/rte_pmd_cnxk.h
+++ b/drivers/net/cnxk/rte_pmd_cnxk.h
@@ -453,6 +453,42 @@ union rte_pmd_cnxk_ipsec_hw_sa {
struct rte_pmd_cnxk_ipsec_outb_sa outb;
 };
 
+/** CPT HW result format */
+union rte_pmd_cnxk_cpt_res_s {
+   /** CN10K CPT result */
+   struct rte_pmd_cpt_cn10k_res_s {
+   /** Completion code */
+   uint64_t compcode : 7;
+   /** Done interrupt */
+   uint64_t doneint : 1;
+   /** Microcode completion code */
+   uint64_t uc_compcode : 8;
+   /** Result length */
+   uint64_t rlen : 16;
+   /** SPI */
+   uint64_t spi : 32;
+
+   /** Extended sequence number */
+   uint64_t esn;
+   } cn10k;
+
+   /** CN9K CPT result */
+   struct rte_pmd_cpt_cn9k_res_s {
+   /** Completion code */
+   uint64_t compcode : 8;
+   /** Microcode completion code */
+   uint64_t uc_compcode : 8;
+   /** Done interrupt */
+   uint64_t doneint : 1;
+   uint64_t reserved_17_63 : 47;
+
+   uint64_t reserved_64_127;
+   } cn9k;
+
+   /** CPT RES */
+   uint64_t u64[2];
+};
+
 /**
  * Read HW SA context from session.
  *
@@ -501,9 +537,9 @@ int rte_pmd_cnxk_hw_sa_write(void *device, struct 
rte_security_session *sess,
  *   Pointer to packet that was just received and was processed with Inline 
IPsec.
  *
  * @return
- *   - Pointer to mbuf location where CPT result info is stored on success.
+ *   - Pointer to mbuf location where `union rte_pmd_cnxk_cpt_res_s` is stored 
on success.
  *   - NULL on failure.
  */
 __rte_experimental
-void *rte_pmd_cnxk_inl_ipsec_res(struct rte_mbuf *mbuf);
+union rte_pmd_cnxk_cpt_res_s *rte_pmd_cnxk_inl_ipsec_res(struct rte_mbuf 
*mbuf);
 #endif /* _PMD_CNXK_H_ */
-- 
2.25.1



[RFC v2] eal: provide option to use compiler memcpy instead of RTE

2024-05-28 Thread Mattias Rönnblom
Provide build option to have functions in  delegate to
the standard compiler/libc memcpy(), instead of using the various
traditional, handcrafted, per-architecture rte_memcpy()
implementations.

A new meson build option 'use_cc_memcpy' is added. The default is
true. It's not obvious what should be the default, but compiler
memcpy() is enabled by default in this RFC so any tests run with this
patch use the new approach.

One purpose of this RFC is to make it easy to evaluate the costs and
benefits of a switch.

Only Loongarch, ARM and x86 is implemented. Only x86 is tested.

RFC v2:
 * Fix bug where rte_memcpy.h was not installed on x86.
 * Made attempt to make Loongarch compile.

Signed-off-by: Mattias Rönnblom 
---
 config/meson.build |  1 +
 lib/eal/arm/include/rte_memcpy.h   | 10 +
 lib/eal/include/generic/rte_memcpy.h   | 62 +++---
 lib/eal/loongarch/include/rte_memcpy.h | 52 ++---
 lib/eal/x86/include/meson.build|  1 +
 lib/eal/x86/include/rte_memcpy.h   | 11 -
 meson_options.txt  |  2 +
 7 files changed, 82 insertions(+), 57 deletions(-)

diff --git a/config/meson.build b/config/meson.build
index 8c8b019c25..456056628e 100644
--- a/config/meson.build
+++ b/config/meson.build
@@ -353,6 +353,7 @@ endforeach
 # set other values pulled from the build options
 dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
 dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet'))
+dpdk_conf.set('RTE_USE_CC_MEMCPY', get_option('use_cc_memcpy'))
 dpdk_conf.set('RTE_ENABLE_STDATOMIC', get_option('enable_stdatomic'))
 dpdk_conf.set('RTE_ENABLE_TRACE_FP', get_option('enable_trace_fp'))
 dpdk_conf.set('RTE_PKTMBUF_HEADROOM', get_option('pkt_mbuf_headroom'))
diff --git a/lib/eal/arm/include/rte_memcpy.h b/lib/eal/arm/include/rte_memcpy.h
index 47dea9a8cc..e8aff722df 100644
--- a/lib/eal/arm/include/rte_memcpy.h
+++ b/lib/eal/arm/include/rte_memcpy.h
@@ -5,10 +5,20 @@
 #ifndef _RTE_MEMCPY_ARM_H_
 #define _RTE_MEMCPY_ARM_H_
 
+#include 
+
+#ifdef RTE_USE_CC_MEMCPY
+
+#include 
+
+#else
+
 #ifdef RTE_ARCH_64
 #include 
 #else
 #include 
 #endif
 
+#endif /* RTE_USE_CC_MEMCPY */
+
 #endif /* _RTE_MEMCPY_ARM_H_ */
diff --git a/lib/eal/include/generic/rte_memcpy.h 
b/lib/eal/include/generic/rte_memcpy.h
index e7f0f8eaa9..f2f66f372d 100644
--- a/lib/eal/include/generic/rte_memcpy.h
+++ b/lib/eal/include/generic/rte_memcpy.h
@@ -5,12 +5,20 @@
 #ifndef _RTE_MEMCPY_H_
 #define _RTE_MEMCPY_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @file
  *
  * Functions for vectorised implementation of memcpy().
  */
 
+#include 
+#include 
+#include 
+
 /**
  * Copy 16 bytes from one location to another using optimised
  * instructions. The locations should not overlap.
@@ -35,8 +43,6 @@ rte_mov16(uint8_t *dst, const uint8_t *src);
 static inline void
 rte_mov32(uint8_t *dst, const uint8_t *src);
 
-#ifdef __DOXYGEN__
-
 /**
  * Copy 48 bytes from one location to another using optimised
  * instructions. The locations should not overlap.
@@ -49,8 +55,6 @@ rte_mov32(uint8_t *dst, const uint8_t *src);
 static inline void
 rte_mov48(uint8_t *dst, const uint8_t *src);
 
-#endif /* __DOXYGEN__ */
-
 /**
  * Copy 64 bytes from one location to another using optimised
  * instructions. The locations should not overlap.
@@ -87,8 +91,6 @@ rte_mov128(uint8_t *dst, const uint8_t *src);
 static inline void
 rte_mov256(uint8_t *dst, const uint8_t *src);
 
-#ifdef __DOXYGEN__
-
 /**
  * Copy bytes from one location to another. The locations must not overlap.
  *
@@ -111,6 +113,52 @@ rte_mov256(uint8_t *dst, const uint8_t *src);
 static void *
 rte_memcpy(void *dst, const void *src, size_t n);
 
-#endif /* __DOXYGEN__ */
+#ifdef RTE_USE_CC_MEMCPY
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+   memcpy(dst, src, 16);
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+   memcpy(dst, src, 32);
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+   memcpy(dst, src, 48);
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+   memcpy(dst, src, 64);
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+   memcpy(dst, src, 128);
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+   memcpy(dst, src, 256);
+}
+
+static inline void *
+rte_memcpy(void *dst, const void *src, size_t n)
+{
+   return memcpy(dst, src, n);
+}
+#endif /* RTE_USE_CC_MEMCPY */
+
+#ifdef __cplusplus
+}
+#endif
 
 #endif /* _RTE_MEMCPY_H_ */
diff --git a/lib/eal/loongarch/include/rte_memcpy.h 
b/lib/eal/loongarch/include/rte_memcpy.h
index 22578d40f4..159420d3b7 100644
--- a/lib/eal/loongarch/include/rte_memcpy.h
+++ b/lib/eal/loongarch/include/rte_memcpy.h
@@ -5,57 +5,11 @@
 #ifndef RTE_MEMCPY_LOONGARCH_H
 #define RTE_MEMCPY_LOONGARCH_H
 
-#include 
-#include 
+#include "rte_config.h"
 
-#include "rte_common.h"
-
-#

[PATCH 00/25] Update IDPF Base Driver

2024-05-28 Thread Soumyadeep Hore
This patchset updates IDPF base driver to latest shared code snapshot.

Soumyadeep Hore (25):
  common/idpf: added NVME CPF specific code with defines
  common/idpf: updated IDPF VF device ID
  common/idpf: update ADD QUEUE GROUPS offset
  common/idpf: update in PTP message validation
  common/idpf: added FLOW STEER capability and a vport flag
  common/idpf: moved the IDPF HW into API header file
  common/idpf: avoid defensive programming
  common/idpf: move related defines into enums
  common/idpf: add flex array support to virtchnl2 structures
  common/idpf: avoid variable 0-init
  common/idpf: support added for xn transactions
  common/idpf: rename of VIRTCHNL2 CAP INLINE FLOW STEER
  common/idpf: update compiler padding
  common/idpf: avoid compiler padding
  common/idpf: add wmb before tail
  common/idpf: add a new Tx context descriptor structure
  common/idpf: removing redundant implementation
  common/idpf: removing redundant functionality of virtchnl2
  common/idpf: updating common code of latest base driver
  net/cpfl: updating cpfl based on latest base driver
  common/idpf: defining ethernet address length macro
  common/idpf: increasing size of xn index
  common/idpf: redefining idpf vc queue switch
  net/idpf: updating idpf vc queue switch in idpf
  net/cpfl: updating idpf vc queue switch in cpfl

 drivers/common/idpf/base/idpf_common.c|  382 ---
 drivers/common/idpf/base/idpf_controlq.c  |   94 +-
 drivers/common/idpf/base/idpf_controlq.h  |  110 +-
 drivers/common/idpf/base/idpf_controlq_api.h  |   41 +-
 .../common/idpf/base/idpf_controlq_setup.c|   16 +-
 drivers/common/idpf/base/idpf_devids.h|   12 +-
 drivers/common/idpf/base/idpf_lan_txrx.h  |   20 +-
 drivers/common/idpf/base/idpf_osdep.c |   71 +
 drivers/common/idpf/base/idpf_osdep.h |   80 +-
 drivers/common/idpf/base/idpf_prototype.h |   23 -
 drivers/common/idpf/base/idpf_type.h  |   10 +-
 drivers/common/idpf/base/idpf_xn.c|  439 +++
 drivers/common/idpf/base/idpf_xn.h|   90 +
 drivers/common/idpf/base/meson.build  |3 +-
 drivers/common/idpf/base/virtchnl2.h  | 2496 +
 drivers/common/idpf/base/virtchnl2_lan_desc.h |  859 --
 drivers/common/idpf/idpf_common_device.h  |2 +
 drivers/common/idpf/idpf_common_virtchnl.c|   10 +-
 drivers/common/idpf/idpf_common_virtchnl.h|2 +-
 drivers/net/cpfl/cpfl_ethdev.c|   36 +-
 drivers/net/cpfl/cpfl_rxtx.c  |8 +-
 drivers/net/idpf/idpf_rxtx.c  |8 +-
 22 files changed, 2746 insertions(+), 2066 deletions(-)
 delete mode 100644 drivers/common/idpf/base/idpf_common.c
 create mode 100644 drivers/common/idpf/base/idpf_osdep.c
 create mode 100644 drivers/common/idpf/base/idpf_xn.c
 create mode 100644 drivers/common/idpf/base/idpf_xn.h

-- 
2.43.0



[PATCH 01/25] common/idpf: added NVME CPF specific code with defines

2024-05-28 Thread Soumyadeep Hore
The aim of the changes is to remove NVME dependency on
memory allocations, and to use a prepared buffer instead.

The changes do not affect other components.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/idpf_controlq.c | 27 +---
 drivers/common/idpf/base/idpf_controlq_api.h |  9 +--
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/drivers/common/idpf/base/idpf_controlq.c 
b/drivers/common/idpf/base/idpf_controlq.c
index a82ca628de..0ba7281a45 100644
--- a/drivers/common/idpf/base/idpf_controlq.c
+++ b/drivers/common/idpf/base/idpf_controlq.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2001-2023 Intel Corporation
+ * Copyright(c) 2001-2024 Intel Corporation
  */
 
 #include "idpf_controlq.h"
@@ -145,8 +145,12 @@ int idpf_ctlq_add(struct idpf_hw *hw,
qinfo->buf_size > IDPF_CTLQ_MAX_BUF_LEN)
return -EINVAL;
 
+#ifndef NVME_CPF
cq = (struct idpf_ctlq_info *)
 idpf_calloc(hw, 1, sizeof(struct idpf_ctlq_info));
+#else
+   cq = *cq_out;
+#endif
if (!cq)
return -ENOMEM;
 
@@ -172,10 +176,15 @@ int idpf_ctlq_add(struct idpf_hw *hw,
}
 
if (status)
+#ifdef NVME_CPF
+   return status;
+#else
goto init_free_q;
+#endif
 
if (is_rxq) {
idpf_ctlq_init_rxq_bufs(cq);
+#ifndef NVME_CPF
} else {
/* Allocate the array of msg pointers for TX queues */
cq->bi.tx_msg = (struct idpf_ctlq_msg **)
@@ -185,6 +194,7 @@ int idpf_ctlq_add(struct idpf_hw *hw,
status = -ENOMEM;
goto init_dealloc_q_mem;
}
+#endif
}
 
idpf_ctlq_setup_regs(cq, qinfo);
@@ -195,6 +205,7 @@ int idpf_ctlq_add(struct idpf_hw *hw,
 
LIST_INSERT_HEAD(&hw->cq_list_head, cq, cq_list);
 
+#ifndef NVME_CPF
*cq_out = cq;
return status;
 
@@ -205,6 +216,7 @@ int idpf_ctlq_add(struct idpf_hw *hw,
idpf_free(hw, cq);
cq = NULL;
 
+#endif
return status;
 }
 
@@ -232,8 +244,13 @@ void idpf_ctlq_remove(struct idpf_hw *hw,
  * destroyed. This must be called prior to using the individual add/remove
  * APIs.
  */
+#ifdef NVME_CPF
+int idpf_ctlq_init(struct idpf_hw *hw, u8 num_q,
+   struct idpf_ctlq_create_info *q_info, struct idpf_ctlq_info 
**ctlq)
+#else
 int idpf_ctlq_init(struct idpf_hw *hw, u8 num_q,
   struct idpf_ctlq_create_info *q_info)
+#endif
 {
struct idpf_ctlq_info *cq = NULL, *tmp = NULL;
int ret_code = 0;
@@ -244,6 +261,10 @@ int idpf_ctlq_init(struct idpf_hw *hw, u8 num_q,
for (i = 0; i < num_q; i++) {
struct idpf_ctlq_create_info *qinfo = q_info + i;
 
+#ifdef NVME_CPF
+   cq = *(ctlq + i);
+
+#endif 
ret_code = idpf_ctlq_add(hw, qinfo, &cq);
if (ret_code)
goto init_destroy_qs;
@@ -398,7 +419,7 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct 
idpf_ctlq_info *cq,
  * ctlq_msgs and free or reuse the DMA buffers.
  */
 static int __idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count,
-   struct idpf_ctlq_msg *msg_status[], bool force)
+   struct idpf_ctlq_msg *msg_status[], bool force)
 {
struct idpf_ctlq_desc *desc;
u16 i = 0, num_to_clean;
@@ -469,7 +490,7 @@ static int __idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, 
u16 *clean_count,
  * ctlq_msgs and free or reuse the DMA buffers.
  */
 int idpf_ctlq_clean_sq_force(struct idpf_ctlq_info *cq, u16 *clean_count,
-struct idpf_ctlq_msg *msg_status[])
+struct idpf_ctlq_msg *msg_status[])
 {
return __idpf_ctlq_clean_sq(cq, clean_count, msg_status, true);
 }
diff --git a/drivers/common/idpf/base/idpf_controlq_api.h 
b/drivers/common/idpf/base/idpf_controlq_api.h
index 38f5d2df3c..bce5187981 100644
--- a/drivers/common/idpf/base/idpf_controlq_api.h
+++ b/drivers/common/idpf/base/idpf_controlq_api.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2001-2023 Intel Corporation
+ * Copyright(c) 2001-2024 Intel Corporation
  */
 
 #ifndef _IDPF_CONTROLQ_API_H_
@@ -158,8 +158,13 @@ enum idpf_mbx_opc {
 /* Will init all required q including default mb.  "q_info" is an array of
  * create_info structs equal to the number of control queues to be created.
  */
+#ifdef NVME_CPF
+int idpf_ctlq_init(struct idpf_hw *hw, u8 num_q,
+   struct idpf_ctlq_create_info *q_info, struct idpf_ctlq_info 
**ctlq);
+#else
 int idpf_ctlq_init(struct idpf_hw *hw, u8 num_q,
   struct idpf_ctlq_create_info *q_info);
+#endif
 
 /* Allocate and initialize a single control queue, which will be added to the
  * control queue list; returns a handle to the created control queue
@@ -186,7 +191,7 @@ int idpf_ctlq_recv(stru

[PATCH 02/25] common/idpf: updated IDPF VF device ID

2024-05-28 Thread Soumyadeep Hore
Update IDPF VF device id to 145C removing the support for legacy AVF of
0x1889.

In accordance to DCR-3788 added device ID for S-IOV device.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/idpf_devids.h | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/common/idpf/base/idpf_devids.h 
b/drivers/common/idpf/base/idpf_devids.h
index c47762d5b7..acd235c540 100644
--- a/drivers/common/idpf/base/idpf_devids.h
+++ b/drivers/common/idpf/base/idpf_devids.h
@@ -1,18 +1,20 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2001-2023 Intel Corporation
+ * Copyright(c) 2001-2024 Intel Corporation
  */
 
 #ifndef _IDPF_DEVIDS_H_
 #define _IDPF_DEVIDS_H_
 
+#ifndef LINUX_SUPPORT
 /* Vendor ID */
 #define IDPF_INTEL_VENDOR_ID   0x8086
+#endif /* LINUX_SUPPORT */
 
 /* Device IDs */
 #define IDPF_DEV_ID_PF 0x1452
-#define IDPF_DEV_ID_VF 0x1889
-
-
-
+#define IDPF_DEV_ID_VF 0x145C
+#ifdef SIOV_SUPPORT
+#define IDPF_DEV_ID_VF_SIOV0x0DD5
+#endif /* SIOV_SUPPORT */
 
 #endif /* _IDPF_DEVIDS_H_ */
-- 
2.43.0



[PATCH 03/25] common/idpf: update ADD QUEUE GROUPS offset

2024-05-28 Thread Soumyadeep Hore
Some compilers will use 64-bit addressing and compiler will detect
such loss of data

virtchnl2.h(1890,40): warning C4244: '=': conversion from '__int64' to
'__le32', possible loss of data

on line 1890
offset = (u8 *)(&groups->groups[0]) - (u8 *)groups;

Removed unnecessary zero init

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/virtchnl2.h | 21 +++--
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/common/idpf/base/virtchnl2.h 
b/drivers/common/idpf/base/virtchnl2.h
index 3900b784d0..f44c0965b4 100644
--- a/drivers/common/idpf/base/virtchnl2.h
+++ b/drivers/common/idpf/base/virtchnl2.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2001-2023 Intel Corporation
+ * Copyright(c) 2001-2024 Intel Corporation
  */
 
 #ifndef _VIRTCHNL2_H_
@@ -47,9 +47,9 @@
  * that is never used.
  */
 #define VIRTCHNL2_CHECK_STRUCT_LEN(n, X) enum virtchnl2_static_assert_enum_##X 
\
-   { virtchnl2_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) 
}
+{ virtchnl2_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 
0) }
 #define VIRTCHNL2_CHECK_UNION_LEN(n, X) enum virtchnl2_static_asset_enum_##X \
-   { virtchnl2_static_assert_##X = (n)/((sizeof(union X) == (n)) ? 1 : 0) }
+{ virtchnl2_static_assert_##X = (n)/((sizeof(union X) == (n)) ? 1 : 0) 
}
 
 /* New major set of opcodes introduced and so leaving room for
  * old misc opcodes to be added in future. Also these opcodes may only
@@ -471,8 +471,8 @@
  * error regardless of version mismatch.
  */
 struct virtchnl2_version_info {
-   u32 major;
-   u32 minor;
+u32 major;
+u32 minor;
 };
 
 VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_version_info);
@@ -1414,9 +1414,9 @@ VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_mac_addr_list);
  * and returns the status.
  */
 struct virtchnl2_promisc_info {
-   __le32 vport_id;
+__le32 vport_id;
/* see VIRTCHNL2_PROMISC_FLAGS definitions */
-   __le16 flags;
+__le16 flags;
u8 pad[2];
 };
 
@@ -1733,7 +1733,8 @@ virtchnl2_vc_validate_vf_msg(__rte_unused struct 
virtchnl2_version_info *ver, u3
case VIRTCHNL2_OP_ADD_QUEUE_GROUPS:
valid_len = sizeof(struct virtchnl2_add_queue_groups);
if (msglen != valid_len) {
-   __le32 i = 0, offset = 0;
+   __le64 offset;
+   __le32 i;
struct virtchnl2_add_queue_groups *add_queue_grp =
(struct virtchnl2_add_queue_groups *)msg;
struct virtchnl2_queue_groups *groups = 
&(add_queue_grp->qg_info);
@@ -1904,8 +1905,8 @@ virtchnl2_vc_validate_vf_msg(__rte_unused struct 
virtchnl2_version_info *ver, u3
/* These are always errors coming from the VF. */
case VIRTCHNL2_OP_EVENT:
case VIRTCHNL2_OP_UNKNOWN:
-   default:
-   return VIRTCHNL2_STATUS_ERR_ESRCH;
+default:
+return VIRTCHNL2_STATUS_ERR_ESRCH;
}
/* few more checks */
if (err_msg_format || valid_len != msglen)
-- 
2.43.0



[PATCH 04/25] common/idpf: update in PTP message validation

2024-05-28 Thread Soumyadeep Hore
When the message for getting timestamp latches is sent by the driver,
number of latches is equal to 0. Current implementation of message
validation function incorrectly notifies this kind of message length as
invalid.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/virtchnl2.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/common/idpf/base/virtchnl2.h 
b/drivers/common/idpf/base/virtchnl2.h
index f44c0965b4..9a1310ca24 100644
--- a/drivers/common/idpf/base/virtchnl2.h
+++ b/drivers/common/idpf/base/virtchnl2.h
@@ -1873,7 +1873,7 @@ virtchnl2_vc_validate_vf_msg(__rte_unused struct 
virtchnl2_version_info *ver, u3
case VIRTCHNL2_OP_GET_PTP_CAPS:
valid_len = sizeof(struct virtchnl2_get_ptp_caps);
 
-   if (msglen >= valid_len) {
+   if (msglen > valid_len) {
struct virtchnl2_get_ptp_caps *ptp_caps =
(struct virtchnl2_get_ptp_caps *)msg;
 
@@ -1889,7 +1889,7 @@ virtchnl2_vc_validate_vf_msg(__rte_unused struct 
virtchnl2_version_info *ver, u3
case VIRTCHNL2_OP_GET_PTP_TX_TSTAMP_LATCHES:
valid_len = sizeof(struct virtchnl2_ptp_tx_tstamp_latches);
 
-   if (msglen >= valid_len) {
+   if (msglen > valid_len) {
struct virtchnl2_ptp_tx_tstamp_latches 
*tx_tstamp_latches =
(struct virtchnl2_ptp_tx_tstamp_latches *)msg;
 
-- 
2.43.0



[PATCH 05/25] common/idpf: added FLOW STEER capability and a vport flag

2024-05-28 Thread Soumyadeep Hore
Removed unused VIRTCHNL2_CAP_ADQ capability and use that bit for
VIRTCHNL2_CAP_INLINE_FLOW_STEER capability.

Added VIRTCHNL2_VPORT_INLINE_FLOW_STEER_ENA port flag to allow
enable/disable per vport.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/virtchnl2.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/common/idpf/base/virtchnl2.h 
b/drivers/common/idpf/base/virtchnl2.h
index 9a1310ca24..51d982b500 100644
--- a/drivers/common/idpf/base/virtchnl2.h
+++ b/drivers/common/idpf/base/virtchnl2.h
@@ -220,7 +220,7 @@
 #define VIRTCHNL2_CAP_FLOW_DIRECTORBIT(3)
 #define VIRTCHNL2_CAP_SPLITQ_QSCHEDBIT(4)
 #define VIRTCHNL2_CAP_CRC  BIT(5)
-#define VIRTCHNL2_CAP_ADQ  BIT(6)
+#define VIRTCHNL2_CAP_INLINE_FLOW_STEERBIT(6)
 #define VIRTCHNL2_CAP_WB_ON_ITRBIT(7)
 #define VIRTCHNL2_CAP_PROMISC  BIT(8)
 #define VIRTCHNL2_CAP_LINK_SPEED   BIT(9)
@@ -593,7 +593,8 @@ struct virtchnl2_queue_reg_chunks {
 VIRTCHNL2_CHECK_STRUCT_LEN(40, virtchnl2_queue_reg_chunks);
 
 /* VIRTCHNL2_VPORT_FLAGS */
-#define VIRTCHNL2_VPORT_UPLINK_PORTBIT(0)
+#define VIRTCHNL2_VPORT_UPLINK_PORTBIT(0)
+#define VIRTCHNL2_VPORT_INLINE_FLOW_STEER_ENA  BIT(1)
 
 #define VIRTCHNL2_ETH_LENGTH_OF_ADDRESS  6
 
-- 
2.43.0



[PATCH 06/25] common/idpf: moved the IDPF HW into API header file

2024-05-28 Thread Soumyadeep Hore
There is an issue of recursive header file includes in accessing the
idpf_hw structure. The controlq.h has the structure definition and osdep
header file needs that. The problem is the controlq.h also needs
the osdep header file contents, basically both dependent on each other.

Today it was resolved in CP by bringing their own idpf_hw definition but
that's not the case for other components which wanted to use the idpf_hw
directly from the shared code.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/idpf_controlq.h | 110 +--
 drivers/common/idpf/base/idpf_controlq_api.h |  34 +-
 drivers/common/idpf/base/idpf_type.h |  10 +-
 3 files changed, 37 insertions(+), 117 deletions(-)

diff --git a/drivers/common/idpf/base/idpf_controlq.h 
b/drivers/common/idpf/base/idpf_controlq.h
index 80ca06e632..86ed3b7bcb 100644
--- a/drivers/common/idpf/base/idpf_controlq.h
+++ b/drivers/common/idpf/base/idpf_controlq.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2001-2023 Intel Corporation
+ * Copyright(c) 2001-2024 Intel Corporation
  */
 
 #ifndef _IDPF_CONTROLQ_H_
@@ -18,7 +18,7 @@
 
 #define IDPF_CTLQ_DESC_UNUSED(R)   \
((u16)R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->ring_size) + 
\
-  (R)->next_to_clean - (R)->next_to_use - 1))
+ (R)->next_to_clean - (R)->next_to_use - 1))
 
 /* Data type manipulation macros. */
 #define IDPF_HI_DWORD(x)   ((u32)x) >> 16) >> 16) & 0x))
@@ -96,111 +96,6 @@ struct idpf_mbxq_desc {
u32 pf_vf_id;   /* used by CP when sending to PF */
 };
 
-enum idpf_mac_type {
-   IDPF_MAC_UNKNOWN = 0,
-   IDPF_MAC_PF,
-   IDPF_MAC_VF,
-   IDPF_MAC_GENERIC
-};
-
-#define ETH_ALEN 6
-
-struct idpf_mac_info {
-   enum idpf_mac_type type;
-   u8 addr[ETH_ALEN];
-   u8 perm_addr[ETH_ALEN];
-};
-
-#define IDPF_AQ_LINK_UP 0x1
-
-/* PCI bus types */
-enum idpf_bus_type {
-   idpf_bus_type_unknown = 0,
-   idpf_bus_type_pci,
-   idpf_bus_type_pcix,
-   idpf_bus_type_pci_express,
-   idpf_bus_type_reserved
-};
-
-/* PCI bus speeds */
-enum idpf_bus_speed {
-   idpf_bus_speed_unknown  = 0,
-   idpf_bus_speed_33   = 33,
-   idpf_bus_speed_66   = 66,
-   idpf_bus_speed_100  = 100,
-   idpf_bus_speed_120  = 120,
-   idpf_bus_speed_133  = 133,
-   idpf_bus_speed_2500 = 2500,
-   idpf_bus_speed_5000 = 5000,
-   idpf_bus_speed_8000 = 8000,
-   idpf_bus_speed_reserved
-};
-
-/* PCI bus widths */
-enum idpf_bus_width {
-   idpf_bus_width_unknown  = 0,
-   idpf_bus_width_pcie_x1  = 1,
-   idpf_bus_width_pcie_x2  = 2,
-   idpf_bus_width_pcie_x4  = 4,
-   idpf_bus_width_pcie_x8  = 8,
-   idpf_bus_width_32   = 32,
-   idpf_bus_width_64   = 64,
-   idpf_bus_width_reserved
-};
-
-/* Bus parameters */
-struct idpf_bus_info {
-   enum idpf_bus_speed speed;
-   enum idpf_bus_width width;
-   enum idpf_bus_type type;
-
-   u16 func;
-   u16 device;
-   u16 lan_id;
-   u16 bus_id;
-};
-
-/* Function specific capabilities */
-struct idpf_hw_func_caps {
-   u32 num_alloc_vfs;
-   u32 vf_base_id;
-};
-
-/* Define the APF hardware struct to replace other control structs as needed
- * Align to ctlq_hw_info
- */
-struct idpf_hw {
-   /* Some part of BAR0 address space is not mapped by the LAN driver.
-* This results in 2 regions of BAR0 to be mapped by LAN driver which
-* will have its own base hardware address when mapped.
-*/
-   u8 *hw_addr;
-   u8 *hw_addr_region2;
-   u64 hw_addr_len;
-   u64 hw_addr_region2_len;
-
-   void *back;
-
-   /* control queue - send and receive */
-   struct idpf_ctlq_info *asq;
-   struct idpf_ctlq_info *arq;
-
-   /* subsystem structs */
-   struct idpf_mac_info mac;
-   struct idpf_bus_info bus;
-   struct idpf_hw_func_caps func_caps;
-
-   /* pci info */
-   u16 device_id;
-   u16 vendor_id;
-   u16 subsystem_device_id;
-   u16 subsystem_vendor_id;
-   u8 revision_id;
-   bool adapter_stopped;
-
-   LIST_HEAD_TYPE(list_head, idpf_ctlq_info) cq_list_head;
-};
-
 int idpf_ctlq_alloc_ring_res(struct idpf_hw *hw,
 struct idpf_ctlq_info *cq);
 
@@ -210,4 +105,5 @@ void idpf_ctlq_dealloc_ring_res(struct idpf_hw *hw, struct 
idpf_ctlq_info *cq);
 void *idpf_alloc_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem,
 u64 size);
 void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem);
+
 #endif /* _IDPF_CONTROLQ_H_ */
diff --git a/drivers/common/idpf/base/idpf_controlq_api.h 
b/drivers/common/idpf/base/idpf_controlq_api.h
index bce5187981..3ad2da5b2e 100644
--- a/drivers/common/idpf/base/idpf_controlq_api.h
+++ b/drivers/common/idpf/base/idpf_controlq_api.h
@@ -1

[PATCH 07/25] common/idpf: avoid defensive programming

2024-05-28 Thread Soumyadeep Hore
Based on the upstream feedback, driver should not use any
defensive programming strategy by checking for NULL pointers
and other conditional checks unnecessarily in the code flow
to fall back, instead fail and fix the bug in a proper way.

Some of the checks checks are identified and removed/wrapped
in this patch:
- As the control queue is freed and deleted from the list after the
idpf_ctlq_shutdown call, there is no need to have the ring_size
check in idpf_ctlq_shutdown.
- From the upstream perspective shared code is part of the Linux
driver and it doesn't make sense to add zero 'len' and 'buf_size'
check in idpf_ctlq_add as to start with, driver provides valid
sizes, if not it is a bug.
- Remove cq NULL and zero ring_size check wherever possible as
the IDPF driver code flow does not pass any NULL cq pointer to
the control queue callbacks. If it passes then it is a bug and
should be fixed rather than checking for NULL pointer and falling
back which is not the right way.

Note: Most of the checks are wrapped with __KERNEL__ flag and will
not have any impact on other shared code consumers other than the
IDPF Linux driver as I am not confident if the same reasoning works
for other components as well.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/idpf_controlq.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/drivers/common/idpf/base/idpf_controlq.c 
b/drivers/common/idpf/base/idpf_controlq.c
index 0ba7281a45..4d31c6e6d8 100644
--- a/drivers/common/idpf/base/idpf_controlq.c
+++ b/drivers/common/idpf/base/idpf_controlq.c
@@ -98,9 +98,6 @@ static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct 
idpf_ctlq_info *cq)
 {
idpf_acquire_lock(&cq->cq_lock);
 
-   if (!cq->ring_size)
-   goto shutdown_sq_out;
-
 #ifdef SIMICS_BUILD
wr32(hw, cq->reg.head, 0);
wr32(hw, cq->reg.tail, 0);
@@ -115,7 +112,6 @@ static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct 
idpf_ctlq_info *cq)
/* Set ring_size to 0 to indicate uninitialized queue */
cq->ring_size = 0;
 
-shutdown_sq_out:
idpf_release_lock(&cq->cq_lock);
idpf_destroy_lock(&cq->cq_lock);
 }
@@ -661,9 +657,6 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 
*num_q_msg,
int ret_code = 0;
u16 i = 0;
 
-   if (!cq || !cq->ring_size)
-   return -ENOBUFS;
-
if (*num_q_msg == 0)
return 0;
else if (*num_q_msg > cq->ring_size)
-- 
2.43.0



[PATCH 08/25] common/idpf: move related defines into enums

2024-05-28 Thread Soumyadeep Hore
Kernel coding style prefers the use of enums, so we must change
all groups of related defines to enums. The names of the enums
are chosen to follow the common part of the naming pattern
as much as possible.

Replaced the common labels from the comments with the enum names.

While at it, modify header description based on upstream feedback.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/virtchnl2.h  | 2042 ++---
 drivers/common/idpf/base/virtchnl2_lan_desc.h |  859 ---
 2 files changed, 1783 insertions(+), 1118 deletions(-)

diff --git a/drivers/common/idpf/base/virtchnl2.h 
b/drivers/common/idpf/base/virtchnl2.h
index 51d982b500..45e77bbb94 100644
--- a/drivers/common/idpf/base/virtchnl2.h
+++ b/drivers/common/idpf/base/virtchnl2.h
@@ -8,320 +8,404 @@
 /* All opcodes associated with virtchnl 2 are prefixed with virtchnl2 or
  * VIRTCHNL2. Any future opcodes, offloads/capabilities, structures,
  * and defines must be prefixed with virtchnl2 or VIRTCHNL2 to avoid confusion.
+ *
+ * PF/VF uses the virtchnl interface defined in this header file to communicate
+ * with device Control Plane (CP). Driver and the CP may run on different
+ * platforms with different endianness. To avoid byte order discrepancies,
+ * all the structures in this header follow little-endian format.
+ *
+ * This is an interface definition file where existing enums and their values
+ * must remain unchanged over time, so we specify explicit values for all 
enums.
  */
 
 #include "virtchnl2_lan_desc.h"
 
-/* VIRTCHNL2_ERROR_CODES */
-/* success */
-#defineVIRTCHNL2_STATUS_SUCCESS0
-/* Operation not permitted, used in case of command not permitted for sender */
-#defineVIRTCHNL2_STATUS_ERR_EPERM  1
-/* Bad opcode - virtchnl interface problem */
-#defineVIRTCHNL2_STATUS_ERR_ESRCH  3
-/* I/O error - HW access error */
-#defineVIRTCHNL2_STATUS_ERR_EIO5
-/* No such resource - Referenced resource is not allacated */
-#defineVIRTCHNL2_STATUS_ERR_ENXIO  6
-/* Permission denied - Resource is not permitted to caller */
-#defineVIRTCHNL2_STATUS_ERR_EACCES 13
-/* Device or resource busy - In case shared resource is in use by others */
-#defineVIRTCHNL2_STATUS_ERR_EBUSY  16
-/* Object already exists and not free */
-#defineVIRTCHNL2_STATUS_ERR_EEXIST 17
-/* Invalid input argument in command */
-#defineVIRTCHNL2_STATUS_ERR_EINVAL 22
-/* No space left or allocation failure */
-#defineVIRTCHNL2_STATUS_ERR_ENOSPC 28
-/* Parameter out of range */
-#defineVIRTCHNL2_STATUS_ERR_ERANGE 34
-
-/* Op not allowed in current dev mode */
-#defineVIRTCHNL2_STATUS_ERR_EMODE  200
-/* State Machine error - Command sequence problem */
-#defineVIRTCHNL2_STATUS_ERR_ESM201
-
-/* These macros are used to generate compilation errors if a structure/union
- * is not exactly the correct length. It gives a divide by zero error if the
- * structure/union is not of the correct size, otherwise it creates an enum
- * that is never used.
- */
-#define VIRTCHNL2_CHECK_STRUCT_LEN(n, X) enum virtchnl2_static_assert_enum_##X 
\
-{ virtchnl2_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 
0) }
-#define VIRTCHNL2_CHECK_UNION_LEN(n, X) enum virtchnl2_static_asset_enum_##X \
-{ virtchnl2_static_assert_##X = (n)/((sizeof(union X) == (n)) ? 1 : 0) 
}
-
-/* New major set of opcodes introduced and so leaving room for
+/**
+ * enum virtchnl2_status - Error codes.
+ * @VIRTCHNL2_STATUS_SUCCESS: Success
+ * @VIRTCHNL2_STATUS_ERR_EPERM: Operation not permitted, used in case of 
command
+ * not permitted for sender
+ * @VIRTCHNL2_STATUS_ERR_ESRCH: Bad opcode - virtchnl interface problem
+ * @VIRTCHNL2_STATUS_ERR_EIO: I/O error - HW access error
+ * @VIRTCHNL2_STATUS_ERR_ENXIO: No such resource - Referenced resource is not
+ * allocated
+ * @VIRTCHNL2_STATUS_ERR_EACCES: Permission denied - Resource is not permitted
+ *  to caller
+ * @VIRTCHNL2_STATUS_ERR_EBUSY: Device or resource busy - In case shared
+ * resource is in use by others
+ * @VIRTCHNL2_STATUS_ERR_EEXIST: Object already exists and not free
+ * @VIRTCHNL2_STATUS_ERR_EINVAL: Invalid input argument in command
+ * @VIRTCHNL2_STATUS_ERR_ENOSPC: No space left or allocation failure
+ * @VIRTCHNL2_STATUS_ERR_ERANGE: Parameter out of range
+ * @VIRTCHNL2_STATUS_ERR_EMODE: Operation not allowed in current dev mode
+ * @VIRTCHNL2_STATUS_ERR_ESM: State Machine error - Command sequence problem
+#ifndef EXTERNAL_RELEASE
+ * @VIRTCHNL2_STATUS_ERR_OEM_1: OEM_1 error code
+#endif
+ */
+enum virtchnl2_status {
+   VIRTCHNL2_STATUS_SUCCESS= 0,
+   VIRTCHNL2_STATUS_ERR_EPERM  = 1,
+   VIRTCHNL2_STATUS_ERR_ESRCH  = 3,
+   VIRTCHNL2_STATUS_ERR_EIO= 5,
+   VIRTCHNL2_STATUS_ERR_E

[PATCH 09/25] common/idpf: add flex array support to virtchnl2 structures

2024-05-28 Thread Soumyadeep Hore
The current virtchnl header uses 1-sized array to address
the dynamic size of the virtchnl structure. For example in the
following structure, the size of the struct depends on the 'num_chunks'
and we use 'chunks[1]' to dereference each chunk information.

struct virtchnl2_queue_reg_chunks {
__le16 num_chunks;
u8 pad[6];
struct virtchnl2_queue_reg_chunk chunks[1];
};

With the internal Linux upstream feedback that is received on
IDPF driver and also some references available online, it
is discouraged to use 1-sized array fields in the structures,
especially in the new Linux drivers that are going to be
upstreamed. Instead, it is recommended to use flex array fields
for the dynamic sized structures.

The problem with this approach is that, C++ doesn't have support
for flex array fields and it might be a problem for Windows driver.

This patch introduces flex array support for the dynamic sized
structures wrapped with 'FLEX_ARRAY_SUPPORT' flag and should be
defined only if the flex array fields are supported.

Also there is a special case in virtchnl2_get_ptype_info and
where the struct has nested flex arrays which is not supported.
To support the flex arrays and not break the message format,
the top level flex array field is removed and the sender/receiver
is expected to parse the message accordingly.

The above reasoning applies for virtchnl2_add_queue_groups as well
but the struct is modified a bit by removing virtchnl2_queue_groups
structure to better support the flex array.

virtchnl2_vc_validate_vf_msg function is refactored to consider the
cases where CP/driver supports or doesn't support the flex array.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/virtchnl2.h | 600 ---
 1 file changed, 352 insertions(+), 248 deletions(-)

diff --git a/drivers/common/idpf/base/virtchnl2.h 
b/drivers/common/idpf/base/virtchnl2.h
index 45e77bbb94..355e2e3038 100644
--- a/drivers/common/idpf/base/virtchnl2.h
+++ b/drivers/common/idpf/base/virtchnl2.h
@@ -63,9 +63,19 @@ enum virtchnl2_status {
  * This macro is used to generate compilation errors if a structure
  * is not exactly the correct length.
  */
-#define VIRTCHNL2_CHECK_STRUCT_LEN(n, X)   \
-   static_assert((n) == sizeof(struct X),  \
+#define VIRTCHNL2_CHECK_STRUCT_LEN(n, X)   \
+   static_assert((n) == sizeof(struct X),  \
  "Structure length does not match with the expected value")
+#ifdef FLEX_ARRAY_SUPPORT
+#define VIRTCHNL2_CHECK_STRUCT_VAR_LEN(n, X, T)\
+   static_assert((n) == struct_size_t(struct X, T, 1),\
+ "Structure length with flex array does not match with the 
expected value")
+#else
+#define VIRTCHNL2_CHECK_STRUCT_VAR_LEN(n, X, T)\
+   VIRTCHNL2_CHECK_STRUCT_LEN(n, X)
+
+#define STRUCT_VAR_LEN 1
+#endif /* FLEX_ARRAY_SUPPORT */
 
 /**
  * New major set of opcodes introduced and so leaving room for
@@ -270,6 +280,43 @@ enum virtchnl2_cap_other {
VIRTCHNL2_CAP_OEM   = BIT_ULL(63),
 };
 
+/**
+ * enum virtchnl2_action_types - Available actions for sideband flow steering
+ * @VIRTCHNL2_ACTION_DROP: Drop the packet
+ * @VIRTCHNL2_ACTION_PASSTHRU: Forward the packet to the next classifier/stage
+ * @VIRTCHNL2_ACTION_QUEUE: Forward the packet to a receive queue
+ * @VIRTCHNL2_ACTION_Q_GROUP: Forward the packet to a receive queue group
+ * @VIRTCHNL2_ACTION_MARK: Mark the packet with specific marker value
+ * @VIRTCHNL2_ACTION_COUNT: Increment the corresponding counter
+ */
+
+enum virtchnl2_action_types {
+   VIRTCHNL2_ACTION_DROP   = BIT(0),
+   VIRTCHNL2_ACTION_PASSTHRU   = BIT(1),
+   VIRTCHNL2_ACTION_QUEUE  = BIT(2),
+   VIRTCHNL2_ACTION_Q_GROUP= BIT(3),
+   VIRTCHNL2_ACTION_MARK   = BIT(4),
+   VIRTCHNL2_ACTION_COUNT  = BIT(5),
+};
+
+/* Flow type capabilities for Flow Steering and Receive-Side Scaling */
+enum virtchnl2_flow_types {
+   VIRTCHNL2_FLOW_IPV4_TCP = BIT(0),
+   VIRTCHNL2_FLOW_IPV4_UDP = BIT(1),
+   VIRTCHNL2_FLOW_IPV4_SCTP= BIT(2),
+   VIRTCHNL2_FLOW_IPV4_OTHER   = BIT(3),
+   VIRTCHNL2_FLOW_IPV6_TCP = BIT(4),
+   VIRTCHNL2_FLOW_IPV6_UDP = BIT(5),
+   VIRTCHNL2_FLOW_IPV6_SCTP= BIT(6),
+   VIRTCHNL2_FLOW_IPV6_OTHER   = BIT(7),
+   VIRTCHNL2_FLOW_IPV4_AH  = BIT(8),
+   VIRTCHNL2_FLOW_IPV4_ESP = BIT(9),
+   VIRTCHNL2_FLOW_IPV4_AH_ESP  = BIT(10),
+   VIRTCHNL2_FLOW_IPV6_AH  = BIT(11),
+   VIRTCHNL2_FLOW_IPV6_ESP = BIT(12),
+   VIRTCHNL2_FLOW_IPV6_AH_ESP  = BIT(13),
+};
+
 /**
  * enum virtchnl2_txq_sched_mode - Transmit Queue Scheduling Modes
  * @VIRTCHNL2_TXQ_SCHED_MODE_QUEUE: Queue mode is the legacy mode i.e. inorder
@@ -711,21 +758,26 @@ VIRTCHNL2_CHECK_STRUCT_LEN(32, virtchnl2_queue_reg_chunk);
 struct virt

Re: [PATCH v4 0/7] Add ODM DMA device

2024-05-28 Thread Jerin Jacob
On Mon, May 27, 2024 at 8:47 PM Anoob Joseph  wrote:
>
> Add Odyssey ODM DMA device. This PMD abstracts ODM hardware unit on
> Odyssey SoC which can perform mem to mem copies.
>
> The hardware unit can support upto 32 queues (vchan) and 16 VFs. It
> supports 'fill' operation with specific values. It also supports
> SG mode of operation with upto 4 src pointers and 4 destination
> pointers.
>
> The PMD is tested with both unit tests and performance applications.
>
> Changes in v4
> - Added release notes
> - Addressed review comments from Jerin
>
> Changes in v3
> - Addressed build failure with stdatomic stage in CI
>
> Changes in v2
> - Addressed build failure in CI
> - Moved update to usertools as separate patch
>
> Anoob Joseph (2):
>   dma/odm: add framework for ODM DMA device
>   dma/odm: add hardware defines
>
> Gowrishankar Muthukrishnan (3):
>   dma/odm: add dev init and fini
>   dma/odm: add device ops
>   dma/odm: add stats
>
> Vidya Sagar Velumuri (2):
>   dma/odm: add copy and copy sg ops
>   dma/odm: add remaining ops


Series applied to dpdk-next-net-mrvl/for-main. Thanks


[PATCH 10/25] common/idpf: avoid variable 0-init

2024-05-28 Thread Soumyadeep Hore
Dont initialize the variables if not needed.

Also use 'err' instead of 'status', 'ret_code', 'ret' etc.
for consistency and change the return label 'sq_send_command_out'
to 'err_unlock'.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/idpf_controlq.c  | 60 +--
 .../common/idpf/base/idpf_controlq_setup.c| 16 ++---
 2 files changed, 37 insertions(+), 39 deletions(-)

diff --git a/drivers/common/idpf/base/idpf_controlq.c 
b/drivers/common/idpf/base/idpf_controlq.c
index 4d31c6e6d8..d2e9fdc06d 100644
--- a/drivers/common/idpf/base/idpf_controlq.c
+++ b/drivers/common/idpf/base/idpf_controlq.c
@@ -61,7 +61,7 @@ static void idpf_ctlq_init_regs(struct idpf_hw *hw, struct 
idpf_ctlq_info *cq,
  */
 static void idpf_ctlq_init_rxq_bufs(struct idpf_ctlq_info *cq)
 {
-   int i = 0;
+   int i;
 
for (i = 0; i < cq->ring_size; i++) {
struct idpf_ctlq_desc *desc = IDPF_CTLQ_DESC(cq, i);
@@ -134,7 +134,7 @@ int idpf_ctlq_add(struct idpf_hw *hw,
 {
struct idpf_ctlq_info *cq;
bool is_rxq = false;
-   int status = 0;
+   int err;
 
if (!qinfo->len || !qinfo->buf_size ||
qinfo->len > IDPF_CTLQ_MAX_RING_SIZE ||
@@ -164,16 +164,16 @@ int idpf_ctlq_add(struct idpf_hw *hw,
is_rxq = true;
/* fallthrough */
case IDPF_CTLQ_TYPE_MAILBOX_TX:
-   status = idpf_ctlq_alloc_ring_res(hw, cq);
+   err = idpf_ctlq_alloc_ring_res(hw, cq);
break;
default:
-   status = -EINVAL;
+   err = -EINVAL;
break;
}
 
-   if (status)
+   if (err)
 #ifdef NVME_CPF
-   return status;
+   return err;
 #else
goto init_free_q;
 #endif
@@ -187,7 +187,7 @@ int idpf_ctlq_add(struct idpf_hw *hw,
idpf_calloc(hw, qinfo->len,
sizeof(struct idpf_ctlq_msg *));
if (!cq->bi.tx_msg) {
-   status = -ENOMEM;
+   err = -ENOMEM;
goto init_dealloc_q_mem;
}
 #endif
@@ -203,17 +203,16 @@ int idpf_ctlq_add(struct idpf_hw *hw,
 
 #ifndef NVME_CPF
*cq_out = cq;
-   return status;
+   return err;
 
 init_dealloc_q_mem:
/* free ring buffers and the ring itself */
idpf_ctlq_dealloc_ring_res(hw, cq);
 init_free_q:
idpf_free(hw, cq);
-   cq = NULL;
 
 #endif
-   return status;
+   return err;
 }
 
 /**
@@ -249,8 +248,8 @@ int idpf_ctlq_init(struct idpf_hw *hw, u8 num_q,
 #endif
 {
struct idpf_ctlq_info *cq = NULL, *tmp = NULL;
-   int ret_code = 0;
-   int i = 0;
+   int err;
+   int i;
 
LIST_INIT(&hw->cq_list_head);
 
@@ -261,19 +260,19 @@ int idpf_ctlq_init(struct idpf_hw *hw, u8 num_q,
cq = *(ctlq + i);
 
 #endif 
-   ret_code = idpf_ctlq_add(hw, qinfo, &cq);
-   if (ret_code)
+   err = idpf_ctlq_add(hw, qinfo, &cq);
+   if (err)
goto init_destroy_qs;
}
 
-   return ret_code;
+   return err;
 
 init_destroy_qs:
LIST_FOR_EACH_ENTRY_SAFE(cq, tmp, &hw->cq_list_head,
 idpf_ctlq_info, cq_list)
idpf_ctlq_remove(hw, cq);
 
-   return ret_code;
+   return err;
 }
 
 /**
@@ -307,9 +306,9 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct 
idpf_ctlq_info *cq,
   u16 num_q_msg, struct idpf_ctlq_msg q_msg[])
 {
struct idpf_ctlq_desc *desc;
-   int num_desc_avail = 0;
-   int status = 0;
-   int i = 0;
+   int num_desc_avail;
+   int err = 0;
+   int i;
 
if (!cq || !cq->ring_size)
return -ENOBUFS;
@@ -319,8 +318,8 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct 
idpf_ctlq_info *cq,
/* Ensure there are enough descriptors to send all messages */
num_desc_avail = IDPF_CTLQ_DESC_UNUSED(cq);
if (num_desc_avail == 0 || num_desc_avail < num_q_msg) {
-   status = -ENOSPC;
-   goto sq_send_command_out;
+   err = -ENOSPC;
+   goto err_unlock;
}
 
for (i = 0; i < num_q_msg; i++) {
@@ -391,10 +390,10 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct 
idpf_ctlq_info *cq,
 
wr32(hw, cq->reg.tail, cq->next_to_use);
 
-sq_send_command_out:
+err_unlock:
idpf_release_lock(&cq->cq_lock);
 
-   return status;
+   return err;
 }
 
 /**
@@ -418,7 +417,7 @@ static int __idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, 
u16 *clean_count,
struct idpf_ctlq_msg *msg_status[], bool force)
 {
struct idpf_ctlq_desc *desc;
-   u16 i = 0, num_to_clean;
+   u16 i, num_to_clean;
u16 ntc, desc_err;
int ret = 0;
 
@@ -534,7 +533,6 @@ int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw, struct 
idpf_ctl

[PATCH 11/25] common/idpf: support added for xn transactions

2024-05-28 Thread Soumyadeep Hore
Support added for xn transaction apis to send/receive control queue
messages.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/idpf_osdep.c |  71 +
 drivers/common/idpf/base/idpf_osdep.h |  80 -
 drivers/common/idpf/base/idpf_xn.c| 439 ++
 drivers/common/idpf/base/idpf_xn.h|  90 ++
 drivers/common/idpf/base/meson.build  |   2 +
 5 files changed, 681 insertions(+), 1 deletion(-)
 create mode 100644 drivers/common/idpf/base/idpf_osdep.c
 create mode 100644 drivers/common/idpf/base/idpf_xn.c
 create mode 100644 drivers/common/idpf/base/idpf_xn.h

diff --git a/drivers/common/idpf/base/idpf_osdep.c 
b/drivers/common/idpf/base/idpf_osdep.c
new file mode 100644
index 00..2faf5ef6a3
--- /dev/null
+++ b/drivers/common/idpf/base/idpf_osdep.c
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2024 Intel Corporation
+ */
+
+#include "idpf_osdep.h"
+
+int idpf_compl_event_init(struct completion *completion)
+{
+   int poll_fd;
+
+   poll_fd = epoll_create(1);
+   if (poll_fd < 0) {
+   perror("epoll create failed\n");
+   return EPERM;
+   }
+   completion->poll_fd = poll_fd;
+
+   return 0;
+}
+
+int idpf_compl_event_reinit(struct completion *completion)
+{
+   struct epoll_event event;
+   int fd, ret;
+
+   fd = eventfd(0,0);
+   if (fd < 0) {
+   perror("Eventfd open failed\n");
+   return EPERM;
+   }
+   completion->event_fd = fd;
+   event.events = EPOLLIN | EPOLLERR | EPOLLHUP;
+   event.data.fd = fd;
+   ret = epoll_ctl(completion->poll_fd, EPOLL_CTL_ADD, fd, &event);
+   if (ret < 0) {
+   perror("Eventfd open failed\n");
+   close(fd);
+   return EPERM;
+   }
+   return 0;
+}
+
+int idpf_compl_event_sig(struct completion *completion, uint64_t status)
+{
+   int ret;
+
+   ret = write(completion->event_fd, &status, sizeof(status));
+
+   return (ret > 0 ? 0 : 1);
+}
+
+int idpf_compl_event_wait(struct completion *completion, int timeout)
+{
+   struct epoll_event event = { 0 };
+   uint64_t status;
+   int ret;
+
+   ret = epoll_wait(completion->poll_fd, &event, 1, timeout);
+   if (ret > 0) {
+   printf("Command Completed successfully\n");
+   ret = read(completion->event_fd, &status, sizeof(status));
+   }
+   close(completion->event_fd);
+
+   return (ret > 0 ? 0 : 1);
+}
+
+void idpf_compl_event_deinit(struct completion *completion)
+{
+   close(completion->poll_fd);
+}
diff --git a/drivers/common/idpf/base/idpf_osdep.h 
b/drivers/common/idpf/base/idpf_osdep.h
index 74a376cb13..bd11eab351 100644
--- a/drivers/common/idpf/base/idpf_osdep.h
+++ b/drivers/common/idpf/base/idpf_osdep.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2001-2023 Intel Corporation
+ * Copyright(c) 2001-2024 Intel Corporation
  */
 
 #ifndef _IDPF_OSDEP_H_
@@ -12,6 +12,11 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
+#include 
 
 #include 
 #include 
@@ -353,4 +358,77 @@ idpf_hweight32(u32 num)
 
 #endif
 
+#ifndef IDPF_DFLT_MBX_BUF_SIZE
+#define IDPF_DFLT_MBX_BUF_SIZE 4096
+#endif
+
+#ifndef __iovec_defined
+#define __iovec_defined 1
+
+#define __need_size_t
+
+/* Structure for scatter/gather I/O.  */
+struct iovec
+  {
+void *iov_base;/* Pointer to data.  */
+size_t iov_len;/* Length of data.  */
+  };
+
+#endif
+
+#define IDPF_IOVEC struct iovec
+
+#define IDPF_LIST_HEAD(name, type) SLIST_HEAD(name, type)
+#define IDPF_LIST_HEAD_INIT(head)  SLIST_INIT(head)
+#define IDPF_LIST_ENTRY(type)  SLIST_ENTRY(type)
+#define IDPF_LIST_ADD(head, node)  SLIST_INSERT_HEAD(head, 
node, entry)
+#define IDPF_LIST_DEL(head)
SLIST_REMOVE_HEAD(head, entry)
+#define IDPF_LIST_FOR_EACH(var, head)  SLIST_FOREACH(var, head, entry)
+#define IDPF_LIST_EMPTY(head)  SLIST_EMPTY(head)
+#define IDPF_LIST_FIRST(head)  SLIST_FIRST(head)
+
+/* OSdep changes */
+#define IDPF_LOCK pthread_mutex_t
+#define IDPF_LOCK_INIT(mutex) pthread_mutex_init(mutex, NULL)
+#define IDPF_LOCK_DESTROY(mutex) pthread_mutex_destroy(mutex)
+#define IDPF_LOCK_ACQUIRE(mutex) pthread_mutex_lock(mutex)
+#define IDPF_LOCK_RELEASE(mutex) pthread_mutex_unlock(mutex)
+
+#ifndef FIELD_PREP
+
+#define __bf_shf(x) (__builtin_ffsll(x) - 1)
+#define FIELD_PREP(_mask, _val)
\
+({ 
\
+((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask);  
\
+})
+
+#define FIELD_GET(_mask, _reg) 
\
+({

[PATCH 12/25] common/idpf: rename of VIRTCHNL2 CAP INLINE FLOW STEER

2024-05-28 Thread Soumyadeep Hore
This capability bit indicates both inline as well as side band flow
steering capability.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/virtchnl2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/common/idpf/base/virtchnl2.h 
b/drivers/common/idpf/base/virtchnl2.h
index 355e2e3038..97e3454df9 100644
--- a/drivers/common/idpf/base/virtchnl2.h
+++ b/drivers/common/idpf/base/virtchnl2.h
@@ -258,7 +258,7 @@ enum virtchnl2_cap_other {
VIRTCHNL2_CAP_FLOW_DIRECTOR = BIT_ULL(3),
VIRTCHNL2_CAP_SPLITQ_QSCHED = BIT_ULL(4),
VIRTCHNL2_CAP_CRC   = BIT_ULL(5),
-   VIRTCHNL2_CAP_INLINE_FLOW_STEER = BIT_ULL(6),
+   VIRTCHNL2_CAP_FLOW_STEER= BIT_ULL(6),
VIRTCHNL2_CAP_WB_ON_ITR = BIT_ULL(7),
VIRTCHNL2_CAP_PROMISC   = BIT_ULL(8),
VIRTCHNL2_CAP_LINK_SPEED= BIT_ULL(9),
-- 
2.43.0



[PATCH 13/25] common/idpf: update compiler padding

2024-05-28 Thread Soumyadeep Hore
With the introduction of the flex array support, DECLARE_FLEX_ARRAY
macro was used in virtchnl2_rss_key struct with the wrong assumption
that it adds the required padding byte (8 byte structure alignment),
to avoid the compiler added padding. But the actual padding byte
was added by the compiler (found using pahole tool).

Everything worked with the current structure format because it
didn't change the virtchnl message format on the wire except for
the extra padding byte which was added at the end of the message.
With DPCP (doesn't yet support flex arrays) using the virtchnl message
size checks, it fails the SET RSS key message because the driver
(supports flex arrays) sends an extra byte of memory than the expected
size.

To fix this issue and also not break the backward compatibility,
use "packed" structure attribute which tells the compiler not
to introduce any padding. Also drop the DECLARE_FLEX_ARRAY
macro as it is not needed.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/virtchnl2.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/common/idpf/base/virtchnl2.h 
b/drivers/common/idpf/base/virtchnl2.h
index 97e3454df9..95fca647b1 100644
--- a/drivers/common/idpf/base/virtchnl2.h
+++ b/drivers/common/idpf/base/virtchnl2.h
@@ -1669,13 +1669,13 @@ struct virtchnl2_rss_key {
 
__le16 key_len;
u8 pad;
+   u8 key[STRUCT_VAR_LEN];
 #ifdef FLEX_ARRAY_SUPPORT
-   DECLARE_FLEX_ARRAY(u8, key);
+} __packed;
 #else
-   u8 key[1];
-#endif /* FLEX_ARRAY_SUPPORT */
 };
-VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_rss_key);
+#endif /* FLEX_ARRAY_SUPPORT */
+VIRTCHNL2_CHECK_STRUCT_VAR_LEN(8, virtchnl2_rss_key, key);
 
 /**
  * struct virtchnl2_queue_chunk - Chunk of contiguous queues
-- 
2.43.0



[PATCH 14/25] common/idpf: avoid compiler padding

2024-05-28 Thread Soumyadeep Hore
In the arm random config file, kconfig option 'CONFIG_AEABI' is
disabled which results in adding the compiler flag '-mabi=apcs-gnu'.
This causes the compiler to add padding in virtchnl2_ptype
structure to align it to 8 bytes, resulting in size check failure.

Avoid the compiler padding by using "__packed" structure
attribute for the virtchnl2_ptype struct. Also align the
structure by using "__aligned(2)" for better code optimization.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/virtchnl2.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/common/idpf/base/virtchnl2.h 
b/drivers/common/idpf/base/virtchnl2.h
index 95fca647b1..aadb2aafff 100644
--- a/drivers/common/idpf/base/virtchnl2.h
+++ b/drivers/common/idpf/base/virtchnl2.h
@@ -1454,7 +1454,11 @@ struct virtchnl2_ptype {
u8 proto_id_count;
__le16 pad;
__le16 proto_id[STRUCT_VAR_LEN];
+#ifdef FLEX_ARRAY_SUPPORT
+} __packed __aligned(2);
+#else
 };
+#endif /* FLEX_ARRAY_SUPPORT */
 VIRTCHNL2_CHECK_STRUCT_VAR_LEN(8, virtchnl2_ptype, proto_id);
 
 /**
-- 
2.43.0



[PATCH 15/25] common/idpf: add wmb before tail

2024-05-28 Thread Soumyadeep Hore
Introduced through customer's feedback in their attempt to address some
bugs this introduces a memory barrier before posting ctlq tail. This
makes sure memory writes have a chance to take place before HW starts
messing with the descriptors.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/idpf_controlq.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/common/idpf/base/idpf_controlq.c 
b/drivers/common/idpf/base/idpf_controlq.c
index d2e9fdc06d..6807e83f18 100644
--- a/drivers/common/idpf/base/idpf_controlq.c
+++ b/drivers/common/idpf/base/idpf_controlq.c
@@ -625,6 +625,8 @@ int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw, struct 
idpf_ctlq_info *cq,
/* Wrap to end of end ring since current ntp is 0 */
cq->next_to_post = cq->ring_size - 1;
 
+   idpf_wmb();
+
wr32(hw, cq->reg.tail, cq->next_to_post);
}
 
-- 
2.43.0



[PATCH 16/25] common/idpf: add a new Tx context descriptor structure

2024-05-28 Thread Soumyadeep Hore
Adding a new structure for the context descriptor that contains
the support for timesync packets, where the index for timestamping is set.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/idpf_lan_txrx.h | 20 +++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/common/idpf/base/idpf_lan_txrx.h 
b/drivers/common/idpf/base/idpf_lan_txrx.h
index c9eaeb5d3f..8b14ee9bf3 100644
--- a/drivers/common/idpf/base/idpf_lan_txrx.h
+++ b/drivers/common/idpf/base/idpf_lan_txrx.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2001-2023 Intel Corporation
+ * Copyright(c) 2001-2024 Intel Corporation
  */
 
 #ifndef _IDPF_LAN_TXRX_H_
@@ -286,6 +286,24 @@ struct idpf_flex_tx_tso_ctx_qw {
 };
 
 union idpf_flex_tx_ctx_desc {
+   /* DTYPE = IDPF_TX_DESC_DTYPE_CTX (0x01) */
+   struct  {
+   struct {
+   u8 rsv[4];
+   __le16 l2tag2;
+   u8 rsv_2[2];
+   } qw0;
+   struct {
+   __le16 cmd_dtype;
+   __le16 tsyn_reg_l;
+#define IDPF_TX_DESC_CTX_TSYN_L_M  GENMASK(15, 14)
+   __le16 tsyn_reg_h;
+#define IDPF_TX_DESC_CTX_TSYN_H_M  GENMASK(15, 0)
+   __le16 mss;
+#define IDPF_TX_DESC_CTX_MSS_M GENMASK(14, 2)
+   } qw1;
+   } tsyn;
+   
/* DTYPE = IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX (0x05) */
struct {
struct idpf_flex_tx_tso_ctx_qw qw0;
-- 
2.43.0



[PATCH 17/25] common/idpf: removing redundant implementation

2024-05-28 Thread Soumyadeep Hore
Removing idpf_common.c file and its definitions as it
is primarily used for WINDOWS and ESX driver support.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/idpf_common.c| 382 --
 drivers/common/idpf/base/idpf_prototype.h |  23 --
 drivers/common/idpf/base/meson.build  |   1 -
 3 files changed, 406 deletions(-)
 delete mode 100644 drivers/common/idpf/base/idpf_common.c

diff --git a/drivers/common/idpf/base/idpf_common.c 
b/drivers/common/idpf/base/idpf_common.c
deleted file mode 100644
index 7181a7f14c..00
--- a/drivers/common/idpf/base/idpf_common.c
+++ /dev/null
@@ -1,382 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2001-2023 Intel Corporation
- */
-
-#include "idpf_type.h"
-#include "idpf_prototype.h"
-#include 
-
-
-/**
- * idpf_set_mac_type - Sets MAC type
- * @hw: pointer to the HW structure
- *
- * This function sets the mac type of the adapter based on the
- * vendor ID and device ID stored in the hw structure.
- */
-int idpf_set_mac_type(struct idpf_hw *hw)
-{
-   int status = 0;
-
-   DEBUGFUNC("Set MAC type\n");
-
-   if (hw->vendor_id == IDPF_INTEL_VENDOR_ID) {
-   switch (hw->device_id) {
-   case IDPF_DEV_ID_PF:
-   hw->mac.type = IDPF_MAC_PF;
-   break;
-   case IDPF_DEV_ID_VF:
-   hw->mac.type = IDPF_MAC_VF;
-   break;
-   default:
-   hw->mac.type = IDPF_MAC_GENERIC;
-   break;
-   }
-   } else {
-   status = -ENODEV;
-   }
-
-   DEBUGOUT2("Setting MAC type found mac: %d, returns: %d\n",
- hw->mac.type, status);
-   return status;
-}
-
-/**
- *  idpf_init_hw - main initialization routine
- *  @hw: pointer to the hardware structure
- *  @ctlq_size: struct to pass ctlq size data
- */
-int idpf_init_hw(struct idpf_hw *hw, struct idpf_ctlq_size ctlq_size)
-{
-   struct idpf_ctlq_create_info *q_info;
-   int status = 0;
-   struct idpf_ctlq_info *cq = NULL;
-
-   /* Setup initial control queues */
-   q_info = (struct idpf_ctlq_create_info *)
-idpf_calloc(hw, 2, sizeof(struct idpf_ctlq_create_info));
-   if (!q_info)
-   return -ENOMEM;
-
-   q_info[0].type = IDPF_CTLQ_TYPE_MAILBOX_TX;
-   q_info[0].buf_size = ctlq_size.asq_buf_size;
-   q_info[0].len  = ctlq_size.asq_ring_size;
-   q_info[0].id   = -1; /* default queue */
-
-   if (hw->mac.type == IDPF_MAC_PF) {
-   q_info[0].reg.head = PF_FW_ATQH;
-   q_info[0].reg.tail = PF_FW_ATQT;
-   q_info[0].reg.len  = PF_FW_ATQLEN;
-   q_info[0].reg.bah  = PF_FW_ATQBAH;
-   q_info[0].reg.bal  = PF_FW_ATQBAL;
-   q_info[0].reg.len_mask = PF_FW_ATQLEN_ATQLEN_M;
-   q_info[0].reg.len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M;
-   q_info[0].reg.head_mask= PF_FW_ATQH_ATQH_M;
-   } else {
-   q_info[0].reg.head = VF_ATQH;
-   q_info[0].reg.tail = VF_ATQT;
-   q_info[0].reg.len  = VF_ATQLEN;
-   q_info[0].reg.bah  = VF_ATQBAH;
-   q_info[0].reg.bal  = VF_ATQBAL;
-   q_info[0].reg.len_mask = VF_ATQLEN_ATQLEN_M;
-   q_info[0].reg.len_ena_mask = VF_ATQLEN_ATQENABLE_M;
-   q_info[0].reg.head_mask= VF_ATQH_ATQH_M;
-   }
-
-   q_info[1].type = IDPF_CTLQ_TYPE_MAILBOX_RX;
-   q_info[1].buf_size = ctlq_size.arq_buf_size;
-   q_info[1].len  = ctlq_size.arq_ring_size;
-   q_info[1].id   = -1; /* default queue */
-
-   if (hw->mac.type == IDPF_MAC_PF) {
-   q_info[1].reg.head = PF_FW_ARQH;
-   q_info[1].reg.tail = PF_FW_ARQT;
-   q_info[1].reg.len  = PF_FW_ARQLEN;
-   q_info[1].reg.bah  = PF_FW_ARQBAH;
-   q_info[1].reg.bal  = PF_FW_ARQBAL;
-   q_info[1].reg.len_mask = PF_FW_ARQLEN_ARQLEN_M;
-   q_info[1].reg.len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M;
-   q_info[1].reg.head_mask= PF_FW_ARQH_ARQH_M;
-   } else {
-   q_info[1].reg.head = VF_ARQH;
-   q_info[1].reg.tail = VF_ARQT;
-   q_info[1].reg.len  = VF_ARQLEN;
-   q_info[1].reg.bah  = VF_ARQBAH;
-   q_info[1].reg.bal  = VF_ARQBAL;
-   q_info[1].reg.len_mask = VF_ARQLEN_ARQLEN_M;
-   q_info[1].reg.len_ena_mask = VF_ARQLEN_ARQENABLE_M;
-   q_info[1].reg.head_mask= VF_ARQH_ARQH_M;
-   }
-
-   status = idpf_ctlq_init(hw, 2, q_info);
-   if (status) {
-  

[PATCH 18/25] common/idpf: removing redundant functionality of virtchnl2

2024-05-28 Thread Soumyadeep Hore
The function virtchnl2_vc_validate_vf_msg() currently has
implementation based on Linux 6.5 kernel and is redundant
for dpdk.

In future if required new implementation will be added.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/virtchnl2.h | 326 ---
 1 file changed, 326 deletions(-)

diff --git a/drivers/common/idpf/base/virtchnl2.h 
b/drivers/common/idpf/base/virtchnl2.h
index aadb2aafff..90232e82a8 100644
--- a/drivers/common/idpf/base/virtchnl2.h
+++ b/drivers/common/idpf/base/virtchnl2.h
@@ -2083,331 +2083,5 @@ static inline const char *virtchnl2_op_str(__le32 
v_opcode)
  *
  * Validate msg format against struct for each opcode.
  */
-static inline int
-virtchnl2_vc_validate_vf_msg(struct virtchnl2_version_info *ver, u32 v_opcode,
-u8 *msg, __le16 msglen)
-{
-   bool err_msg_format = false;
-#ifdef FLEX_ARRAY_SUPPORT
-   bool is_flex_array = true;
-#else
-   bool is_flex_array = false;
-#endif /* !FLEX_ARRAY_SUPPORT */
-   __le32 valid_len = 0;
-   __le32 num_chunks;
-   __le32 num_qgrps;
-
-   /* It is possible that the FLEX_ARRAY_SUPPORT flag is not defined
-* by all the users of virtchnl2 header file. Let's take an example
-* where the driver doesn't support flex array and CP does. In this
-* case, the size of the VIRTCHNL2_OP_CREATE_VPORT message sent from
-* the driver would be 192 bytes because of the 1-sized array in the
-* virtchnl2_create_vport structure whereas the message size expected
-* by the CP would be 160 bytes (as the driver doesn't send any chunk
-* information on create vport). This means, both 160 and 192 byte
-* message length are valid. The math for the message size check of the
-* opcodes consider the said scenarios for the flex array supported
-* structures.
-*/
-   /* Validate message length */
-   switch (v_opcode) {
-   case VIRTCHNL2_OP_VERSION:
-   valid_len = sizeof(struct virtchnl2_version_info);
-   break;
-   case VIRTCHNL2_OP_GET_CAPS:
-   valid_len = sizeof(struct virtchnl2_get_capabilities);
-   break;
-   case VIRTCHNL2_OP_CREATE_VPORT:
-   num_chunks = ((struct virtchnl2_create_vport 
*)msg)->chunks.num_chunks;
-   valid_len = struct_size_t(struct virtchnl2_create_vport,
- chunks.chunks, num_chunks);
-
-   if (!is_flex_array)
-   /* Remove the additional chunk included in the
-* struct_size_t calculation in case of no flex array
-* support, due to the 1-sized array.
-*/
-   valid_len -= sizeof(struct virtchnl2_queue_reg_chunk);
-
-   /* Zero chunks is allowed as input */
-   if (!num_chunks && msglen > valid_len)
-   valid_len += sizeof(struct virtchnl2_queue_reg_chunk);
-
-   break;
-   case VIRTCHNL2_OP_NON_FLEX_CREATE_ADI:
-   valid_len = sizeof(struct virtchnl2_non_flex_create_adi);
-   if (msglen >= valid_len) {
-   struct virtchnl2_non_flex_create_adi *cadi =
-   (struct virtchnl2_non_flex_create_adi *)msg;
-
-   if (cadi->chunks.num_chunks == 0) {
-   /* Zero chunks is allowed as input */
-   break;
-   }
-
-   if (cadi->vchunks.num_vchunks == 0) {
-   err_msg_format = true;
-   break;
-   }
-   valid_len += (cadi->chunks.num_chunks - 1) *
- sizeof(struct virtchnl2_queue_reg_chunk);
-   valid_len += (cadi->vchunks.num_vchunks - 1) *
- sizeof(struct virtchnl2_vector_chunk);
-   }
-   break;
-   case VIRTCHNL2_OP_NON_FLEX_DESTROY_ADI:
-   valid_len = sizeof(struct virtchnl2_non_flex_destroy_adi);
-   break;
-   case VIRTCHNL2_OP_DESTROY_VPORT:
-   case VIRTCHNL2_OP_ENABLE_VPORT:
-   case VIRTCHNL2_OP_DISABLE_VPORT:
-   valid_len = sizeof(struct virtchnl2_vport);
-   break;
-   case VIRTCHNL2_OP_CONFIG_TX_QUEUES:
-   num_chunks = ((struct virtchnl2_config_tx_queues 
*)msg)->num_qinfo;
-   if (!num_chunks) {
-   err_msg_format = true;
-   break;
-   }
-
-   valid_len = struct_size_t(struct virtchnl2_config_tx_queues,
- qinfo, num_chunks);
-   if (!is_flex_array)
-   valid_len -= sizeof(struct virtchnl2_txq_info);
-
-   break;
-   case V

[PATCH 19/25] common/idpf: updating common code of latest base driver

2024-05-28 Thread Soumyadeep Hore
Based on latest implemenation of struct VIRTCHNL_QUEUE_TYPE_RX
in virtchnl2.h, qg_info field is removed and its members are
updated in the abve mentioned structure. Hence updating the same.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/idpf_common_virtchnl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/common/idpf/idpf_common_virtchnl.c 
b/drivers/common/idpf/idpf_common_virtchnl.c
index c46ed50eb5..f00202f43c 100644
--- a/drivers/common/idpf/idpf_common_virtchnl.c
+++ b/drivers/common/idpf/idpf_common_virtchnl.c
@@ -366,7 +366,7 @@ idpf_vc_queue_grps_add(struct idpf_vport *vport,
int err = -1;
 
size = sizeof(*p2p_queue_grps_info) +
-  (p2p_queue_grps_info->qg_info.num_queue_groups - 1) *
+  (p2p_queue_grps_info->num_queue_groups - 1) *
   sizeof(struct virtchnl2_queue_group_info);
 
memset(&args, 0, sizeof(args));
-- 
2.43.0



[PATCH 20/25] net/cpfl: updating cpfl based on latest base driver

2024-05-28 Thread Soumyadeep Hore
Based on latest implemenation of struct VIRTCHNL_QUEUE_TYPE_RX
in virtchnl2.h, qg_info field is removed and its members are
added in the above mentioned structure. Hence updating the same.

Signed-off-by: Soumyadeep Hore 
---
 drivers/net/cpfl/cpfl_ethdev.c | 28 ++--
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c
index 7e718e9e19..e707043bf7 100644
--- a/drivers/net/cpfl/cpfl_ethdev.c
+++ b/drivers/net/cpfl/cpfl_ethdev.c
@@ -2393,18 +2393,18 @@ cpfl_p2p_q_grps_add(struct idpf_vport *vport,
int ret;
 
p2p_queue_grps_info->vport_id = vport->vport_id;
-   p2p_queue_grps_info->qg_info.num_queue_groups = CPFL_P2P_NB_QUEUE_GRPS;
-   p2p_queue_grps_info->qg_info.groups[0].num_rx_q = 
CPFL_MAX_P2P_NB_QUEUES;
-   p2p_queue_grps_info->qg_info.groups[0].num_rx_bufq = 
CPFL_P2P_NB_RX_BUFQ;
-   p2p_queue_grps_info->qg_info.groups[0].num_tx_q = 
CPFL_MAX_P2P_NB_QUEUES;
-   p2p_queue_grps_info->qg_info.groups[0].num_tx_complq = 
CPFL_P2P_NB_TX_COMPLQ;
-   p2p_queue_grps_info->qg_info.groups[0].qg_id.queue_group_id = 
CPFL_P2P_QUEUE_GRP_ID;
-   p2p_queue_grps_info->qg_info.groups[0].qg_id.queue_group_type = 
VIRTCHNL2_QUEUE_GROUP_P2P;
-   p2p_queue_grps_info->qg_info.groups[0].rx_q_grp_info.rss_lut_size = 0;
-   p2p_queue_grps_info->qg_info.groups[0].tx_q_grp_info.tx_tc = 0;
-   p2p_queue_grps_info->qg_info.groups[0].tx_q_grp_info.priority = 0;
-   p2p_queue_grps_info->qg_info.groups[0].tx_q_grp_info.is_sp = 0;
-   p2p_queue_grps_info->qg_info.groups[0].tx_q_grp_info.pir_weight = 0;
+   p2p_queue_grps_info->num_queue_groups = CPFL_P2P_NB_QUEUE_GRPS;
+   p2p_queue_grps_info->groups[0].num_rx_q = CPFL_MAX_P2P_NB_QUEUES;
+   p2p_queue_grps_info->groups[0].num_rx_bufq = CPFL_P2P_NB_RX_BUFQ;
+   p2p_queue_grps_info->groups[0].num_tx_q = CPFL_MAX_P2P_NB_QUEUES;
+   p2p_queue_grps_info->groups[0].num_tx_complq = CPFL_P2P_NB_TX_COMPLQ;
+   p2p_queue_grps_info->groups[0].qg_id.queue_group_id = 
CPFL_P2P_QUEUE_GRP_ID;
+   p2p_queue_grps_info->groups[0].qg_id.queue_group_type = 
VIRTCHNL2_QUEUE_GROUP_P2P;
+   p2p_queue_grps_info->groups[0].rx_q_grp_info.rss_lut_size = 0;
+   p2p_queue_grps_info->groups[0].tx_q_grp_info.tx_tc = 0;
+   p2p_queue_grps_info->groups[0].tx_q_grp_info.priority = 0;
+   p2p_queue_grps_info->groups[0].tx_q_grp_info.is_sp = 0;
+   p2p_queue_grps_info->groups[0].tx_q_grp_info.pir_weight = 0;
 
ret = idpf_vc_queue_grps_add(vport, p2p_queue_grps_info, 
p2p_q_vc_out_info);
if (ret != 0) {
@@ -2423,13 +2423,13 @@ cpfl_p2p_queue_info_init(struct cpfl_vport *cpfl_vport,
struct virtchnl2_queue_reg_chunks *vc_chunks_out;
int i, type;
 
-   if (p2p_q_vc_out_info->qg_info.groups[0].qg_id.queue_group_type !=
+   if (p2p_q_vc_out_info->groups[0].qg_id.queue_group_type !=
VIRTCHNL2_QUEUE_GROUP_P2P) {
PMD_DRV_LOG(ERR, "Add queue group response mismatch.");
return -EINVAL;
}
 
-   vc_chunks_out = &p2p_q_vc_out_info->qg_info.groups[0].chunks;
+   vc_chunks_out = &p2p_q_vc_out_info->groups[0].chunks;
 
for (i = 0; i < vc_chunks_out->num_chunks; i++) {
type = vc_chunks_out->chunks[i].type;
-- 
2.43.0



[PATCH 21/25] common/idpf: defining ethernet address length macro

2024-05-28 Thread Soumyadeep Hore
Introducing ETH_ALEN macro for mac address length.
this definition is used in idpf_xn.c and was missing
previously.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/idpf_common_device.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/common/idpf/idpf_common_device.h 
b/drivers/common/idpf/idpf_common_device.h
index bfa927a5ff..031e4bd09d 100644
--- a/drivers/common/idpf/idpf_common_device.h
+++ b/drivers/common/idpf/idpf_common_device.h
@@ -31,6 +31,8 @@
 
 #define IDPF_DFLT_INTERVAL 16
 
+#define ETH_ALEN 6
+
 #define IDPF_RX_MAX_PTYPE_PROTO_IDS32
 #define IDPF_RX_MAX_PTYPE_SZ   (sizeof(struct virtchnl2_ptype) +   \
 (sizeof(uint16_t) *\
-- 
2.43.0



[PATCH 22/25] common/idpf: increasing size of xn index

2024-05-28 Thread Soumyadeep Hore
Increasing size of xn_index in idpf_xn.c from u8 to u16
for fixing compilation warning.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/base/idpf_xn.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/common/idpf/base/idpf_xn.c 
b/drivers/common/idpf/base/idpf_xn.c
index 5492564903..07625db3ba 100644
--- a/drivers/common/idpf/base/idpf_xn.c
+++ b/drivers/common/idpf/base/idpf_xn.c
@@ -128,7 +128,7 @@ static int idpf_ctlq_xn_process_recv(struct 
idpf_ctlq_xn_recv_params *params, st
IDPF_IOVEC recv_buf;
u16 msg_cookie;
void *payload;
-   u8 xn_index;
+   u16 xn_index;
int status;
int ret;
 
-- 
2.43.0



[PATCH 23/25] common/idpf: redefining idpf vc queue switch

2024-05-28 Thread Soumyadeep Hore
unint32_t type has been introduced in the function idpf_vc_queue_switch().
This helps in providing right queue type while calling the function.

Signed-off-by: Soumyadeep Hore 
---
 drivers/common/idpf/idpf_common_virtchnl.c | 8 ++--
 drivers/common/idpf/idpf_common_virtchnl.h | 2 +-
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/common/idpf/idpf_common_virtchnl.c 
b/drivers/common/idpf/idpf_common_virtchnl.c
index f00202f43c..de511da788 100644
--- a/drivers/common/idpf/idpf_common_virtchnl.c
+++ b/drivers/common/idpf/idpf_common_virtchnl.c
@@ -769,15 +769,11 @@ idpf_vc_ena_dis_one_queue(struct idpf_vport *vport, 
uint16_t qid,
 
 int
 idpf_vc_queue_switch(struct idpf_vport *vport, uint16_t qid,
-bool rx, bool on)
+bool rx, bool on, uint32_t type)
 {
-   uint32_t type;
int err, queue_id;
 
-   /* switch txq/rxq */
-   type = rx ? VIRTCHNL2_QUEUE_TYPE_RX : VIRTCHNL2_QUEUE_TYPE_TX;
-
-   if (type == VIRTCHNL2_QUEUE_TYPE_RX)
+   if (rx)
queue_id = vport->chunks_info.rx_start_qid + qid;
else
queue_id = vport->chunks_info.tx_start_qid + qid;
diff --git a/drivers/common/idpf/idpf_common_virtchnl.h 
b/drivers/common/idpf/idpf_common_virtchnl.h
index 73446ded86..d6555978d5 100644
--- a/drivers/common/idpf/idpf_common_virtchnl.h
+++ b/drivers/common/idpf/idpf_common_virtchnl.h
@@ -31,7 +31,7 @@ int idpf_vc_cmd_execute(struct idpf_adapter *adapter,
struct idpf_cmd_info *args);
 __rte_internal
 int idpf_vc_queue_switch(struct idpf_vport *vport, uint16_t qid,
-bool rx, bool on);
+bool rx, bool on, uint32_t type);
 __rte_internal
 int idpf_vc_queues_ena_dis(struct idpf_vport *vport, bool enable);
 __rte_internal
-- 
2.43.0



[PATCH 24/25] net/idpf: updating idpf vc queue switch in idpf

2024-05-28 Thread Soumyadeep Hore
unint32_t type has been introduced in the function idpf_vc_queue_switch().
This helps in providing right queue type while calling the function.

Signed-off-by: Soumyadeep Hore 
---
 drivers/net/idpf/idpf_rxtx.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c
index 64f2235580..61d6a0b42c 100644
--- a/drivers/net/idpf/idpf_rxtx.c
+++ b/drivers/net/idpf/idpf_rxtx.c
@@ -595,7 +595,7 @@ idpf_rx_queue_start(struct rte_eth_dev *dev, uint16_t 
rx_queue_id)
}
 
/* Ready to switch the queue on */
-   err = idpf_vc_queue_switch(vport, rx_queue_id, true, true);
+   err = idpf_vc_queue_switch(vport, rx_queue_id, true, true, 
VIRTCHNL2_QUEUE_TYPE_RX);
if (err != 0) {
PMD_DRV_LOG(ERR, "Failed to switch RX queue %u on",
rx_queue_id);
@@ -646,7 +646,7 @@ idpf_tx_queue_start(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
}
 
/* Ready to switch the queue on */
-   err = idpf_vc_queue_switch(vport, tx_queue_id, false, true);
+   err = idpf_vc_queue_switch(vport, tx_queue_id, false, true, 
VIRTCHNL2_QUEUE_TYPE_TX);
if (err != 0) {
PMD_DRV_LOG(ERR, "Failed to switch TX queue %u on",
tx_queue_id);
@@ -669,7 +669,7 @@ idpf_rx_queue_stop(struct rte_eth_dev *dev, uint16_t 
rx_queue_id)
if (rx_queue_id >= dev->data->nb_rx_queues)
return -EINVAL;
 
-   err = idpf_vc_queue_switch(vport, rx_queue_id, true, false);
+   err = idpf_vc_queue_switch(vport, rx_queue_id, true, false, 
VIRTCHNL2_QUEUE_TYPE_RX);
if (err != 0) {
PMD_DRV_LOG(ERR, "Failed to switch RX queue %u off",
rx_queue_id);
@@ -701,7 +701,7 @@ idpf_tx_queue_stop(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
if (tx_queue_id >= dev->data->nb_tx_queues)
return -EINVAL;
 
-   err = idpf_vc_queue_switch(vport, tx_queue_id, false, false);
+   err = idpf_vc_queue_switch(vport, tx_queue_id, false, false, 
VIRTCHNL2_QUEUE_TYPE_TX);
if (err != 0) {
PMD_DRV_LOG(ERR, "Failed to switch TX queue %u off",
tx_queue_id);
-- 
2.43.0



[PATCH 25/25] net/cpfl: updating idpf vc queue switch in cpfl

2024-05-28 Thread Soumyadeep Hore
unint32_t type has been introduced in the function idpf_vc_queue_switch().
This helps in providing right queue type while calling the function.

Signed-off-by: Soumyadeep Hore 
---
 drivers/net/cpfl/cpfl_ethdev.c | 8 
 drivers/net/cpfl/cpfl_rxtx.c   | 8 
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c
index e707043bf7..350a301ee9 100644
--- a/drivers/net/cpfl/cpfl_ethdev.c
+++ b/drivers/net/cpfl/cpfl_ethdev.c
@@ -1907,7 +1907,7 @@ cpfl_stop_cfgqs(struct cpfl_adapter_ext *adapter)
int i, ret;
 
for (i = 0; i < CPFL_TX_CFGQ_NUM; i++) {
-   ret = idpf_vc_queue_switch(&adapter->ctrl_vport.base, i, false, 
false);
+   ret = idpf_vc_queue_switch(&adapter->ctrl_vport.base, i, false, 
false, VIRTCHNL2_QUEUE_TYPE_CONFIG_TX);
if (ret) {
PMD_DRV_LOG(ERR, "Fail to disable Tx config queue.");
return ret;
@@ -1915,7 +1915,7 @@ cpfl_stop_cfgqs(struct cpfl_adapter_ext *adapter)
}
 
for (i = 0; i < CPFL_RX_CFGQ_NUM; i++) {
-   ret = idpf_vc_queue_switch(&adapter->ctrl_vport.base, i, true, 
false);
+   ret = idpf_vc_queue_switch(&adapter->ctrl_vport.base, i, true, 
false, VIRTCHNL2_QUEUE_TYPE_CONFIG_RX);
if (ret) {
PMD_DRV_LOG(ERR, "Fail to disable Rx config queue.");
return ret;
@@ -1943,7 +1943,7 @@ cpfl_start_cfgqs(struct cpfl_adapter_ext *adapter)
}
 
for (i = 0; i < CPFL_TX_CFGQ_NUM; i++) {
-   ret = idpf_vc_queue_switch(&adapter->ctrl_vport.base, i, false, 
true);
+   ret = idpf_vc_queue_switch(&adapter->ctrl_vport.base, i, false, 
true, VIRTCHNL2_QUEUE_TYPE_CONFIG_TX);
if (ret) {
PMD_DRV_LOG(ERR, "Fail to enable Tx config queue.");
return ret;
@@ -1951,7 +1951,7 @@ cpfl_start_cfgqs(struct cpfl_adapter_ext *adapter)
}
 
for (i = 0; i < CPFL_RX_CFGQ_NUM; i++) {
-   ret = idpf_vc_queue_switch(&adapter->ctrl_vport.base, i, true, 
true);
+   ret = idpf_vc_queue_switch(&adapter->ctrl_vport.base, i, true, 
true, VIRTCHNL2_QUEUE_TYPE_CONFIG_RX);
if (ret) {
PMD_DRV_LOG(ERR, "Fail to enable Rx config queue.");
return ret;
diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c
index ab8bec4645..7b3349d745 100644
--- a/drivers/net/cpfl/cpfl_rxtx.c
+++ b/drivers/net/cpfl/cpfl_rxtx.c
@@ -1200,7 +1200,7 @@ cpfl_rx_queue_start(struct rte_eth_dev *dev, uint16_t 
rx_queue_id)
}
 
/* Ready to switch the queue on */
-   err = idpf_vc_queue_switch(vport, rx_queue_id, true, true);
+   err = idpf_vc_queue_switch(vport, rx_queue_id, true, true, 
VIRTCHNL2_QUEUE_TYPE_RX);
if (err != 0) {
PMD_DRV_LOG(ERR, "Failed to switch RX queue %u on",
rx_queue_id);
@@ -1252,7 +1252,7 @@ cpfl_tx_queue_start(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
}
 
/* Ready to switch the queue on */
-   err = idpf_vc_queue_switch(vport, tx_queue_id, false, true);
+   err = idpf_vc_queue_switch(vport, tx_queue_id, false, true, 
VIRTCHNL2_QUEUE_TYPE_TX);
if (err != 0) {
PMD_DRV_LOG(ERR, "Failed to switch TX queue %u on",
tx_queue_id);
@@ -1283,7 +1283,7 @@ cpfl_rx_queue_stop(struct rte_eth_dev *dev, uint16_t 
rx_queue_id)
 rx_queue_id - 
cpfl_vport->nb_data_txq,
 true, false);
else
-   err = idpf_vc_queue_switch(vport, rx_queue_id, true, false);
+   err = idpf_vc_queue_switch(vport, rx_queue_id, true, false, 
VIRTCHNL2_QUEUE_TYPE_RX);
if (err != 0) {
PMD_DRV_LOG(ERR, "Failed to switch RX queue %u off",
rx_queue_id);
@@ -1331,7 +1331,7 @@ cpfl_tx_queue_stop(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
 tx_queue_id - 
cpfl_vport->nb_data_txq,
 false, false);
else
-   err = idpf_vc_queue_switch(vport, tx_queue_id, false, false);
+   err = idpf_vc_queue_switch(vport, tx_queue_id, false, false, 
VIRTCHNL2_QUEUE_TYPE_TX);
if (err != 0) {
PMD_DRV_LOG(ERR, "Failed to switch TX queue %u off",
tx_queue_id);
-- 
2.43.0



Re: [RFC v2] eal: provide option to use compiler memcpy instead of RTE

2024-05-28 Thread Mattias Rönnblom

On 2024-05-28 09:43, Mattias Rönnblom wrote:

Provide build option to have functions in  delegate to
the standard compiler/libc memcpy(), instead of using the various
traditional, handcrafted, per-architecture rte_memcpy()
implementations.

A new meson build option 'use_cc_memcpy' is added. The default is
true. It's not obvious what should be the default, but compiler
memcpy() is enabled by default in this RFC so any tests run with this
patch use the new approach.

One purpose of this RFC is to make it easy to evaluate the costs and
benefits of a switch.



I've tested this patch some with DSW micro benchmarks, and the result is 
a 2.5% reduction of the DSW+testapp overhead with cc/libc memcpy. GCC 11.4.


We've also run characteristic test suite of a large, real world app. 
Here, we saw no effect. GCC 10.5.


x86_64 in both cases (Skylake and Raptor Lake).

Last time we did the same, there were a noticeable performance 
degradation in both the above cases.


This is not a lot of data points, but I think it we should consider 
making the custom RTE memcpy() implementations optional in the next 
release, and if no-one complains, remove the implementations in the next 
release.


(Whether or not [or how long] to keep the wrapper API is another question.)




Re: [RFC v2] eal: provide option to use compiler memcpy instead of RTE

2024-05-28 Thread Bruce Richardson
On Tue, May 28, 2024 at 09:43:54AM +0200, Mattias Rönnblom wrote:
> Provide build option to have functions in  delegate to
> the standard compiler/libc memcpy(), instead of using the various
> traditional, handcrafted, per-architecture rte_memcpy()
> implementations.
> 
> A new meson build option 'use_cc_memcpy' is added. The default is
> true. It's not obvious what should be the default, but compiler
> memcpy() is enabled by default in this RFC so any tests run with this
> patch use the new approach.
> 
> One purpose of this RFC is to make it easy to evaluate the costs and
> benefits of a switch.
> 
> Only Loongarch, ARM and x86 is implemented. Only x86 is tested.
> 
> RFC v2:
>  * Fix bug where rte_memcpy.h was not installed on x86.
>  * Made attempt to make Loongarch compile.
> 
> Signed-off-by: Mattias Rönnblom 
> ---
>  config/meson.build |  1 +
>  lib/eal/arm/include/rte_memcpy.h   | 10 +
>  lib/eal/include/generic/rte_memcpy.h   | 62 +++---
>  lib/eal/loongarch/include/rte_memcpy.h | 52 ++---
>  lib/eal/x86/include/meson.build|  1 +
>  lib/eal/x86/include/rte_memcpy.h   | 11 -
>  meson_options.txt  |  2 +
>  7 files changed, 82 insertions(+), 57 deletions(-)
> 

I really support the long-term goal here of eliminating the need for us to
maintain our own memcpy. This looks a good idea to see how things perform.
If we do decide to take this patch, having the default be regular memcpy
should help with static analysis and other tooling, which would be aware of
memcpy but not rte_memcpy.

/Bruce


Re: [RFC v2] eal: provide option to use compiler memcpy instead of RTE

2024-05-28 Thread Bruce Richardson
On Tue, May 28, 2024 at 10:19:15AM +0200, Mattias Rönnblom wrote:
> On 2024-05-28 09:43, Mattias Rönnblom wrote:
> > Provide build option to have functions in  delegate to
> > the standard compiler/libc memcpy(), instead of using the various
> > traditional, handcrafted, per-architecture rte_memcpy()
> > implementations.
> > 
> > A new meson build option 'use_cc_memcpy' is added. The default is
> > true. It's not obvious what should be the default, but compiler
> > memcpy() is enabled by default in this RFC so any tests run with this
> > patch use the new approach.
> > 
> > One purpose of this RFC is to make it easy to evaluate the costs and
> > benefits of a switch.
> > 
> 
> I've tested this patch some with DSW micro benchmarks, and the result is a
> 2.5% reduction of the DSW+testapp overhead with cc/libc memcpy. GCC 11.4.
> 
> We've also run characteristic test suite of a large, real world app. Here,
> we saw no effect. GCC 10.5.
> 
> x86_64 in both cases (Skylake and Raptor Lake).
> 
> Last time we did the same, there were a noticeable performance degradation
> in both the above cases.
> 
> This is not a lot of data points, but I think it we should consider making
> the custom RTE memcpy() implementations optional in the next release, and if
> no-one complains, remove the implementations in the next release.
> 
> (Whether or not [or how long] to keep the wrapper API is another question.)
> 
> 

The other instance I've heard mention of in the past is virtio/vhost, which
used to have a speedup from the custom memcpy.

My own thinking on these cases, is that for targetted settings like these,
we should look to have local memcpy functions written - taking account of
the specifics of each usecase. For virtio/vhost for example, we can have
assumptions around host buffer alignment, and we also can be pretty
confident we are copying to another CPU. For DSW, or other eventdev cases,
we would only be looking at copies of multiples of 16, with guaranteed
8-byte alignment on both source and destination. Writing efficient copy fns
for specific scenarios can be faster and more effective than trying to
write a general, optimized in all cases, memcpy. It also discourages the
use of non-libc memcpy except where really necessary.

Naturally, if we find there are a lot of cases where use of libc memcpy
slows us down, we will want to keep a general rte_memcpy. However, I'd hope
the slowdown cases are very few.

/Bruce


[PATCH] net/cnxk: fix promiscuous state after MAC change

2024-05-28 Thread Rahul Bhansali
If promiscuous mode is enabled and default MAC address is set
again then promiscuous mode gets disabled in hardware.

This change will restore promiscuous behavior after configuring
default MAC address.

Fixes: 5fe86db2a0dd ("net/cnxk: support MAC address set")
Cc: sta...@dpdk.org

Signed-off-by: Rahul Bhansali 
---
 drivers/net/cnxk/cnxk_ethdev_ops.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/cnxk/cnxk_ethdev_ops.c 
b/drivers/net/cnxk/cnxk_ethdev_ops.c
index c8260fcb9c..b1093dd584 100644
--- a/drivers/net/cnxk/cnxk_ethdev_ops.c
+++ b/drivers/net/cnxk/cnxk_ethdev_ops.c
@@ -451,6 +451,13 @@ cnxk_nix_mac_addr_set(struct rte_eth_dev *eth_dev, struct 
rte_ether_addr *addr)
roc_nix_npc_mac_addr_set(nix, dev->mac_addr);
goto exit;
}
+
+   if (eth_dev->data->promiscuous) {
+   rc = roc_nix_mac_promisc_mode_enable(nix, true);
+   if (rc)
+   plt_err("Failed to setup promisc mode in mac, 
rc=%d(%s)", rc,
+   roc_error_msg_get(rc));
+   }
}
 
/* Update mac address to cnxk ethernet device */
-- 
2.25.1



Re: [PATCH v2] dma/cnxk: add higher chunk size support

2024-05-28 Thread Jerin Jacob
On Mon, May 27, 2024 at 6:13 PM  wrote:
>
> From: Pavan Nikhilesh 
>
> Add support to configure higher chunk size by using the new
> OPEN_V2 mailbox, this improves performance as the number of
> mempool allocs are reduced.
> Add timeout when polling for queue idle timeout.
>
> Signed-off-by: Pavan Nikhilesh 
> Signed-off-by: Amit Prakash Shukla 
> ---
> v2 Changes:
> - Update release notes.
> - Use timeout when polling for queue idle state.

Applied to dpdk-next-net-mrvl/for-main. Thanks


Re: [RFC v2] eal: provide option to use compiler memcpy instead of RTE

2024-05-28 Thread Mattias Rönnblom

On 2024-05-28 10:27, Bruce Richardson wrote:

On Tue, May 28, 2024 at 10:19:15AM +0200, Mattias Rönnblom wrote:

On 2024-05-28 09:43, Mattias Rönnblom wrote:

Provide build option to have functions in  delegate to
the standard compiler/libc memcpy(), instead of using the various
traditional, handcrafted, per-architecture rte_memcpy()
implementations.

A new meson build option 'use_cc_memcpy' is added. The default is
true. It's not obvious what should be the default, but compiler
memcpy() is enabled by default in this RFC so any tests run with this
patch use the new approach.

One purpose of this RFC is to make it easy to evaluate the costs and
benefits of a switch.



I've tested this patch some with DSW micro benchmarks, and the result is a
2.5% reduction of the DSW+testapp overhead with cc/libc memcpy. GCC 11.4.

We've also run characteristic test suite of a large, real world app. Here,
we saw no effect. GCC 10.5.

x86_64 in both cases (Skylake and Raptor Lake).

Last time we did the same, there were a noticeable performance degradation
in both the above cases.

This is not a lot of data points, but I think it we should consider making
the custom RTE memcpy() implementations optional in the next release, and if
no-one complains, remove the implementations in the next release.

(Whether or not [or how long] to keep the wrapper API is another question.)




The other instance I've heard mention of in the past is virtio/vhost, which
used to have a speedup from the custom memcpy.

My own thinking on these cases, is that for targetted settings like these,
we should look to have local memcpy functions written - taking account of
the specifics of each usecase. For virtio/vhost for example, we can have
assumptions around host buffer alignment, and we also can be pretty
confident we are copying to another CPU. For DSW, or other eventdev cases,
we would only be looking at copies of multiples of 16, with guaranteed
8-byte alignment on both source and destination. Writing efficient copy fns


In such cases, you should first try to tell the compiler that it's safe 
to assume that the pointers have a certain alignment.


void copy256(void *dst, const void *src)
{
memcpy(dst, src, 256);
}

void copy256_a(void *dst, const void *src)
{
void *dst_a = __builtin_assume_aligned(dst, 32);
const void *src_a = __builtin_assume_aligned(src, 32);
memcpy(dst_a, src_a, 256);
}

The first will generate loads/stores without alignment restrictions, 
while the latter will use things like vmovdqa or vmovaps.


(I doubt there's much of a performance difference though, if any at all.)


for specific scenarios can be faster and more effective than trying to
write a general, optimized in all cases, memcpy. It also discourages the
use of non-libc memcpy except where really necessary.

Naturally, if we find there are a lot of cases where use of libc memcpy
slows us down, we will want to keep a general rte_memcpy. However, I'd hope
the slowdown cases are very few.

/Bruce


RE: [RFC v2] eal: provide option to use compiler memcpy instead of RTE

2024-05-28 Thread Morten Brørup
> From: Mattias Rönnblom [mailto:hof...@lysator.liu.se]
> Sent: Tuesday, 28 May 2024 11.00
> 
> On 2024-05-28 10:27, Bruce Richardson wrote:
> > On Tue, May 28, 2024 at 10:19:15AM +0200, Mattias Rönnblom wrote:
> >> On 2024-05-28 09:43, Mattias Rönnblom wrote:
> >>> Provide build option to have functions in  delegate to
> >>> the standard compiler/libc memcpy(), instead of using the various
> >>> traditional, handcrafted, per-architecture rte_memcpy()
> >>> implementations.
> >>>
> >>> A new meson build option 'use_cc_memcpy' is added. The default is
> >>> true. It's not obvious what should be the default, but compiler
> >>> memcpy() is enabled by default in this RFC so any tests run with this
> >>> patch use the new approach.
> >>>
> >>> One purpose of this RFC is to make it easy to evaluate the costs and
> >>> benefits of a switch.
> >>>
> >>
> >> I've tested this patch some with DSW micro benchmarks, and the result is a
> >> 2.5% reduction of the DSW+testapp overhead with cc/libc memcpy. GCC 11.4.
> >>
> >> We've also run characteristic test suite of a large, real world app. Here,
> >> we saw no effect. GCC 10.5.
> >>
> >> x86_64 in both cases (Skylake and Raptor Lake).
> >>
> >> Last time we did the same, there were a noticeable performance degradation
> >> in both the above cases.

Mattias, which compiler was that?

As previously mentioned in another thread, I'm worried about memcpy performance 
with older compilers.
DPDK officially supports GCC 4.9 and clang 3.4 [1].
I don't think degrading performance when using supported compilers is 
considered acceptable.

Alternatively, we could change the DPDK compiler policy from "supported" to 
"works with (but might not perform optimally)".

[1]: 
https://doc.dpdk.org/guides-21.11/linux_gsg/sys_reqs.html#compilation-of-the-dpdk

> >>
> >> This is not a lot of data points, but I think it we should consider making
> >> the custom RTE memcpy() implementations optional in the next release, and
> if
> >> no-one complains, remove the implementations in the next release.
> >>
> >> (Whether or not [or how long] to keep the wrapper API is another question.)
> >>
> >> 
> >
> > The other instance I've heard mention of in the past is virtio/vhost, which
> > used to have a speedup from the custom memcpy.
> >
> > My own thinking on these cases, is that for targetted settings like these,
> > we should look to have local memcpy functions written - taking account of
> > the specifics of each usecase. For virtio/vhost for example, we can have
> > assumptions around host buffer alignment, and we also can be pretty
> > confident we are copying to another CPU. For DSW, or other eventdev cases,
> > we would only be looking at copies of multiples of 16, with guaranteed
> > 8-byte alignment on both source and destination. Writing efficient copy fns
> 
> In such cases, you should first try to tell the compiler that it's safe
> to assume that the pointers have a certain alignment.
> 
> void copy256(void *dst, const void *src)
> {
>  memcpy(dst, src, 256);
> }
> 
> void copy256_a(void *dst, const void *src)
> {
>  void *dst_a = __builtin_assume_aligned(dst, 32);
>  const void *src_a = __builtin_assume_aligned(src, 32);
>  memcpy(dst_a, src_a, 256);
> }
> 
> The first will generate loads/stores without alignment restrictions,
> while the latter will use things like vmovdqa or vmovaps.
> 
> (I doubt there's much of a performance difference though, if any at all.)

Interesting.

> 
> > for specific scenarios can be faster and more effective than trying to
> > write a general, optimized in all cases, memcpy. It also discourages the
> > use of non-libc memcpy except where really necessary.

Good idea, Bruce.
I have previously worked on an optimized memcpy, where information about 
alignment, multiples, non-temporal source/destination, etc. is passed as flags 
to the function [2]. But it turned into too much work, so I never finished it.

If we start with local memcpy functions optimized for each specific use case, 
we still have the option of consolidating them into a common rte_memcpy 
function later. It will also reveal which flags/features such a common function 
needs to support.

[2]: https://inbox.dpdk.org/dev/20221010064600.16495-1...@smartsharesystems.com/

> >
> > Naturally, if we find there are a lot of cases where use of libc memcpy
> > slows us down, we will want to keep a general rte_memcpy. However, I'd hope
> > the slowdown cases are very few.
> >
> > /Bruce


RE: [EXTERNAL] Re: [PATCH v5] cnxk: disable building template files

2024-05-28 Thread Pavan Nikhilesh Bhagavatula
> On Mon, May 27, 2024 at 09:04:29PM +0530, pbhagavat...@marvell.com
> wrote:
> > From: Pavan Nikhilesh 
> >
> > Disable building template files when CNXK_DIS_TMPLT_FUNC
> > is defined as a part of c_args.
> > This option can be used when reworking datapath or debugging
> > issues to reduce Rx/Tx path compilation time.
> >
> > Example command:
> > meson build -Dc_args='-DCNXK_DIS_TMPLT_FUNC'
> > -Dexamples=all  --cross-file config/arm/arm64_cn10k_linux_gcc
> >
> Should this option be set in CI by default, or in test-meson-builds by
> default? When do we need to avoid setting this flag, vs setting it?
> 

Yes, we can set this option when testing builds to speed things up.
We would want to avoid setting the flag when we are packaging or generating
release binaries as it will negatively affect fastpath performance.

I am not sure how to integrate to test-meson-builds as there is no target 
option 
for cn10k/cn9k.

> Thanks,
> /Bruce

Thanks,
Pavan.


Re: [EXTERNAL] Re: [PATCH v5] cnxk: disable building template files

2024-05-28 Thread Bruce Richardson
On Tue, May 28, 2024 at 09:23:12AM +, Pavan Nikhilesh Bhagavatula
wrote:
> > On Mon, May 27, 2024 at 09:04:29PM +0530, pbhagavat...@marvell.com
> > wrote:
> > > From: Pavan Nikhilesh 
> > >
> > > Disable building template files when CNXK_DIS_TMPLT_FUNC is defined
> > > as a part of c_args.  This option can be used when reworking datapath
> > > or debugging issues to reduce Rx/Tx path compilation time.
> > >
> > > Example command: meson build -Dc_args='-DCNXK_DIS_TMPLT_FUNC'
> > > -Dexamples=all  --cross-file config/arm/arm64_cn10k_linux_gcc
> > >
> > Should this option be set in CI by default, or in test-meson-builds by
> > default? When do we need to avoid setting this flag, vs setting it?
> > 
> 
> Yes, we can set this option when testing builds to speed things up.  We
> would want to avoid setting the flag when we are packaging or generating
> release binaries as it will negatively affect fastpath performance.
> 
> I am not sure how to integrate to test-meson-builds as there is no target
> option for cn10k/cn9k.
>
So, are these template files already not built when building for other
platforms other than cn10k/cn9k? 


[PATCH] net/mlx5: support HW flag action

2024-05-28 Thread Shun Hao
The HW flag action is supported by adding the tag to packet, with the
default tag value.

Signed-off-by: Shun Hao 
Acked-by: Suanming Mou 
---
 drivers/net/mlx5/mlx5_flow_hw.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index 68c5a36bbb..9274a18ae0 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -2292,6 +2292,15 @@ __flow_hw_actions_translate(struct rte_eth_dev *dev,
}
acts->rule_acts[dr_pos].action = priv->hw_def_miss;
break;
+   case RTE_FLOW_ACTION_TYPE_FLAG:
+   acts->mark = true;
+   acts->rule_acts[dr_pos].tag.value =
+   mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT);
+   acts->rule_acts[dr_pos].action =
+   priv->hw_tag[!!attr->group];
+   __atomic_add_fetch(&priv->hws_mark_refcnt, 1, 
__ATOMIC_RELAXED);
+   flow_hw_rxq_flag_set(dev, true);
+   break;
case RTE_FLOW_ACTION_TYPE_MARK:
acts->mark = true;
if (masks->conf &&
@@ -6415,6 +6424,10 @@ mlx5_flow_hw_actions_validate(struct rte_eth_dev *dev,
if (ret < 0)
return ret;
break;
+   case RTE_FLOW_ACTION_TYPE_FLAG:
+   /* TODO: Validation logic */
+   action_flags |= MLX5_FLOW_ACTION_FLAG;
+   break;
case RTE_FLOW_ACTION_TYPE_MARK:
/* TODO: Validation logic */
action_flags |= MLX5_FLOW_ACTION_MARK;
@@ -6606,6 +6619,7 @@ flow_hw_actions_validate(struct rte_eth_dev *dev,
 
 static enum mlx5dr_action_type mlx5_hw_dr_action_types[] = {
[RTE_FLOW_ACTION_TYPE_MARK] = MLX5DR_ACTION_TYP_TAG,
+   [RTE_FLOW_ACTION_TYPE_FLAG] = MLX5DR_ACTION_TYP_TAG,
[RTE_FLOW_ACTION_TYPE_DROP] = MLX5DR_ACTION_TYP_DROP,
[RTE_FLOW_ACTION_TYPE_JUMP] = MLX5DR_ACTION_TYP_TBL,
[RTE_FLOW_ACTION_TYPE_QUEUE] = MLX5DR_ACTION_TYP_TIR,
-- 
2.20.0



[PATCH] net/mlx5: support HW flag action

2024-05-28 Thread Shun Hao
The HW flag action is supported by adding the tag to packet, with the
default tag value.

Signed-off-by: Shun Hao 
Acked-by: Suanming Mou 
---
 drivers/net/mlx5/mlx5_flow_hw.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index 68c5a36bbb..fa10a591b3 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -2292,6 +2292,16 @@ __flow_hw_actions_translate(struct rte_eth_dev *dev,
}
acts->rule_acts[dr_pos].action = priv->hw_def_miss;
break;
+   case RTE_FLOW_ACTION_TYPE_FLAG:
+   acts->mark = true;
+   acts->rule_acts[dr_pos].tag.value =
+   mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT);
+   acts->rule_acts[dr_pos].action =
+   priv->hw_tag[!!attr->group];
+   rte_atomic_fetch_add_explicit(&priv->hws_mark_refcnt, 1,
+   rte_memory_order_relaxed);
+   flow_hw_rxq_flag_set(dev, true);
+   break;
case RTE_FLOW_ACTION_TYPE_MARK:
acts->mark = true;
if (masks->conf &&
@@ -6415,6 +6425,10 @@ mlx5_flow_hw_actions_validate(struct rte_eth_dev *dev,
if (ret < 0)
return ret;
break;
+   case RTE_FLOW_ACTION_TYPE_FLAG:
+   /* TODO: Validation logic */
+   action_flags |= MLX5_FLOW_ACTION_FLAG;
+   break;
case RTE_FLOW_ACTION_TYPE_MARK:
/* TODO: Validation logic */
action_flags |= MLX5_FLOW_ACTION_MARK;
@@ -6606,6 +6620,7 @@ flow_hw_actions_validate(struct rte_eth_dev *dev,
 
 static enum mlx5dr_action_type mlx5_hw_dr_action_types[] = {
[RTE_FLOW_ACTION_TYPE_MARK] = MLX5DR_ACTION_TYP_TAG,
+   [RTE_FLOW_ACTION_TYPE_FLAG] = MLX5DR_ACTION_TYP_TAG,
[RTE_FLOW_ACTION_TYPE_DROP] = MLX5DR_ACTION_TYP_DROP,
[RTE_FLOW_ACTION_TYPE_JUMP] = MLX5DR_ACTION_TYP_TBL,
[RTE_FLOW_ACTION_TYPE_QUEUE] = MLX5DR_ACTION_TYP_TIR,
-- 
2.20.0



RE: [EXTERNAL] Re: [PATCH v5] cnxk: disable building template files

2024-05-28 Thread Pavan Nikhilesh Bhagavatula



> -Original Message-
> From: Bruce Richardson 
> Sent: Tuesday, May 28, 2024 3:10 PM
> To: Pavan Nikhilesh Bhagavatula 
> Cc: Jerin Jacob ; Nithin Kumar Dabilpuram
> ; Kiran Kumar Kokkilagadda
> ; Sunil Kumar Kori ; Satha
> Koteswara Rao Kottidi ; Harman Kalra
> ; Shijith Thotton ; Anatoly
> Burakov ; dev@dpdk.org
> Subject: Re: [EXTERNAL] Re: [PATCH v5] cnxk: disable building template files
> 
> On Tue, May 28, 2024 at 09:23:12AM +, Pavan Nikhilesh Bhagavatula
> wrote:
> > > On Mon, May 27, 2024 at 09:04:29PM +0530,
> pbhagavat...@marvell.com
> > > wrote:
> > > > From: Pavan Nikhilesh 
> > > >
> > > > Disable building template files when CNXK_DIS_TMPLT_FUNC is defined
> > > > as a part of c_args.  This option can be used when reworking datapath
> > > > or debugging issues to reduce Rx/Tx path compilation time.
> > > >
> > > > Example command: meson build -Dc_args='-DCNXK_DIS_TMPLT_FUNC'
> > > > -Dexamples=all  --cross-file config/arm/arm64_cn10k_linux_gcc
> > > >
> > > Should this option be set in CI by default, or in test-meson-builds by
> > > default? When do we need to avoid setting this flag, vs setting it?
> > >
> >
> > Yes, we can set this option when testing builds to speed things up.  We
> > would want to avoid setting the flag when we are packaging or generating
> > release binaries as it will negatively affect fastpath performance.
> >
> > I am not sure how to integrate to test-meson-builds as there is no target
> > option for cn10k/cn9k.
> >
> So, are these template files already not built when building for other
> platforms other than cn10k/cn9k?

I rechecked they are only built when target is aarch64.


Depends-on

2024-05-28 Thread Morten Brørup
Aaron,

I have been trying to submit a patch with a dependency on another patch, but it 
keeps failing, and the error message "Apply patch set 140352 failed: 404 Not 
Found" isn't very helpful.

Could you please take a look at it and let me know what's going on:
https://patchwork.dpdk.org/project/dpdk/patch/20240528070546.92511-1...@smartsharesystems.com/


Med venlig hilsen / Kind regards,
-Morten Brørup




Re: [PATCH v2 1/2] dts: update mypy static checker

2024-05-28 Thread Luca Vizzarro

On 24/05/2024 15:51, Patrick Robb wrote:

I think this is fine. Another option would be to move sut_nodes and
tg_nodes up as DTSRunner attributes. I like preserving the type hint,
but it also might just be cluttering the code to do this... up to you.


I made the change because of a complaint coming from mypy...
I think it was complaining that the function is not being type checked?

The other solution was to add a mypy ignore attribute.


Re: [PATCH v2 0/2] dts: update mypy and clean up

2024-05-28 Thread Luca Vizzarro

On 24/05/2024 17:17, Patrick Robb wrote:

Luca, I assume what you had in mind was we will run
dts-check-format.sh, so including isort and pylama, as opposed to just
mypy? Thanks.

I originally thought of mypy only, Juraj suggested the dts-check-format.
I guess it is a good suggestion, and we should go with that.


Re: Including contigmem in core dumps

2024-05-28 Thread Lewis Donzis


- On May 28, 2024, at 1:55 AM, Dmitry Kozlyuk dmitry.kozl...@gmail.com 
wrote:

> Hi Lewis,
> 
> Memory reserved by eal_get_virtual_area() is not yet useful,
> but it is very large, so by excluding it from dumps,
> DPDK prevents dumps from including large zero-filled parts.
> 
> It also makes sense to call eal_mem_set_dump(..., false)
> from eal_memalloc.c:free_seg(), because of --huge-unlink=never:
> in this mode (Linux-only), freed segments are not cleared,
> so if they were included into dump, it would be a lot of garbage data.
> 
> Newly allocated hugepages are not included into dumps
> because this would make dumps very large by default.
> However, this could be an opt-in as a runtime option if need be.

Thanks for the clarification.  I agree that not including freed segments makes 
perfect sense.

But when debugging a core dump, it's sometimes really helpful to be able to see 
what's in the mbuf that was being processed at the time.  Perhaps it would be a 
useful option to be able to tell the allocator not to disable core dumps.

In the mean time, my experiments to get around this have not been fruitful.

I wondered if we could enable core dumps for mbufs by calling 
rte_mempool_mem_iter() on the pool returned by rte_pktmbuf_pool_create(), and 
have the callback function call madvise(memhdr->addr, memhdr->len, MADV_CORE).  
But that didn't help, or at least the size of the core file didn't increase.

I then tried disabling the call to madvise() in the DPDK source code, and that 
didn't make any difference either.

Note that this is on FreeBSD, so I wonder if there's some fundamental reason 
that the contigmem memory doesn't get included in a core dump?


RE: DPDK patch for Amston Lake SGMII <> GPY215

2024-05-28 Thread Amy . Shih
Hi Ferruh:

The Ethernet controller connected to the "GPY215 PHY" is the integrated Gigabit 
Ethernet (GbE) controller from the Intel Amston Lake CPU. 
The output of `lspci` is as follows:

00:1e.4 Ethernet controller [0200]: Intel Corporation Device [8086:54ac]

Best Regards,
Amy Shih
Advantech ICVG x86 Software
02-7732-3399 Ext. 1249

-Original Message-
From: Ferruh Yigit  
Sent: Monday, May 27, 2024 4:58 PM
To: Jack.Chen ; dev@dpdk.org
Cc: Amy.Shih ; bill.lu ; 
Jenny3.Lin ; Bruce Richardson 
; Mcnamara, John 
Subject: Re: DPDK patch for Amston Lake SGMII <> GPY215

On 5/24/2024 6:40 AM, Jack.Chen wrote:
> Dear DPDK Dev .
> 
> This is PM from Advantech ENPD. We are working on Intel Amston Lake 
> CPU’s  SGMII <> GPY215 PHY for DPDK test but fail.
> 
> We consulted with Intel support team and they suggested we should 
> consult DPDK community and it should have the patch or code change for 
> Amston Lake <> GYP215 available for DPDK.
> 
> Could you kindly suggest us the direction of it?  I also keep my 
> Engineering team in this mail loop for further discussion.
> 
>  
> 
> Thank you so much
> 
>  
> 
> The error message while we testing DPDK
> 
> SoC 2.5G LAN (BIOS  set to 1G) with dpdk 24.03.0. It can run testpmd
> test, and  error message as follows :
> 
> root@fwa-1214-efi:~/dpdk/dpdk-24.03/build/app# ./dpdk-testpmd -c 0xf -n
> 1 -a 00:1e.4 --socket-mem=2048,0 -- -i --mbcache=512 --numa
> --port-numa-config=0,0 --socket-num=0 --coremask=0x2 --nb-cores=1
> --rxq=1 --txq=1 --portmask=0x1 --rxd=2048 --rxfreet=64 --rxpt=64
> --rxht=8 --rxwt=0 --txd=2048 --txfreet=64 --txpt=64 --txht=0 --txwt=0
> --burst=64 --txrst=64 --rss-ip -a
> 
> EAL: Detected CPU lcores: 4
> 
> EAL: Detected NUMA nodes: 1
> 
> EAL: Detected static linkage of DPDK
> 
> EAL: Multi-process socket /var/run/dpdk/rte/mp_socket
> 
> EAL: Selected IOVA mode 'PA'
> 
> TELEMETRY: No legacy callbacks, legacy socket not created
> 
> testpmd: No probed ethernet devices
> 
> Interactive-mode selected
> 
> Fail: input rxq (1) can't be greater than max_rx_queues (0) of port 0
> 
> EAL: Error - exiting with code: 1
> 
>  Cause: rxq 1 invalid - must be >= 0 && <= 0
> 
>  


Hi Jack,

According above log device is not detected.
What is the Ehternet controller connected to the "GPY215 PHY" and do you
know if it has required driver in DPDK for it?
If device sits on PCIe bus, you can check it via `lspci`.



Segment Fault using dpdk-pktgen

2024-05-28 Thread florian.duesin...@web.de
Dear Developers of pktgen,
After the recent updates of pktgen I was finally able to compile the code but I 
get 2 critical errors when I try to use pktgen.

First:
I testet the newest pktgen code from git on a virtual Machine using EVE-NG and 
a physical Computer with an Intel i9 and 2 1G Nics. On both I installed and 
compiled everything as stated in the dokumentation, but when using pktgen, I 
only get a Segment Fault. I doesnt matter if i try using a blank command like: 
sudo ./path/to/pktgen or if I use more statements like: sudo ./path/to/pktgen 
-l 0-1 – -P -m 1.0. I only get following error message:

EAL: Detected CPU lcores: 6

EAL: Detected NUMA nodes: 1

EAL: Detected shared linkage of DPDK

EAL: Multi-process socket /var/run/dpdk/rte/mp_socket

EAL: Selected IOVA mode 'PA'

EAL: Probe PCI driver: net_virtio (1af4:1000) device: :00:03.0 (socket -1)

eth_virtio_pci_init(): Failed to init PCI device

EAL: Requested device :00:03.0 cannot be used

EAL: Probe PCI driver: net_virtio (1af4:1000) device: :00:04.0 (socket -1)

EAL: Probe PCI driver: net_virtio (1af4:1000) device: :00:05.0 (socket -1)

TELEMETRY: No legacy callbacks, legacy socket not created

  Total memory used =   
 0 KB


Port DevName  Index NUMA PCI Information   Src MAC   Promiscuous


== Pktgen got a Segment Fault


Obtained 7 stack frames.

./Pktgen-DPDK/builddir/app/pktgen(+0x25e83) [0x5857c4580e83]

/lib/x86_64-linux-gnu/libc.so.6(+0x42990) [0x746baa642990]

./Pktgen-DPDK/builddir/app/pktgen(+0x99a7) [0x5857c45649a7]

./Pktgen-DPDK/builddir/app/pktgen(+0xa793) [0x5857c4565793]

/lib/x86_64-linux-gnu/libc.so.6(+0x28150) [0x746baa628150]

/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0x89) [0x746baa628209]

./Pktgen-DPDK/builddir/app/pktgen(+0xb025) [0x5857c4566025]


The error occurs on both machines running ubuntu-23.10.
I have a testsetup in EVE-NG with the exact same specs, but running 
ubuntu-22.04. With dpdk 22.11.5 and pktgen 22.04.1 I was able to run pktgen 
with the statements mentioned above. But whatever I do in the pktgen command 
line, I am not able to generate traffic.

My final goal is using pktgen to read pcap files and use them to generate 
traffic.
The machine with ubuntu-22.04 opens my imix.pcap file when using -s 
0:imix.pcap, but again I am not able to generate any traffic.
If I try to open the pcap file on one of the ubuntu-23.10 machines, I get 
following error:

EAL: Detected CPU lcores: 6

EAL: Detected NUMA nodes: 1

EAL: Detected shared linkage of DPDK

EAL: Multi-process socket /var/run/dpdk/rte/mp_socket

EAL: Selected IOVA mode 'PA'

EAL: Probe PCI driver: net_virtio (1af4:1000) device: :00:03.0 (socket -1)

eth_virtio_pci_init(): Failed to init PCI device

EAL: Requested device :00:03.0 cannot be used

EAL: Probe PCI driver: net_virtio (1af4:1000) device: :00:04.0 (socket -1)

EAL: Probe PCI driver: net_virtio (1af4:1000) device: :00:05.0 (socket -1)

TELEMETRY: No legacy callbacks, legacy socket not created

EAL: Error - exiting with code: 1

  Cause: pktgen_pcap_open: rte_zmalloc_socket() failed for pcap_info_t structure

Can someone help me to figure out whats wrong? I really start to question 
myself. If you need more data let me know.
Thank you in advance

Sincerely
Florian Düsing


Re: [Help] O-RAN Fronthaul CUS-U data structure implementation

2024-05-28 Thread Lincoln Lavoie
Hi Mattia,

Have you looked into the O-RAN OSC open fronthaul phy implementation?
https://docs.o-ran-sc.org/projects/o-ran-sc-o-du-phy/en/latest/Architecture-Overview_fh.html

Cheers,
Lincoln

On Tue, May 28, 2024 at 10:31 AM Mattia Milani 
wrote:

> Dear DPDK Dev community,
>
> I hope this is the correct mailing list for my questions, otherwise
> please excuse me and let me know where my questions should be posted.
>
> I was looking for a data structure capable to manage O-RAN Fronthaul
> CUS-U headers (attached a screenshot of the header structure form a
> packet analyzed with Wireshark)
> but I couldn't find one.
>
> I would like to be capable to identify the different port ids but also
> the number of PRBs in the section part.
>
> I wrote my own implementation for a simple use case (I don't take in
> consideration different versions and or data directions)
> but it's enough for me at the moment.
>
> What I wanted to ask is the following:
> - Does a data structure for this kind of header already exists?
> - If it doesn't exists is it planned?
> - If it's not planned could it be of some interest?
>
> If there is interest I would be happy to share what I developed up to
> now to receive comments and/or assistance on how to make it fully
> functioning.
>
> Best regards,
> Mattia
>


-- 
*Lincoln Lavoie*
Principal Engineer, Broadband Technologies
21 Madbury Rd., Ste. 100, Durham, NH 03824
lylav...@iol.unh.edu
https://www.iol.unh.edu
+1-603-674-2755 (m)



Re: Segment Fault using dpdk-pktgen

2024-05-28 Thread Stephen Hemminger
On Tue, 28 May 2024 07:54:22 +
"florian.duesin...@web.de"  wrote:

> Dear Developers of pktgen,
> After the recent updates of pktgen I was finally able to compile the code but 
> I get 2 critical errors when I try to use pktgen.

I don't think pktgen is maintained anymore.


Re: [RFC v2] eal: provide option to use compiler memcpy instead of RTE

2024-05-28 Thread Bruce Richardson
On Tue, May 28, 2024 at 07:59:36AM -0700, Stephen Hemminger wrote:
> On Tue, 28 May 2024 10:19:15 +0200
> Mattias Rönnblom  wrote:
> 
> > >   
> > 
> > I've tested this patch some with DSW micro benchmarks, and the result is 
> > a 2.5% reduction of the DSW+testapp overhead with cc/libc memcpy. GCC 11.4.
> > 
> > We've also run characteristic test suite of a large, real world app. 
> > Here, we saw no effect. GCC 10.5.
> > 
> > x86_64 in both cases (Skylake and Raptor Lake).
> > 
> > Last time we did the same, there were a noticeable performance 
> > degradation in both the above cases.
> > 
> > This is not a lot of data points, but I think it we should consider 
> > making the custom RTE memcpy() implementations optional in the next 
> > release, and if no-one complains, remove the implementations in the next 
> > release.
> 
> Lets go farther.
> 
> 1. Announce that rte_memcpy will be marked deprecated in 24.11 release
> 
> 2. In 24.11 do a global replace of rte_memcpy on the tree.
>And mark rte_memcpy as deprecated.
> 
> 3. In 25.11 it can go away.

While I'd like us to be able to do so, I believe that to be premature. We
need to see where/if there are regressions first, and see about fixing
them.

/Bruce


Re: [PATCH v2 1/8] dts: add params manipulation module

2024-05-28 Thread Nicholas Pratte
Tested-by: Nicholas Pratte 
Reviewed-by: Nicholas Pratte 

On Thu, May 9, 2024 at 7:21 AM Luca Vizzarro  wrote:
>
> This commit introduces a new "params" module, which adds a new way
> to manage command line parameters. The provided Params dataclass
> is able to read the fields of its child class and produce a string
> representation to supply to the command line. Any data structure
> that is intended to represent command line parameters can inherit it.
>
> The main purpose is to make it easier to represent data structures that
> map to parameters. Aiding quicker development, while minimising code
> bloat.
>
> Signed-off-by: Luca Vizzarro 
> Reviewed-by: Paul Szczepanek 
> ---
>  dts/framework/params/__init__.py | 274 +++
>  1 file changed, 274 insertions(+)
>  create mode 100644 dts/framework/params/__init__.py
>
> diff --git a/dts/framework/params/__init__.py 
> b/dts/framework/params/__init__.py
> new file mode 100644
> index 00..aa27e34357
> --- /dev/null
> +++ b/dts/framework/params/__init__.py
> @@ -0,0 +1,274 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2024 Arm Limited
> +
> +"""Parameter manipulation module.
> +
> +This module provides :class:`Params` which can be used to model any data 
> structure
> +that is meant to represent any command parameters.
> +"""
> +
> +from dataclasses import dataclass, fields
> +from enum import Flag
> +from typing import Any, Callable, Iterable, Literal, Reversible, TypedDict, 
> cast
> +
> +from typing_extensions import Self
> +
> +#: Type for a function taking one argument.
> +FnPtr = Callable[[Any], Any]
> +#: Type for a switch parameter.
> +Switch = Literal[True, None]
> +#: Type for a yes/no switch parameter.
> +YesNoSwitch = Literal[True, False, None]
> +
> +
> +def _reduce_functions(funcs: Reversible[FnPtr]) -> FnPtr:
> +"""Reduces an iterable of :attr:`FnPtr` from end to start to a composite 
> function.
> +
> +If the iterable is empty, the created function just returns its fed 
> value back.
> +"""
> +
> +def composite_function(value: Any):
> +for fn in reversed(funcs):
> +value = fn(value)
> +return value
> +
> +return composite_function
> +
> +
> +def convert_str(*funcs: FnPtr):
> +"""Decorator that makes the ``__str__`` method a composite function 
> created from its arguments.
> +
> +The :attr:`FnPtr`s fed to the decorator are executed from right to left
> +in the arguments list order.
> +
> +Example:
> +.. code:: python
> +
> +@convert_str(hex_from_flag_value)
> +class BitMask(enum.Flag):
> +A = auto()
> +B = auto()
> +
> +will allow ``BitMask`` to render as a hexadecimal value.
> +"""
> +
> +def _class_decorator(original_class):
> +original_class.__str__ = _reduce_functions(funcs)
> +return original_class
> +
> +return _class_decorator
> +
> +
> +def comma_separated(values: Iterable[Any]) -> str:
> +"""Converts an iterable in a comma-separated string."""
> +return ",".join([str(value).strip() for value in values if value is not 
> None])
> +
> +
> +def bracketed(value: str) -> str:
> +"""Adds round brackets to the input."""
> +return f"({value})"
> +
> +
> +def str_from_flag_value(flag: Flag) -> str:
> +"""Returns the value from a :class:`enum.Flag` as a string."""
> +return str(flag.value)
> +
> +
> +def hex_from_flag_value(flag: Flag) -> str:
> +"""Returns the value from a :class:`enum.Flag` converted to 
> hexadecimal."""
> +return hex(flag.value)
> +
> +
> +class ParamsModifier(TypedDict, total=False):
> +"""Params modifiers dict compatible with the :func:`dataclasses.field` 
> metadata parameter."""
> +
> +#:
> +Params_value_only: bool
> +#:
> +Params_short: str
> +#:
> +Params_long: str
> +#:
> +Params_multiple: bool
> +#:
> +Params_convert_value: Reversible[FnPtr]
> +
> +
> +@dataclass
> +class Params:
> +"""Dataclass that renders its fields into command line arguments.
> +
> +The parameter name is taken from the field name by default. The 
> following:
> +
> +.. code:: python
> +
> +name: str | None = "value"
> +
> +is rendered as ``--name=value``.
> +Through :func:`dataclasses.field` the resulting parameter can be 
> manipulated by applying
> +this class' metadata modifier functions.
> +
> +To use fields as switches, set the value to ``True`` to render them. If 
> you
> +use a yes/no switch you can also set ``False`` which would render a 
> switch
> +prefixed with ``--no-``. Examples:
> +
> +.. code:: python
> +
> +interactive: Switch = True  # renders --interactive
> +numa: YesNoSwitch   = False # renders --no-numa
> +
> +Setting ``None`` will prevent it from being rendered. The 
> :attr:`~Switch` type alias is provided
> +for regular switches, whereas :attr:`~YesNoSwitch` is offered for yes/no 
> ones.

Re: [PATCH 2/6] dts: use Params for interactive shells

2024-05-28 Thread Nicholas Pratte
Tested-by: Nicholas Pratte 
Reviewed-by: Nicholas Pratte 

On Tue, Mar 26, 2024 at 3:04 PM Luca Vizzarro  wrote:
>
> Make it so that interactive shells accept an implementation of `Params`
> for app arguments. Convert EalParameters to use `Params` instead.
>
> String command line parameters can still be supplied by using the
> `StrParams` implementation.
>
> Signed-off-by: Luca Vizzarro 
> Reviewed-by: Jack Bond-Preston 
> Reviewed-by: Honnappa Nagarahalli 
> ---
>  .../remote_session/interactive_shell.py   |   8 +-
>  dts/framework/remote_session/testpmd_shell.py |  12 +-
>  dts/framework/testbed_model/__init__.py   |   2 +-
>  dts/framework/testbed_model/node.py   |   4 +-
>  dts/framework/testbed_model/os_session.py |   4 +-
>  dts/framework/testbed_model/sut_node.py   | 106 --
>  dts/tests/TestSuite_pmd_buffer_scatter.py |   3 +-
>  7 files changed, 73 insertions(+), 66 deletions(-)
>
> diff --git a/dts/framework/remote_session/interactive_shell.py 
> b/dts/framework/remote_session/interactive_shell.py
> index 5cfe202e15..a2c7b30d9f 100644
> --- a/dts/framework/remote_session/interactive_shell.py
> +++ b/dts/framework/remote_session/interactive_shell.py
> @@ -1,5 +1,6 @@
>  # SPDX-License-Identifier: BSD-3-Clause
>  # Copyright(c) 2023 University of New Hampshire
> +# Copyright(c) 2024 Arm Limited
>
>  """Common functionality for interactive shell handling.
>
> @@ -21,6 +22,7 @@
>  from paramiko import Channel, SSHClient, channel  # type: ignore[import]
>
>  from framework.logger import DTSLogger
> +from framework.params import Params
>  from framework.settings import SETTINGS
>
>
> @@ -40,7 +42,7 @@ class InteractiveShell(ABC):
>  _ssh_channel: Channel
>  _logger: DTSLogger
>  _timeout: float
> -_app_args: str
> +_app_args: Params | None
>
>  #: Prompt to expect at the end of output when sending a command.
>  #: This is often overridden by subclasses.
> @@ -63,7 +65,7 @@ def __init__(
>  interactive_session: SSHClient,
>  logger: DTSLogger,
>  get_privileged_command: Callable[[str], str] | None,
> -app_args: str = "",
> +app_args: Params | None = None,
>  timeout: float = SETTINGS.timeout,
>  ) -> None:
>  """Create an SSH channel during initialization.
> @@ -100,7 +102,7 @@ def _start_application(self, get_privileged_command: 
> Callable[[str], str] | None
>  get_privileged_command: A function (but could be any callable) 
> that produces
>  the version of the command with elevated privileges.
>  """
> -start_command = f"{self.path} {self._app_args}"
> +start_command = f"{self.path} {self._app_args or ''}"
>  if get_privileged_command is not None:
>  start_command = get_privileged_command(start_command)
>  self.send_command(start_command)
> diff --git a/dts/framework/remote_session/testpmd_shell.py 
> b/dts/framework/remote_session/testpmd_shell.py
> index cb2ab6bd00..db3abb7600 100644
> --- a/dts/framework/remote_session/testpmd_shell.py
> +++ b/dts/framework/remote_session/testpmd_shell.py
> @@ -21,6 +21,7 @@
>  from typing import Callable, ClassVar
>
>  from framework.exception import InteractiveCommandExecutionError
> +from framework.params import StrParams
>  from framework.settings import SETTINGS
>  from framework.utils import StrEnum
>
> @@ -118,8 +119,15 @@ def _start_application(self, get_privileged_command: 
> Callable[[str], str] | None
>  Also find the number of pci addresses which were allowed on the 
> command line when the app
>  was started.
>  """
> -self._app_args += " -i --mask-event intr_lsc"
> -self.number_of_ports = self._app_args.count("-a ")
> +from framework.testbed_model.sut_node import EalParameters
> +
> +assert isinstance(self._app_args, EalParameters)
> +
> +if isinstance(self._app_args.app_params, StrParams):
> +self._app_args.app_params.value += " -i --mask-event intr_lsc"
> +
> +self.number_of_ports = len(self._app_args.ports) if 
> self._app_args.ports is not None else 0
> +
>  super()._start_application(get_privileged_command)
>
>  def start(self, verify: bool = True) -> None:
> diff --git a/dts/framework/testbed_model/__init__.py 
> b/dts/framework/testbed_model/__init__.py
> index 6086512ca2..ef9520df4c 100644
> --- a/dts/framework/testbed_model/__init__.py
> +++ b/dts/framework/testbed_model/__init__.py
> @@ -23,6 +23,6 @@
>  from .cpu import LogicalCoreCount, LogicalCoreCountFilter, LogicalCoreList
>  from .node import Node
>  from .port import Port, PortLink
> -from .sut_node import SutNode
> +from .sut_node import SutNode, EalParameters
>  from .tg_node import TGNode
>  from .virtual_device import VirtualDevice
> diff --git a/dts/framework/testbed_model/node.py 
> b/dts/framework/testbed_model/node.py
> index 74061f6262..ec9512d618 100644
> --- a/dts/framewo

Re: [PATCH v2 3/8] dts: refactor EalParams

2024-05-28 Thread Nicholas Pratte
Tested-by: Nicholas Pratte 
Reviewed-by: Nicholas Pratte 

On Thu, May 9, 2024 at 7:21 AM Luca Vizzarro  wrote:
>
> Move EalParams to its own module to avoid circular dependencies.
>
> Signed-off-by: Luca Vizzarro 
> Reviewed-by: Paul Szczepanek 
> ---
>  dts/framework/params/eal.py   | 50 +++
>  dts/framework/remote_session/testpmd_shell.py |  2 +-
>  dts/framework/testbed_model/sut_node.py   | 42 +---
>  3 files changed, 53 insertions(+), 41 deletions(-)
>  create mode 100644 dts/framework/params/eal.py
>
> diff --git a/dts/framework/params/eal.py b/dts/framework/params/eal.py
> new file mode 100644
> index 00..bbdbc8f334
> --- /dev/null
> +++ b/dts/framework/params/eal.py
> @@ -0,0 +1,50 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2024 Arm Limited
> +
> +"""Module representing the DPDK EAL-related parameters."""
> +
> +from dataclasses import dataclass, field
> +from typing import Literal
> +
> +from framework.params import Params, Switch
> +from framework.testbed_model.cpu import LogicalCoreList
> +from framework.testbed_model.port import Port
> +from framework.testbed_model.virtual_device import VirtualDevice
> +
> +
> +def _port_to_pci(port: Port) -> str:
> +return port.pci
> +
> +
> +@dataclass(kw_only=True)
> +class EalParams(Params):
> +"""The environment abstraction layer parameters.
> +
> +Attributes:
> +lcore_list: The list of logical cores to use.
> +memory_channels: The number of memory channels to use.
> +prefix: Set the file prefix string with which to start DPDK, e.g.: 
> ``prefix="vf"``.
> +no_pci: Switch to disable PCI bus, e.g.: ``no_pci=True``.
> +vdevs: Virtual devices, e.g.::
> +vdevs=[
> +VirtualDevice('net_ring0'),
> +VirtualDevice('net_ring1')
> +]
> +ports: The list of ports to allow.
> +other_eal_param: user defined DPDK EAL parameters, e.g.:
> +``other_eal_param='--single-file-segments'``
> +"""
> +
> +lcore_list: LogicalCoreList = field(metadata=Params.short("l"))
> +memory_channels: int = field(metadata=Params.short("n"))
> +prefix: str = field(metadata=Params.long("file-prefix"))
> +no_pci: Switch = None
> +vdevs: list[VirtualDevice] | None = field(
> +default=None, metadata=Params.multiple() | Params.long("vdev")
> +)
> +ports: list[Port] | None = field(
> +default=None,
> +metadata=Params.convert_value(_port_to_pci) | Params.multiple() | 
> Params.short("a"),
> +)
> +other_eal_param: Params | None = None
> +_separator: Literal[True] = field(default=True, init=False, 
> metadata=Params.short("-"))
> diff --git a/dts/framework/remote_session/testpmd_shell.py 
> b/dts/framework/remote_session/testpmd_shell.py
> index 7eced27096..841d456a2f 100644
> --- a/dts/framework/remote_session/testpmd_shell.py
> +++ b/dts/framework/remote_session/testpmd_shell.py
> @@ -21,8 +21,8 @@
>  from typing import Callable, ClassVar
>
>  from framework.exception import InteractiveCommandExecutionError
> +from framework.params.eal import EalParams
>  from framework.settings import SETTINGS
> -from framework.testbed_model.sut_node import EalParams
>  from framework.utils import StrEnum
>
>  from .interactive_shell import InteractiveShell
> diff --git a/dts/framework/testbed_model/sut_node.py 
> b/dts/framework/testbed_model/sut_node.py
> index c886590979..e1163106a3 100644
> --- a/dts/framework/testbed_model/sut_node.py
> +++ b/dts/framework/testbed_model/sut_node.py
> @@ -15,9 +15,8 @@
>  import os
>  import tarfile
>  import time
> -from dataclasses import dataclass, field
>  from pathlib import PurePath
> -from typing import Literal, Type
> +from typing import Type
>
>  from framework.config import (
>  BuildTargetConfiguration,
> @@ -26,6 +25,7 @@
>  SutNodeConfiguration,
>  )
>  from framework.params import Params, Switch
> +from framework.params.eal import EalParams
>  from framework.remote_session import CommandResult
>  from framework.settings import SETTINGS
>  from framework.utils import MesonArgs
> @@ -37,44 +37,6 @@
>  from .virtual_device import VirtualDevice
>
>
> -def _port_to_pci(port: Port) -> str:
> -return port.pci
> -
> -
> -@dataclass(kw_only=True)
> -class EalParams(Params):
> -"""The environment abstraction layer parameters.
> -
> -Attributes:
> -lcore_list: The list of logical cores to use.
> -memory_channels: The number of memory channels to use.
> -prefix: Set the file prefix string with which to start DPDK, e.g.: 
> ``prefix="vf"``.
> -no_pci: Switch to disable PCI bus, e.g.: ``no_pci=True``.
> -vdevs: Virtual devices, e.g.::
> -vdevs=[
> -VirtualDevice('net_ring0'),
> -VirtualDevice('net_ring1')
> -]
> -ports: The list of ports to allow.
> -other_eal_param: user 

Re: [PATCH v2 4/8] dts: remove module-wide imports

2024-05-28 Thread Nicholas Pratte
Tested-by: Nicholas Pratte 
Reviewed-by: Nicholas Pratte 

On Thu, May 9, 2024 at 7:21 AM Luca Vizzarro  wrote:
>
> Remove the imports in the testbed_model and remote_session modules init
> file, to avoid the initialisation of unneeded modules, thus removing or
> limiting the risk of circular dependencies.
>
> Signed-off-by: Luca Vizzarro 
> Reviewed-by: Paul Szczepanek 
> ---
>  dts/framework/remote_session/__init__.py   | 5 +
>  dts/framework/runner.py| 4 +++-
>  dts/framework/test_suite.py| 5 -
>  dts/framework/testbed_model/__init__.py| 7 ---
>  dts/framework/testbed_model/os_session.py  | 4 ++--
>  dts/framework/testbed_model/sut_node.py| 2 +-
>  dts/framework/testbed_model/traffic_generator/scapy.py | 2 +-
>  dts/tests/TestSuite_hello_world.py | 2 +-
>  dts/tests/TestSuite_smoke_tests.py | 2 +-
>  9 files changed, 14 insertions(+), 19 deletions(-)
>
> diff --git a/dts/framework/remote_session/__init__.py 
> b/dts/framework/remote_session/__init__.py
> index 1910c81c3c..29000a4642 100644
> --- a/dts/framework/remote_session/__init__.py
> +++ b/dts/framework/remote_session/__init__.py
> @@ -18,11 +18,8 @@
>  from framework.logger import DTSLogger
>
>  from .interactive_remote_session import InteractiveRemoteSession
> -from .interactive_shell import InteractiveShell
> -from .python_shell import PythonShell
> -from .remote_session import CommandResult, RemoteSession
> +from .remote_session import RemoteSession
>  from .ssh_session import SSHSession
> -from .testpmd_shell import TestPmdShell
>
>
>  def create_remote_session(
> diff --git a/dts/framework/runner.py b/dts/framework/runner.py
> index d74f1871db..e6c23af7c7 100644
> --- a/dts/framework/runner.py
> +++ b/dts/framework/runner.py
> @@ -26,6 +26,9 @@
>  from types import FunctionType
>  from typing import Iterable, Sequence
>
> +from framework.testbed_model.sut_node import SutNode
> +from framework.testbed_model.tg_node import TGNode
> +
>  from .config import (
>  BuildTargetConfiguration,
>  Configuration,
> @@ -51,7 +54,6 @@
>  TestSuiteWithCases,
>  )
>  from .test_suite import TestSuite
> -from .testbed_model import SutNode, TGNode
>
>
>  class DTSRunner:
> diff --git a/dts/framework/test_suite.py b/dts/framework/test_suite.py
> index 8768f756a6..9d3debb00f 100644
> --- a/dts/framework/test_suite.py
> +++ b/dts/framework/test_suite.py
> @@ -20,9 +20,12 @@
>  from scapy.layers.l2 import Ether  # type: ignore[import-untyped]
>  from scapy.packet import Packet, Padding  # type: ignore[import-untyped]
>
> +from framework.testbed_model.port import Port, PortLink
> +from framework.testbed_model.sut_node import SutNode
> +from framework.testbed_model.tg_node import TGNode
> +
>  from .exception import TestCaseVerifyError
>  from .logger import DTSLogger, get_dts_logger
> -from .testbed_model import Port, PortLink, SutNode, TGNode
>  from .testbed_model.traffic_generator import PacketFilteringConfig
>  from .utils import get_packet_summaries
>
> diff --git a/dts/framework/testbed_model/__init__.py 
> b/dts/framework/testbed_model/__init__.py
> index 6086512ca2..4f8a58c039 100644
> --- a/dts/framework/testbed_model/__init__.py
> +++ b/dts/framework/testbed_model/__init__.py
> @@ -19,10 +19,3 @@
>  """
>
>  # pylama:ignore=W0611
> -
> -from .cpu import LogicalCoreCount, LogicalCoreCountFilter, LogicalCoreList
> -from .node import Node
> -from .port import Port, PortLink
> -from .sut_node import SutNode
> -from .tg_node import TGNode
> -from .virtual_device import VirtualDevice
> diff --git a/dts/framework/testbed_model/os_session.py 
> b/dts/framework/testbed_model/os_session.py
> index 1a77aee532..e5f5fcbe0e 100644
> --- a/dts/framework/testbed_model/os_session.py
> +++ b/dts/framework/testbed_model/os_session.py
> @@ -32,13 +32,13 @@
>  from framework.logger import DTSLogger
>  from framework.params import Params
>  from framework.remote_session import (
> -CommandResult,
>  InteractiveRemoteSession,
> -InteractiveShell,
>  RemoteSession,
>  create_interactive_session,
>  create_remote_session,
>  )
> +from framework.remote_session.interactive_shell import InteractiveShell
> +from framework.remote_session.remote_session import CommandResult
>  from framework.settings import SETTINGS
>  from framework.utils import MesonArgs
>
> diff --git a/dts/framework/testbed_model/sut_node.py 
> b/dts/framework/testbed_model/sut_node.py
> index e1163106a3..83ad06ae2d 100644
> --- a/dts/framework/testbed_model/sut_node.py
> +++ b/dts/framework/testbed_model/sut_node.py
> @@ -26,7 +26,7 @@
>  )
>  from framework.params import Params, Switch
>  from framework.params.eal import EalParams
> -from framework.remote_session import CommandResult
> +from framework.remote_session.remote_session import CommandResult
>  from framework.settings import SETTINGS
>  from framework

Re: [PATCH v2 6/8] dts: use testpmd params for scatter test suite

2024-05-28 Thread Nicholas Pratte
Tested-by: Nicholas Pratte 
Reviewed-by: Nicholas Pratte 

On Thu, May 9, 2024 at 7:21 AM Luca Vizzarro  wrote:
>
> Update the buffer scatter test suite to use TestPmdParameters
> instead of the StrParams implementation.
>
> Signed-off-by: Luca Vizzarro 
> Reviewed-by: Paul Szczepanek 
> ---
>  dts/tests/TestSuite_pmd_buffer_scatter.py | 18 +-
>  1 file changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/dts/tests/TestSuite_pmd_buffer_scatter.py 
> b/dts/tests/TestSuite_pmd_buffer_scatter.py
> index 578b5a4318..6d206c1a40 100644
> --- a/dts/tests/TestSuite_pmd_buffer_scatter.py
> +++ b/dts/tests/TestSuite_pmd_buffer_scatter.py
> @@ -16,14 +16,14 @@
>  """
>
>  import struct
> +from dataclasses import asdict
>
>  from scapy.layers.inet import IP  # type: ignore[import-untyped]
>  from scapy.layers.l2 import Ether  # type: ignore[import-untyped]
>  from scapy.packet import Raw  # type: ignore[import-untyped]
>  from scapy.utils import hexstr  # type: ignore[import-untyped]
>
> -from framework.params import Params
> -from framework.params.testpmd import SimpleForwardingModes
> +from framework.params.testpmd import SimpleForwardingModes, TestPmdParams
>  from framework.remote_session.testpmd_shell import TestPmdShell
>  from framework.test_suite import TestSuite
>
> @@ -105,16 +105,16 @@ def pmd_scatter(self, mbsize: int) -> None:
>  """
>  testpmd = self.sut_node.create_interactive_shell(
>  TestPmdShell,
> -app_params=Params.from_str(
> -"--mbcache=200 "
> -f"--mbuf-size={mbsize} "
> -"--max-pkt-len=9000 "
> -"--port-topology=paired "
> -"--tx-offloads=0x8000"
> +app_params=TestPmdParams(
> +forward_mode=SimpleForwardingModes.mac,
> +mbcache=200,
> +mbuf_size=[mbsize],
> +max_pkt_len=9000,
> +tx_offloads=0x8000,
> +**asdict(self.sut_node.create_eal_parameters()),
>  ),
>  privileged=True,
>  )
> -testpmd.set_forward_mode(SimpleForwardingModes.mac)
>  testpmd.start()
>
>  for offset in [-1, 0, 1, 4, 5]:
> --
> 2.34.1
>


Re: [PATCH v2 7/8] dts: rework interactive shells

2024-05-28 Thread Nicholas Pratte
Tested-by: Nicholas Pratte 
Reviewed-by: Nicholas Pratte 

On Thu, May 9, 2024 at 7:21 AM Luca Vizzarro  wrote:
>
> The way nodes and interactive shells interact makes it difficult to
> develop for static type checking and hinting. The current system relies
> on a top-down approach, attempting to give a generic interface to the
> test developer, hiding the interaction of concrete shell classes as much
> as possible. When working with strong typing this approach is not ideal,
> as Python's implementation of generics is still rudimentary.
>
> This rework reverses the tests interaction to a bottom-up approach,
> allowing the test developer to call concrete shell classes directly,
> and let them ingest nodes independently. While also re-enforcing type
> checking and making the code easier to read.
>
> Signed-off-by: Luca Vizzarro 
> Reviewed-by: Paul Szczepanek 
> ---
>  dts/framework/params/eal.py   |   6 +-
>  dts/framework/remote_session/dpdk_shell.py| 104 
>  .../remote_session/interactive_shell.py   |  75 +++-
>  dts/framework/remote_session/python_shell.py  |   4 +-
>  dts/framework/remote_session/testpmd_shell.py |  64 +-
>  dts/framework/testbed_model/node.py   |  36 +-
>  dts/framework/testbed_model/os_session.py |  36 +-
>  dts/framework/testbed_model/sut_node.py   | 112 +-
>  .../testbed_model/traffic_generator/scapy.py  |   4 +-
>  dts/tests/TestSuite_hello_world.py|   7 +-
>  dts/tests/TestSuite_pmd_buffer_scatter.py |  21 ++--
>  dts/tests/TestSuite_smoke_tests.py|   2 +-
>  12 files changed, 201 insertions(+), 270 deletions(-)
>  create mode 100644 dts/framework/remote_session/dpdk_shell.py
>
> diff --git a/dts/framework/params/eal.py b/dts/framework/params/eal.py
> index bbdbc8f334..8d7766fefc 100644
> --- a/dts/framework/params/eal.py
> +++ b/dts/framework/params/eal.py
> @@ -35,9 +35,9 @@ class EalParams(Params):
>  ``other_eal_param='--single-file-segments'``
>  """
>
> -lcore_list: LogicalCoreList = field(metadata=Params.short("l"))
> -memory_channels: int = field(metadata=Params.short("n"))
> -prefix: str = field(metadata=Params.long("file-prefix"))
> +lcore_list: LogicalCoreList | None = field(default=None, 
> metadata=Params.short("l"))
> +memory_channels: int | None = field(default=None, 
> metadata=Params.short("n"))
> +prefix: str = field(default="dpdk", metadata=Params.long("file-prefix"))
>  no_pci: Switch = None
>  vdevs: list[VirtualDevice] | None = field(
>  default=None, metadata=Params.multiple() | Params.long("vdev")
> diff --git a/dts/framework/remote_session/dpdk_shell.py 
> b/dts/framework/remote_session/dpdk_shell.py
> new file mode 100644
> index 00..78caae36ea
> --- /dev/null
> +++ b/dts/framework/remote_session/dpdk_shell.py
> @@ -0,0 +1,104 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2024 Arm Limited
> +
> +"""DPDK-based interactive shell.
> +
> +Provides a base class to create interactive shells based on DPDK.
> +"""
> +
> +
> +from abc import ABC
> +
> +from framework.params.eal import EalParams
> +from framework.remote_session.interactive_shell import InteractiveShell
> +from framework.settings import SETTINGS
> +from framework.testbed_model.cpu import LogicalCoreCount, LogicalCoreList
> +from framework.testbed_model.sut_node import SutNode
> +
> +
> +def compute_eal_params(
> +node: SutNode,
> +params: EalParams | None = None,
> +lcore_filter_specifier: LogicalCoreCount | LogicalCoreList = 
> LogicalCoreCount(),
> +ascending_cores: bool = True,
> +append_prefix_timestamp: bool = True,
> +) -> EalParams:
> +"""Compute EAL parameters based on the node's specifications.
> +
> +Args:
> +node: The SUT node to compute the values for.
> +params: The EalParams object to amend, if set to None a new object 
> is created and returned.
> +lcore_filter_specifier: A number of lcores/cores/sockets to use
> +or a list of lcore ids to use.
> +The default will select one lcore for each of two cores
> +on one socket, in ascending order of core ids.
> +ascending_cores: Sort cores in ascending order (lowest to highest 
> IDs).
> +If :data:`False`, sort in descending order.
> +append_prefix_timestamp: If :data:`True`, will append a timestamp to 
> DPDK file prefix.
> +"""
> +if params is None:
> +params = EalParams()
> +
> +if params.lcore_list is None:
> +params.lcore_list = LogicalCoreList(
> +node.filter_lcores(lcore_filter_specifier, ascending_cores)
> +)
> +
> +prefix = params.prefix
> +if append_prefix_timestamp:
> +prefix = f"{prefix}_{node._dpdk_timestamp}"
> +prefix = node.main_session.get_dpdk_file_prefix(prefix)
> +if prefix:
> +node._dpdk_prefix_list.append(prefix)
> +pa

Re: [PATCH v2 8/8] dts: use Unpack for type checking and hinting

2024-05-28 Thread Nicholas Pratte
Tested-by: Nicholas Pratte 
Reviewed-by: Nicholas Pratte 

On Thu, May 9, 2024 at 7:21 AM Luca Vizzarro  wrote:
>
> Interactive shells that inherit DPDKShell initialise their params
> classes from a kwargs dict. Therefore, static type checking is
> disabled. This change uses the functionality of Unpack added in
> PEP 692 to re-enable it. The disadvantage is that this functionality has
> been implemented only with TypedDict, forcing the creation of TypedDict
> mirrors of the Params classes.
>
> Signed-off-by: Luca Vizzarro 
> Reviewed-by: Paul Szczepanek 
> ---
>  dts/framework/params/types.py | 133 ++
>  dts/framework/remote_session/testpmd_shell.py |   5 +-
>  2 files changed, 137 insertions(+), 1 deletion(-)
>  create mode 100644 dts/framework/params/types.py
>
> diff --git a/dts/framework/params/types.py b/dts/framework/params/types.py
> new file mode 100644
> index 00..e668f658d8
> --- /dev/null
> +++ b/dts/framework/params/types.py
> @@ -0,0 +1,133 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2024 Arm Limited
> +
> +"""Module containing TypeDict-equivalents of Params classes for static 
> typing and hinting.
> +
> +TypedDicts can be used in conjunction with Unpack and kwargs for type 
> hinting on function calls.
> +
> +Example:
> +..code:: python
> +def create_testpmd(**kwargs: Unpack[TestPmdParamsDict]):
> +params = TestPmdParams(**kwargs)
> +"""
> +
> +from pathlib import PurePath
> +from typing import TypedDict
> +
> +from framework.params import Switch, YesNoSwitch
> +from framework.params.testpmd import (
> +AnonMempoolAllocationMode,
> +EthPeer,
> +Event,
> +FlowGenForwardingMode,
> +HairpinMode,
> +NoisyForwardingMode,
> +Params,
> +PortNUMAConfig,
> +PortTopology,
> +RingNUMAConfig,
> +RSSSetting,
> +RXMultiQueueMode,
> +RXRingParams,
> +SimpleForwardingModes,
> +SimpleMempoolAllocationMode,
> +TxIPAddrPair,
> +TXOnlyForwardingMode,
> +TXRingParams,
> +TxUDPPortPair,
> +)
> +from framework.testbed_model.cpu import LogicalCoreList
> +from framework.testbed_model.port import Port
> +from framework.testbed_model.virtual_device import VirtualDevice
> +
> +
> +class EalParamsDict(TypedDict, total=False):
> +""":class:`TypedDict` equivalent of :class:`~.eal.EalParams`."""
> +
> +lcore_list: LogicalCoreList | None
> +memory_channels: int | None
> +prefix: str
> +no_pci: Switch
> +vdevs: list[VirtualDevice] | None
> +ports: list[Port] | None
> +other_eal_param: Params | None
> +
> +
> +class TestPmdParamsDict(EalParamsDict, total=False):
> +""":class:`TypedDict` equivalent of :class:`~.testpmd.TestPmdParams`."""
> +
> +interactive_mode: Switch
> +auto_start: Switch
> +tx_first: Switch
> +stats_period: int | None
> +display_xstats: list[str] | None
> +nb_cores: int | None
> +coremask: int | None
> +nb_ports: int | None
> +port_topology: PortTopology | None
> +portmask: int | None
> +portlist: str | None
> +numa: YesNoSwitch
> +socket_num: int | None
> +port_numa_config: list[PortNUMAConfig] | None
> +ring_numa_config: list[RingNUMAConfig] | None
> +total_num_mbufs: int | None
> +mbuf_size: list[int] | None
> +mbcache: int | None
> +max_pkt_len: int | None
> +eth_peers_configfile: PurePath | None
> +eth_peer: list[EthPeer] | None
> +tx_ip: TxIPAddrPair | None
> +tx_udp: TxUDPPortPair | None
> +enable_lro: Switch
> +max_lro_pkt_size: int | None
> +disable_crc_strip: Switch
> +enable_scatter: Switch
> +enable_hw_vlan: Switch
> +enable_hw_vlan_filter: Switch
> +enable_hw_vlan_strip: Switch
> +enable_hw_vlan_extend: Switch
> +enable_hw_qinq_strip: Switch
> +pkt_drop_enabled: Switch
> +rss: RSSSetting | None
> +forward_mode: (
> +SimpleForwardingModes
> +| FlowGenForwardingMode
> +| TXOnlyForwardingMode
> +| NoisyForwardingMode
> +| None
> +)
> +hairpin_mode: HairpinMode | None
> +hairpin_queues: int | None
> +burst: int | None
> +enable_rx_cksum: Switch
> +rx_queues: int | None
> +rx_ring: RXRingParams | None
> +no_flush_rx: Switch
> +rx_segments_offsets: list[int] | None
> +rx_segments_length: list[int] | None
> +multi_rx_mempool: Switch
> +rx_shared_queue: Switch | int
> +rx_offloads: int | None
> +rx_mq_mode: RXMultiQueueMode | None
> +tx_queues: int | None
> +tx_ring: TXRingParams | None
> +tx_offloads: int | None
> +eth_link_speed: int | None
> +disable_link_check: Switch
> +disable_device_start: Switch
> +no_lsc_interrupt: Switch
> +no_rmv_interrupt: Switch
> +bitrate_stats: int | None
> +latencystats: int | None
> +print_events: list[Event] | None
> +mask_events: list[Event] | None
> +flow_isolate_all: Switch
> +disable_f

Re: [PATCH v2 5/8] dts: add testpmd shell params

2024-05-28 Thread Nicholas Pratte
Tested-by: Nicholas Pratte 
Reviewed-by: Nicholas Pratte 


On Thu, May 9, 2024 at 7:21 AM Luca Vizzarro  wrote:
>
> Implement all the testpmd shell parameters into a data structure.
>
> Signed-off-by: Luca Vizzarro 
> Reviewed-by: Paul Szczepanek 
> ---
>  dts/framework/params/testpmd.py   | 608 ++
>  dts/framework/remote_session/testpmd_shell.py |  42 +-
>  dts/tests/TestSuite_pmd_buffer_scatter.py |   5 +-
>  3 files changed, 615 insertions(+), 40 deletions(-)
>  create mode 100644 dts/framework/params/testpmd.py
>
> diff --git a/dts/framework/params/testpmd.py b/dts/framework/params/testpmd.py
> new file mode 100644
> index 00..f8f70320cf
> --- /dev/null
> +++ b/dts/framework/params/testpmd.py
> @@ -0,0 +1,608 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2024 Arm Limited
> +
> +"""Module containing all the TestPmd-related parameter classes."""
> +
> +from dataclasses import dataclass, field
> +from enum import EnumMeta, Flag, auto, unique
> +from pathlib import PurePath
> +from typing import Literal, NamedTuple
> +
> +from framework.params import (
> +Params,
> +Switch,
> +YesNoSwitch,
> +bracketed,
> +comma_separated,
> +convert_str,
> +hex_from_flag_value,
> +str_from_flag_value,
> +)
> +from framework.params.eal import EalParams
> +from framework.utils import StrEnum
> +
> +
> +class PortTopology(StrEnum):
> +"""Enum representing the port topology."""
> +
> +paired = auto()
> +"""In paired mode, the forwarding is between pairs of ports, e.g.: 
> (0,1), (2,3), (4,5)."""
> +chained = auto()
> +"""In chained mode, the forwarding is to the next available port in the 
> port mask, e.g.:
> +(0,1), (1,2), (2,0).
> +
> +The ordering of the ports can be changed using the portlist testpmd 
> runtime function.
> +"""
> +loop = auto()
> +"""In loop mode, ingress traffic is simply transmitted back on the same 
> interface."""
> +
> +
> +@convert_str(bracketed, comma_separated)
> +class PortNUMAConfig(NamedTuple):
> +"""DPDK port to NUMA socket association tuple."""
> +
> +#:
> +port: int
> +#:
> +socket: int
> +
> +
> +@convert_str(str_from_flag_value)
> +@unique
> +class FlowDirection(Flag):
> +"""Flag indicating the direction of the flow.
> +
> +A bi-directional flow can be specified with the pipe:
> +
> +>>> TestPmdFlowDirection.RX | TestPmdFlowDirection.TX
> +
> +"""
> +
> +#:
> +RX = 1 << 0
> +#:
> +TX = 1 << 1
> +
> +
> +@convert_str(bracketed, comma_separated)
> +class RingNUMAConfig(NamedTuple):
> +"""Tuple associating DPDK port, direction of the flow and NUMA socket."""
> +
> +#:
> +port: int
> +#:
> +direction: FlowDirection
> +#:
> +socket: int
> +
> +
> +@convert_str(comma_separated)
> +class EthPeer(NamedTuple):
> +"""Tuple associating a MAC address to the specified DPDK port."""
> +
> +#:
> +port_no: int
> +#:
> +mac_address: str
> +
> +
> +@convert_str(comma_separated)
> +class TxIPAddrPair(NamedTuple):
> +"""Tuple specifying the source and destination IPs for the packets."""
> +
> +#:
> +source_ip: str
> +#:
> +dest_ip: str
> +
> +
> +@convert_str(comma_separated)
> +class TxUDPPortPair(NamedTuple):
> +"""Tuple specifying the UDP source and destination ports for the packets.
> +
> +If leaving ``dest_port`` unspecified, ``source_port`` will be used for
> +the destination port as well.
> +"""
> +
> +#:
> +source_port: int
> +#:
> +dest_port: int | None = None
> +
> +
> +@dataclass
> +class DisableRSS(Params):
> +"""Disables RSS (Receive Side Scaling)."""
> +
> +_disable_rss: Literal[True] = field(
> +default=True, init=False, metadata=Params.long("disable-rss")
> +)
> +
> +
> +@dataclass
> +class SetRSSIPOnly(Params):
> +"""Sets RSS (Receive Side Scaling) functions for IPv4/IPv6 only."""
> +
> +_rss_ip: Literal[True] = field(default=True, init=False, 
> metadata=Params.long("rss-ip"))
> +
> +
> +@dataclass
> +class SetRSSUDP(Params):
> +"""Sets RSS (Receive Side Scaling) functions for IPv4/IPv6 and UDP."""
> +
> +_rss_udp: Literal[True] = field(default=True, init=False, 
> metadata=Params.long("rss-udp"))
> +
> +
> +class RSSSetting(EnumMeta):
> +"""Enum representing a RSS setting. Each property is a class that needs 
> to be initialised."""
> +
> +#:
> +Disabled = DisableRSS
> +#:
> +SetIPOnly = SetRSSIPOnly
> +#:
> +SetUDP = SetRSSUDP
> +
> +
> +class SimpleForwardingModes(StrEnum):
> +r"""The supported packet forwarding modes for 
> :class:`~TestPmdShell`\s."""
> +
> +#:
> +io = auto()
> +#:
> +mac = auto()
> +#:
> +macswap = auto()
> +#:
> +rxonly = auto()
> +#:
> +csum = auto()
> +#:
> +icmpecho = auto()
> +#:
> +ieee1588 = auto()
> +#:
> +fivetswap = "5tswap"
> +#:
> +share

Re: [RFC v2] eal: provide option to use compiler memcpy instead of RTE

2024-05-28 Thread Mattias Rönnblom

On 2024-05-28 16:59, Stephen Hemminger wrote:

On Tue, 28 May 2024 10:19:15 +0200
Mattias Rönnblom  wrote:

   


I've tested this patch some with DSW micro benchmarks, and the result is
a 2.5% reduction of the DSW+testapp overhead with cc/libc memcpy. GCC 11.4.

We've also run characteristic test suite of a large, real world app.
Here, we saw no effect. GCC 10.5.

x86_64 in both cases (Skylake and Raptor Lake).

Last time we did the same, there were a noticeable performance
degradation in both the above cases.

This is not a lot of data points, but I think it we should consider
making the custom RTE memcpy() implementations optional in the next
release, and if no-one complains, remove the implementations in the next
release.


Lets go farther.

1. Announce that rte_memcpy will be marked deprecated in 24.11 release

2. In 24.11 do a global replace of rte_memcpy on the tree.
And mark rte_memcpy as deprecated.

3. In 25.11 it can go away.


If/when rte_memcpy.h is just a tiny memcpy() wrapper, the maintenance 
burden is pretty much eliminated.


Keeping it around will allow for older applications to compile against 
newer DPDK version.


You can always discourage its use in the API documentation.

Also, hopefully, some day, we will have a non-temporal memcpy(), and 
those functions needs a home.


Re: [PATCH v2 10/10] net/cnxk: define CPT HW result format for PMD API

2024-05-28 Thread Jerin Jacob
On Tue, May 28, 2024 at 12:43 PM Nithin Dabilpuram
 wrote:
>
> From: Srujana Challa 
>
> Defines CPT HW result format for PMD API,
> rte_pmd_cnxk_inl_ipsec_res().
>
> Signed-off-by: Srujana Challa 

Series applied to dpdk-next-net-mrvl/for-main. Thanks


Re: [RFC v2] eal: provide option to use compiler memcpy instead of RTE

2024-05-28 Thread Mattias Rönnblom

On 2024-05-28 11:07, Morten Brørup wrote:

From: Mattias Rönnblom [mailto:hof...@lysator.liu.se]
Sent: Tuesday, 28 May 2024 11.00

On 2024-05-28 10:27, Bruce Richardson wrote:

On Tue, May 28, 2024 at 10:19:15AM +0200, Mattias Rönnblom wrote:

On 2024-05-28 09:43, Mattias Rönnblom wrote:

Provide build option to have functions in  delegate to
the standard compiler/libc memcpy(), instead of using the various
traditional, handcrafted, per-architecture rte_memcpy()
implementations.

A new meson build option 'use_cc_memcpy' is added. The default is
true. It's not obvious what should be the default, but compiler
memcpy() is enabled by default in this RFC so any tests run with this
patch use the new approach.

One purpose of this RFC is to make it easy to evaluate the costs and
benefits of a switch.



I've tested this patch some with DSW micro benchmarks, and the result is a
2.5% reduction of the DSW+testapp overhead with cc/libc memcpy. GCC 11.4.

We've also run characteristic test suite of a large, real world app. Here,
we saw no effect. GCC 10.5.

x86_64 in both cases (Skylake and Raptor Lake).

Last time we did the same, there were a noticeable performance degradation
in both the above cases.


Mattias, which compiler was that?



GCC 9, I think.

Not only the compiler changed between those two test runs.

It would be interesting with some ARM data points as well.


As previously mentioned in another thread, I'm worried about memcpy performance 
with older compilers.
DPDK officially supports GCC 4.9 and clang 3.4 [1].
I don't think degrading performance when using supported compilers is 
considered acceptable.

Alternatively, we could change the DPDK compiler policy from "supported" to "works 
with (but might not perform optimally)".



GCC 4.9 is ten years old.

If you are using an old compiler, odds are you don't really care too 
much about squeezing out max performance, considering how much better 
code generation is in newer compilers.


That said, we obviously don't want to cause large performance 
regressions for no good reason, even for old compilers.



[1]: 
https://doc.dpdk.org/guides-21.11/linux_gsg/sys_reqs.html#compilation-of-the-dpdk



This is not a lot of data points, but I think it we should consider making
the custom RTE memcpy() implementations optional in the next release, and

if

no-one complains, remove the implementations in the next release.

(Whether or not [or how long] to keep the wrapper API is another question.)




The other instance I've heard mention of in the past is virtio/vhost, which
used to have a speedup from the custom memcpy.

My own thinking on these cases, is that for targetted settings like these,
we should look to have local memcpy functions written - taking account of
the specifics of each usecase. For virtio/vhost for example, we can have
assumptions around host buffer alignment, and we also can be pretty
confident we are copying to another CPU. For DSW, or other eventdev cases,
we would only be looking at copies of multiples of 16, with guaranteed
8-byte alignment on both source and destination. Writing efficient copy fns


In such cases, you should first try to tell the compiler that it's safe
to assume that the pointers have a certain alignment.

void copy256(void *dst, const void *src)
{
  memcpy(dst, src, 256);
}

void copy256_a(void *dst, const void *src)
{
  void *dst_a = __builtin_assume_aligned(dst, 32);
  const void *src_a = __builtin_assume_aligned(src, 32);
  memcpy(dst_a, src_a, 256);
}

The first will generate loads/stores without alignment restrictions,
while the latter will use things like vmovdqa or vmovaps.

(I doubt there's much of a performance difference though, if any at all.)


Interesting.




for specific scenarios can be faster and more effective than trying to
write a general, optimized in all cases, memcpy. It also discourages the
use of non-libc memcpy except where really necessary.


Good idea, Bruce.
I have previously worked on an optimized memcpy, where information about 
alignment, multiples, non-temporal source/destination, etc. is passed as flags 
to the function [2]. But it turned into too much work, so I never finished it.

If we start with local memcpy functions optimized for each specific use case, 
we still have the option of consolidating them into a common rte_memcpy 
function later. It will also reveal which flags/features such a common function 
needs to support.

[2]: https://inbox.dpdk.org/dev/20221010064600.16495-1...@smartsharesystems.com/



Naturally, if we find there are a lot of cases where use of libc memcpy
slows us down, we will want to keep a general rte_memcpy. However, I'd hope
the slowdown cases are very few.

/Bruce


Re: [PATCH v15 07/11] net/tap: use libbpf to load new BPF program

2024-05-28 Thread Cody Cheng
Hi, I've added the Ubuntu 24.04 environment to the Community Lab but
without werror enabled as the rte_pcapng.c warning would cause them to
fail. Once the rte_pcapng.c warning is fixed, I will re-enable werror. I
will send an email to the ci mailing list with the logs of the warning so
that people are aware of it.

Thanks,
Cody

On Tue, May 21, 2024 at 4:14 PM Stephen Hemminger <
step...@networkplumber.org> wrote:

> There were multiple issues in the RSS queue support in the TAP
> driver. This required extensive rework of the BPF support.
>
> Change the BPF loading to use bpftool to
> create a skeleton header file, and load with libbpf.
> The BPF is always compiled from source so less chance that
> source and instructions diverge. Also resolves issue where
> libbpf and source get out of sync. The program
> is only loaded once, so if multiple rules are created
> only one BPF program is loaded in kernel.
>
> The new BPF program only needs a single action.
> No need for action and re-classification step.
>
> It also fixes the missing bits from the original.
> - supports setting RSS key per flow
> - level of hash can be L3 or L3/L4.
>
> Bugzilla ID: 1329
>
> Signed-off-by: Stephen Hemminger 
> ---
>  doc/guides/rel_notes/release_24_07.rst |   3 +
>  drivers/net/tap/bpf/meson.build|  81 +++--
>  drivers/net/tap/meson.build|  39 ++-
>  drivers/net/tap/rte_eth_tap.c  |  14 +-
>  drivers/net/tap/rte_eth_tap.h  |   6 +-
>  drivers/net/tap/tap_flow.c | 416 ++---
>  drivers/net/tap/tap_flow.h |  17 +-
>  drivers/net/tap/tap_rss.h  |  10 +-
>  drivers/net/tap/tap_tcmsgs.h   |   4 +-
>  9 files changed, 186 insertions(+), 404 deletions(-)
>
> diff --git a/doc/guides/rel_notes/release_24_07.rst
> b/doc/guides/rel_notes/release_24_07.rst
> index a6295359b1..37a6e98637 100644
> --- a/doc/guides/rel_notes/release_24_07.rst
> +++ b/doc/guides/rel_notes/release_24_07.rst
> @@ -59,6 +59,9 @@ New Features
>
>* Updated to support up to 8 queues when used by secondary process.
>
> +  * Fixed support of RSS flow action to work with current Linux
> +kernels and BPF tooling. Will only be enabled if clang, libbpf 1.0
> +and bpftool are available.
>
>  Removed Items
>  -
> diff --git a/drivers/net/tap/bpf/meson.build
> b/drivers/net/tap/bpf/meson.build
> index f2c03a19fd..df497948e2 100644
> --- a/drivers/net/tap/bpf/meson.build
> +++ b/drivers/net/tap/bpf/meson.build
> @@ -1,17 +1,26 @@
>  # SPDX-License-Identifier: BSD-3-Clause
>  # Copyright 2024 Stephen Hemminger 
>
> -enable_tap_rss = false
> -
> -libbpf = dependency('libbpf', required: false, method: 'pkg-config')
> +# Loading BPF requires libbpf
> +# and the bpf_map__XXX API's were introduced in 0.8.0
> +libbpf = dependency('libbpf', version: '>= 1.0',
> +required: false, method: 'pkg-config')
>  if not libbpf.found()
>  message('net/tap: no RSS support missing libbpf')
>  subdir_done()
>  endif
>
> +# Making skeleton needs bpftool
>  # Debian install this in /usr/sbin which is not in $PATH
> -bpftool = find_program('bpftool', '/usr/sbin/bpftool', required: false,
> version: '>= 5.6.0')
> -if not bpftool.found()
> +bpftool_supports_skel = false
> +bpftool = find_program('bpftool', '/usr/sbin/bpftool', required: false)
> +if bpftool.found()
> +# Some Ubuntu versions have non-functional bpftool
> +bpftool_supports_skel = run_command(bpftool, 'gen', 'help',
> +check:false).returncode() == 0
> +endif
> +
> +if not bpftool_supports_skel
>  message('net/tap: no RSS support missing bpftool')
>  subdir_done()
>  endif
> @@ -39,43 +48,47 @@ machine_name = run_command('uname',
> '-m').stdout().strip()
>  march_include_dir = '/usr/include/' + machine_name + '-linux-gnu'
>
>  clang_flags = [
> -'-O2',
> -'-Wall',
> -'-Wextra',
> -'-target',
> -'bpf',
> -'-g',
> -'-c',
> +# these are flags used to build the BPF code
> +'-O2',
> +'-Wall',
> +'-Wextra',
> +max_queues,
> +'-target',
> +'bpf',
> +'-g',
> +'-c',
>  ]
>
> +# Command used to compile BPF pgrograme
>  bpf_o_cmd = [
> -clang,
> -clang_flags,
> -'-idirafter',
> -libbpf_include_dir,
> -'-idirafter',
> -march_include_dir,
> -'@INPUT@',
> -'-o',
> -'@OUTPUT@'
> +clang,
> +clang_flags,
> +'-idirafter',
> +libbpf_include_dir,
> +'-idirafter',
> +march_include_dir,
> +'@INPUT@',
> +'-o',
> +'@OUTPUT@',
>  ]
>
> +# Command used to generate header file from BPF object
>  skel_h_cmd = [
> -bpftool,
> -'gen',
> -'skeleton',
> -'@INPUT@'
> +bpftool,
> +'gen',
> +'skeleton',
> +'@INPUT@',
>  ]
>
>  tap_rss_o = custom_target(
> -'tap_rss.bpf.o',
> -input: 'tap_rss.c',
> -

Re: [PATCH v2 2/8] dts: use Params for interactive shells

2024-05-28 Thread Nicholas Pratte
Provided a review for the wrong version...

Tested-by: Nicholas Pratte 
Reviewed-by: Nicholas Pratte 

On Thu, May 9, 2024 at 7:21 AM Luca Vizzarro  wrote:
>
> Make it so that interactive shells accept an implementation of `Params`
> for app arguments. Convert EalParameters to use `Params` instead.
>
> String command line parameters can still be supplied by using the
> `Params.from_str()` method.
>
> Signed-off-by: Luca Vizzarro 
> Reviewed-by: Paul Szczepanek 
> ---
>  .../remote_session/interactive_shell.py   |  12 +-
>  dts/framework/remote_session/testpmd_shell.py |  11 +-
>  dts/framework/testbed_model/node.py   |   6 +-
>  dts/framework/testbed_model/os_session.py |   4 +-
>  dts/framework/testbed_model/sut_node.py   | 124 --
>  dts/tests/TestSuite_pmd_buffer_scatter.py |   3 +-
>  6 files changed, 77 insertions(+), 83 deletions(-)
>
> diff --git a/dts/framework/remote_session/interactive_shell.py 
> b/dts/framework/remote_session/interactive_shell.py
> index 074a541279..9da66d1c7e 100644
> --- a/dts/framework/remote_session/interactive_shell.py
> +++ b/dts/framework/remote_session/interactive_shell.py
> @@ -1,5 +1,6 @@
>  # SPDX-License-Identifier: BSD-3-Clause
>  # Copyright(c) 2023 University of New Hampshire
> +# Copyright(c) 2024 Arm Limited
>
>  """Common functionality for interactive shell handling.
>
> @@ -21,6 +22,7 @@
>  from paramiko import Channel, SSHClient, channel  # type: 
> ignore[import-untyped]
>
>  from framework.logger import DTSLogger
> +from framework.params import Params
>  from framework.settings import SETTINGS
>
>
> @@ -40,7 +42,7 @@ class InteractiveShell(ABC):
>  _ssh_channel: Channel
>  _logger: DTSLogger
>  _timeout: float
> -_app_args: str
> +_app_params: Params
>
>  #: Prompt to expect at the end of output when sending a command.
>  #: This is often overridden by subclasses.
> @@ -63,7 +65,7 @@ def __init__(
>  interactive_session: SSHClient,
>  logger: DTSLogger,
>  get_privileged_command: Callable[[str], str] | None,
> -app_args: str = "",
> +app_params: Params = Params(),
>  timeout: float = SETTINGS.timeout,
>  ) -> None:
>  """Create an SSH channel during initialization.
> @@ -74,7 +76,7 @@ def __init__(
>  get_privileged_command: A method for modifying a command to 
> allow it to use
>  elevated privileges. If :data:`None`, the application will 
> not be started
>  with elevated privileges.
> -app_args: The command line arguments to be passed to the 
> application on startup.
> +app_params: The command line parameters to be passed to the 
> application on startup.
>  timeout: The timeout used for the SSH channel that is dedicated 
> to this interactive
>  shell. This timeout is for collecting output, so if reading 
> from the buffer
>  and no output is gathered within the timeout, an exception 
> is thrown.
> @@ -87,7 +89,7 @@ def __init__(
>  self._ssh_channel.set_combine_stderr(True)  # combines stdout and 
> stderr streams
>  self._logger = logger
>  self._timeout = timeout
> -self._app_args = app_args
> +self._app_params = app_params
>  self._start_application(get_privileged_command)
>
>  def _start_application(self, get_privileged_command: Callable[[str], 
> str] | None) -> None:
> @@ -100,7 +102,7 @@ def _start_application(self, get_privileged_command: 
> Callable[[str], str] | None
>  get_privileged_command: A function (but could be any callable) 
> that produces
>  the version of the command with elevated privileges.
>  """
> -start_command = f"{self.path} {self._app_args}"
> +start_command = f"{self.path} {self._app_params}"
>  if get_privileged_command is not None:
>  start_command = get_privileged_command(start_command)
>  self.send_command(start_command)
> diff --git a/dts/framework/remote_session/testpmd_shell.py 
> b/dts/framework/remote_session/testpmd_shell.py
> index cb2ab6bd00..7eced27096 100644
> --- a/dts/framework/remote_session/testpmd_shell.py
> +++ b/dts/framework/remote_session/testpmd_shell.py
> @@ -22,6 +22,7 @@
>
>  from framework.exception import InteractiveCommandExecutionError
>  from framework.settings import SETTINGS
> +from framework.testbed_model.sut_node import EalParams
>  from framework.utils import StrEnum
>
>  from .interactive_shell import InteractiveShell
> @@ -118,8 +119,14 @@ def _start_application(self, get_privileged_command: 
> Callable[[str], str] | None
>  Also find the number of pci addresses which were allowed on the 
> command line when the app
>  was started.
>  """
> -self._app_args += " -i --mask-event intr_lsc"
> -self.number_of_ports = self._app_args.count("-a ")
> +self._app_params += " -i --m

Re: [PATCH] net/cnxk: fix promiscuous state after MAC change

2024-05-28 Thread Jerin Jacob
On Tue, May 28, 2024 at 2:18 PM Rahul Bhansali  wrote:
>
> If promiscuous mode is enabled and default MAC address is set
> again then promiscuous mode gets disabled in hardware.
>
> This change will restore promiscuous behavior after configuring
> default MAC address.
>
> Fixes: 5fe86db2a0dd ("net/cnxk: support MAC address set")
> Cc: sta...@dpdk.org
>
> Signed-off-by: Rahul Bhansali 

Applied to dpdk-next-net-mrvl/for-main. Thanks


> ---
>  drivers/net/cnxk/cnxk_ethdev_ops.c | 7 +++
>  1 file changed, 7 insertions(+)
>
> diff --git a/drivers/net/cnxk/cnxk_ethdev_ops.c 
> b/drivers/net/cnxk/cnxk_ethdev_ops.c
> index c8260fcb9c..b1093dd584 100644
> --- a/drivers/net/cnxk/cnxk_ethdev_ops.c
> +++ b/drivers/net/cnxk/cnxk_ethdev_ops.c
> @@ -451,6 +451,13 @@ cnxk_nix_mac_addr_set(struct rte_eth_dev *eth_dev, 
> struct rte_ether_addr *addr)
> roc_nix_npc_mac_addr_set(nix, dev->mac_addr);
> goto exit;
> }
> +
> +   if (eth_dev->data->promiscuous) {
> +   rc = roc_nix_mac_promisc_mode_enable(nix, true);
> +   if (rc)
> +   plt_err("Failed to setup promisc mode in mac, 
> rc=%d(%s)", rc,
> +   roc_error_msg_get(rc));
> +   }
> }
>
> /* Update mac address to cnxk ethernet device */
> --
> 2.25.1
>


Get involved

2024-05-28 Thread Mahmoud Maatouq
Hello,
First of all, sorry if this is not the right place for this subject.
I'm trying to get involved and contribute to dpdk, I went through the
contribution page in docs but I think it doesn have enough info, I can see
some bugs reported on BugZilla, the question is
How to get a bug assigned to me?
is there any label/list for "good first issue" kind of bugs.
Regards.


Re: [PATCH v5 0/4] add pointer compression API

2024-05-28 Thread Paul Szczepanek



On 24/05/2024 10:09, Konstantin Ananyev wrote:
> 
> 
>> I have added macros to help find the parameters and I have added mempool
>> functions that allow you to determine if you can use the mempool and
>> what params it needs. The new mempool functions are mentioned in the
>> docs for ptr compress.
>> Please take a look at v11.
> 
> Great, thanks.
> Will try to have a look in next few days. 
> With these functions in place, can we produce a unit-test that
> will use together these new mempool functions and compress API? 
> Something like: 
> - allocate mempool 
> - deduce base_pointer for it
> - main_loop_start:
> producer(s):  mempool_get(); ; 
> ring_enqueue();  
> consumer(s): ring_dequeue(); ; mempool_put();
> - main_loop_end
> - free mempool

The v11 already includes mempool base pointer and range calculation in
the mempool test and the functions are mentioned in the ptr compress lib
docs. The ptr compress test doesn't use a mempool to minimise dependencies.

I have a v12 pending (awaiting internal reviews) that addresses Morten's
comments (adds prefix, adds tests and doxygen for all the macros, uses
rte_bitops) and a fix for the guide which had the wrong letter case for
the MACRO.


Re: [PATCH v11 2/6] mempool: add functions to get extra mempool info

2024-05-28 Thread Paul Szczepanek


On 24/05/2024 13:20, Morten Brørup wrote:
>> From: Paul Szczepanek [mailto:paul.szczepa...@arm.com]
>> Sent: Friday, 24 May 2024 10.37
>>
>> +size_t rte_mempool_get_obj_alignment(struct rte_mempool *mp)
>> +{
>> +if (mp == NULL)
>> +return 0;
>> +
>> +if (mp->flags & RTE_MEMPOOL_F_NO_CACHE_ALIGN)
>> +return sizeof(uint64_t);
>> +else
>> +return RTE_MEMPOOL_ALIGN;
>> +}
> 
> The object alignment depends on the underlying mempool driver. You cannot 
> assume that it is either sizeof(uint64_t) or cache line aligned.
> 
> Refer to the calc_mem_size driver operation, which also provides object 
> alignment information:
> https://elixir.bootlin.com/dpdk/v24.03/source/lib/mempool/rte_mempool.h#L529
> 
> If you need this function, you need to add a new driver operation, and your 
> function above can be the default for this operation, like for the the 
> calc_mem_size driver operation:
> https://elixir.bootlin.com/dpdk/v24.03/source/lib/mempool/rte_mempool_ops.c#L120
> 

As discussed on slack the alignment you mention is the memzone alignment
which is distinct from the object alignment which is enforced by the
mempool according to the RTE_MEMPOOL_F_NO_CACHE_ALIGN flag. Objects may
have higher alignment, the alignment returned by the new function is the
minimum guaranteed one.

I addressed your other comments in v12 (pending internal review).


Re: [PATCH v2 2/8] dts: use Params for interactive shells

2024-05-28 Thread Jeremy Spewock
Reviewed-by: Jeremy Spewock 


Re: [PATCH v2 3/8] dts: refactor EalParams

2024-05-28 Thread Jeremy Spewock
Reviewed-by: Jeremy Spewock 


Re: [PATCH v2 5/8] dts: add testpmd shell params

2024-05-28 Thread Jeremy Spewock
This looks good, the only comment I had was in some classes the
docstrings didn't get updated to what was discussed previously in the
comments (making sure the comments are included in the class'
docstring). I tried to point out a few places where I noticed it.
Other than those comments however:

Reviewed-by: Jeremy Spewock 

On Thu, May 9, 2024 at 7:21 AM Luca Vizzarro  wrote:
>
> Implement all the testpmd shell parameters into a data structure.
>
> Signed-off-by: Luca Vizzarro 
> Reviewed-by: Paul Szczepanek 
> ---
>  dts/framework/params/testpmd.py   | 608 ++
>  dts/framework/remote_session/testpmd_shell.py |  42 +-
>  dts/tests/TestSuite_pmd_buffer_scatter.py |   5 +-
>  3 files changed, 615 insertions(+), 40 deletions(-)
>  create mode 100644 dts/framework/params/testpmd.py
>

> +
> +
> +class PortTopology(StrEnum):
> +"""Enum representing the port topology."""
> +
> +paired = auto()
> +"""In paired mode, the forwarding is between pairs of ports, e.g.: 
> (0,1), (2,3), (4,5)."""
> +chained = auto()
> +"""In chained mode, the forwarding is to the next available port in the 
> port mask, e.g.:
> +(0,1), (1,2), (2,0).
> +
> +The ordering of the ports can be changed using the portlist testpmd 
> runtime function.
> +"""
> +loop = auto()
> +"""In loop mode, ingress traffic is simply transmitted back on the same 
> interface."""
> +

This should likely be the comment style for class vars: `#:`

> +

> +
> +@convert_str(hex_from_flag_value)
> +@unique
> +class HairpinMode(Flag):
> +"""Flag representing the hairpin mode."""
> +
> +TWO_PORTS_LOOP = 1 << 0
> +"""Two hairpin ports loop."""
> +TWO_PORTS_PAIRED = 1 << 1
> +"""Two hairpin ports paired."""
> +EXPLICIT_TX_FLOW = 1 << 4
> +"""Explicit Tx flow rule."""
> +FORCE_RX_QUEUE_MEM_SETTINGS = 1 << 8
> +"""Force memory settings of hairpin RX queue."""
> +FORCE_TX_QUEUE_MEM_SETTINGS = 1 << 9
> +"""Force memory settings of hairpin TX queue."""
> +RX_QUEUE_USE_LOCKED_DEVICE_MEMORY = 1 << 12
> +"""Hairpin RX queues will use locked device memory."""
> +RX_QUEUE_USE_RTE_MEMORY = 1 << 13
> +"""Hairpin RX queues will use RTE memory."""
> +TX_QUEUE_USE_LOCKED_DEVICE_MEMORY = 1 << 16
> +"""Hairpin TX queues will use locked device memory."""
> +TX_QUEUE_USE_RTE_MEMORY = 1 << 18
> +"""Hairpin TX queues will use RTE memory."""
> +

Same thing in this class, these should likely be documented as class
vars with `#:`

> +

> +class SimpleMempoolAllocationMode(StrEnum):
> +"""Enum representing simple mempool allocation modes."""
> +
> +native = auto()
> +"""Create and populate mempool using native DPDK memory."""
> +xmem = auto()
> +"""Create and populate mempool using externally and anonymously 
> allocated area."""
> +xmemhuge = auto()
> +"""Create and populate mempool using externally and anonymously 
> allocated hugepage area."""
> +

Also here. Same as the previous, should likely be `#:`

> +
> +@dataclass(kw_only=True)

> 2.34.1
>


Re: [PATCH v2 6/8] dts: use testpmd params for scatter test suite

2024-05-28 Thread Jeremy Spewock
Reviewed-by: Jeremy Spewock 


Re: [PATCH v2 7/8] dts: rework interactive shells

2024-05-28 Thread Jeremy Spewock
On Thu, May 9, 2024 at 7:21 AM Luca Vizzarro  wrote:
>
> The way nodes and interactive shells interact makes it difficult to
> develop for static type checking and hinting. The current system relies
> on a top-down approach, attempting to give a generic interface to the
> test developer, hiding the interaction of concrete shell classes as much
> as possible. When working with strong typing this approach is not ideal,
> as Python's implementation of generics is still rudimentary.
>
> This rework reverses the tests interaction to a bottom-up approach,
> allowing the test developer to call concrete shell classes directly,
> and let them ingest nodes independently. While also re-enforcing type
> checking and making the code easier to read.
>
> Signed-off-by: Luca Vizzarro 
> Reviewed-by: Paul Szczepanek 
> ---
>  dts/framework/params/eal.py   |   6 +-
>  dts/framework/remote_session/dpdk_shell.py| 104 
>  .../remote_session/interactive_shell.py   |  75 +++-
>  dts/framework/remote_session/python_shell.py  |   4 +-
>  dts/framework/remote_session/testpmd_shell.py |  64 +-
>  dts/framework/testbed_model/node.py   |  36 +-
>  dts/framework/testbed_model/os_session.py |  36 +-
>  dts/framework/testbed_model/sut_node.py   | 112 +-
>  .../testbed_model/traffic_generator/scapy.py  |   4 +-
>  dts/tests/TestSuite_hello_world.py|   7 +-
>  dts/tests/TestSuite_pmd_buffer_scatter.py |  21 ++--
>  dts/tests/TestSuite_smoke_tests.py|   2 +-
>  12 files changed, 201 insertions(+), 270 deletions(-)
>  create mode 100644 dts/framework/remote_session/dpdk_shell.py
>

>  def __init__(
>  self,
> -interactive_session: SSHClient,
> -logger: DTSLogger,
> -get_privileged_command: Callable[[str], str] | None,
> +node: Node,
>  app_params: Params = Params(),
> +privileged: bool = False,
>  timeout: float = SETTINGS.timeout,
> +start_on_init: bool = True,
>  ) -> None:
>  """Create an SSH channel during initialization.
>
>  Args:
> -interactive_session: The SSH session dedicated to interactive 
> shells.
> -logger: The logger instance this session will use.
> -get_privileged_command: A method for modifying a command to 
> allow it to use
> -elevated privileges. If :data:`None`, the application will 
> not be started
> -with elevated privileges.
> +node: The node on which to run start the interactive shell.
>  app_params: The command line parameters to be passed to the 
> application on startup.
> +privileged: Enables the shell to run as superuser.
>  timeout: The timeout used for the SSH channel that is dedicated 
> to this interactive
>  shell. This timeout is for collecting output, so if reading 
> from the buffer
>  and no output is gathered within the timeout, an exception 
> is thrown.
> +start_on_init: Start interactive shell automatically after 
> object initialisation.
>  """
> -self._interactive_session = interactive_session
> -self._ssh_channel = self._interactive_session.invoke_shell()
> +self._node = node
> +self._logger = node._logger
> +self._app_params = app_params
> +self._privileged = privileged
> +self._timeout = timeout
> +# Ensure path is properly formatted for the host
> +
> self._update_path(self._node.main_session.join_remote_path(self.path))
> +
> +self.__post_init__()
> +
> +if start_on_init:
> +self.start_application()

What's the reason for including start_on_init? Is there a time when
someone would create an application but not want to start it when they
create it? It seems like it is always true currently and I'm not sure
we would want it to be delayed otherwise (except in cases like the
context manager patch where we want to enforce that it is only started
for specific periods of time).

> +
> +def __post_init__(self):

Is the name of this method meant to mimic that of the dataclasses? It
might also make sense to call it something like `_post_init()` as just
a regular private method, I'm not sure it matters either way.
Additionally, I think in other super classes which contained functions
that were optionally implemented by subclasses we omitted the `pass`
and just left the function stub empty other than the doc-string.
Either way this does the same thing, but it might be better to make
them consistent one way or the other.

> +"""Overridable. Method called after the object init and before 
> application start."""
> +pass
> +

>
> -def _start_application(self, get_privileged_command: Callable[[str], 
> str] | None) -> None:
> +def start_application(self) -> None:
>  """Starts a new interacti

Re: [PATCH v2 1/8] dts: add params manipulation module

2024-05-28 Thread Jeremy Spewock
I think there was just one typo, otherwise:

Reviewed-by: Jeremy Spewock 

On Thu, May 9, 2024 at 7:21 AM Luca Vizzarro  wrote:
>
> This commit introduces a new "params" module, which adds a new way
> to manage command line parameters. The provided Params dataclass
> is able to read the fields of its child class and produce a string
> representation to supply to the command line. Any data structure
> that is intended to represent command line parameters can inherit it.
>
> The main purpose is to make it easier to represent data structures that
> map to parameters. Aiding quicker development, while minimising code
> bloat.
>
> Signed-off-by: Luca Vizzarro 
> Reviewed-by: Paul Szczepanek 
> ---

> +def comma_separated(values: Iterable[Any]) -> str:
> +"""Converts an iterable in a comma-separated string."""

I think this was meant to be "...an iterable into a comma-separated..."

> +return ",".join([str(value).strip() for value in values if value is not 
> None])
> +
> +

> 2.34.1
>


Re: [PATCH v2 4/8] dts: remove module-wide imports

2024-05-28 Thread Jeremy Spewock
Reviewed-by: Jeremy Spewock 


Re: [PATCH v2 8/8] dts: use Unpack for type checking and hinting

2024-05-28 Thread Jeremy Spewock
Reviewed-by: Jeremy Spewock 


Re: [RFC 0/2] Add support for link speed lanes

2024-05-28 Thread Damodharam Ammepalli
 From: Ferruh Yigit 
Date: Wednesday, May 22, 2024 at 2:00 PM
To: Damodharam Ammepalli , Ajit Khaparde

Cc: dev@dpdk.org , Thomas Monjalon ,
huangdengdui , lihuisong (C) ,
step...@networkplumber.org ,
fengcheng...@huawei.com , haij...@huawei.com <
haij...@huawei.com>
Subject: Re: [RFC 0/2] Add support for link speed lanes

On 3/22/2024 10:25 PM, Damodharam Ammepalli wrote:
> BRCM576xx NIC modules support speeds with different lanes configuration.
> This is an alternate proposal to
>
https://www.google.com/url?q=https://patchwork.dpdk.org/project/dpdk/list/?series%3D31593&source=gmail-imap&ust=171701640200&usg=AOvVaw1jrFok7jqKY0zhtLzKKg1Z
>
> Please provide your review. Broadcom driver patches will follow.
>
> Damodharam Ammepalli (2):
>   lib/ethdev: Add link_speed lanes support into rte lib
>   testpmd: Add speed lanes to testpmd config and show command
>

Hi Damodharam,

As discussed in other thread, having dedicated APIs for setting lane
makes API more clear, so we can continue with this design.

This RFC proposes two APIs, but agreement on other thread was around
three new APIs.

Copy/pasting Thomas's suggestion
"
 There are 3 needs:
- set PHY lane config
- get PHY lane config
- get PHY lane capabilities
"

And "get PHY lane capabilities" should return capability per speed,
similar to FEC one.


Can you please update this RFC to have three APIs as mentioned above?

Thanks,
Ferruh

Noted Ferruh. Will update this RFC as commented.

-- 
This electronic communication and the information and any files transmitted 
with it, or attached to it, are confidential and are intended solely for 
the use of the individual or entity to whom it is addressed and may contain 
information that is confidential, legally privileged, protected by privacy 
laws, or otherwise restricted from disclosure to anyone else. If you are 
not the intended recipient or the person responsible for delivering the 
e-mail to the intended recipient, you are hereby notified that any use, 
copying, distributing, dissemination, forwarding, printing, or copying of 
this e-mail is strictly prohibited. If you received this e-mail in error, 
please return the e-mail to the sender, delete it from your computer, and 
destroy any printed copy of it.


Re: [PATCH 2/2] net/virtio: fix fortify memcpy warning

2024-05-28 Thread Thomas Monjalon
21/05/2024 03:01, Stephen Hemminger:
> If fortify is enabled, it will generate a warning if memcpy
> src is NULL even if size is zero. This happens if the MP message
> sync is called with no file descriptors.
> 
> Bugzilla ID 1446
> Fixes: 6a84c37e3975 ("net/virtio-user: add vhost-user adapter layer")

Bugzilla ID: 1446
Fixes: 6a84c37e3975 ("net/virtio-user: add vhost-user adapter layer")
Cc: sta...@dpdk.org

> 
> Signed-off-by: Stephen Hemminger 

Series applied quickly as it is blocking compilation on recent Ubuntu.




hash lookup in secondary process

2024-05-28 Thread Mohsen Meamarian
Hi all,

I have two dpdk app, one primary and one secondary. I create a hash table
in the primary dpdk app like this:

static struct rte_hash_parameters ut_params = {
.name = "BufferTable2",
.entries = 1024*256,
.key_len = sizeof(uint64_t),
.hash_func = rte_jhash,
//.extra_flag=RTE_HASH_EXTRA_FLAGS_EXT_TABLE,
//.extra_flag=RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY,
};

buffer_table = rte_hash_create(&ut_params);
if (buffer_table == NULL) {
printf("UNABLE TO CREATE HASHTABLE\n");
rte_exit(EXIT_FAILURE, "UNABLE TO CREATE HASHTABLE\n");
}


and in secondary I use :

h=rte_hash_find_existing("BufferTable1");
if (!h) {
fprintf(stderr, "Failed to find existing hash table\n");
return -1;
}

int ret = rte_hash_lookup_data(buffer_table, &teid,
(void**)&packet_in_bucket);

I can find the table pointer, but it gives a segmentation fault when I want
to look up something or add some key value.
I cannot add a key value in the primary app, so it should be in the
secondary app.

I checked rte_hash_lookup_with_hash_data and rte_hash_add_key_with_hash.
these get a hash signature along with a key/value. but it also gives
segfault. in this way, the hash sig should be calculated manually.


I saw this too in dpdk doc:

The use of function pointers between multiple processes running based on
different compiled binaries is not supported, since the location of a given
function in one process may be different from its location in a second.
This prevents the librte_hash library from behaving properly as in a
multi-process instance since it uses a pointer to the hash function
internally.
https://doc.dpdk.org/guides/prog_guide/multi_proc_support.html

can you explain what I should do?
dpdk version 24.03



Best,
Mohsen


Re: [PATCH] r8125: add r8125 ethernet poll mode driver

2024-05-28 Thread Stephen Hemminger
On Mon, 27 May 2024 14:15:28 +0800
Howard Wang  wrote:

> r8125 is for Realtek 2.5 Gigabit Ethernet NICs.
> 
> Signed-off-by: Howard Wang 

To the two drivers have any common code? Many drivers in DPDK use 
drivers/common for cases
where two drivers can reuse same code.