Re: [PATCH v2] build: add missing arch define for Arm

2022-01-19 Thread Thomas Monjalon
17/01/2022 08:15, Ruifeng Wang:
> --- a/config/arm/meson.build
> +++ b/config/arm/meson.build
> @@ -49,6 +49,7 @@ implementer_generic = {
>  ['RTE_ARCH_ARM_NEON_MEMCPY', false],
>  ['RTE_ARCH_STRICT_ALIGN', true],
>  ['RTE_ARCH_ARMv8_AARCH32', true],
> +['RTE_ARCH', 'armv8_aarch32'],
>  ['RTE_CACHE_LINE_SIZE', 64]
>  ]
>  }
> @@ -432,11 +433,13 @@ if dpdk_conf.get('RTE_ARCH_32')
>  else
>  # armv7 build
>  dpdk_conf.set('RTE_ARCH_ARMv7', true)
> +dpdk_conf.set('RTE_ARCH', 'armv7')
>  # the minimum architecture supported, armv7-a, needs the following,
>  machine_args += '-mfpu=neon'
>  endif
>  else
>  # armv8 build
> +dpdk_conf.set('RTE_ARCH', 'arm64')

Juraj commented on v1 that it should be armv8.





Re: [PATCH 2/2] net/cnxk: ethdev Rx/Tx queue status callbacks

2022-01-19 Thread Jerin Jacob
On Fri, Dec 3, 2021 at 10:06 PM Rahul Bhansali  wrote:
>
> Provides ethdev callback support of rx_queue_count,
> rx_descriptor_status and tx_descriptor_status.
>
> Signed-off-by: Rahul Bhansali 

Missed to update doc/guides/nics/features/cnxk* for "Rx descriptor
status" and "Tx descriptor status".
Rest looks good to me. Please send the next version.


> ---
>  drivers/net/cnxk/cnxk_ethdev.c |  3 ++
>  drivers/net/cnxk/cnxk_ethdev.h |  5 +++
>  drivers/net/cnxk/cnxk_ethdev_ops.c | 60 ++
>  3 files changed, 68 insertions(+)
>
> diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c
> index 74f625553d..183fd241d8 100644
> --- a/drivers/net/cnxk/cnxk_ethdev.c
> +++ b/drivers/net/cnxk/cnxk_ethdev.c
> @@ -1595,6 +1595,9 @@ cnxk_eth_dev_init(struct rte_eth_dev *eth_dev)
> int rc, max_entries;
>
> eth_dev->dev_ops = &cnxk_eth_dev_ops;
> +   eth_dev->rx_queue_count = cnxk_nix_rx_queue_count;
> +   eth_dev->rx_descriptor_status = cnxk_nix_rx_descriptor_status;
> +   eth_dev->tx_descriptor_status = cnxk_nix_tx_descriptor_status;
>
> /* Alloc security context */
> sec_ctx = plt_zmalloc(sizeof(struct rte_security_ctx), 0);
> diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
> index 5bfda3d815..43814a81fc 100644
> --- a/drivers/net/cnxk/cnxk_ethdev.h
> +++ b/drivers/net/cnxk/cnxk_ethdev.h
> @@ -559,6 +559,11 @@ void cnxk_nix_rxq_info_get(struct rte_eth_dev *eth_dev, 
> uint16_t qid,
>  void cnxk_nix_txq_info_get(struct rte_eth_dev *eth_dev, uint16_t qid,
>struct rte_eth_txq_info *qinfo);
>
> +/* Queue status */
> +int cnxk_nix_rx_descriptor_status(void *rxq, uint16_t offset);
> +int cnxk_nix_tx_descriptor_status(void *txq, uint16_t offset);
> +uint32_t cnxk_nix_rx_queue_count(void *rxq);
> +
>  /* Lookup configuration */
>  const uint32_t *cnxk_nix_supported_ptypes_get(struct rte_eth_dev *eth_dev);
>  void *cnxk_nix_fastpath_lookup_mem_get(void);
> diff --git a/drivers/net/cnxk/cnxk_ethdev_ops.c 
> b/drivers/net/cnxk/cnxk_ethdev_ops.c
> index ce5f1f7240..1255d6b40f 100644
> --- a/drivers/net/cnxk/cnxk_ethdev_ops.c
> +++ b/drivers/net/cnxk/cnxk_ethdev_ops.c
> @@ -694,6 +694,66 @@ cnxk_nix_txq_info_get(struct rte_eth_dev *eth_dev, 
> uint16_t qid,
> memcpy(&qinfo->conf, &txq_sp->qconf.conf.tx, sizeof(qinfo->conf));
>  }
>
> +uint32_t
> +cnxk_nix_rx_queue_count(void *rxq)
> +{
> +   struct cnxk_eth_rxq_sp *rxq_sp = cnxk_eth_rxq_to_sp(rxq);
> +   struct roc_nix *nix = &rxq_sp->dev->nix;
> +   uint32_t head, tail;
> +
> +   roc_nix_cq_head_tail_get(nix, rxq_sp->qid, &head, &tail);
> +   return (tail - head) % (rxq_sp->qconf.nb_desc);
> +}
> +
> +static inline int
> +nix_offset_has_packet(uint32_t head, uint32_t tail, uint16_t offset, bool 
> is_rx)
> +{
> +   /* Check given offset(queue index) has packet filled/xmit by HW
> +* in case of Rx or Tx.
> +* Also, checks for wrap around case.
> +*/
> +   return ((tail > head && offset <= tail && offset >= head) ||
> +   (head > tail && (offset >= head || offset <= tail))) ?
> +  is_rx :
> +  !is_rx;
> +}
> +
> +int
> +cnxk_nix_rx_descriptor_status(void *rxq, uint16_t offset)
> +{
> +   struct cnxk_eth_rxq_sp *rxq_sp = cnxk_eth_rxq_to_sp(rxq);
> +   struct roc_nix *nix = &rxq_sp->dev->nix;
> +   uint32_t head, tail;
> +
> +   if (rxq_sp->qconf.nb_desc <= offset)
> +   return -EINVAL;
> +
> +   roc_nix_cq_head_tail_get(nix, rxq_sp->qid, &head, &tail);
> +
> +   if (nix_offset_has_packet(head, tail, offset, 1))
> +   return RTE_ETH_RX_DESC_DONE;
> +   else
> +   return RTE_ETH_RX_DESC_AVAIL;
> +}
> +
> +int
> +cnxk_nix_tx_descriptor_status(void *txq, uint16_t offset)
> +{
> +   struct cnxk_eth_txq_sp *txq_sp = cnxk_eth_txq_to_sp(txq);
> +   struct roc_nix *nix = &txq_sp->dev->nix;
> +   uint32_t head = 0, tail = 0;
> +
> +   if (txq_sp->qconf.nb_desc <= offset)
> +   return -EINVAL;
> +
> +   roc_nix_sq_head_tail_get(nix, txq_sp->qid, &head, &tail);
> +
> +   if (nix_offset_has_packet(head, tail, offset, 0))
> +   return RTE_ETH_TX_DESC_DONE;
> +   else
> +   return RTE_ETH_TX_DESC_FULL;
> +}
> +
>  /* It is a NOP for cnxk as HW frees the buffer on xmit */
>  int
>  cnxk_nix_tx_done_cleanup(void *txq, uint32_t free_cnt)
> --
> 2.25.1
>


[PATCH v2] net/bonding: fix RSS not work for bonding

2022-01-19 Thread Yu Wenjun
RSS don't work when bond_ethdev_configure called before
rte_eth_bond_slave_add.

e.g.:
dont't work(examples/bond/main.c):
rte_eth_bond_create()
rte_eth_dev_configure()
rte_eth_bond_slave_add()
rte_eth_dev_start()

work(testpmd):
rte_eth_bond_create()
rte_eth_bond_slave_add()
rte_eth_dev_configure()
rte_eth_dev_start()

Fixes: 6b1a001ec546 ("net/bonding: fix RSS key length")
Cc: sta...@dpdk.org

Signed-off-by: Yu Wenjun 
---
v2:
- Fixed patch format.

 drivers/net/bonding/rte_eth_bond_pmd.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c 
b/drivers/net/bonding/rte_eth_bond_pmd.c
index 84f4900..31bcee1 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -3504,6 +3504,11 @@ struct bwg_slave {
if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS) {
struct rte_eth_rss_conf *rss_conf =
&dev->data->dev_conf.rx_adv_conf.rss_conf;
+
+   if (internals->rss_key_len == 0) {
+   internals->rss_key_len = sizeof(default_rss_key);
+   }
+
if (rss_conf->rss_key != NULL) {
if (internals->rss_key_len > rss_conf->rss_key_len) {
RTE_BOND_LOG(ERR, "Invalid rss key length(%u)",
-- 
1.8.3.1





RE: [PATCH v1 2/6] net/axgbe: toggle PLL settings during rate change

2022-01-19 Thread Namburu, Chandu-babu
[Public]

For series,
Acked-by: Chandubabu Namburu 

-Original Message-
From: sseba...@amd.com  
Sent: Monday, January 10, 2022 5:05 PM
To: dev@dpdk.org
Subject: [PATCH v1 2/6] net/axgbe: toggle PLL settings during rate change

From: Selwin Sebastian 

For each rate change command submission, the FW has to do a phy power off 
sequence internally. For this to happen correctly, the PLL re-initialization 
control setting has to be turned off before sending mailbox commands and 
re-enabled once the command submission is complete. Without the PLL control 
setting, the link up takes longer time in a fixed phy configuration.

Signed-off-by: Selwin Sebastian 
---
 drivers/net/axgbe/axgbe_common.h   |  9 +
 drivers/net/axgbe/axgbe_phy_impl.c | 22 --
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/drivers/net/axgbe/axgbe_common.h b/drivers/net/axgbe/axgbe_common.h
index df0aa21a9b..5a7ac35b6a 100644
--- a/drivers/net/axgbe/axgbe_common.h
+++ b/drivers/net/axgbe/axgbe_common.h
@@ -1314,6 +1314,11 @@
 #define MDIO_VEND2_PMA_CDR_CONTROL 0x8056
 #endif
 
+#ifndef MDIO_VEND2_PMA_MISC_CTRL0
+#define MDIO_VEND2_PMA_MISC_CTRL0  0x8090
+#endif
+
+
 #ifndef MDIO_CTRL1_SPEED1G
 #define MDIO_CTRL1_SPEED1G (MDIO_CTRL1_SPEED10G & ~BMCR_SPEED100)
 #endif
@@ -1392,6 +1397,10 @@ static inline uint32_t high32_value(uint64_t addr)
return (addr >> 32) & 0x0;
 }
 
+#define XGBE_PMA_PLL_CTRL_MASK BIT(15)
+#define XGBE_PMA_PLL_CTRL_SET  BIT(15)
+#define XGBE_PMA_PLL_CTRL_CLEAR0x
+
 /*END*/
 
 /* Bit setting and getting macros
diff --git a/drivers/net/axgbe/axgbe_phy_impl.c 
b/drivers/net/axgbe/axgbe_phy_impl.c
index 02236ec192..dc9489f0aa 100644
--- a/drivers/net/axgbe/axgbe_phy_impl.c
+++ b/drivers/net/axgbe/axgbe_phy_impl.c
@@ -1196,8 +1196,22 @@ static void axgbe_phy_set_redrv_mode(struct axgbe_port 
*pdata)
axgbe_phy_put_comm_ownership(pdata);
 }
 
+static void axgbe_phy_pll_ctrl(struct axgbe_port *pdata, bool enable) {
+   XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_VEND2_PMA_MISC_CTRL0,
+   XGBE_PMA_PLL_CTRL_MASK,
+   enable ? XGBE_PMA_PLL_CTRL_SET
+   : XGBE_PMA_PLL_CTRL_CLEAR);
+
+   /* Wait for command to complete */
+   rte_delay_us(150);
+}
+
 static void axgbe_phy_start_ratechange(struct axgbe_port *pdata)  {
+   /* Clear the PLL so that it helps in power down sequence */
+   axgbe_phy_pll_ctrl(pdata, false);
+
/* Log if a previous command did not complete */
if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS))
PMD_DRV_LOG(NOTICE, "firmware mailbox not ready for 
command\n"); @@ -1213,10 +1227,14 @@ static void 
axgbe_phy_complete_ratechange(struct axgbe_port *pdata)
wait = AXGBE_RATECHANGE_COUNT;
while (wait--) {
if (!XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS))
-   return;
-
+   goto reenable_pll;
rte_delay_us(1500);
}
+
+reenable_pll:
+/* Re-enable the PLL control */
+   axgbe_phy_pll_ctrl(pdata, true);
+
PMD_DRV_LOG(NOTICE, "firmware mailbox command did not complete\n");  }
 
--
2.25.1


[PATCH v2 1/2] common/cnxk: get head-tail of Rx and Tx queues

2022-01-19 Thread Rahul Bhansali
Adds roc APIs roc_nix_cq_head_tail_get, roc_nix_sq_head_tail_get
to get head-tail of receive and transmit queue respectively.

Signed-off-by: Rahul Bhansali 
---
v2 changes:
 - No change

 drivers/common/cnxk/roc_nix.h   |  4 +++
 drivers/common/cnxk/roc_nix_queue.c | 53 +
 drivers/common/cnxk/version.map |  2 ++
 3 files changed, 59 insertions(+)

diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index 69a5e8e7b4..d79abfef9f 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -795,8 +795,12 @@ int __roc_api roc_nix_rq_ena_dis(struct roc_nix_rq *rq, 
bool enable);
 int __roc_api roc_nix_rq_fini(struct roc_nix_rq *rq);
 int __roc_api roc_nix_cq_init(struct roc_nix *roc_nix, struct roc_nix_cq *cq);
 int __roc_api roc_nix_cq_fini(struct roc_nix_cq *cq);
+void __roc_api roc_nix_cq_head_tail_get(struct roc_nix *roc_nix, uint16_t qid,
+   uint32_t *head, uint32_t *tail);
 int __roc_api roc_nix_sq_init(struct roc_nix *roc_nix, struct roc_nix_sq *sq);
 int __roc_api roc_nix_sq_fini(struct roc_nix_sq *sq);
+void __roc_api roc_nix_sq_head_tail_get(struct roc_nix *roc_nix, uint16_t qid,
+   uint32_t *head, uint32_t *tail);

 /* PTP */
 int __roc_api roc_nix_ptp_rx_ena_dis(struct roc_nix *roc_nix, int enable);
diff --git a/drivers/common/cnxk/roc_nix_queue.c 
b/drivers/common/cnxk/roc_nix_queue.c
index c638cd43e4..67f83acdf2 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -969,3 +969,56 @@ roc_nix_sq_fini(struct roc_nix_sq *sq)

return rc;
 }
+
+void
+roc_nix_cq_head_tail_get(struct roc_nix *roc_nix, uint16_t qid, uint32_t *head,
+uint32_t *tail)
+{
+   struct nix *nix = roc_nix_to_nix_priv(roc_nix);
+   uint64_t reg, val;
+   int64_t *addr;
+
+   if (head == NULL || tail == NULL)
+   return;
+
+   reg = (((uint64_t)qid) << 32);
+   addr = (int64_t *)(nix->base + NIX_LF_CQ_OP_STATUS);
+   val = roc_atomic64_add_nosync(reg, addr);
+   if (val &
+   (BIT_ULL(NIX_CQ_OP_STAT_OP_ERR) | BIT_ULL(NIX_CQ_OP_STAT_CQ_ERR)))
+   val = 0;
+
+   *tail = (uint32_t)(val & 0xF);
+   *head = (uint32_t)((val >> 20) & 0xF);
+}
+
+void
+roc_nix_sq_head_tail_get(struct roc_nix *roc_nix, uint16_t qid, uint32_t *head,
+uint32_t *tail)
+{
+   struct nix *nix = roc_nix_to_nix_priv(roc_nix);
+   struct roc_nix_sq *sq = nix->sqs[qid];
+   uint16_t sqes_per_sqb, sqb_cnt;
+   uint64_t reg, val;
+   int64_t *addr;
+
+   if (head == NULL || tail == NULL)
+   return;
+
+   reg = (((uint64_t)qid) << 32);
+   addr = (int64_t *)(nix->base + NIX_LF_SQ_OP_STATUS);
+   val = roc_atomic64_add_nosync(reg, addr);
+   if (val & BIT_ULL(NIX_CQ_OP_STAT_OP_ERR)) {
+   val = 0;
+   return;
+   }
+
+   *tail = (uint32_t)((val >> 28) & 0x3F);
+   *head = (uint32_t)((val >> 20) & 0x3F);
+   sqb_cnt = (uint16_t)(val & 0x);
+
+   sqes_per_sqb = 1 << sq->sqes_per_sqb_log2;
+
+   /* Update tail index as per used sqb count */
+   *tail += (sqes_per_sqb * (sqb_cnt - 1));
+}
diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
index 07c6720f0c..a9dba47e0e 100644
--- a/drivers/common/cnxk/version.map
+++ b/drivers/common/cnxk/version.map
@@ -107,6 +107,7 @@ INTERNAL {
roc_nix_bpf_timeunit_get;
roc_nix_cq_dump;
roc_nix_cq_fini;
+   roc_nix_cq_head_tail_get;
roc_nix_cq_init;
roc_nix_cqe_dump;
roc_nix_dev_fini;
@@ -222,6 +223,7 @@ INTERNAL {
roc_nix_rx_queue_intr_enable;
roc_nix_sq_dump;
roc_nix_sq_fini;
+   roc_nix_sq_head_tail_get;
roc_nix_sq_init;
roc_nix_stats_get;
roc_nix_stats_queue_get;
--
2.25.1



[PATCH v2 2/2] net/cnxk: ethdev Rx/Tx queue status callbacks

2022-01-19 Thread Rahul Bhansali
Provides ethdev callback support of rx_queue_count,
rx_descriptor_status and tx_descriptor_status.

Signed-off-by: Rahul Bhansali 
---
v2 changes:
 - Updated doc/guides/nics/features/cnxk* for
   "Rx descriptor status" and "Tx descriptor status"

 doc/guides/nics/features/cnxk.ini |  2 +
 doc/guides/nics/features/cnxk_vec.ini |  2 +
 doc/guides/nics/features/cnxk_vf.ini  |  2 +
 drivers/net/cnxk/cnxk_ethdev.c|  3 ++
 drivers/net/cnxk/cnxk_ethdev.h|  5 +++
 drivers/net/cnxk/cnxk_ethdev_ops.c| 60 +++
 6 files changed, 74 insertions(+)

diff --git a/doc/guides/nics/features/cnxk.ini 
b/doc/guides/nics/features/cnxk.ini
index 1623a1803e..0eba334eb4 100644
--- a/doc/guides/nics/features/cnxk.ini
+++ b/doc/guides/nics/features/cnxk.ini
@@ -37,6 +37,8 @@ Inner L4 checksum= Y
 Packet type parsing  = Y
 Timesync = Y
 Timestamp offload= Y
+Rx descriptor status = Y
+Tx descriptor status = Y
 Basic stats  = Y
 Stats per queue  = Y
 Extended stats   = Y
diff --git a/doc/guides/nics/features/cnxk_vec.ini 
b/doc/guides/nics/features/cnxk_vec.ini
index 4b7c2bce4d..df5f358a3e 100644
--- a/doc/guides/nics/features/cnxk_vec.ini
+++ b/doc/guides/nics/features/cnxk_vec.ini
@@ -33,6 +33,8 @@ L4 checksum offload  = Y
 Inner L3 checksum= Y
 Inner L4 checksum= Y
 Packet type parsing  = Y
+Rx descriptor status = Y
+Tx descriptor status = Y
 Basic stats  = Y
 Stats per queue  = Y
 Extended stats   = Y
diff --git a/doc/guides/nics/features/cnxk_vf.ini 
b/doc/guides/nics/features/cnxk_vf.ini
index 0523be434d..a78fbcada0 100644
--- a/doc/guides/nics/features/cnxk_vf.ini
+++ b/doc/guides/nics/features/cnxk_vf.ini
@@ -29,6 +29,8 @@ L4 checksum offload  = Y
 Inner L3 checksum= Y
 Inner L4 checksum= Y
 Packet type parsing  = Y
+Rx descriptor status = Y
+Tx descriptor status = Y
 Basic stats  = Y
 Stats per queue  = Y
 Extended stats   = Y
diff --git a/drivers/net/cnxk/cnxk_ethdev.c b/drivers/net/cnxk/cnxk_ethdev.c
index 74f625553d..183fd241d8 100644
--- a/drivers/net/cnxk/cnxk_ethdev.c
+++ b/drivers/net/cnxk/cnxk_ethdev.c
@@ -1595,6 +1595,9 @@ cnxk_eth_dev_init(struct rte_eth_dev *eth_dev)
int rc, max_entries;

eth_dev->dev_ops = &cnxk_eth_dev_ops;
+   eth_dev->rx_queue_count = cnxk_nix_rx_queue_count;
+   eth_dev->rx_descriptor_status = cnxk_nix_rx_descriptor_status;
+   eth_dev->tx_descriptor_status = cnxk_nix_tx_descriptor_status;

/* Alloc security context */
sec_ctx = plt_zmalloc(sizeof(struct rte_security_ctx), 0);
diff --git a/drivers/net/cnxk/cnxk_ethdev.h b/drivers/net/cnxk/cnxk_ethdev.h
index 5bfda3d815..43814a81fc 100644
--- a/drivers/net/cnxk/cnxk_ethdev.h
+++ b/drivers/net/cnxk/cnxk_ethdev.h
@@ -559,6 +559,11 @@ void cnxk_nix_rxq_info_get(struct rte_eth_dev *eth_dev, 
uint16_t qid,
 void cnxk_nix_txq_info_get(struct rte_eth_dev *eth_dev, uint16_t qid,
   struct rte_eth_txq_info *qinfo);

+/* Queue status */
+int cnxk_nix_rx_descriptor_status(void *rxq, uint16_t offset);
+int cnxk_nix_tx_descriptor_status(void *txq, uint16_t offset);
+uint32_t cnxk_nix_rx_queue_count(void *rxq);
+
 /* Lookup configuration */
 const uint32_t *cnxk_nix_supported_ptypes_get(struct rte_eth_dev *eth_dev);
 void *cnxk_nix_fastpath_lookup_mem_get(void);
diff --git a/drivers/net/cnxk/cnxk_ethdev_ops.c 
b/drivers/net/cnxk/cnxk_ethdev_ops.c
index 34e4809650..f20f201db2 100644
--- a/drivers/net/cnxk/cnxk_ethdev_ops.c
+++ b/drivers/net/cnxk/cnxk_ethdev_ops.c
@@ -695,6 +695,66 @@ cnxk_nix_txq_info_get(struct rte_eth_dev *eth_dev, 
uint16_t qid,
memcpy(&qinfo->conf, &txq_sp->qconf.conf.tx, sizeof(qinfo->conf));
 }

+uint32_t
+cnxk_nix_rx_queue_count(void *rxq)
+{
+   struct cnxk_eth_rxq_sp *rxq_sp = cnxk_eth_rxq_to_sp(rxq);
+   struct roc_nix *nix = &rxq_sp->dev->nix;
+   uint32_t head, tail;
+
+   roc_nix_cq_head_tail_get(nix, rxq_sp->qid, &head, &tail);
+   return (tail - head) % (rxq_sp->qconf.nb_desc);
+}
+
+static inline int
+nix_offset_has_packet(uint32_t head, uint32_t tail, uint16_t offset, bool 
is_rx)
+{
+   /* Check given offset(queue index) has packet filled/xmit by HW
+* in case of Rx or Tx.
+* Also, checks for wrap around case.
+*/
+   return ((tail > head && offset <= tail && offset >= head) ||
+   (head > tail && (offset >= head || offset <= tail))) ?
+  is_rx :
+  !is_rx;
+}
+
+int
+cnxk_nix_rx_descriptor_status(void *rxq, uint16_t offset)
+{
+   struct cnxk_eth_rxq_sp *rxq_sp = cnxk_eth_rxq_to_sp(rxq);
+   struct roc_nix *nix = &rxq_sp->dev->nix;
+   uint32_t head, tail;
+
+   if (rxq_sp->qconf.nb_desc <= offset)
+   return -EINVAL;
+
+   roc_nix_cq_head_tail_get(nix, rxq_sp->qid, &head, &tail);
+
+   if (nix_offset_has_packet(head, tail, offset, 1))
+   return RTE_ETH_RX_DESC_DONE;
+   

RE: [PATCH] build: add missing arch define for Arm

2022-01-19 Thread Ruifeng Wang
> -Original Message-
> From: Juraj Linkeš 
> Sent: Monday, January 17, 2022 9:12 PM
> To: Ruifeng Wang ; tho...@monjalon.net
> Cc: dev@dpdk.org; sta...@dpdk.org; vikto...@rehivetech.com;
> bruce.richard...@intel.com; step...@networkplumber.org; Honnappa
> Nagarahalli ; nd ; nd
> 
> Subject: RE: [PATCH] build: add missing arch define for Arm
> 
> 
> 
> > -Original Message-
> > From: Ruifeng Wang 
> > Sent: Friday, January 14, 2022 10:05 AM
> > To: tho...@monjalon.net
> > Cc: dev@dpdk.org; sta...@dpdk.org; vikto...@rehivetech.com;
> > bruce.richard...@intel.com; step...@networkplumber.org; Juraj Linkeš
> > ; Honnappa Nagarahalli
> > ; nd ; nd 
> > Subject: RE: [PATCH] build: add missing arch define for Arm
> >
> > > -Original Message-
> > > From: Thomas Monjalon 
> > > Sent: Friday, January 14, 2022 1:33 AM
> > > To: Ruifeng Wang 
> > > Cc: dev@dpdk.org; sta...@dpdk.org; vikto...@rehivetech.com;
> > > bruce.richard...@intel.com; step...@networkplumber.org;
> > > juraj.lin...@pantheon.tech; Honnappa Nagarahalli
> > > ; nd 
> > > Subject: Re: [PATCH] build: add missing arch define for Arm
> > >
> > > 17/12/2021 09:54, Ruifeng Wang:
> > > > As per design document, RTE_ARCH is the name of the architecture.
> > > > However, the definition was missing on Arm with meson build.
> > > > It impacts applications that refers to this string.
> > > >
> > > > Added for Arm builds.
> > > >
> > > > Fixes: b1d48c41189a ("build: support ARM with meson")
> > > > Cc: sta...@dpdk.org
> > > >
> > > > Signed-off-by: Ruifeng Wang 
> > > > ---
> > > >  ['RTE_ARCH_ARMv8_AARCH32', true],
> > > > +['RTE_ARCH', 'arm64_aarch32'],
> > >
> > > Why not armv8_aarch32?
> >
> > Thanks for the comments.
> > Agreed. armv8_aarch32 is consistent with the RTE_ARCH_xx macro above.
> >
> > >
> > > [...]
> > > >  dpdk_conf.set('RTE_ARCH_ARMv7', true)
> > > > +dpdk_conf.set('RTE_ARCH', 'armv7')
> > > [...]
> > > >  # armv8 build
> > > > +dpdk_conf.set('RTE_ARCH', 'arm64')
> > >
> > > Why not armv8?
> > >
> > > What I prefer the most in silicon industry is the naming craziness
> > > :)
> >
> > While armv8 usually refers to one generation of the Arm architecture,
> > arm64 is more generic for 64-bit architectures.
> > And what defined for armv8 build is RTE_ARCH_ARM64. So for
> > consistency,
> > arm64 is better?
> >
> 
> Using armv8_aarch32 along with arm64 doesn't seem right. We should unite

Thanks for providing your thoughts.
I have no strong opinion on this. armv8 indeed is better aligned with 
armv8_aarch32.
I will change in next version.

> these and I think armv8 makes sense. As you mentioned arvm8 is an arm64
> architecture and using the more precise identification is better in my opinion
> (as that gives more information). As for the consistency with
> RTE_ARCH_ARM64, I think the problem is that we don't have the
> RTE_ARCH_ARMv8 flag (which would provide the consistency, but won't be
> used):
> The current code is, accurately, written for 64bit arm architectures (all of
> them).
> There is currently no need to differentiate between 64bit arm architectures
> which is why RTE_ARCH_ARMv8 doesn't exist.
> However, armv8 exists and we know how to identify it which is why I think
> setting RTE_ARCH to armv8 is the way to go.
> 
> So my thinking is RTE_ARCH should be set to armv8, which implies
> RTE_ARCH_ARMv8 which in turn implies RTE_ARCH_ARM64. We're just
> missing the middle part since there's no use for it now.
> 
> And to be fully consistent, we could add RTE_ARCH_ARM32 to armv7 (as a
> superset of RTE_ARCH_ARMv7, but that likely won't be of much use).



RE: [PATCH v2] build: add missing arch define for Arm

2022-01-19 Thread Ruifeng Wang
> -Original Message-
> From: Thomas Monjalon 
> Sent: Wednesday, January 19, 2022 5:01 PM
> To: Ruifeng Wang 
> Cc: dev@dpdk.org; sta...@dpdk.org; vikto...@rehivetech.com;
> bruce.richard...@intel.com; step...@networkplumber.org;
> juraj.lin...@pantheon.tech; Honnappa Nagarahalli
> ; nd ; sta...@dpdk.org
> Subject: Re: [PATCH v2] build: add missing arch define for Arm
> 
> 17/01/2022 08:15, Ruifeng Wang:
> > --- a/config/arm/meson.build
> > +++ b/config/arm/meson.build
> > @@ -49,6 +49,7 @@ implementer_generic = {
> >  ['RTE_ARCH_ARM_NEON_MEMCPY', false],
> >  ['RTE_ARCH_STRICT_ALIGN', true],
> >  ['RTE_ARCH_ARMv8_AARCH32', true],
> > +['RTE_ARCH', 'armv8_aarch32'],
> >  ['RTE_CACHE_LINE_SIZE', 64]
> >  ]
> >  }
> > @@ -432,11 +433,13 @@ if dpdk_conf.get('RTE_ARCH_32')
> >  else
> >  # armv7 build
> >  dpdk_conf.set('RTE_ARCH_ARMv7', true)
> > +dpdk_conf.set('RTE_ARCH', 'armv7')
> >  # the minimum architecture supported, armv7-a, needs the following,
> >  machine_args += '-mfpu=neon'
> >  endif
> >  else
> >  # armv8 build
> > +dpdk_conf.set('RTE_ARCH', 'arm64')
> 
> Juraj commented on v1 that it should be armv8.
> 
Thanks, I will send out v3.



Re: [RFC 1/3] ethdev: support GRE optional fields

2022-01-19 Thread Ferruh Yigit

On 12/30/2021 3:08 AM, Sean Zhang wrote:

Add flow pattern items and header format for matching optional fields
(checksum/key/sequence) in GRE header. And the flags in gre item should
be correspondingly set with the new added items.

Signed-off-by: Sean Zhang 
---
  doc/guides/prog_guide/rte_flow.rst | 16 
  lib/ethdev/rte_flow.c  |  1 +
  lib/ethdev/rte_flow.h  | 18 ++
  3 files changed, 35 insertions(+)

diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index c51ed88..48d5685 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -1113,6 +1113,22 @@ This should be preceded by item ``GRE``.
  - Value to be matched is a big-endian 32 bit integer.
  - When this item present it implicitly match K bit in default mask as "1"
  
+Item: ``GRE_OPTION``

+
+
+Matches a GRE optional fields (checksum/key/sequence).
+This should be preceded by item ``GRE``.
+
+- ``checksum``: checksum.
+- ``key``: key.
+- ``sequence``: sequence.
+- The items in GRE_OPTION do not change bit flags(c_bit/k_bit/s_bit) in GRE
+  item. The bit flags need be set with GRE item by application. When the items
+  present, the corresponding bits in GRE spec and mask should be set "1" by
+  application, it means to match specified value of the fields. When the items
+  no present, but the corresponding bits in GRE spec and mask is "1", it means
+  to match any value of the fields.
+
  Item: ``FUZZY``
  ^^^
  
diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c

index a93f68a..03bd1df 100644
--- a/lib/ethdev/rte_flow.c
+++ b/lib/ethdev/rte_flow.c
@@ -139,6 +139,7 @@ struct rte_flow_desc_data {
MK_FLOW_ITEM(META, sizeof(struct rte_flow_item_meta)),
MK_FLOW_ITEM(TAG, sizeof(struct rte_flow_item_tag)),
MK_FLOW_ITEM(GRE_KEY, sizeof(rte_be32_t)),
+   MK_FLOW_ITEM(GRE_OPTION, sizeof(struct rte_gre_hdr_option)),
MK_FLOW_ITEM(GTP_PSC, sizeof(struct rte_flow_item_gtp_psc)),
MK_FLOW_ITEM(PPPOES, sizeof(struct rte_flow_item_pppoe)),
MK_FLOW_ITEM(PPPOED, sizeof(struct rte_flow_item_pppoe)),
diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
index 1031fb2..27b4140 100644
--- a/lib/ethdev/rte_flow.h
+++ b/lib/ethdev/rte_flow.h
@@ -660,6 +660,13 @@ enum rte_flow_item_type {
 * See struct rte_flow_item_ppp.
 */
RTE_FLOW_ITEM_TYPE_PPP,
+
+   /**
+* Matches GRE optional fields.
+*
+* See struct rte_gre_hdr_option.
+*/
+   RTE_FLOW_ITEM_TYPE_GRE_OPTION,
  };
  
  /**

@@ -1196,6 +1203,17 @@ struct rte_flow_item_gre {
  #endif
  
  /**

+ * RTE_FLOW_ITEM_TYPE_GRE_OPTION.
+ *
+ * Matches GRE optional fields in header.
+ */
+struct rte_gre_hdr_option {
+   rte_be16_t checksum;
+   rte_be32_t key;
+   rte_be32_t sequence;
+};
+


Hi Ori, Andrew,

The decision was to have protocol structs in the net library and flow structs
use from there, wasn't it?
(Btw, a deprecation notice is still pending to clear some existing ones)

So for the GRE optional fields, what about having a struct in the 'rte_gre.h'?
(Also perhaps an GRE extended protocol header can be defined combining
'rte_gre_hdr' and optional fields struct.)
Later flow API struct can embed that struct.


Re: [RFC 1/3] ethdev: support GRE optional fields

2022-01-19 Thread Thomas Monjalon
19/01/2022 10:53, Ferruh Yigit:
> On 12/30/2021 3:08 AM, Sean Zhang wrote:
> > --- a/lib/ethdev/rte_flow.h
> > +++ b/lib/ethdev/rte_flow.h
> >   /**
> > + * RTE_FLOW_ITEM_TYPE_GRE_OPTION.
> > + *
> > + * Matches GRE optional fields in header.
> > + */
> > +struct rte_gre_hdr_option {
> > +   rte_be16_t checksum;
> > +   rte_be32_t key;
> > +   rte_be32_t sequence;
> > +};
> > +
> 
> Hi Ori, Andrew,
> 
> The decision was to have protocol structs in the net library and flow structs
> use from there, wasn't it?
> (Btw, a deprecation notice is still pending to clear some existing ones)
> 
> So for the GRE optional fields, what about having a struct in the 'rte_gre.h'?
> (Also perhaps an GRE extended protocol header can be defined combining
> 'rte_gre_hdr' and optional fields struct.)
> Later flow API struct can embed that struct.

+1 for using librte_net.
This addition in rte_flow looks to be a mistake.
Please fix in the next version.




Re: [PATCH v5 1/2] eal: add API for bus close

2022-01-19 Thread Thomas Monjalon
Hi,

10/01/2022 06:26, rohit@nxp.com:
> From: Rohit Raj 
> 
> As per the current code we have API for bus probe, but the
> bus close API is missing. This breaks the multi process
> scenarios as objects are not cleaned while terminating the
> secondary processes.
> 
> This patch adds a new API rte_bus_close() for cleanup of
> bus objects which were acquired during probe.

I don't understand how closing all devices of a bus will help better
than just closing all devices.

As Ferruh already suggested in the past,
we could force closing all devices in rte_eal_cleanup().
And we already have the function rte_dev_remove().





RE: [PATCH v1] raw/ifpga: fix pthread cannot join

2022-01-19 Thread Xu, Rosen
Hi Wei,

Some ci/iol-intel-Functional issues, pls check. Thanks a lot.

> -Original Message-
> From: Huang, Wei 
> Sent: Wednesday, January 19, 2022 13:54
> To: dev@dpdk.org; Xu, Rosen ; Zhang, Qi Z
> 
> Cc: sta...@dpdk.org; Zhang, Tianfei ; Yigit, Ferruh
> 
> Subject: [PATCH v1] raw/ifpga: fix pthread cannot join
> 
> From: Tianfei Zhang 
> 
> When we want to close a thread, we should set a flag to thread handler
> function.
> 
> Fixes: 9c006c45 ("raw/ifpga: scan PCIe BDF device tree")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Tianfei Zhang 
> ---
>  drivers/raw/ifpga/ifpga_rawdev.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/raw/ifpga/ifpga_rawdev.c
> b/drivers/raw/ifpga/ifpga_rawdev.c
> index 8d9db58..9663b67 100644
> --- a/drivers/raw/ifpga/ifpga_rawdev.c
> +++ b/drivers/raw/ifpga/ifpga_rawdev.c
> @@ -497,7 +497,7 @@ static int set_surprise_link_check_aer(
>   int gsd_enable, ret;
>  #define MS 1000
> 
> - while (1) {
> + while (ifpga_monitor_start) {
>   gsd_enable = 0;
>   for (i = 0; i < IFPGA_RAWDEV_NUM; i++) {
>   ifpga_rdev = &ifpga_rawdevices[i];
> @@ -544,7 +544,9 @@ static int set_surprise_link_check_aer(  {
>   int ret;
> 
> - if (ifpga_monitor_start == 1) {
> + if (ifpga_monitor_start == 1 && ifpga_monitor_start_thread) {
> + ifpga_monitor_start = 0;
> +
>   ret = pthread_cancel(ifpga_monitor_start_thread);
>   if (ret)
>   IFPGA_RAWDEV_PMD_ERR("Can't cancel the
> thread"); @@ -553,8 +555,6 @@ static int set_surprise_link_check_aer(
>   if (ret)
>   IFPGA_RAWDEV_PMD_ERR("Can't join the thread");
> 
> - ifpga_monitor_start = 0;
> -
>   return ret;
>   }
> 
> --
> 1.8.3.1



RE: [PATCH v1] raw/ifpga/base: fix SPI transaction

2022-01-19 Thread Xu, Rosen
Hi,

> -Original Message-
> From: Huang, Wei 
> Sent: Wednesday, January 19, 2022 9:45
> To: dev@dpdk.org; Xu, Rosen ; Zhang, Qi Z
> 
> Cc: sta...@dpdk.org; Zhang, Tianfei ; Yigit, Ferruh
> 
> Subject: [PATCH v1] raw/ifpga/base: fix SPI transaction
> 
> From: Tianfei Zhang 
> 
> When EOP is detected, 2 more bytes should be received (may be a
> SPI_PACKET_ESC before last valid byte) then rx should be finished.
> 
> Fixes: 96ebfcf8 ("raw/ifpga/base: add SPI and MAX10 device driver")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Tianfei Zhang 
> ---
>  drivers/raw/ifpga/base/opae_spi.c |  12 ++
>  drivers/raw/ifpga/base/opae_spi.h |   4 +
>  drivers/raw/ifpga/base/opae_spi_transaction.c | 215 +++--
> -
>  3 files changed, 140 insertions(+), 91 deletions(-)
> 
> diff --git a/drivers/raw/ifpga/base/opae_spi.c
> b/drivers/raw/ifpga/base/opae_spi.c
> index 9efeecb..ca3d41f 100644
> --- a/drivers/raw/ifpga/base/opae_spi.c
> +++ b/drivers/raw/ifpga/base/opae_spi.c
> @@ -239,6 +239,18 @@ int spi_command(struct altera_spi_device *dev,
> unsigned int chip_select,
>   return 0;
>  }
> 
> +int spi_write(struct altera_spi_device *dev, unsigned int chip_select,
> + unsigned int wlen, void *wdata)
> +{
> + return spi_command(dev, chip_select, wlen, wdata, 0, NULL); }
> +
> +int spi_read(struct altera_spi_device *dev, unsigned int chip_select,
> + unsigned int rlen, void *rdata)
> +{
> + return spi_command(dev, chip_select, 0, NULL, rlen, rdata); }
> +
>  struct altera_spi_device *altera_spi_alloc(void *base, int type)  {
>   struct altera_spi_device *spi_dev =
> diff --git a/drivers/raw/ifpga/base/opae_spi.h
> b/drivers/raw/ifpga/base/opae_spi.h
> index af11656..bcff67d 100644
> --- a/drivers/raw/ifpga/base/opae_spi.h
> +++ b/drivers/raw/ifpga/base/opae_spi.h
> @@ -117,6 +117,10 @@ struct spi_tran_header {
>   u32 addr;
>  };
> 
> +int spi_read(struct altera_spi_device *dev, unsigned int chip_select,
> + unsigned int rlen, void *rdata);
> +int spi_write(struct altera_spi_device *dev, unsigned int chip_select,
> + unsigned int wlen, void *wdata);
>  int spi_command(struct altera_spi_device *dev, unsigned int chip_select,
>   unsigned int wlen, void *wdata, unsigned int rlen, void
> *rdata);  void spi_cs_deactivate(struct altera_spi_device *dev); diff --git
> a/drivers/raw/ifpga/base/opae_spi_transaction.c
> b/drivers/raw/ifpga/base/opae_spi_transaction.c
> index 006cdb4..cd50d40 100644
> --- a/drivers/raw/ifpga/base/opae_spi_transaction.c
> +++ b/drivers/raw/ifpga/base/opae_spi_transaction.c
> @@ -40,7 +40,7 @@ static void print_buffer(const char *string, void *buffer,
> int len)
>   printf("%s print buffer, len=%d\n", string, len);
> 
>   for (i = 0; i < len; i++)
> - printf("%x ", *(p+i));
> + printf("%02x ", *(p+i));
>   printf("\n");
>  }
>  #else
> @@ -72,43 +72,6 @@ static void reorder_phy_data(u8 bits_per_word,
>   }
>  }
> 
> -enum {
> - SPI_FOUND_SOP,
> - SPI_FOUND_EOP,
> - SPI_NOT_FOUND,
> -};
> -
> -static int resp_find_sop_eop(unsigned char *resp, unsigned int len,
> - int flags)
> -{
> - int ret = SPI_NOT_FOUND;
> -
> - unsigned char *b = resp;
> -
> - /* find SOP */
> - if (flags != SPI_FOUND_SOP) {
> - while (b < resp + len && *b != SPI_PACKET_SOP)
> - b++;
> -
> - if (*b != SPI_PACKET_SOP)
> - goto done;
> -
> - ret = SPI_FOUND_SOP;
> - }
> -
> - /* find EOP */
> - while (b < resp + len && *b != SPI_PACKET_EOP)
> - b++;
> -
> - if (*b != SPI_PACKET_EOP)
> - goto done;
> -
> - ret = SPI_FOUND_EOP;
> -
> -done:
> - return ret;
> -}
> -
>  static void phy_tx_pad(unsigned char *phy_buf, unsigned int phy_buf_len,
>   unsigned int *aligned_len)
>  {
> @@ -137,6 +100,104 @@ static void phy_tx_pad(unsigned char *phy_buf,
> unsigned int phy_buf_len,
>   *p++ = SPI_BYTE_IDLE;
>  }
> 
> +#define RX_ALL_IDLE_DATA (SPI_BYTE_IDLE << 24 | SPI_BYTE_IDLE << 16 |
>   \
> +  SPI_BYTE_IDLE << 8 | SPI_BYTE_IDLE)
> +
> +static bool all_idle_data(u8 *rxbuf)
> +{
> + return *(u32 *)rxbuf == RX_ALL_IDLE_DATA; }
> +
> +static unsigned char *find_eop(u8 *rxbuf, u32 BPW) {
> + return memchr(rxbuf, SPI_PACKET_EOP, BPW); }
> +
> +static int do_spi_txrx(struct spi_transaction_dev *dev,
> + unsigned char *tx_buffer,
> + unsigned int tx_len, unsigned char *rx_buffer,
> + unsigned int rx_len,
> + unsigned int *actual_rx)
> +{
> + unsigned int rx_cnt = 0;
> + int ret = 0;
> + unsigned int BPW = 4;
> + bool eop_found = false;
> + unsigned char *eop;
> + unsigned char *ptr;
> + unsigned char *rxbuf = rx_buffer;
> + int add_byte = 0;
> + unsigned long ticks;
> + unsigned long ti

[PATCH] net/cnxk: resolve mbuf data length update issue

2022-01-19 Thread Rahul Bhansali
If multi-segment is enabled and single segment/packet
is received, then mbuf data_len is not updated in
cn9k_nix_cqe_to_mbuf function.
Also, in case of timestamp is enabled, mbuf data_len
and pkt_len will be updated for all packets including
multi segmented packets.

Signed-off-by: Rahul Bhansali 
---
Depends-on: Series-21246 ("[v2,1/4] net/cnxk: avoid command copy from Tx
queue")

 drivers/event/cnxk/cn10k_worker.h |  2 --
 drivers/event/cnxk/cn9k_worker.h  |  2 --
 drivers/net/cnxk/cn10k_rx.h   |  9 +++--
 drivers/net/cnxk/cn9k_rx.h| 18 +++---
 drivers/net/cnxk/cnxk_ethdev.h|  9 +++--
 5 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_worker.h 
b/drivers/event/cnxk/cn10k_worker.h
index 1e61a6ddf0..5c36540a43 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -174,7 +174,6 @@ cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const 
uint32_t flags,
   CNXK_SSO_WQE_SG_PTR);
cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
flags & NIX_RX_OFFLOAD_TSTAMP_F,
-   flags & NIX_RX_MULTI_SEG_F,
(uint64_t *)tstamp_ptr);
wqe[0] = (uint64_t *)mbuf;
non_vec--;
@@ -266,7 +265,6 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct 
rte_event *ev,
cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
ws->tstamp,
flags & NIX_RX_OFFLOAD_TSTAMP_F,
-   flags & NIX_RX_MULTI_SEG_F,
(uint64_t *)tstamp_ptr);
gw.u64[1] = mbuf;
} else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index e44422ec25..368baae048 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -208,7 +208,6 @@ cn9k_sso_hws_dual_get_work(uint64_t base, uint64_t 
pair_base,
cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
dws->tstamp,
flags & NIX_RX_OFFLOAD_TSTAMP_F,
-   flags & NIX_RX_MULTI_SEG_F,
(uint64_t *)tstamp_ptr);
gw.u64[1] = mbuf;
}
@@ -285,7 +284,6 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct 
rte_event *ev,
cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
ws->tstamp,
flags & NIX_RX_OFFLOAD_TSTAMP_F,
-   flags & NIX_RX_MULTI_SEG_F,
(uint64_t *)tstamp_ptr);
gw.u64[1] = mbuf;
}
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index 9694a3080f..45b626b089 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -364,7 +364,13 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, 
const uint32_t tag,
*(uint64_t *)(&mbuf->rearm_data) = val;

if (flag & NIX_RX_MULTI_SEG_F)
-   nix_cqe_xtract_mseg(rx, mbuf, val, flag);
+   /*
+* For multi segment packets, mbuf length correction according
+* to Rx timestamp length will be handled later during
+* timestamp data process.
+* Hence, flag argument is not required.
+*/
+   nix_cqe_xtract_mseg(rx, mbuf, val, 0);
else
mbuf->next = NULL;
 }
@@ -452,7 +458,6 @@ cn10k_nix_recv_pkts(void *rx_queue, struct rte_mbuf 
**rx_pkts, uint16_t pkts,
  flags);
cnxk_nix_mbuf_to_tstamp(mbuf, rxq->tstamp,
(flags & NIX_RX_OFFLOAD_TSTAMP_F),
-   (flags & NIX_RX_MULTI_SEG_F),
(uint64_t *)((uint8_t *)mbuf
+ data_off));
rx_pkts[packets++] = mbuf;
diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h
index fa4efbf80a..c883eaeb07 100644
--- a/drivers/net/cnxk/cn9k_rx.h
+++ b/drivers/net/cnxk/cn9k_rx.h
@@ -345,13 +345,18 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, 
const uint32_t tag,
mbuf->ol_flags = ol_flags;
*(uint64_t *)(&mbuf->rearm_data) = val;
mbuf->pkt_len = len;
-
-   if (flag & NIX_RX_MULTI_SEG_F) {
-   nix_cqe_xtract_mseg(rx, mbuf, val, flag

Re: [PATCH v2 1/2] common/cnxk: get head-tail of Rx and Tx queues

2022-01-19 Thread Ray Kinsella


Rahul Bhansali  writes:

> Adds roc APIs roc_nix_cq_head_tail_get, roc_nix_sq_head_tail_get
> to get head-tail of receive and transmit queue respectively.
>
> Signed-off-by: Rahul Bhansali 
> ---
> v2 changes:
>  - No change
>
>  drivers/common/cnxk/roc_nix.h   |  4 +++
>  drivers/common/cnxk/roc_nix_queue.c | 53 +
>  drivers/common/cnxk/version.map |  2 ++
>  3 files changed, 59 insertions(+)
>

Acked-by: Ray Kinsella 

-- 
Regards, Ray K


RE: [RFC 1/3] ethdev: support GRE optional fields

2022-01-19 Thread Ori Kam
Hi,

> -Original Message-
> From: Thomas Monjalon 
> Subject: Re: [RFC 1/3] ethdev: support GRE optional fields
> 
> 19/01/2022 10:53, Ferruh Yigit:
> > On 12/30/2021 3:08 AM, Sean Zhang wrote:
> > > --- a/lib/ethdev/rte_flow.h
> > > +++ b/lib/ethdev/rte_flow.h
> > >   /**
> > > + * RTE_FLOW_ITEM_TYPE_GRE_OPTION.
> > > + *
> > > + * Matches GRE optional fields in header.
> > > + */
> > > +struct rte_gre_hdr_option {
> > > + rte_be16_t checksum;
> > > + rte_be32_t key;
> > > + rte_be32_t sequence;
> > > +};
> > > +
> >
> > Hi Ori, Andrew,
> >
> > The decision was to have protocol structs in the net library and flow 
> > structs
> > use from there, wasn't it?
> > (Btw, a deprecation notice is still pending to clear some existing ones)
> >
> > So for the GRE optional fields, what about having a struct in the 
> > 'rte_gre.h'?
> > (Also perhaps an GRE extended protocol header can be defined combining
> > 'rte_gre_hdr' and optional fields struct.)
> > Later flow API struct can embed that struct.
> 
> +1 for using librte_net.
> This addition in rte_flow looks to be a mistake.
> Please fix the next version.
> 
Nice idea,
but my main concern is that the header should have the header is defined.
Since some of the fields are optional this will look something like this:
gre_hdr_option_checksum {
rte_be_16_t checksum;
}

gre_hdr_option_key {
rte_be_32_t key;
}

gre_hdr_option_ sequence {
rte_be_32_t sequence;
}

I don't want to have so many rte_flow_items,
Has more and more protocols have optional data it doesn't make sense to create 
the item for each.

If I'm looking at it from an ideal place, I would like that the optional fields 
will be part of the original item.
For example in test pmd I would like to write:
Eth / ipv4 / udp / gre flags is key & checksum checksum is yyy key is xxx / end
And not 
Eth / ipv4 / udp / gre flags is key & checksum / gre_option checksum is yyy key 
is xxx / end
This means that the structure will look like this:
struct rte_flow_item_gre {
union {
struct {
/**
* Checksum (1b), reserved 0 (12b), version (3b).
 * Refer to RFC 2784.
 */
rte_be16_t c_rsvd0_ver;
rte_be16_t protocol; /**< Protocol type. */
}
struct rte_gre_hdr hdr
}
rte_be_16_t checksum;
rte_be_32_t key;
rte_be_32_t sequence;
};
The main issue with this is that it breaks ABI,
Maybe to solve this we can create a new structure gre_ext?

In any way I think we should think how we allow adding members to structures 
without 
ABI breakage.

Best,
Ori



[PATCH] mempool: test performance with constant n

2022-01-19 Thread Morten Brørup
"What gets measured gets done."

This patch adds mempool performance tests where the number of objects to
put and get is constant at compile time, which may significantly improve
the performance of these functions. [*]

Also, it is ensured that the array holding the object used for testing
is cache line aligned, for maximum performance.

And finally, the following entries are added to the list of tests:
- Number of kept objects: 512
- Number of objects to get and to put: The number of pointers fitting
  into a cache line, i.e. 8 or 16

[*] Some example performance test (with cache) results:

get_bulk=4 put_bulk=4 keep=128 constant_n=false rate_persec=280480972
get_bulk=4 put_bulk=4 keep=128 constant_n=true  rate_persec=622159462

get_bulk=8 put_bulk=8 keep=128 constant_n=false rate_persec=477967155
get_bulk=8 put_bulk=8 keep=128 constant_n=true  rate_persec=917582643

get_bulk=32 put_bulk=32 keep=32 constant_n=false rate_persec=871248691
get_bulk=32 put_bulk=32 keep=32 constant_n=true rate_persec=1134021836

Signed-off-by: Morten Brørup 
---
 app/test/test_mempool_perf.c | 120 +--
 1 file changed, 74 insertions(+), 46 deletions(-)

diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
index 87ad251367..ffefe934d5 100644
--- a/app/test/test_mempool_perf.c
+++ b/app/test/test_mempool_perf.c
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2022 SmartShare Systems
  */
 
 #include 
@@ -55,19 +56,24 @@
  *
  *  - Bulk get from 1 to 32
  *  - Bulk put from 1 to 32
+ *  - Bulk get and put from 1 to 32, compile time constant
  *
  *- Number of kept objects (*n_keep*)
  *
  *  - 32
  *  - 128
+ *  - 512
  */
 
 #define N 65536
 #define TIME_S 5
 #define MEMPOOL_ELT_SIZE 2048
-#define MAX_KEEP 128
+#define MAX_KEEP 512
 #define MEMPOOL_SIZE 
((rte_lcore_count()*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE))-1)
 
+/* Number of pointers fitting into one cache line. */
+#define CACHE_LINE_BURST (RTE_CACHE_LINE_SIZE/sizeof(uintptr_t))
+
 #define LOG_ERR() printf("test failed at %s():%d\n", __func__, __LINE__)
 #define RET_ERR() do { \
LOG_ERR();  \
@@ -80,16 +86,16 @@
} while (0)
 
 static int use_external_cache;
-static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
+static unsigned int external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
 
 static uint32_t synchro;
 
 /* number of objects in one bulk operation (get or put) */
-static unsigned n_get_bulk;
-static unsigned n_put_bulk;
+static int n_get_bulk;
+static int n_put_bulk;
 
 /* number of objects retrieved from mempool before putting them back */
-static unsigned n_keep;
+static int n_keep;
 
 /* number of enqueues / dequeues */
 struct mempool_test_stats {
@@ -104,20 +110,43 @@ static struct mempool_test_stats stats[RTE_MAX_LCORE];
  */
 static void
 my_obj_init(struct rte_mempool *mp, __rte_unused void *arg,
-   void *obj, unsigned i)
+   void *obj, unsigned int i)
 {
uint32_t *objnum = obj;
memset(obj, 0, mp->elt_size);
*objnum = i;
 }
 
+#define test_loop(x_keep, x_get_bulk, x_put_bulk) \
+   for (i = 0; likely(i < (N/x_keep)); i++) {\
+   /* get x_keep objects by bulk of x_get_bulk */  \
+   for (idx = 0; idx < x_keep; idx += x_get_bulk) {\
+   ret = rte_mempool_generic_get(mp,   \
+   &obj_table[idx],\
+   x_get_bulk,  \
+   cache);  \
+   if (unlikely(ret < 0)) {\
+   rte_mempool_dump(stdout, mp);   \
+   GOTO_ERR(ret, out);  \
+   }  \
+   }  \
+   \
+   /* put the objects back by bulk of x_put_bulk */\
+   for (idx = 0; idx < x_keep; idx += x_put_bulk) {\
+   rte_mempool_generic_put(mp,  \
+   &obj_table[idx],\
+   x_put_bulk,  \
+   cache);  \
+   }  \
+   }
+
 static int
 per_lcore_mempool_test(void *arg)
 {
-   void *obj_table[MAX_KEEP];
-   unsigned i, idx;
+   void *obj_table[MAX_KEEP] __rte_cache_aligned;
+   int i, idx;
struct rte_mempool

[dpdk-dev] [PATCH] net/nfp: free HW rings memzone on queue release

2022-01-19 Thread heinrich . kuhn
From: Heinrich Kuhn 

During rx/tx queue setup, memory is reserved for the hardware rings.
This memory zone should subsequently be freed in the queue release
logic. This commit also adds a call to the release logic in the
dev_close() callback so that the ring memzone may be freed during port
close too.

Fixes: b812daadad0d ("nfp: add Rx and Tx")
Cc: sta...@dpdk.org

Signed-off-by: Heinrich Kuhn 
Signed-off-by: Simon Horman 
---
 drivers/net/nfp/nfp_ethdev.c| 2 ++
 drivers/net/nfp/nfp_ethdev_vf.c | 2 ++
 drivers/net/nfp/nfp_rxtx.c  | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/drivers/net/nfp/nfp_ethdev.c b/drivers/net/nfp/nfp_ethdev.c
index 8e81cc498f..9166f65da3 100644
--- a/drivers/net/nfp/nfp_ethdev.c
+++ b/drivers/net/nfp/nfp_ethdev.c
@@ -302,11 +302,13 @@ nfp_net_close(struct rte_eth_dev *dev)
for (i = 0; i < dev->data->nb_tx_queues; i++) {
this_tx_q = (struct nfp_net_txq *)dev->data->tx_queues[i];
nfp_net_reset_tx_queue(this_tx_q);
+   nfp_net_tx_queue_release(dev, i);
}
 
for (i = 0; i < dev->data->nb_rx_queues; i++) {
this_rx_q = (struct nfp_net_rxq *)dev->data->rx_queues[i];
nfp_net_reset_rx_queue(this_rx_q);
+   nfp_net_rx_queue_release(dev, i);
}
 
/* Cancel possible impending LSC work here before releasing the port*/
diff --git a/drivers/net/nfp/nfp_ethdev_vf.c b/drivers/net/nfp/nfp_ethdev_vf.c
index 303ef72b1b..0034d68ea6 100644
--- a/drivers/net/nfp/nfp_ethdev_vf.c
+++ b/drivers/net/nfp/nfp_ethdev_vf.c
@@ -219,11 +219,13 @@ nfp_netvf_close(struct rte_eth_dev *dev)
for (i = 0; i < dev->data->nb_tx_queues; i++) {
this_tx_q =  (struct nfp_net_txq *)dev->data->tx_queues[i];
nfp_net_reset_tx_queue(this_tx_q);
+   nfp_net_tx_queue_release(dev, i);
}
 
for (i = 0; i < dev->data->nb_rx_queues; i++) {
this_rx_q =  (struct nfp_net_rxq *)dev->data->rx_queues[i];
nfp_net_reset_rx_queue(this_rx_q);
+   nfp_net_rx_queue_release(dev, i);
}
 
rte_intr_disable(pci_dev->intr_handle);
diff --git a/drivers/net/nfp/nfp_rxtx.c b/drivers/net/nfp/nfp_rxtx.c
index 0fe1415596..335a90b2c9 100644
--- a/drivers/net/nfp/nfp_rxtx.c
+++ b/drivers/net/nfp/nfp_rxtx.c
@@ -470,6 +470,7 @@ nfp_net_rx_queue_release(struct rte_eth_dev *dev, uint16_t 
queue_idx)
 
if (rxq) {
nfp_net_rx_queue_release_mbufs(rxq);
+   rte_eth_dma_zone_free(dev, "rx_ring", queue_idx);
rte_free(rxq->rxbufs);
rte_free(rxq);
}
@@ -660,6 +661,7 @@ nfp_net_tx_queue_release(struct rte_eth_dev *dev, uint16_t 
queue_idx)
 
if (txq) {
nfp_net_tx_queue_release_mbufs(txq);
+   rte_eth_dma_zone_free(dev, "tx_ring", queue_idx);
rte_free(txq->txbufs);
rte_free(txq);
}
-- 
2.30.1 (Apple Git-130)



Re: [PATCH] common/cnxk: enable lmtst burst for batch free

2022-01-19 Thread Jerin Jacob
On Sat, Dec 4, 2021 at 4:02 PM Ashwin Sekhar T K  wrote:
>
> Use lmtst burst when more than 15 pointers is requested
> to be freed.
>
> Signed-off-by: Ashwin Sekhar T K 

Acked-by: Jerin Jacob 
Applied to dpdk-next-net-mrvl/for-next-net. Thanks

Changed the commit as:

common/cnxk: enable LMTST burst for batch free

Use LMTST burst as a performance optimization when more than
15 pointers are requested to be freed.

Signed-off-by: Ashwin Sekhar T K 
Acked-by: Jerin Jacob 

> ---
>  drivers/common/cnxk/roc_npa.h | 86 +++
>  1 file changed, 68 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/common/cnxk/roc_npa.h b/drivers/common/cnxk/roc_npa.h
> index 46350fdb48..dfe6e5318f 100644
> --- a/drivers/common/cnxk/roc_npa.h
> +++ b/drivers/common/cnxk/roc_npa.h
> @@ -10,7 +10,8 @@
>
>  #define ROC_NPA_MAX_BLOCK_SZ  (128 * 1024)
>  #define ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS 512
> -#define ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS  15
> +#define ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS  15U
> +#define ROC_CN10K_NPA_BATCH_FREE_BURST_MAX 16U
>
>  /* This value controls how much of the present average resource level is 
> used to
>   * calculate the new resource level.
> @@ -362,9 +363,6 @@ roc_npa_aura_batch_free(uint64_t aura_handle, uint64_t 
> const *buf,
> volatile uint64_t *lmt_data;
> unsigned int i;
>
> -   if (num > ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS)
> -   return;
> -
> lmt_data = (uint64_t *)lmt_addr;
>
> addr = roc_npa_aura_handle_to_base(aura_handle) +
> @@ -379,10 +377,8 @@ roc_npa_aura_batch_free(uint64_t aura_handle, uint64_t 
> const *buf,
>  * -
>  */
> free0 = roc_npa_aura_handle_to_aura(aura_handle);
> -   if (fabs)
> -   free0 |= (0x1UL << 63);
> -   if (num & 0x1)
> -   free0 |= (0x1UL << 32);
> +   free0 |= ((uint64_t)!!fabs << 63);
> +   free0 |= ((uint64_t)(num & 0x1) << 32);
>
> /* tar_addr[4:6] is LMTST size-1 in units of 128b */
> tar_addr = addr | ((num >> 1) << 4);
> @@ -396,23 +392,77 @@ roc_npa_aura_batch_free(uint64_t aura_handle, uint64_t 
> const *buf,
>  }
>
>  static inline void
> -roc_npa_aura_op_batch_free(uint64_t aura_handle, uint64_t const *buf,
> -  unsigned int num, const int fabs, uint64_t 
> lmt_addr,
> -  uint64_t lmt_id)
> +roc_npa_aura_batch_free_burst(uint64_t aura_handle, uint64_t const *buf,
> + unsigned int num, const int fabs,
> + uint64_t lmt_addr, uint64_t lmt_id)
>  {
> -   unsigned int chunk;
> +   uint64_t addr, tar_addr, free0, send_data, lmtline;
> +   uint64_t *lmt_data;
> +
> +   /* 63   52 51  20 19   7 6   4 3  0
> +* 
> +* | RSVD | ADDR | RSVD | LMTST SZ(0) | 0 |
> +* 
> +*/
> +   addr = roc_npa_aura_handle_to_base(aura_handle) +
> +  NPA_LF_AURA_BATCH_FREE0;
> +   tar_addr = addr | (0x7 << 4);
> +
> +   /* 63   63 62  33 32   32 31  20 190
> +* -
> +* | FABS | Rsvd | COUNT_EOT | Rsvd | AURA |
> +* -
> +*/
> +   free0 = roc_npa_aura_handle_to_aura(aura_handle);
> +   free0 |= ((uint64_t)!!fabs << 63);
> +   free0 |= (0x1UL << 32);
>
> +   /* Fill the lmt lines */
> +   lmt_data = (uint64_t *)lmt_addr;
> +   lmtline = 0;
> while (num) {
> -   chunk = (num >= ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS) ?
> - ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS :
> - num;
> +   lmt_data[lmtline * 16] = free0;
> +   memcpy(&lmt_data[(lmtline * 16) + 1], buf,
> +  ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS * sizeof(uint64_t));
> +   lmtline++;
> +   num -= ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS;
> +   buf += ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS;
> +   }
>
> -   roc_npa_aura_batch_free(aura_handle, buf, chunk, fabs, 
> lmt_addr,
> -   lmt_id);
> +   /* 63   19 18  16 15   12 11  11 10  0
> +* ---
> +* | LMTST SZ(15) ... LMTST SZ(1) | Rsvd | CNTM1 | Rsvd | LMT_ID |
> +* ---
> +*/
> +   send_data = lmt_id | ((lmtline - 1) << 12) | (0x1FFFUL << 19);
> +   roc_lmt_submit_steorl(send_data, tar_addr);
> +   plt_io_wmb();
> +}
>
> +static inline void
> +roc_npa_aura_op_batch_free(uint64_t aura_handle, uint64_t const *buf,
> +  unsigned int num, cons

[Bug 921] meson build warnings with 0.61.1

2022-01-19 Thread bugzilla
https://bugs.dpdk.org/show_bug.cgi?id=921

Bug ID: 921
   Summary: meson build warnings with 0.61.1
   Product: DPDK
   Version: 21.11
  Hardware: All
OS: All
Status: UNCONFIRMED
  Severity: normal
  Priority: Normal
 Component: meson
  Assignee: dev@dpdk.org
  Reporter: jerinjac...@gmail.com
  Target Milestone: ---

Warnings:
1) WARNING: You should add the boolean check kwarg to the run_command call.
 It currently defaults to false,
 but it will default to true in future releases of meson.
 See also: https://github.com/mesonbuild/meson/issues/9300
2)WARNING: Python files installed by Meson might not be found by python
interpreter.
 This warning can be avoided by setting "python.platlibdir" option.
WARNING: Python files installed by Meson might not be found by python
interpreter.
 This warning can be avoided by setting "python.purelibdir" option.

3) config/meson.build:290: WARNING: Consider using the built-in warning_level
option instead of using "-Wextra".

Log:


The Meson build system
Version: 0.61.1
Source dir: /export/dpdk-next-eventdev
Build dir: /export/dpdk-next-eventdev/build
Build type: native build
Program cat found: YES (/usr/bin/cat)
WARNING: You should add the boolean check kwarg to the run_command call.
 It currently defaults to false,
 but it will default to true in future releases of meson.
 See also: https://github.com/mesonbuild/meson/issues/9300
Project name: DPDK
Project version: 22.03.0-rc0
C compiler for the host machine: ccache cc (gcc 11.1.0 "cc (GCC) 11.1.0")
C linker for the host machine: cc ld.bfd 2.36.1
Host machine cpu family: x86_64
Host machine cpu: x86_64
Message: ## Building in Developer Mode ##
Program pkg-config found: YES (/usr/bin/pkg-config)
Program check-symbols.sh found: YES
(/export/dpdk-next-eventdev/buildtools/check-symbols.sh)
Program options-ibverbs-static.sh found: YES
(/export/dpdk-next-eventdev/buildtools/options-ibverbs-static.sh)
Program objdump found: YES (/usr/bin/objdump)
Program python3 found: YES (/usr/bin/python)
WARNING: Python files installed by Meson might not be found by python
interpreter.
 This warning can be avoided by setting "python.platlibdir" option.
WARNING: Python files installed by Meson might not be found by python
interpreter.
 This warning can be avoided by setting "python.purelibdir" option.
Program cat found: YES (/usr/bin/cat)
Checking for size of "void *" : 8
Checking for size of "void *" : 8
Library m found: YES
Library numa found: YES
Has header "numaif.h" : YES
Library libfdt found: YES
Has header "fdt.h" : YES
Library libexecinfo found: NO
Found pkg-config: /usr/bin/pkg-config (1.8.0)
Run-time dependency libarchive found: YES 3.5.2
Run-time dependency libbsd found: YES 0.11.3
Run-time dependency jansson found: YES 2.14
Run-time dependency libpcap found: YES 1.10.1
Has header "pcap.h" with dependency libpcap: YES
Compiler for C supports arguments -Wextra: YES
config/meson.build:290: WARNING: Consider using the built-in warning_level
option instead of using "-Wextra".

-- 
You are receiving this mail because:
You are the assignee for the bug.

[PATCH v1] doc: fix KNI PMD name typo

2022-01-19 Thread Haiyue Wang
The KNI PMD name should be "net_kni".

Fixes: 75e2bc54c018 ("net/kni: add KNI PMD")
Cc: sta...@dpdk.org

Signed-off-by: Haiyue Wang 
---
 doc/guides/nics/kni.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/guides/nics/kni.rst b/doc/guides/nics/kni.rst
index 37c5411a32..2a23bb3f3b 100644
--- a/doc/guides/nics/kni.rst
+++ b/doc/guides/nics/kni.rst
@@ -33,7 +33,7 @@ Usage
 
 EAL ``--vdev`` argument can be used to create KNI device instance, like::
 
-dpdk-testpmd --vdev=net_kni0 --vdev=net_kn1 -- -i
+dpdk-testpmd --vdev=net_kni0 --vdev=net_kni1 -- -i
 
 Above command will create ``kni0`` and ``kni1`` Linux network interfaces,
 those interfaces can be controlled by standard Linux tools.
-- 
2.34.1



Re: [PATCH v2 01/10] ethdev: introduce flow pre-configuration hints

2022-01-19 Thread Ivan Malov

Hi,


+Rules management configuration
+--
+
+Configure flow rules management.


It is either "management OF ruleS" or "rule management".
Perhaps fix similar occurrences across the series.


+   /**
+* Number of counter actions pre-configured.
+* If set to 0, PMD will allocate counters dynamically.
+* @see RTE_FLOW_ACTION_TYPE_COUNT
+*/
+   uint32_t nb_counters;
+   /**
+* Number of aging actions pre-configured.
+* If set to 0, PMD will allocate aging dynamically.
+* @see RTE_FLOW_ACTION_TYPE_AGE
+*/
+   uint32_t nb_aging;
+   /**
+* Number of traffic metering actions pre-configured.
+* If set to 0, PMD will allocate meters dynamically.
+* @see RTE_FLOW_ACTION_TYPE_METER
+*/
+   uint32_t nb_meters;


If duplication of the same description is undesirable,
consider adding a common description for these fields:

/**
 * Resource preallocation settings. Use zero to
 * request that allocations be done on demand.
 */

Instead of "nb_aging", perhaps consider something like "nb_age_timers".


+ * Configure flow rules module.
+ * To pre-allocate resources as per the flow port attributes
+ * this configuration function must be called before any flow rule is created.
+ * Must be called only after Ethernet device is configured, but may be called
+ * before or after the device is started as long as there are no flow rules.
+ * No other rte_flow function should be called while this function is invoked.
+ * This function can be called again to change the configuration.
+ * Some PMDs may not support re-configuration at all,
+ * or may only allow increasing the number of resources allocated.


Consider:

* Pre-configure the port's flow API engine.
*
* This API can only be invoked before the application
* starts using the rest of the flow library functions.
*
* The API can be invoked multiple times to change the
* settings. The port, however, may reject the changes.

--
Ivan M.


[PATCH v1] gpu/cuda: add NVIDIA GPU A100 identifier for DPU

2022-01-19 Thread eagostini
From: Elena Agostini 

Adding a new NVIDIA GPU identifier to let
driver recognize the A100 on a DPU card.

Signed-off-by: Elena Agostini 
---
 drivers/gpu/cuda/cuda.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/cuda/cuda.c b/drivers/gpu/cuda/cuda.c
index 882df08e56..c295e7cc70 100644
--- a/drivers/gpu/cuda/cuda.c
+++ b/drivers/gpu/cuda/cuda.c
@@ -58,6 +58,7 @@ static int cuda_driver_version;
 /* NVIDIA GPU device IDs */
 #define NVIDIA_GPU_A100_40GB_DEVICE_ID (0x20f1)
 #define NVIDIA_GPU_A100_80GB_DEVICE_ID (0x20b5)
+#define NVIDIA_GPU_A100_80GB_DPU_DEVICE_ID (0x20b8)
 
 #define NVIDIA_GPU_A30_24GB_DEVICE_ID (0x20b7)
 #define NVIDIA_GPU_A10_24GB_DEVICE_ID (0x2236)
@@ -92,6 +93,10 @@ static const struct rte_pci_id pci_id_cuda_map[] = {
RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID,
NVIDIA_GPU_A100_80GB_DEVICE_ID)
},
+   {
+   RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID,
+   NVIDIA_GPU_A100_80GB_DPU_DEVICE_ID)
+   },
{
RTE_PCI_DEVICE(NVIDIA_GPU_VENDOR_ID,
NVIDIA_GPU_A30_24GB_DEVICE_ID)
-- 
2.17.1



Re: [PATCH 01/12] net: add string to IPv4 parse function

2022-01-19 Thread Thomas Monjalon
14/12/2021 15:12, Ronan Randles:
> --- a/lib/net/rte_ip.h
> +++ b/lib/net/rte_ip.h
> +/**
> + * IP address parser.
> + *
> + * @param src_ip
> + *   The IP address to be parsed.
> + * @param output_addr
> + *   The array in which the parsed digits will be saved.
> + *
> + * @retval 0
> + *   Success.
> + * @retval -1
> + *   Failure due to invalid input arguments.
> + */
> +
> +__rte_experimental
> +int32_t
> +rte_ip_parse_addr(const char *src_ip, uint32_t *output_addr);

Is it similar to inet_aton() ?
Does it support IPv6? If not, why not adding a number 4 in the name?





Re: [PATCH 02/12] net: add function to pretty print IPv4

2022-01-19 Thread Thomas Monjalon
15/12/2021 14:06, Ananyev, Konstantin:
> 
> > > From: Stephen Hemminger [mailto:step...@networkplumber.org]
> > > Sent: Wednesday, 15 December 2021 04.21
> > >
> > > On Wed, 15 Dec 2021 01:06:14 +
> > > "Ananyev, Konstantin"  wrote:
> > > > > > --- a/lib/net/rte_ip.h
> > > > > > +++ b/lib/net/rte_ip.h
> > > > > > @@ -444,6 +444,26 @@ __rte_experimental
> > > > > >  int32_t
> > > > > >  rte_ip_parse_addr(const char *src_ip, uint32_t *output_addr);
> > > > > >
> > > > > > +
> > > > > > +/**
> > > > > > + * Print IP address from 32 bit int into char * buffer.
> > > > > > + *
> > > > > > + * @param ip_addr
> > > > > > + *   ip address to be printed.
> > > > > > + * @param buffer
> > > > > > + *   The buffer the string will be saved into.
> > > > > > + * @param buffer_size
> > > > > > + *   size of buffer to be used.
> > > > > > + *
> > > > > > + * @retval 0
> > > > > > + *   Success.
> > > > > > + * @retval -1
> > > > > > + *   Failure due to invalid input arguments.
> > > > > > + */
> > > > > > +__rte_experimental
> > > > > > +int32_t
> > > > > > +rte_ip_print_addr(uint32_t ip_addr, char *buffer, uint32_t
> > > > > > buffer_size);
> > > > > > +
> > > > >
> > > > > In continuation of my email reply about the IPv4 parse function...
> > > > >
> > > > > I have a few suggestions to the IPv4 print function too:
> > > > >
> > > > > The return value should be the number of characters written to the
> > > output string, and still -1 on error. With this modification, you could
> > > > > use the return type ssize_t instead of int32_t.
> > > > >
> > > > > Furthermore, I would prefer having the parameters in the same order
> > > as snprintf(): char *str, size_t size, const uint32_t ip_addr. Please
> > > > > also notice the suggested changed type for the size, and the const
> > > added to the ip_addr.
> > > > >
> > > > Honestly, I don't understand why we need to introduce such functions
> > > > inside DPDK at all.
> > > > What's wrong with existing standard ones: inet_ntop() and
> > > inet_pton()?
> > >
> > > Agreed, I see no added value in reinventing here
> > 
> > I think that DPDK functions for converting all sorts of types to/from 
> > strings would be useful; not only IP addresses, but also MAC addresses,
> > TCP/UDP port numbers and VLAN IDs.
> 
> For MACs we already have:
> rte_ether_format_addr()/rte_ether_unformat_addr()
> 
> > 
> > If you don't like IP address string conversion functions in the net 
> > library, DPDK could have a string conversions library. That library could
> > expose a multitude of APIs for the same purpose, so the application can use 
> > the API that best fits each application use.
> 
> I don’t mind to add new functions into net lib, if they are useful ones.
> But for that particular case, I just don't see what is the reason to
> develop and maintain our own functions while existing analogues:
> - are well known, widely adopted and field proven
> - do provide the same or even more comprehensive functionality

+1
Waiting for an answer from the authors. One month silence so far.





mellanox connect x 5 drops when cache full

2022-01-19 Thread Yaron Illouz
Hi

I am using multiqueue with RSS to read from MT27800 Family [ConnectX-5] 1017

My application receive traffic, and write some data to disk. As a result I/O 
write is cached in linux memory.
When the server memory is completely cache ( I know it is still available) I 
start seeing drops at nic.
If I delete the data from disk every minute the cached memory is released to 
free and and no drops at nic.
How can I avoid the drops at nic?

totalusedfree  shared  buff/cache   available
Mem:   125G 77G325M 29M 47G 47G
Swap:  8.0G256K8.0G



RE: [PATCH 02/12] net: add function to pretty print IPv4

2022-01-19 Thread Van Haaren, Harry
> -Original Message-
> From: Thomas Monjalon 
> Sent: Wednesday, January 19, 2022 2:24 PM
> To: Morten Brørup ; Stephen Hemminger
> ; Randles, Ronan ;
> Van Haaren, Harry ; Ananyev, Konstantin
> 
> Cc: dev@dpdk.org
> Subject: Re: [PATCH 02/12] net: add function to pretty print IPv4
> 
> 15/12/2021 14:06, Ananyev, Konstantin:
> >
> > > > From: Stephen Hemminger [mailto:step...@networkplumber.org]
> > > > Sent: Wednesday, 15 December 2021 04.21
> > > >
> > > > On Wed, 15 Dec 2021 01:06:14 +
> > > > "Ananyev, Konstantin"  wrote:
> > > > > > > --- a/lib/net/rte_ip.h
> > > > > > > +++ b/lib/net/rte_ip.h
> > > > > > > @@ -444,6 +444,26 @@ __rte_experimental
> > > > > > >  int32_t
> > > > > > >  rte_ip_parse_addr(const char *src_ip, uint32_t *output_addr);
> > > > > > >
> > > > > > > +
> > > > > > > +/**
> > > > > > > + * Print IP address from 32 bit int into char * buffer.
> > > > > > > + *
> > > > > > > + * @param ip_addr
> > > > > > > + *   ip address to be printed.
> > > > > > > + * @param buffer
> > > > > > > + *   The buffer the string will be saved into.
> > > > > > > + * @param buffer_size
> > > > > > > + *   size of buffer to be used.
> > > > > > > + *
> > > > > > > + * @retval 0
> > > > > > > + *   Success.
> > > > > > > + * @retval -1
> > > > > > > + *   Failure due to invalid input arguments.
> > > > > > > + */
> > > > > > > +__rte_experimental
> > > > > > > +int32_t
> > > > > > > +rte_ip_print_addr(uint32_t ip_addr, char *buffer, uint32_t
> > > > > > > buffer_size);
> > > > > > > +
> > > > > >
> > > > > > In continuation of my email reply about the IPv4 parse function...
> > > > > >
> > > > > > I have a few suggestions to the IPv4 print function too:
> > > > > >
> > > > > > The return value should be the number of characters written to the
> > > > output string, and still -1 on error. With this modification, you could
> > > > > > use the return type ssize_t instead of int32_t.
> > > > > >
> > > > > > Furthermore, I would prefer having the parameters in the same order
> > > > as snprintf(): char *str, size_t size, const uint32_t ip_addr. Please
> > > > > > also notice the suggested changed type for the size, and the const
> > > > added to the ip_addr.
> > > > > >
> > > > > Honestly, I don't understand why we need to introduce such functions
> > > > > inside DPDK at all.
> > > > > What's wrong with existing standard ones: inet_ntop() and
> > > > inet_pton()?
> > > >
> > > > Agreed, I see no added value in reinventing here
> > >
> > > I think that DPDK functions for converting all sorts of types to/from 
> > > strings
> would be useful; not only IP addresses, but also MAC addresses,
> > > TCP/UDP port numbers and VLAN IDs.
> >
> > For MACs we already have:
> > rte_ether_format_addr()/rte_ether_unformat_addr()
> >
> > >
> > > If you don't like IP address string conversion functions in the net 
> > > library, DPDK
> could have a string conversions library. That library could
> > > expose a multitude of APIs for the same purpose, so the application can 
> > > use
> the API that best fits each application use.
> >
> > I don’t mind to add new functions into net lib, if they are useful ones.
> > But for that particular case, I just don't see what is the reason to
> > develop and maintain our own functions while existing analogues:
> > - are well known, widely adopted and field proven
> > - do provide the same or even more comprehensive functionality
> 
> +1
> Waiting for an answer from the authors. One month silence so far.

Hi All,

Ronan and I are working on a V2 patchset, and hope to share it in the next days.

Personally I think there is value in DPDK having various functionality "built 
in", and
if that functionality is available for e.g. Ether, I see no reason why IPv4 or 
other
protocols shouldn't have that functionality available. If I was a newcomer to 
DPDK,
I would find it difficult to understand why I can format/unformat Ether with 
DPDK,
but had to use Linux/Windows functions to format/unformat IP/UDP.

I liked Morten's feedback around real-world usage, however we have not got 
around
to implementing that functionality in the V2 patchset. We intended to propose 
that
change in signature/extra-functionality being added to V3.

I believe the gen library was discussed briefly at last tech-board call 
(unfortunately I was not
available to participate). Perhaps a quick chat/discussion at the next 
tech-board would be helpful
to identify community stance on the library? At that point the V2 should be 
available too.

Regards, -Harry



[PATCH] bus/pci: assign driver's pointer before mapping

2022-01-19 Thread Michal Krawczyk
Patch changing the way of accessing interrupt handle also changed order
of the rte_pci_map_device() call and rte_pci_device:driver assignment.
It was causing issues with Write Combine mapping on the Linux platform
if it was used with the igb_uio module.

Linux implementation of pci_uio_map_resource_by_index(), which is called
by rte_pci_map_device(), needs access to the device's driver. Otherwise
it won't be able to check the driver's flags and won't respect them.

Fixes: d61138d4f0e2 ("drivers: remove direct access to interrupt handle")
Cc: hka...@marvell.com
Cc: sta...@dpdk.org

Signed-off-by: Michal Krawczyk 
---
 drivers/bus/pci/pci_common.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index 4a3a87f24f..def372b67e 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -247,9 +247,12 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
return -ENOMEM;
}
 
+   dev->driver = dr;
+
if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
ret = rte_pci_map_device(dev);
if (ret != 0) {
+   dev->driver = NULL;

rte_intr_instance_free(dev->vfio_req_intr_handle);
dev->vfio_req_intr_handle = NULL;
rte_intr_instance_free(dev->intr_handle);
@@ -257,8 +260,6 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
return ret;
}
}
-
-   dev->driver = dr;
}
 
RTE_LOG(INFO, EAL, "Probe PCI driver: %s (%x:%x) device: "PCI_PRI_FMT" 
(socket %i)\n",
-- 
2.25.1



[PATCH v2] mempool: fix put objects to mempool with cache

2022-01-19 Thread Morten Brørup
This patch optimizes the rte_mempool_do_generic_put() caching algorithm,
and fixes a bug in it.

The existing algorithm was:
 1. Add the objects to the cache
 2. Anything greater than the cache size (if it crosses the cache flush
threshold) is flushed to the ring.

Please note that the description in the source code said that it kept
"cache min value" objects after flushing, but the function actually kept
"size" objects, which is reflected in the above description.

Now, the algorithm is:
 1. If the objects cannot be added to the cache without crossing the
flush threshold, flush the cache to the ring.
 2. Add the objects to the cache.

This patch changes these details:

1. Bug: The cache was still full after flushing.
In the opposite direction, i.e. when getting objects from the cache, the
cache is refilled to full level when it crosses the low watermark (which
happens to be zero).
Similarly, the cache should be flushed to empty level when it crosses
the high watermark (which happens to be 1.5 x the size of the cache).
The existing flushing behaviour was suboptimal for real applications,
because crossing the low or high watermark typically happens when the
application is in a state where the number of put/get events are out of
balance, e.g. when absorbing a burst of packets into a QoS queue
(getting more mbufs from the mempool), or when a burst of packets is
trickling out from the QoS queue (putting the mbufs back into the
mempool).
NB: When the application is in a state where put/get events are in
balance, the cache should remain within its low and high watermarks, and
the algorithms for refilling/flushing the cache should not come into
play.
Now, the mempool cache is completely flushed when crossing the flush
threshold, so only the newly put (hot) objects remain in the mempool
cache afterwards.

2. Minor bug: The flush threshold comparison has been corrected; it must
be "len > flushthresh", not "len >= flushthresh".
Reasoning: Consider a flush multiplier of 1 instead of 1.5; the cache
would be flushed already when reaching size elements, not when exceeding
size elements.
Now, flushing is triggered when the flush threshold is exceeded, not
when reached.

3. Optimization: The most recent (hot) objects are flushed, leaving the
oldest (cold) objects in the mempool cache.
This is bad for CPUs with a small L1 cache, because when they get
objects from the mempool after the mempool cache has been flushed, they
get cold objects instead of hot objects.
Now, the existing (cold) objects in the mempool cache are flushed before
the new (hot) objects are added the to the mempool cache.

4. Optimization: Using the x86 variant of rte_memcpy() is inefficient
here, where n is relatively small and unknown at compile time.
Now, it has been replaced by an alternative copying method, optimized
for the fact that most Ethernet PMDs operate in bursts of 4 or 8 mbufs
or multiples thereof.

v2 changes:

- Not adding the new objects to the mempool cache before flushing it
also allows the memory allocated for the mempool cache to be reduced
from 3 x to 2 x RTE_MEMPOOL_CACHE_MAX_SIZE.
However, such this change would break the ABI, so it was removed in v2.

- The mempool cache should be cache line aligned for the benefit of the
copying method, which on some CPU architectures performs worse on data
crossing a cache boundary.
However, such this change would break the ABI, so it was removed in v2;
and yet another alternative copying method replaced the rte_memcpy().

Signed-off-by: Morten Brørup 
---
 lib/mempool/rte_mempool.h | 54 +--
 1 file changed, 40 insertions(+), 14 deletions(-)

diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h
index 1e7a3c1527..8a7067ee5b 100644
--- a/lib/mempool/rte_mempool.h
+++ b/lib/mempool/rte_mempool.h
@@ -94,7 +94,8 @@ struct rte_mempool_cache {
 * Cache is allocated to this size to allow it to overflow in certain
 * cases to avoid needless emptying of cache.
 */
-   void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
+   void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 2] __rte_cache_aligned;
+   /**< Cache objects */
 } __rte_cache_aligned;
 
 /**
@@ -1334,6 +1335,7 @@ static __rte_always_inline void
 rte_mempool_do_generic_put(struct rte_mempool *mp, void * const *obj_table,
   unsigned int n, struct rte_mempool_cache *cache)
 {
+   uint32_t index;
void **cache_objs;
 
/* increment stat now, adding in mempool always success */
@@ -1344,31 +1346,56 @@ rte_mempool_do_generic_put(struct rte_mempool *mp, void 
* const *obj_table,
if (unlikely(cache == NULL || n > RTE_MEMPOOL_CACHE_MAX_SIZE))
goto ring_enqueue;
 
-   cache_objs = &cache->objs[cache->len];
+   /* If the request itself is too big for the cache */
+   if (unlikely(n > cache->flushthresh))
+   goto ring_enqueue;
 
/*
 * The cache follows t

Re: [PATCH 05/12] gen: add raw packet data API and tests

2022-01-19 Thread Thomas Monjalon
20/12/2021 11:21, Van Haaren, Harry:
> From: Thomas Monjalon 
> > 17/12/2021 12:40, Van Haaren, Harry:
> > > I could ramble on a bit more, but mostly diminishing returns I think...
> > > I'll just use this email as a reply to Thomas' tweet;
> > > https://twitter.com/tmonjalo/status/1337313985662771201
> > 
> > My original question was to know available applications,
> > not integrating such application in the DPDK repository.
> > 
> > I may me miss something obvious,
> > but I don't understand why trying to add a user app inside DPDK repo.
> 
> There are likely a few points-of-view on this particular topic; and I'm glad
> you mention it so we can discuss it clearly here.
> 
> There are two main parts to this patchset, the first is a packet generation 
> library,
> with an easy to use string-based syntax. The *library* is designed to be 
> extended in
> future to a range of "useful stuff" to do while generating packets.

The text syntax would be specific to this application
and not usable somewhere else, so it doesn't make sense as a lib.

> The packet generation
> *application* should have minimal features, and focus on ease-of-use (as 
> suggested below).

It would be either a limited application,
or an ever-growing application.
If the latter, it should not be in the main DPDK repository in my opinion.

By the way, I don't think it is the responsibility of DPDK to generate packets.
I would prefer having an application using the already known scapy
or a graphical interface like Ostinato.
There are tons of approach to define packets to send (pCraft is another one).
DPDK should only manage the Tx part, and optionally Rx of forwarded packets.

> In order to test the DPDK code, we need a variety of unit tests, and a 
> sample-application to show
> users how to use the library (as well as docs etc). For me, the interesting 
> part is that it is a small
> step from a simple sample-app just for testing to a minimal tool for 
> high-rate packet generation.
> 
> I think many users of DPDK first install DPDK, then wish for a tool to 
> generate high traffic rates
> to test DPDK, and end up with a usability problem; DPDK does not include a 
> usable packet generator.

I don't see any usability problem in using an external well known tool.
Learning a new tool provided by DPDK *is* a usabilty difficulty.

> To highlight this point; our own DPDK Docs simply ignore the requirement of 
> packet-generation to
> actually have packets processed by skeleton: 
> http://doc.dpdk.org/guides/sample_app_ug/skeleton.html 
> Our "quick start" on the website uses PCAP vdevs (avoiding the problem)  
> https://core.dpdk.org/doc/quick-start/
> Even searching the entire docs for "generate packet" doesn't give any 
> relevant/useful results:
> http://doc.dpdk.org/guides/search.html?q=generate+packet&check_keywords=yes&area=default#
>  
> 
> Users could internet-search & find pktgen, moongen, trex, or similar tools. 
> These tools are fantastic for experienced
> developers such as devs on this mailing list - we should *NOT* replicate 
> these complex tools in DPDK itself. However,
> building any tool outside of DPDK repo requires more effort; another 
> git-clone, another set of dependencies to install,
> perhaps another build-system to get used to. Particularly for people starting 
> out with DPDK (who are likely finding
> it difficult to learn the various hugepage/PCI-binding etc), this is yet 
> another problem to solve, or to give up.
> 
> So my proposal is as follows; let us add a simple DPDK traffic generator to 
> DPDK. We can define its scope
> and its intended use, limiting the scope and capabilities. As before, I do 
> NOT think it a good idea to build a
> complex and feature-rich packet generator. I do feel it useful to have an 
> easy-to-use application in DPDK that
> is particularly designed for generating specific packets, at specific 
> line-rates, and reports mpps returned.
> 
> Thoughts on adding an small scope-limited application to DPDK enabling 
> ease-of-packet-generation for new users?

So you want a simple packet generator for simple benchmarks?
And for complex benchmarks, we use another tool?




[PATCH v3] mempool: fix put objects to mempool with cache

2022-01-19 Thread Morten Brørup
mempool: fix put objects to mempool with cache

This patch optimizes the rte_mempool_do_generic_put() caching algorithm,
and fixes a bug in it.

The existing algorithm was:
 1. Add the objects to the cache
 2. Anything greater than the cache size (if it crosses the cache flush
threshold) is flushed to the ring.

Please note that the description in the source code said that it kept
"cache min value" objects after flushing, but the function actually kept
"size" objects, which is reflected in the above description.

Now, the algorithm is:
 1. If the objects cannot be added to the cache without crossing the
flush threshold, flush the cache to the ring.
 2. Add the objects to the cache.

This patch changes these details:

1. Bug: The cache was still full after flushing.
In the opposite direction, i.e. when getting objects from the cache, the
cache is refilled to full level when it crosses the low watermark (which
happens to be zero).
Similarly, the cache should be flushed to empty level when it crosses
the high watermark (which happens to be 1.5 x the size of the cache).
The existing flushing behaviour was suboptimal for real applications,
because crossing the low or high watermark typically happens when the
application is in a state where the number of put/get events are out of
balance, e.g. when absorbing a burst of packets into a QoS queue
(getting more mbufs from the mempool), or when a burst of packets is
trickling out from the QoS queue (putting the mbufs back into the
mempool).
NB: When the application is in a state where put/get events are in
balance, the cache should remain within its low and high watermarks, and
the algorithms for refilling/flushing the cache should not come into
play.
Now, the mempool cache is completely flushed when crossing the flush
threshold, so only the newly put (hot) objects remain in the mempool
cache afterwards.

2. Minor bug: The flush threshold comparison has been corrected; it must
be "len > flushthresh", not "len >= flushthresh".
Reasoning: Consider a flush multiplier of 1 instead of 1.5; the cache
would be flushed already when reaching size elements, not when exceeding
size elements.
Now, flushing is triggered when the flush threshold is exceeded, not
when reached.

3. Optimization: The most recent (hot) objects are flushed, leaving the
oldest (cold) objects in the mempool cache.
This is bad for CPUs with a small L1 cache, because when they get
objects from the mempool after the mempool cache has been flushed, they
get cold objects instead of hot objects.
Now, the existing (cold) objects in the mempool cache are flushed before
the new (hot) objects are added the to the mempool cache.

4. Optimization: Using the x86 variant of rte_memcpy() is inefficient
here, where n is relatively small and unknown at compile time.
Now, it has been replaced by an alternative copying method, optimized
for the fact that most Ethernet PMDs operate in bursts of 4 or 8 mbufs
or multiples thereof.

v2 changes:

- Not adding the new objects to the mempool cache before flushing it
also allows the memory allocated for the mempool cache to be reduced
from 3 x to 2 x RTE_MEMPOOL_CACHE_MAX_SIZE.
However, such this change would break the ABI, so it was removed in v2.

- The mempool cache should be cache line aligned for the benefit of the
copying method, which on some CPU architectures performs worse on data
crossing a cache boundary.
However, such this change would break the ABI, so it was removed in v2;
and yet another alternative copying method replaced the rte_memcpy().

v3 changes:

- Actually remove my modifications of the rte_mempool_cache structure.

Signed-off-by: Morten Brørup 
---
 lib/mempool/rte_mempool.h | 51 +--
 1 file changed, 38 insertions(+), 13 deletions(-)

diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h
index 1e7a3c1527..7b364cfc74 100644
--- a/lib/mempool/rte_mempool.h
+++ b/lib/mempool/rte_mempool.h
@@ -1334,6 +1334,7 @@ static __rte_always_inline void
 rte_mempool_do_generic_put(struct rte_mempool *mp, void * const *obj_table,
   unsigned int n, struct rte_mempool_cache *cache)
 {
+   uint32_t index;
void **cache_objs;
 
/* increment stat now, adding in mempool always success */
@@ -1344,31 +1345,56 @@ rte_mempool_do_generic_put(struct rte_mempool *mp, void 
* const *obj_table,
if (unlikely(cache == NULL || n > RTE_MEMPOOL_CACHE_MAX_SIZE))
goto ring_enqueue;
 
-   cache_objs = &cache->objs[cache->len];
+   /* If the request itself is too big for the cache */
+   if (unlikely(n > cache->flushthresh))
+   goto ring_enqueue;
 
/*
 * The cache follows the following algorithm
-*   1. Add the objects to the cache
-*   2. Anything greater than the cache min value (if it crosses the
-*   cache flush threshold) is flushed to the ring.
+*   1. If the objects cannot be added to the cach

Re: [PATCH v2 02/10] ethdev: add flow item/action templates

2022-01-19 Thread Ivan Malov

Hi,


+Oftentimes in an application, many flow rules share a common structure
+(the same pattern and/or action list) so they can be grouped and 

classified
+together. This knowledge may be used as a source of optimization by a 

PMD/HW.

+The flow rule creation is done by selecting a table, an item template
+and an action template (which are bound to the table), and setting 

unique

+values for the items and actions. This API is not thread-safe.


Consider:

+Typically, flow rules generated by a given application conform to a small
+group of "shapes". What defines a "shape" is a set of specific item masks
+and action types. This knowledge facilitates optimisations in PMDs / HW.
+
+With such "shapes" (templates) being grouped in tables, a flow rule can
+be created by selecting a template (pattern, action list) within a given
+table and filling out specific match / action properties.


+ struct rte_flow_item_template *
+ rte_flow_item_template_create(uint16_t port_id,
+ const struct rte_flow_item_template_attr 

*it_attr,

+ const struct rte_flow_item items[],
+ struct rte_flow_error *error);


I'm afraid "it_attr" is hardly readable. Also, the API name can
trick users into thinking that it's all about creating a single
item template rather than a flow pattern template.

Perhaps rename to "rte_flow_pattern_template_create()"?
Use "tmpl" instead of "template"? Or "shape" maybe?

For sure, "const struct rte_flow_item items[]" would look better
when renamed to "const struct rte_flow_item pattern[]".

The same goes for "rte_flow_action_template_create()" and "at_attr".

Perhaps, "rte_flow_action_list_shape_create()" then?

+A table combines a number of item and action templates along with 

shared flow
+rule attributes (group ID, priority and traffic direction). This way a 

PMD/HW

Please consider:

+A template table consists of multiple pattern templates and action list
+templates associated with a single set of rule attributes (group ID,
+priority, etc).

Perhaps rename "item_templates[]" and "action_templates[]"
to "pattern_templates[]" and "action_list_templates[]".
Maybe make use of the term "shape" here as well...


+ /**
+  * Relaxed matching policy, PMD may match only on items
+  * with mask member set and skip matching on protocol
+  * layers specified without any masks.
+  * If not set, PMD will match on protocol layers
+  * specified without any masks as well.
+  * Packet data must be stacked in the same order as the
+  * protocol layers to match inside packets,
+  * starting from the lowest.
+  */
+ uint32_t relaxed_matching:1;


Consider rewording this to a bullet-formatted set of statements.
For brevity. For improved clarity.


+  * Flow attributes that will be used in the table.


Perhaps: "Flow attributes to be used in each rule generated from this
table". Something like that.


+   struct rte_flow_item_template *item_templates[],

Perhaps, "const struct"? The name could be "pattern_templates".


+   uint8_t nb_item_templates,

Why not "unsigned int"? The name could be "nb_pattern_templates".


+   struct rte_flow_action_template *action_templates[],
+   uint8_t nb_action_templates,

Same questions here.

--
Ivan M.


Re: [PATCH 1/1] mempool: implement index-based per core cache

2022-01-19 Thread Dharmik Thakkar
Hi Konstatin,

> On Jan 13, 2022, at 4:37 AM, Ananyev, Konstantin 
>  wrote:
> 
> 
> Hi Dharmik,
> 
>>> 
 Current mempool per core cache implementation stores pointers to mbufs
 On 64b architectures, each pointer consumes 8B
 This patch replaces it with index-based implementation,
 where in each buffer is addressed by (pool base address + index)
 It reduces the amount of memory/cache required for per core cache
 
 L3Fwd performance testing reveals minor improvements in the cache
 performance (L1 and L2 misses reduced by 0.60%)
 with no change in throughput
>>> 
>>> I feel really sceptical about that patch and the whole idea in general:
>>> - From what I read above there is no real performance improvement observed.
>>> (In fact on my IA boxes mempool_perf_autotest reports ~20% slowdown,
>>> see below for more details).
>> 
>> Currently, the optimizations (loop unroll and vectorization) are only 
>> implemented for ARM64.
>> Similar optimizations can be implemented for x86 platforms which should 
>> close the performance gap
>> and in my understanding should give better performance for a bulk size of 32.
> 
> Might be, but I still don't see the reason for such effort.
> As you mentioned there is no performance improvement in 'real' apps: l3fwd, 
> etc.
> on ARM64 even with vectorized version of the code.
> 

IMO, even without performance improvement, it is advantageous because the same 
performance is being achieved
with less memory and cache utilization using the patch.

>>> - Space utilization difference looks neglectable too.
>> 
>> Sorry, I did not understand this point.
> 
> As I understand one of the expectations from that patch was:
> reduce memory/cache required, which should improve cache utilization
> (less misses, etc.).
> Though I think such improvements would be neglectable and wouldn't
> cause any real performance gain.

The cache utilization performance numbers are for the l3fwd app, which might 
not be bottlenecked at the mempool per core cache.
Theoretically, this patch enables storing twice the number of objects in the 
cache as compared to the original implementation.

> 
>>> - The change introduces a new build time config option with a major 
>>> limitation:
>>>  All memzones in a pool have to be within the same 4GB boundary.
>>>  To address it properly, extra changes will be required in init(/populate) 
>>> part of the code.
>> 
>> I agree to the above mentioned challenges and I am currently working on 
>> resolving these issues.
> 
> I still think that to justify such changes some really noticeable performance
> improvement needs to be demonstrated: double-digit speedup for 
> l3fwd/ipsec-secgw/...  
> Otherwise it just not worth the hassle. 
> 

Like I mentioned earlier, the app might not be bottlenecked at the mempool per 
core cache.
That could be the reason the numbers with l3fwd don’t fully show the advantage 
of the patch.
I’m seeing double-digit improvement with mempool_perf_autotest which should not 
be ignored.

>>>  All that will complicate mempool code, will make it more error prone
>>>  and harder to maintain.
>>> But, as there is no real gain in return - no point to add such extra 
>>> complexity at all.
>>> 
>>> Konstantin
>>> 



Re: [PATCH 1/8] common/cnxk: fix shift offset for tl3 length disable

2022-01-19 Thread Jerin Jacob
On Thu, Dec 9, 2021 at 2:43 PM Nithin Dabilpuram
 wrote:
>
> Fix shift offset for length disable flag in NIXX_AF_TL3X_SHAPE
> register to be 24 instead of zero similar to other level SHAPE
> registers. Also mask unused bits in adjust value.
>
> Fixes: 0885429c3028 ("common/cnxk: add NIX TM hierarchy enable/disable")
>
> Signed-off-by: Nithin Dabilpuram 
> Signed-off-by: Satha Rao 


1) FIxed following warning
Is it candidate for Cc: sta...@dpdk.org backport?
common/cnxk: fix shift offset for tl3 length disable

2) Change tl3 to TL3.

Applied to dpdk-next-net-mrvl/for-next-net. Thanks


> ---
>  drivers/common/cnxk/roc_nix_tm_utils.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/common/cnxk/roc_nix_tm_utils.c 
> b/drivers/common/cnxk/roc_nix_tm_utils.c
> index 543adf9..9e80c2a 100644
> --- a/drivers/common/cnxk/roc_nix_tm_utils.c
> +++ b/drivers/common/cnxk/roc_nix_tm_utils.c
> @@ -642,6 +642,7 @@ nix_tm_shaper_reg_prep(struct nix_tm_node *node,
> else if (profile)
> adjust = profile->pkt_len_adj;
>
> +   adjust &= 0x1FF;
> plt_tm_dbg("Shaper config node %s(%u) lvl %u id %u, "
>"pir %" PRIu64 "(%" PRIu64 "B),"
>" cir %" PRIu64 "(%" PRIu64 "B)"
> @@ -708,7 +709,7 @@ nix_tm_shaper_reg_prep(struct nix_tm_node *node,
> /* Configure RED algo */
> reg[k] = NIX_AF_TL3X_SHAPE(schq);
> regval[k] = (adjust | (uint64_t)node->red_algo << 9 |
> -(uint64_t)node->pkt_mode);
> +(uint64_t)node->pkt_mode << 24);
> k++;
>
> break;
> --
> 2.8.4
>


Re: [PATCH 2/8] common/cnxk: use for loop in shaper profiles cleanup

2022-01-19 Thread Jerin Jacob
On Thu, Dec 9, 2021 at 2:44 PM Nithin Dabilpuram
 wrote:
>
> From: Gowrishankar Muthukrishnan 
>
> In shaper profiles cleanup, KW reports infinite loop although existing
> loop condition is alright. False positive may be due to tqh_first not
> checked in loop, hence switching to FOREACH_SAFE to make KW happy.
>
> Signed-off-by: Gowrishankar Muthukrishnan 
> Signed-off-by: Shijith Thotton 

Acked-by: Jerin Jacob 
Applied to dpdk-next-net-mrvl/for-next-net. Thanks

Changed the git log to:


common/cnxk: use for loop in shaper profiles cleanup

In shaper profiles cleanup, Klockwork static analyzer tool reports
infinite loop although existing loop condition is alright.
False positive may be due to tqh_first not checked in loop,
hence switching to FOREACH_SAFE to make Klockwork happy.

> ---
>  drivers/common/cnxk/roc_nix_tm.c   | 8 
>  drivers/common/cnxk/roc_platform.h | 2 ++
>  2 files changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/common/cnxk/roc_nix_tm.c 
> b/drivers/common/cnxk/roc_nix_tm.c
> index b3d8ebd..fe9e83f 100644
> --- a/drivers/common/cnxk/roc_nix_tm.c
> +++ b/drivers/common/cnxk/roc_nix_tm.c
> @@ -17,16 +17,16 @@ bitmap_ctzll(uint64_t slab)
>  void
>  nix_tm_clear_shaper_profiles(struct nix *nix)
>  {
> -   struct nix_tm_shaper_profile *shaper_profile;
> +   struct nix_tm_shaper_profile *shaper_profile, *tmp;
> +   struct nix_tm_shaper_profile_list *list;
>
> -   shaper_profile = TAILQ_FIRST(&nix->shaper_profile_list);
> -   while (shaper_profile != NULL) {
> +   list = &nix->shaper_profile_list;
> +   PLT_TAILQ_FOREACH_SAFE(shaper_profile, list, shaper, tmp) {
> if (shaper_profile->ref_cnt)
> plt_warn("Shaper profile %u has non zero references",
>  shaper_profile->id);
> TAILQ_REMOVE(&nix->shaper_profile_list, shaper_profile, 
> shaper);
> nix_tm_shaper_profile_free(shaper_profile);
> -   shaper_profile = TAILQ_FIRST(&nix->shaper_profile_list);
> }
>  }
>
> diff --git a/drivers/common/cnxk/roc_platform.h 
> b/drivers/common/cnxk/roc_platform.h
> index 61d4781..3648e84 100644
> --- a/drivers/common/cnxk/roc_platform.h
> +++ b/drivers/common/cnxk/roc_platform.h
> @@ -19,6 +19,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>
>  #include "roc_bits.h"
> @@ -53,6 +54,7 @@
>  #define BITMASK_ULL GENMASK_ULL
>  #define PLT_ALIGN_CEIL  RTE_ALIGN_CEIL
>  #define PLT_INITRTE_INIT
> +#define PLT_TAILQ_FOREACH_SAFE  RTE_TAILQ_FOREACH_SAFE
>
>  /** Divide ceil */
>  #define PLT_DIV_CEIL(x, y) \
> --
> 2.8.4
>


Re: [PATCH 3/8] common/cnxk: change order of frag sizes and infos

2022-01-19 Thread Jerin Jacob
On Thu, Dec 9, 2021 at 2:44 PM Nithin Dabilpuram
 wrote:
>
> Change the order of frag sizes and infos to match HW
> implementation.
>
> Signed-off-by: Nithin Dabilpuram 
> Signed-off-by: Vidya Sagar Velumuri 


Since it a fix, change git log following and Applied to
dpdk-next-net-mrvl/for-next-net. Thanks


common/cnxk: fix byte order of frag sizes and infos

Change the byte order of frag sizes and infos to match HW
implementation.

Fixes: 64a73ebd87bd ("common/cnxk: add CPT hardware definitions")
Cc: sta...@dpdk.org

> ---
>  drivers/common/cnxk/hw/cpt.h | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/common/cnxk/hw/cpt.h b/drivers/common/cnxk/hw/cpt.h
> index 919f842..99a900c 100644
> --- a/drivers/common/cnxk/hw/cpt.h
> +++ b/drivers/common/cnxk/hw/cpt.h
> @@ -286,10 +286,10 @@ struct cpt_frag_info_s {
> union {
> uint64_t u64;
> struct {
> -   union cpt_frag_info f3;
> -   union cpt_frag_info f2;
> -   union cpt_frag_info f1;
> union cpt_frag_info f0;
> +   union cpt_frag_info f1;
> +   union cpt_frag_info f2;
> +   union cpt_frag_info f3;
> };
> } w0;
>
> @@ -297,10 +297,10 @@ struct cpt_frag_info_s {
> union {
> uint64_t u64;
> struct {
> -   uint16_t frag_size3;
> -   uint16_t frag_size2;
> -   uint16_t frag_size1;
> uint16_t frag_size0;
> +   uint16_t frag_size1;
> +   uint16_t frag_size2;
> +   uint16_t frag_size3;
> };
> } w1;
>  };
> --
> 2.8.4
>


Re: [PATCH 4/8] common/cnxk: reset stale values on error debug registers

2022-01-19 Thread Jerin Jacob
On Thu, Dec 9, 2021 at 2:44 PM Nithin Dabilpuram
 wrote:
>
> From: Harman Kalra 
>
> LF's error debug registers like NIX_LF_SQ_OP_ERR_DBG,
> NIX_LF_MNQ_ERR_DBG, NIX_LF_SEND_ERR_DBG captures debug
> info for an error detected during LMT operation or meta
> enqueue or after meta enqueue granted respectively. HW
> sets a valid bit when info is captured and SW is expected
> to clear this valid bit by writing 1, else these registers
> will show stale values of first interrupt when occurred and
> will never update with subsequent interrupts.
>
> Signed-off-by: Harman Kalra 

Acked-by: Jerin Jacob 
Applied to dpdk-next-net-mrvl/for-next-net. Thanks

> ---
>  drivers/common/cnxk/roc_nix_irq.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/common/cnxk/roc_nix_irq.c 
> b/drivers/common/cnxk/roc_nix_irq.c
> index a5cd9d4..7dcd533 100644
> --- a/drivers/common/cnxk/roc_nix_irq.c
> +++ b/drivers/common/cnxk/roc_nix_irq.c
> @@ -202,9 +202,12 @@ nix_lf_sq_debug_reg(struct nix *nix, uint32_t off)
> uint64_t reg;
>
> reg = plt_read64(nix->base + off);
> -   if (reg & BIT_ULL(44))
> +   if (reg & BIT_ULL(44)) {
> plt_err("SQ=%d err_code=0x%x", (int)((reg >> 8) & 0xf),
> (uint8_t)(reg & 0xff));
> +   /* Clear valid bit */
> +   plt_write64(BIT_ULL(44), nix->base + off);
> +   }
>  }
>
>  static void
> --
> 2.8.4
>


Re: [PATCH 5/8] common/cnxk: always use single qint with NIX

2022-01-19 Thread Jerin Jacob
On Thu, Dec 9, 2021 at 2:44 PM Nithin Dabilpuram
 wrote:
>
> From: Harman Kalra 
>
> An errata exists whereby, in certain cases NIX may use an
> incorrect QINT_IDX for SQ interrupts. As a result, the
> interrupt may not be delivered to software, or may not be
> associated with the correct SQ.
> When NIX uses an incorrect QINT_IDX :
> 1. NIX_LF_QINT(0..63)_CNT[COUNT] will be incremented for
> incorrect QINT.
> 2. NIX_LF_QINT(0..63)_INT[INTR] will be set for incorrect
> QINT.
>
> Signed-off-by: Harman Kalra 

Changed the subject to;
 common/cnxk: always use single interrupt ID with NIX

Acked-by: Jerin Jacob 
Applied to dpdk-next-net-mrvl/for-next-net. Thanks

> ---
>  drivers/common/cnxk/roc_nix_queue.c | 13 ++---
>  1 file changed, 10 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/common/cnxk/roc_nix_queue.c 
> b/drivers/common/cnxk/roc_nix_queue.c
> index c8c8401..4455fc1 100644
> --- a/drivers/common/cnxk/roc_nix_queue.c
> +++ b/drivers/common/cnxk/roc_nix_queue.c
> @@ -680,7 +680,11 @@ sq_cn9k_init(struct nix *nix, struct roc_nix_sq *sq, 
> uint32_t rr_quantum,
> aq->sq.sq_int_ena |= BIT(NIX_SQINT_MNQ_ERR);
>
> /* Many to one reduction */
> -   aq->sq.qint_idx = sq->qid % nix->qints;
> +   /* Assigning QINT 0 to all the SQs, an errata exists where NIXTX can
> +* send incorrect QINT_IDX when reporting queue interrupt (QINT). This
> +* might result in software missing the interrupt.
> +*/
> +   aq->sq.qint_idx = 0;
>  }
>
>  static int
> @@ -779,8 +783,11 @@ sq_init(struct nix *nix, struct roc_nix_sq *sq, uint32_t 
> rr_quantum,
> aq->sq.sq_int_ena |= BIT(NIX_SQINT_SEND_ERR);
> aq->sq.sq_int_ena |= BIT(NIX_SQINT_MNQ_ERR);
>
> -   /* Many to one reduction */
> -   aq->sq.qint_idx = sq->qid % nix->qints;
> +   /* Assigning QINT 0 to all the SQs, an errata exists where NIXTX can
> +* send incorrect QINT_IDX when reporting queue interrupt (QINT). This
> +* might result in software missing the interrupt.
> +*/
> +   aq->sq.qint_idx = 0;
>  }
>
>  static int
> --
> 2.8.4
>


Re: [PATCH 6/8] common/cnxk: handle issues from static analysis

2022-01-19 Thread Jerin Jacob
On Thu, Dec 9, 2021 at 2:44 PM Nithin Dabilpuram
 wrote:
>
> From: Gowrishankar Muthukrishnan 
>
> Handle issues reported by static analysis tool such as
> null pointer dereferences, variable initialization, etc.
>
> Signed-off-by: Gowrishankar Muthukrishnan 
> Signed-off-by: Nithin Dabilpuram 

Acked-by: Jerin Jacob 
Applied to dpdk-next-net-mrvl/for-next-net. Thanks

Since it the fix, Changed the git log to

common/cnxk: fix issues reported by klockwork

Fix issues reported by klockwork(static analysis tool) such as
null pointer dereferences, variable initialization, etc.

Fixes: c045d2e5cbbc ("common/cnxk: add CPT configuration")
Fixes: ed135040f0ab ("common/cnxk: add CPT LF configuration")
Fixes: 585bb3e538f9 ("common/cnxk: add VF support to base device class")
Fixes: 665ff1ccc2c4 ("common/cnxk: add base device class")
Fixes: da57d4589a6f ("common/cnxk: support NIX flow control")
Fixes: 218d022e1f3f ("common/cnxk: support NIX stats")
Fixes: 4efa6e82fe43 ("common/cnxk: support NIX extended stats")
Fixes: 0885429c3028 ("common/cnxk: add NIX TM hierarchy enable/disable")
Cc: sta...@dpdk.org

Signed-off-by: Gowrishankar Muthukrishnan 
Signed-off-by: Nithin Dabilpuram 
Acked-by: Jerin Jacob 

> ---
>  drivers/common/cnxk/roc_cpt.c   |  7 +++--
>  drivers/common/cnxk/roc_dev.c   | 21 -
>  drivers/common/cnxk/roc_nix_debug.c |  6 
>  drivers/common/cnxk/roc_nix_fc.c| 12 
>  drivers/common/cnxk/roc_nix_queue.c | 61 
> ++---
>  drivers/common/cnxk/roc_nix_stats.c | 18 +++
>  drivers/common/cnxk/roc_nix_tm.c|  8 -
>  7 files changed, 125 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/common/cnxk/roc_cpt.c b/drivers/common/cnxk/roc_cpt.c
> index 8f8e6d3..0e2dc45 100644
> --- a/drivers/common/cnxk/roc_cpt.c
> +++ b/drivers/common/cnxk/roc_cpt.c
> @@ -385,6 +385,9 @@ cpt_lfs_alloc(struct dev *dev, uint8_t eng_grpmsk, 
> uint8_t blkaddr,
> return -EINVAL;
>
> req = mbox_alloc_msg_cpt_lf_alloc(mbox);
> +   if (!req)
> +   return -ENOSPC;
> +
> req->nix_pf_func = 0;
> if (inl_dev_sso && nix_inl_dev_pffunc_get())
> req->sso_pf_func = nix_inl_dev_pffunc_get();
> @@ -812,9 +815,9 @@ roc_cpt_eng_grp_add(struct roc_cpt *roc_cpt, enum 
> cpt_eng_type eng_type)
>  void
>  roc_cpt_iq_disable(struct roc_cpt_lf *lf)
>  {
> +   volatile union cpt_lf_q_grp_ptr grp_ptr = {.u = 0x0};
> +   volatile union cpt_lf_inprog lf_inprog = {.u = 0x0};
> union cpt_lf_ctl lf_ctl = {.u = 0x0};
> -   union cpt_lf_q_grp_ptr grp_ptr;
> -   union cpt_lf_inprog lf_inprog;
> int timeout = 20;
> int cnt;
>
> diff --git a/drivers/common/cnxk/roc_dev.c b/drivers/common/cnxk/roc_dev.c
> index 926a916..9a86969 100644
> --- a/drivers/common/cnxk/roc_dev.c
> +++ b/drivers/common/cnxk/roc_dev.c
> @@ -57,7 +57,7 @@ pf_af_sync_msg(struct dev *dev, struct mbox_msghdr **rsp)
> struct mbox *mbox = dev->mbox;
> struct mbox_dev *mdev = &mbox->dev[0];
>
> -   volatile uint64_t int_status;
> +   volatile uint64_t int_status = 0;
> struct mbox_msghdr *msghdr;
> uint64_t off;
> int rc = 0;
> @@ -152,6 +152,11 @@ af_pf_wait_msg(struct dev *dev, uint16_t vf, int num_msg)
> /* Reserve PF/VF mbox message */
> size = PLT_ALIGN(size, MBOX_MSG_ALIGN);
> rsp = mbox_alloc_msg(&dev->mbox_vfpf, vf, size);
> +   if (!rsp) {
> +   plt_err("Failed to reserve VF%d message", vf);
> +   continue;
> +   }
> +
> mbox_rsp_init(msg->id, rsp);
>
> /* Copy message from AF<->PF mbox to PF<->VF mbox */
> @@ -236,6 +241,12 @@ vf_pf_process_msgs(struct dev *dev, uint16_t vf)
> BIT_ULL(vf % max_bits);
> rsp = (struct ready_msg_rsp *)mbox_alloc_msg(
> mbox, vf, sizeof(*rsp));
> +   if (!rsp) {
> +   plt_err("Failed to alloc VF%d READY message",
> +   vf);
> +   continue;
> +   }
> +
> mbox_rsp_init(msg->id, rsp);
>
> /* PF/VF function ID */
> @@ -988,6 +999,9 @@ dev_setup_shared_lmt_region(struct mbox *mbox, bool 
> valid_iova, uint64_t iova)
> struct lmtst_tbl_setup_req *req;
>
> req = mbox_alloc_msg_lmtst_tbl_setup(mbox);
> +   if (!req)
> +   return -ENOSPC;
> +
> /* This pcifunc is defined with primary pcifunc whose LMT address
>  * will be shared. If call contains valid IOVA, following pcifunc
>  * field is of no use.
> @@ -1061,6 +1075,11 @@ dev_lmt_setup(struct dev *dev)
>  */
> if (!dev->disable_shared_lmt) {

Re: [PATCH 8/8] net/cnxk: synchronize inline session create and destroy

2022-01-19 Thread Jerin Jacob
On Thu, Dec 9, 2021 at 2:44 PM Nithin Dabilpuram
 wrote:
>
> Synchronize inline session create and destroy using spinlock.
> Also move security related error prints outside the spinlock.
>
> Signed-off-by: Nithin Dabilpuram 

Fixed the following issue

CHECK:OPEN_ENDED_LINE: Lines should not end with a '('
#159: FILE: drivers/net/cnxk/cn9k_ethdev_sec.c:195:
+   inb_sa = (struct roc_onf_ipsec_inb_sa *)roc_nix_inl_inb_sa_get(

And Series applied to dpdk-next-net-mrvl/for-next-net. Thanks.


> ---
>  drivers/net/cnxk/cn10k_ethdev_sec.c | 35 ---
>  drivers/net/cnxk/cn9k_ethdev_sec.c  | 34 +++---
>  drivers/net/cnxk/cnxk_ethdev.c  |  7 +--
>  drivers/net/cnxk/cnxk_ethdev.h  |  6 ++
>  4 files changed, 66 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/net/cnxk/cn10k_ethdev_sec.c 
> b/drivers/net/cnxk/cn10k_ethdev_sec.c
> index 235c168..12cec0a 100644
> --- a/drivers/net/cnxk/cn10k_ethdev_sec.c
> +++ b/drivers/net/cnxk/cn10k_ethdev_sec.c
> @@ -238,6 +238,8 @@ cn10k_eth_sec_session_create(void *device,
> struct rte_crypto_sym_xform *crypto;
> struct cnxk_eth_sec_sess *eth_sec;
> bool inbound, inl_dev;
> +   rte_spinlock_t *lock;
> +   char tbuf[128] = {0};
> int rc = 0;
>
> if (conf->action_type != RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL)
> @@ -272,6 +274,9 @@ cn10k_eth_sec_session_create(void *device,
> memset(eth_sec, 0, sizeof(struct cnxk_eth_sec_sess));
> sess_priv.u64 = 0;
>
> +   lock = inbound ? &dev->inb.lock : &dev->outb.lock;
> +   rte_spinlock_lock(lock);
> +
> /* Acquire lock on inline dev for inbound */
> if (inbound && inl_dev)
> roc_nix_inl_dev_lock();
> @@ -287,12 +292,14 @@ cn10k_eth_sec_session_create(void *device,
> /* Get Inbound SA from NIX_RX_IPSEC_SA_BASE */
> sa = roc_nix_inl_inb_sa_get(&dev->nix, inl_dev, ipsec->spi);
> if (!sa && dev->inb.inl_dev) {
> -   plt_err("Failed to create ingress sa, inline dev "
> -   "not found or spi not in range");
> +   snprintf(tbuf, sizeof(tbuf),
> +"Failed to create ingress sa, inline dev "
> +"not found or spi not in range");
> rc = -ENOTSUP;
> goto mempool_put;
> } else if (!sa) {
> -   plt_err("Failed to create ingress sa");
> +   snprintf(tbuf, sizeof(tbuf),
> +"Failed to create ingress sa");
> rc = -EFAULT;
> goto mempool_put;
> }
> @@ -301,8 +308,9 @@ cn10k_eth_sec_session_create(void *device,
>
> /* Check if SA is already in use */
> if (inb_sa->w2.s.valid) {
> -   plt_err("Inbound SA with SPI %u already in use",
> -   ipsec->spi);
> +   snprintf(tbuf, sizeof(tbuf),
> +"Inbound SA with SPI %u already in use",
> +ipsec->spi);
> rc = -EBUSY;
> goto mempool_put;
> }
> @@ -313,7 +321,8 @@ cn10k_eth_sec_session_create(void *device,
> /* Fill inbound sa params */
> rc = cnxk_ot_ipsec_inb_sa_fill(inb_sa_dptr, ipsec, crypto);
> if (rc) {
> -   plt_err("Failed to init inbound sa, rc=%d", rc);
> +   snprintf(tbuf, sizeof(tbuf),
> +"Failed to init inbound sa, rc=%d", rc);
> goto mempool_put;
> }
>
> @@ -371,7 +380,8 @@ cn10k_eth_sec_session_create(void *device,
> /* Fill outbound sa params */
> rc = cnxk_ot_ipsec_outb_sa_fill(outb_sa_dptr, ipsec, crypto);
> if (rc) {
> -   plt_err("Failed to init outbound sa, rc=%d", rc);
> +   snprintf(tbuf, sizeof(tbuf),
> +"Failed to init outbound sa, rc=%d", rc);
> rc |= cnxk_eth_outb_sa_idx_put(dev, sa_idx);
> goto mempool_put;
> }
> @@ -409,6 +419,7 @@ cn10k_eth_sec_session_create(void *device,
> }
> if (inbound && inl_dev)
> roc_nix_inl_dev_unlock();
> +   rte_spinlock_unlock(lock);
>
> plt_nix_dbg("Created %s session with spi=%u, sa_idx=%u inl_dev=%u",
> inbound ? "inbound" : "outbound", eth_sec->spi,
> @@ -422,7 +433,11 @@ cn10k_eth_sec_session_create(void *device,
>  mempool_put:
> if (inbound && inl_dev)
> roc_nix_inl_dev_unlock();
> +   rte_spinlock_unlock(lock);
> +
> rte_mempool_put

Re: [PATCH 2/6] app/test: link unit test binary against all available libs

2022-01-19 Thread David Marchand
On Thu, Jan 13, 2022 at 6:40 PM Bruce Richardson
 wrote:
>
> Rather than maintaining a list of the libraries the unit tests need, and
> having to conditionally include/omit optional libs from the list, we can
> just link against all available libraries, simplifying the code
> considerably.
>
> Signed-off-by: Bruce Richardson 
> ---
>  app/test/meson.build | 47 +---
>  1 file changed, 1 insertion(+), 46 deletions(-)
>
> diff --git a/app/test/meson.build b/app/test/meson.build
> index 344a609a4d..9919de4307 100644
> --- a/app/test/meson.build
> +++ b/app/test/meson.build
> @@ -157,39 +157,7 @@ test_sources = files(
>  'virtual_pmd.c',
>  )
>
> -test_deps = [
> -'acl',
> -'bus_pci',
> -'bus_vdev',

bus_pci and bus_vdev are not "libraries", but "drivers" dependencies
and must be kept.
This probably explains an error seen in UNH test report for a job that
disables all but those bus drivers and net/hns3:
http://mails.dpdk.org/archives/test-report/2022-January/251477.html


-- 
David Marchand



Re: [PATCH 0/6] allow more DPDK libraries to be disabled on build

2022-01-19 Thread David Marchand
On Thu, Jan 13, 2022 at 6:40 PM Bruce Richardson
 wrote:
>
> A common request on-list has been to allow more of the DPDK build to be 
> disabled by those who are
> doing their own builds and only use a subset of the libraries. To this end, 
> this patchset makes some
> infrastructure changes [first two patches] to make it easier to have 
> libraries disabled, and then
> adds a six libraries to the "optional" list.
>
> Bruce Richardson (6):
>   lib: allow recursive disabling of libs in build
>   app/test: link unit test binary against all available libs
>   build: add node library to optional list
>   build: add flow classification library to optional list
>   build: add "packet framework" libs to optional list
>   build: add cfgfile library to optional list
>
>  app/test/meson.build | 74 
>  lib/meson.build  | 30 --
>  2 files changed, 40 insertions(+), 64 deletions(-)

Except an issue in patch 2, this series looks good to me.
Thanks Bruce.


-- 
David Marchand



Re: [PATCH] bus/ifpga: remove useless check while browsing devices

2022-01-19 Thread Thomas Monjalon
> > reported by code analysis tool C++test (version 10.4):
> > 
> > > /build/dpdk-20.11/drivers/bus/ifpga/ifpga_bus.c
> > > 67Condition "afu_dev" is always evaluated to true
> > > 81Condition "afu_dev" is always evaluated to true
> > 
> > The "for" loop already checks that afu_dev is not NULL.
> > 
> > Fixes: 05fa3d4a6539 ("bus/ifpga: add Intel FPGA bus library")
> > 
> > Signed-off-by: Maxime Gouin 
> > Reviewed-by: Olivier Matz 
> 
> Acked-by: Rosen Xu 

Applied, thanks




Re: [PATCH v6 00/26] Net/SPNIC: support SPNIC into DPDK 22.03

2022-01-19 Thread Ferruh Yigit

On 12/30/2021 6:08 AM, Yanling Song wrote:

The patchsets introduce SPNIC driver for Ramaxel's SPNxx serial NIC cards into 
DPDK 22.03.
Ramaxel Memory Technology is a company which supply a lot of electric products:
storage, communication, PCB...
SPNxxx is a serial PCIE interface NIC cards:
SPN110: 2 PORTs *25G
SPN120: 4 PORTs *25G
SPN130: 2 PORTs *100G



Hi Yanling,

As far as I can see hnic (from Huawei) and this spnic drivers are alike,
what is the relation between these two?


The following is main features of our SPNIC:
- TSO
- LRO
- Flow control
- SR-IOV(Partially supported)
- VLAN offload
- VLAN filter
- CRC offload
- Promiscuous mode
- RSS

v6->v5, No real changes:
1. Move the fix of RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS from patch 26 to patch 2;
2. Change the description of patch 26.

v5->v4:
1. Add prefix "spinc_" for external functions;
2. Remove temporary MACRO: RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS
3. Do not use void* for keeping the type information

v3->v4:
1. Fix ABI test failure;
2. Remove some descriptions in spnic.rst.

v2->v3:
1. Fix clang compiling failure.

v1->v2:
1. Fix coding style issues and compiling failures;
2. Only support linux in meson.build;
3. Use CLOCK_MONOTONIC_COARSE instead of CLOCK_MONOTONIC/CLOCK_MONOTONIC_RAW;
4. Fix time_before();
5. Remove redundant checks in spnic_dev_configure();

Yanling Song (26):
   drivers/net: introduce a new PMD driver
   net/spnic: initialize the HW interface
   net/spnic: add mbox message channel
   net/spnic: introduce event queue
   net/spnic: add mgmt module
   net/spnic: add cmdq and work queue
   net/spnic: add interface handling cmdq message
   net/spnic: add hardware info initialization
   net/spnic: support MAC and link event handling
   net/spnic: add function info initialization
   net/spnic: add queue pairs context initialization
   net/spnic: support mbuf handling of Tx/Rx
   net/spnic: support Rx congfiguration
   net/spnic: add port/vport enable
   net/spnic: support IO packets handling
   net/spnic: add device configure/version/info
   net/spnic: support RSS configuration update and get
   net/spnic: support VLAN filtering and offloading
   net/spnic: support promiscuous and allmulticast Rx modes
   net/spnic: support flow control
   net/spnic: support getting Tx/Rx queues info
   net/spnic: net/spnic: support xstats statistics
   net/spnic: support VFIO interrupt
   net/spnic: support Tx/Rx queue start/stop
   net/spnic: add doc infrastructure
   net/spnic: fixes unsafe C style code


<...>


Re: [PATCH v6 01/26] drivers/net: introduce a new PMD driver

2022-01-19 Thread Ferruh Yigit

On 12/30/2021 6:08 AM, Yanling Song wrote:

Introduce a new PMD driver which names spnic.


PMD stands for "Poll mode driver", so "PMD driver" usage is wrong,
can you please update it in the patch title too?

Also domain for patch title can be "net/spnic: ".


Now, this driver only implements module entry
without doing anything else.

Signed-off-by: Yanling Song 
---
  drivers/net/meson.build   |   1 +
  drivers/net/spnic/base/meson.build|  26 
  drivers/net/spnic/base/spnic_compat.h | 184 ++
  drivers/net/spnic/meson.build |  17 +++
  drivers/net/spnic/spnic_ethdev.c  | 107 +++
  drivers/net/spnic/spnic_ethdev.h  |  28 
  drivers/net/spnic/version.map |   3 +
  7 files changed, 366 insertions(+)
  create mode 100644 drivers/net/spnic/base/meson.build
  create mode 100644 drivers/net/spnic/base/spnic_compat.h
  create mode 100644 drivers/net/spnic/meson.build
  create mode 100644 drivers/net/spnic/spnic_ethdev.c
  create mode 100644 drivers/net/spnic/spnic_ethdev.h
  create mode 100644 drivers/net/spnic/version.map



Can you please add/update following files in this first patch:
- MAINTAINERS (sorted by vendor name)
- doc/guides/nics/spnic.rst
- doc/guides/nics/features/spnic.ini
- doc/guides/nics/index.rst
- doc/guides/rel_notes/release_22_03.rst

Most of them are already in the patch 25/26, so please squash it in this patch,
and I put some comment on the documentation.



diff --git a/drivers/net/meson.build b/drivers/net/meson.build
index 2355d1cde8..a5c715f59c 100644
--- a/drivers/net/meson.build
+++ b/drivers/net/meson.build
@@ -53,6 +53,7 @@ drivers = [
  'ring',
  'sfc',
  'softnic',
+   'spnic',


The indentation in the meson files needs to be fixed, please run 
'check-meson.py'
Comment is valid for all meson file updates in other patches.

$ ./devtools/check-meson.py
Error parsing drivers/net/meson.build:54, got some tabulation
Error: Incorrect indent at drivers/net/meson.build:55
Error parsing drivers/net/spnic/meson.build:13, got some tabulation
Error: Incorrect indent at drivers/net/spnic/meson.build:14
Error parsing drivers/net/spnic/meson.build:14, got some tabulation
Error parsing drivers/net/spnic/base/meson.build:23, got some tabulation
Error parsing drivers/net/spnic/base/meson.build:24, got some tabulation



  'tap',
  'thunderx',
  'txgbe',
diff --git a/drivers/net/spnic/base/meson.build 
b/drivers/net/spnic/base/meson.build
new file mode 100644
index 00..e83a473881
--- /dev/null
+++ b/drivers/net/spnic/base/meson.build
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2021 Ramaxel Memory Technology, Ltd
+
+sources = [
+]
+
+extra_flags = []
+# The driver runs only on arch64 machine, remove 32bit warnings
+if not dpdk_conf.get('RTE_ARCH_64')
+extra_flags += ['-Wno-int-to-pointer-cast', '-Wno-pointer-to-int-cast']
+endif


Why disabling compiler warning even without having any source file.
Please start with empty meson file, and develop it as needed.


+
+foreach flag: extra_flags
+if cc.has_argument(flag)
+cflags += flag
+endif
+endforeach
+
+deps += ['hash']
+cflags += ['-DHW_CONVERT_ENDIAN']
+c_args = cflags


ditto


+
+base_lib = static_library('spnic_base', sources,
+   dependencies: [static_rte_eal, static_rte_ethdev, static_rte_bus_pci, 
static_rte_hash],
+   c_args: c_args)
+base_objs = base_lib.extract_all_objects()
diff --git a/drivers/net/spnic/base/spnic_compat.h 
b/drivers/net/spnic/base/spnic_compat.h
new file mode 100644
index 00..97f817cba9
--- /dev/null
+++ b/drivers/net/spnic/base/spnic_compat.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Ramaxel Memory Technology, Ltd
+ */
+
+#ifndef _SPNIC_COMPAT_H_
+#define _SPNIC_COMPAT_H_
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+


Do you need to include all these headers at this stage?
Can you please add them as needed?


+typedef uint8_t   u8;
+typedef int8_ts8;
+typedef uint16_t  u16;
+typedef uint32_t  u32;
+typedef int32_t   s32;
+typedef uint64_t  u64;
+
+#ifndef BIT
+#define BIT(n) (1 << (n))
+#endif
+


There is already RTE_BIT64 / RTE_BIT32 in DPDK, those can be used.



+#define upper_32_bits(n) ((u32)(((n) >> 16) >> 16))
+#define lower_32_bits(n) ((u32)(n))
+
+#define SPNIC_MEM_ALLOC_ALIGN_MIN  1
+
+#define SPNIC_DRIVER_NAME "spnic"
+
+extern int spnic_logtype;
+
+#define PMD_DRV_LOG(level, fmt, args...) \
+   rte_log(RTE_LOG_ ## level, spnic_logtype, \
+   SPNIC_DRIVER_NAME ": " fmt "\n", ##args)
+
+/* Bit order interface */
+#define cpu_to_be16(o) rte_cpu_to_be_16(o)
+#define cpu_to_be32(o) rte_cpu_to_be_32(o)
+#define cpu_to_be64(o) rte_cpu_to_be_64(o)
+#define cpu_to_le32(o) rte_cpu_to_le_32(o)
+#

iavf/ice seem to report incorrect ol_flags in certain cases

2022-01-19 Thread Thomas Tsakiris
Hi All,

iavf and ice seem to report ol_flags for inner checksum even when not
configured to do so.
This results in packet with good outer/wrong inner ip checksums having
flag RTE_MBUF_F_RX_IP_CKSUM_BAD
And packet with wrong outer/good inner checksums having both
RTE_MBUF_F_RX_IP_CKSUM_GOOD
and RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD
>From what I understand in the documentation,
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD should only be set when
RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM is set on the port.

Test results below.

Am I missing something ?

testpmd is started this way:
$dpdk-testpmd -l 0-2 -a  -a  -- -i
testpmd>port stop all
testpmd>port config 1 rx_offload ipv4_cksum on
testpmd>port config 0 rx_offload ipv4_cksum on
testpmd>port start all
testpmd>set log global 10
testpmd>set verbose 1
testpmd>start

Sent packet are built this way in scapy :
a =  
IP(dst="10.200.0.1",src="10.100.0.1",proto=4)/IP(dst="10.125.0.1",src="10.125.0.2")/ICMP()
b =  
IP(dst="10.200.0.1",src="10.100.0.1",proto=4,chksum=0x)/IP(dst="10.125.0.1",src="10.125.0.2")/ICMP()
c =  
IP(dst="10.200.0.1",src="10.100.0.1",proto=4)/IP(dst="10.125.0.1",src="10.125.0.2",chksum=0x)/ICMP()

Here are the results by pmd:
ice_version=1.7.16 iavf_version=4.3.19 ddp_version=1.3.30.0 dpdk_version=21.11
#test on e810vf iavf
testpmd> show port summary 0
Number of available ports: 2
Port MAC Address   Name Driver Status   Link
040:A6:B7:19:00:32 :00:04.0 net_iavf   up   100 Gbps
testpmd> show port 0 rx_offload configuration
Rx Offloading Configuration of port 0 :
  Port : IPV4_CKSUM
  Queue[ 0] : IPV4_CKSUM
# packet a
port 0/queue 0: received 1 packets
  src=24:8A:07:A3:E3:34 - dst=FF:FF:FF:FF:FF:FF - type=0x0800 -
length=62 - nb_segs=1 - hw ptype: L2_ETHER L3_IPV4_EXT_UNKNOWN
TUNNEL_IP INNER_L3_IPV4_EXT_UNKNOWN INNER_L4_ICMP  - sw ptype:
L2_ETHER L3_IPV4 TUNNEL_IP INNER_L3_IPV4  - l2_len=14 - l3_len=20 -
tunnel_len=0 - inner_l3_len=20 - Receive queue=0x0
  ol_flags: RTE_MBUF_F_RX_L4_CKSUM_GOOD RTE_MBUF_F_RX_IP_CKSUM_GOOD
RTE_MBUF_F_RX_OUTER_L4_CKSUM_UNKNOWN
# packet b
  port 0/queue 0: received 1 packets
  src=24:8A:07:A3:E3:34 - dst=FF:FF:FF:FF:FF:FF - type=0x0800 -
length=62 - nb_segs=1 - hw ptype: L2_ETHER L3_IPV4_EXT_UNKNOWN
TUNNEL_IP INNER_L3_IPV4_EXT_UNKNOWN INNER_L4_ICMP  - sw ptype:
L2_ETHER L3_IPV4 TUNNEL_IP INNER_L3_IPV4  - l2_len=14 - l3_len=20 -
tunnel_len=0 - inner_l3_len=20 - Receive queue=0x0
  ol_flags: RTE_MBUF_F_RX_L4_CKSUM_GOOD RTE_MBUF_F_RX_IP_CKSUM_GOOD
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD RTE_MBUF_F_RX_OUTER_L4_CKSUM_UNKNOWN
# packet c
  port 0/queue 0: received 1 packets
  src=24:8A:07:A3:E3:34 - dst=FF:FF:FF:FF:FF:FF - type=0x0800 -
length=62 - nb_segs=1 - hw ptype: L2_ETHER L3_IPV4_EXT_UNKNOWN
TUNNEL_IP INNER_L3_IPV4_EXT_UNKNOWN INNER_L4_ICMP  - sw ptype:
L2_ETHER L3_IPV4 TUNNEL_IP INNER_L3_IPV4  - l2_len=14 - l3_len=20 -
tunnel_len=0 - inner_l3_len=20 - Receive queue=0x0
  ol_flags: RTE_MBUF_F_RX_L4_CKSUM_GOOD RTE_MBUF_F_RX_IP_CKSUM_BAD
RTE_MBUF_F_RX_OUTER_L4_CKSUM_UNKNOWN

#test on e810 ice
testpmd> show port summary 0
Number of available ports: 2
Port MAC Address   Name Driver Status   Link
040:A6:B7:19:04:60 :b3:00.0 net_iceup   100 Gbps
testpmd> show port 0 rx_offload configuration
Rx Offloading Configuration of port 0 :
  Port : IPV4_CKSUM
  Queue[ 0] : IPV4_CKSUM
# packet a
port 0/queue 0: received 1 packets
  src=24:8A:07:A3:E3:34 - dst=FF:FF:FF:FF:FF:FF - type=0x0800 -
length=62 - nb_segs=1 - hw ptype: L2_ETHER L3_IPV4_EXT_UNKNOWN
TUNNEL_IP INNER_L3_IPV4_EXT_UNKNOWN INNER_L4_ICMP  - sw ptype:
L2_ETHER L3_IPV4 TUNNEL_IP INNER_L3_IPV4  - l2_len=14 - l3_len=20 -
tunnel_len=0 - inner_l3_len=20 - Receive queue=0x0
  ol_flags: RTE_MBUF_F_RX_L4_CKSUM_GOOD RTE_MBUF_F_RX_IP_CKSUM_GOOD
RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD
# packet b
port 0/queue 0: received 1 packets
  src=24:8A:07:A3:E3:34 - dst=FF:FF:FF:FF:FF:FF - type=0x0800 -
length=62 - nb_segs=1 - hw ptype: L2_ETHER L3_IPV4_EXT_UNKNOWN
TUNNEL_IP INNER_L3_IPV4_EXT_UNKNOWN INNER_L4_ICMP  - sw ptype:
L2_ETHER L3_IPV4 TUNNEL_IP INNER_L3_IPV4  - l2_len=14 - l3_len=20 -
tunnel_len=0 - inner_l3_len=20 - Receive queue=0x0
  ol_flags: RTE_MBUF_F_RX_L4_CKSUM_GOOD RTE_MBUF_F_RX_IP_CKSUM_GOOD
RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD
# packet c
testpmd> port 0/queue 0: received 1 packets
  src=24:8A:07:A3:E3:34 - dst=FF:FF:FF:FF:FF:FF - type=0x0800 -
length=62 - nb_segs=1 - hw ptype: L2_ETHER L3_IPV4_EXT_UNKNOWN
TUNNEL_IP INNER_L3_IPV4_EXT_UNKNOWN INNER_L4_ICMP  - sw ptype:
L2_ETHER L3_IPV4 TUNNEL_IP INNER_L3_IPV4  - l2_len=14 - l3_len=20 -
tunnel_len=0 - inner_l3_len=20 - Receive queue=0x0
  ol_flags: RTE_MBUF_F_RX_L4_CKSUM_GOOD RTE_MBUF_F_RX_IP_CKSUM_BAD
RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD

#test on niantic ixgbe
testpmd> show port summary 0
Number of available ports: 2
Port MAC Address   Name Driver Status   Link
000:09:C0:2B:0F:4C :00:04.0 net_ixgbe_vf   up   10 Gbp

Re: [PATCH v6 02/26] net/spnic: initialize the HW interface

2022-01-19 Thread Ferruh Yigit

On 12/30/2021 6:08 AM, Yanling Song wrote:

Add HW interface registers and initialize the HW
interface.

Signed-off-by: Yanling Song 


<...>


diff --git a/drivers/net/spnic/base/spnic_hwdev.h 
b/drivers/net/spnic/base/spnic_hwdev.h
new file mode 100644
index 00..c89a4fa840
--- /dev/null
+++ b/drivers/net/spnic/base/spnic_hwdev.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Ramaxel Memory Technology, Ltd
+ */
+
+#ifndef _SPNIC_HWDEV_H_
+#define _SPNIC_HWDEV_H_
+
+#include 
+


Why is this header required in this file?


<...>


+#ifdef SPNIC_RELEASE
+static int wait_until_doorbell_flush_states(struct spnic_hwif *hwif,
+   enum spnic_doorbell_ctrl states)
+{
+   enum spnic_doorbell_ctrl db_ctrl;
+   u32 cnt = 0;
+
+   if (!hwif)
+   return -EINVAL;
+
+   while (cnt < SPNIC_WAIT_DOORBELL_AND_OUTBOUND_TIMEOUT) {
+   db_ctrl = spnic_get_doorbell_ctrl_status(hwif);
+   if (db_ctrl == states)
+   return 0;
+
+   rte_delay_ms(1);
+   cnt++;
+   }
+
+   return -EFAULT;
+}
+#endif


What is this 'SPNIC_RELEASE' macro and why it exists?

Please get rid of all all compile time macros, if the code is not required you
can delete it while upstreaming.

<...>


  struct spnic_nic_dev {
+   struct spnic_hwdev *hwdev; /* Hardware device */
+
+   struct spnic_txq **txqs;
+   struct spnic_rxq **rxqs;
+   struct rte_mempool *cpy_mpool;
+
+   u16 num_sqs;
+   u16 num_rqs;
+   u16 max_sqs;
+   u16 max_rqs;
+
+   u16 rx_buff_len;
+   u16 mtu_size;
+
+   u16 rss_state;
+   u8 num_rss;
+   u8 rsvd0;
+
+   u32 rx_mode;
+   u8 rx_queue_list[SPNIC_MAX_QUEUE_NUM];
+   rte_spinlock_t queue_list_lock;
+   pthread_mutex_t rx_mode_mutex;
+
+   u32 default_cos;
+   u32 rx_csum_en;
+
u32 dev_status;
+
+   bool pause_set;
+   pthread_mutex_t pause_mutuex;
+
+   struct rte_ether_addr default_addr;
+   struct rte_ether_addr *mc_list;
+
char dev_name[SPNIC_DEV_NAME_LEN];
+   u32 vfta[SPNIC_VFTA_SIZE]; /* VLAN bitmap */
  };



Most of these additions to the struct is not used at all, can you please add 
them
when they are used?


Re: [PATCH v6 05/26] net/spnic: add mgmt module

2022-01-19 Thread Ferruh Yigit

On 12/30/2021 6:08 AM, Yanling Song wrote:

Mgmt module manage the message gerenated from the hardware.
This patch implements mgmt module initialization, related event
processing and message command definition.

Signed-off-by: Yanling Song 


<...>


+static void spnic_get_port_link_info(u8 link_state, struct rte_eth_link *link)
+{
+   if (!link_state) {
+   link->link_status = ETH_LINK_DOWN;
+   link->link_speed = ETH_SPEED_NUM_NONE;
+   link->link_duplex = ETH_LINK_HALF_DUPLEX;
+   link->link_autoneg = ETH_LINK_FIXED;
+   }


The driver is using deprecated macros. If you can rebase on top of latest
next-net, you will get the warnings.

Since it is hard to continue with bunch of build errors, I will wait for
next version and can continue the reviews with it.


Re: [PATCH v6 09/26] net/spnic: support MAC and link event handling

2022-01-19 Thread Ferruh Yigit

On 12/30/2021 6:08 AM, Yanling Song wrote:

This commit adds interfaces to add/remove MAC addresses
and registers related ops to struct eth_dev_ops. Furthermore,
this commit adds callback to handle link events.



The patch also adds the VF dev_ops.

It would be more clear to support PF first and add mbox support and VF later.
But VF support is crept into the code from early patches, I assume that is
because the driver is already complete and spliting it is hard at this stage..

Similarly the primary/secondary support seems spread through the patches,
hard to separate the feature.


Above are sign of the patches are not split logically which makes harder
to review them and detect any issues, and future fixes references won't be
clear.

If you can clarify the split more, that would be great but I can see it
is hard with an existing driver.


Signed-off-by: Yanling Song 




Re: [PATCH 2/6] app/test: link unit test binary against all available libs

2022-01-19 Thread Bruce Richardson
On Wed, Jan 19, 2022 at 05:51:20PM +0100, David Marchand wrote:
> On Thu, Jan 13, 2022 at 6:40 PM Bruce Richardson
>  wrote:
> >
> > Rather than maintaining a list of the libraries the unit tests need, and
> > having to conditionally include/omit optional libs from the list, we can
> > just link against all available libraries, simplifying the code
> > considerably.
> >
> > Signed-off-by: Bruce Richardson 
> > ---
> >  app/test/meson.build | 47 +---
> >  1 file changed, 1 insertion(+), 46 deletions(-)
> >
> > diff --git a/app/test/meson.build b/app/test/meson.build
> > index 344a609a4d..9919de4307 100644
> > --- a/app/test/meson.build
> > +++ b/app/test/meson.build
> > @@ -157,39 +157,7 @@ test_sources = files(
> >  'virtual_pmd.c',
> >  )
> >
> > -test_deps = [
> > -'acl',
> > -'bus_pci',
> > -'bus_vdev',
> 
> bus_pci and bus_vdev are not "libraries", but "drivers" dependencies
> and must be kept.
> This probably explains an error seen in UNH test report for a job that
> disables all but those bus drivers and net/hns3:
> http://mails.dpdk.org/archives/test-report/2022-January/251477.html
>
Thanks for flagging this, I'll update the set.

/Bruce 


Re: [PATCH v6 25/26] net/spnic: add doc infrastructure

2022-01-19 Thread Ferruh Yigit

On 12/30/2021 6:09 AM, Yanling Song wrote:

This patch adds doc infrastructure for spnic PMD driver.

Signed-off-by: Yanling Song 


<...>


diff --git a/doc/guides/nics/spnic.rst b/doc/guides/nics/spnic.rst
new file mode 100644
index 00..fd04178f8a
--- /dev/null
+++ b/doc/guides/nics/spnic.rst
@@ -0,0 +1,55 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+Copyright(c) 2021 Ramaxel Memory Technology, Ltd
+
+
+SPNIC Poll Mode Driver
+==
+
+The spnic PMD (**librte_net_spnic**) provides poll mode driver support
+for 25Gbps/100Gbps SPNxxx Network Adapters.
+


Can you please provide more information to the device, and add a link to the
product page?


+
+Features
+
+
+- Multiple queues for TX and RX
+- Receiver Side Scaling(RSS)
+- RSS supports IPv4, IPv6, TCPv4, TCPv6, UDPv4 and UDPv6, use inner type for 
VXLAN as default
+- MAC/VLAN filtering
+- Checksum offload
+- TSO offload
+- LRO offload
+- Promiscuous mode
+- Port hardware statistics
+- Link state information
+- Link flow control(pause frame)
+- Scattered and gather for TX and RX
+- SR-IOV - Partially supported VFIO only
+- VLAN filter and VLAN offload
+- Allmulticast mode
+- MTU update
+- Unicast MAC filter
+- Multicast MAC filter
+- Set Link down or up
+- FW version
+- Multi arch support: x86_64, ARMv8.



Please build this list in each patch as these features are added.
So have the base documentation in first patch, later as above features added
update this file in that patch.

Same for above .ini file.


+
+Prerequisites
+-
+
+- Follow the DPDK :ref:`Getting Started Guide for Linux ` to setup 
the basic DPDK environment.
+
+
+Driver compilation and testing
+--
+
+Refer to the document :ref:`compiling and testing a PMD for a NIC 
`
+for details.
+
+It is highly recommended to upgrade the spnic driver and firmware to avoid the 
compatibility issues,
+and check the work mode with the latest product documents.
+
+Limitations or Known issues
+---
+Build with ICC is not supported yet.
+X86-32, Power8, ARMv7 and BSD are not supported yet.




Re: [PATCH v6 26/26] net/spnic: fixes unsafe C style code

2022-01-19 Thread Ferruh Yigit

On 12/30/2021 6:09 AM, Yanling Song wrote:

Use the hardware structure instead of void* as parameter of
function to keep the type information


Hi Yanling,

This is a new driver and first patchset for it. Instead of fixing it in the set,
why not update old patches to introduce them correct at first place?



Signed-off-by: Yanling Song 
---
  drivers/net/spnic/base/spnic_cmdq.c  |  14 +--
  drivers/net/spnic/base/spnic_cmdq.h  |   6 +-
  drivers/net/spnic/base/spnic_hw_cfg.c|  49 --
  drivers/net/spnic/base/spnic_hw_cfg.h|  16 ++--
  drivers/net/spnic/base/spnic_hw_comm.c   |  32 ---
  drivers/net/spnic/base/spnic_hw_comm.h   |  22 ++---
  drivers/net/spnic/base/spnic_hwdev.c |   8 +-
  drivers/net/spnic/base/spnic_hwif.c  |  52 +--
  drivers/net/spnic/base/spnic_hwif.h  |  22 ++---
  drivers/net/spnic/base/spnic_mgmt.c  |   9 +-
  drivers/net/spnic/base/spnic_mgmt.h  |   4 +-
  drivers/net/spnic/base/spnic_nic_cfg.c   | 110 +++
  drivers/net/spnic/base/spnic_nic_cfg.h   |  84 -
  drivers/net/spnic/base/spnic_nic_event.c |  30 +++
  drivers/net/spnic/base/spnic_nic_event.h |  10 +--
  drivers/net/spnic/base/spnic_wq.c|   3 +-
  drivers/net/spnic/base/spnic_wq.h|   2 +-
  drivers/net/spnic/spnic_ethdev.c |  10 +--
  drivers/net/spnic/spnic_io.c |  34 +++
  drivers/net/spnic/spnic_io.h |  10 +--
  drivers/net/spnic/spnic_rx.c |   4 +-
  drivers/net/spnic/spnic_tx.c |   4 +-
  22 files changed, 252 insertions(+), 283 deletions(-)



<...>



Re: [PATCH v1] doc: fix KNI PMD name typo

2022-01-19 Thread Ferruh Yigit

On 1/19/2022 12:26 PM, Haiyue Wang wrote:

The KNI PMD name should be "net_kni".

Fixes: 75e2bc54c018 ("net/kni: add KNI PMD")
Cc: sta...@dpdk.org

Signed-off-by: Haiyue Wang 


Acked-by: Ferruh Yigit 


[PATCH v2 0/6] allow more DPDK libs to be disabled on build

2022-01-19 Thread Bruce Richardson
*A common request on-list has been to allow more of the DPDK build to be 
disabled by those who are
doing their own builds and only use a subset of the libraries. To this end, 
this patchset makes some
infrastructure changes [first two patches] to make it easier to have libraries 
disabled, and then
adds a six libraries to the "optional" list.

V2: fix missing PCI and vdev bus driver dependencies in patch 2.

Bruce Richardson (6):
  lib: allow recursive disabling of libs in build
  app/test: link unit test binary against all available libs
  build: add node library to optional list
  build: add flow classification library to optional list
  build: add "packet framework" libs to optional list
  build: add cfgfile library to optional list

 app/test/meson.build | 76 
 lib/meson.build  | 30 +++--
 2 files changed, 42 insertions(+), 64 deletions(-)

--
2.32.0



[PATCH v2 1/6] lib: allow recursive disabling of libs in build

2022-01-19 Thread Bruce Richardson
Align the code in lib/meson.build with that in drivers/meson.build to
enable recursive disabling of libraries, i.e. if library b depends on
library a, disable library b if a is disabled (either explicitly or
implicitly). This allows libraries to be optional even if other DPDK
libs depend on them, something that was not previously possible.

Signed-off-by: Bruce Richardson 
Acked-by: Stephen Hemminger 
Acked-by: Morten Brørup 
---
 lib/meson.build | 24 +++-
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/lib/meson.build b/lib/meson.build
index fbaa6ef7c2..af4662e942 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -134,23 +134,29 @@ foreach l:libraries
 warning('Library name, "@0@", and directory name, "@1@", do not 
match'.format(name, l))
 endif
 
-if not build
-dpdk_libs_disabled += name
-set_variable(name.underscorify() + '_disable_reason', reason)
-continue
-endif
-
 shared_deps = ext_deps
 static_deps = ext_deps
 foreach d:deps
+if not build
+break
+endif
 if not is_variable('shared_rte_' + d)
-error('Missing internal dependency "@0@" for @1@ [@2@]'
+build = false
+reason = 'missing internal dependency, "@0@"'.format(d)
+message('Disabling @1@ [@2@]: missing internal dependency "@0@"'
 .format(d, name, 'lib/' + l))
+else
+shared_deps += [get_variable('shared_rte_' + d)]
+static_deps += [get_variable('static_rte_' + d)]
 endif
-shared_deps += [get_variable('shared_rte_' + d)]
-static_deps += [get_variable('static_rte_' + d)]
 endforeach
 
+if not build
+dpdk_libs_disabled += name
+set_variable(name.underscorify() + '_disable_reason', reason)
+continue
+endif
+
 enabled_libs += name
 dpdk_conf.set('RTE_LIB_' + name.to_upper(), 1)
 install_headers(headers)
-- 
2.32.0



[PATCH v2 2/6] app/test: link unit test binary against all available libs

2022-01-19 Thread Bruce Richardson
Rather than maintaining a list of the libraries the unit tests need, and
having to conditionally include/omit optional libs from the list, we can
just link against all available libraries, simplifying the code
considerably.

Signed-off-by: Bruce Richardson 
Acked-by: Stephen Hemminger 
Acked-by: Morten Brørup 
---
 app/test/meson.build | 49 +++-
 1 file changed, 3 insertions(+), 46 deletions(-)

diff --git a/app/test/meson.build b/app/test/meson.build
index 344a609a4d..210e03fbff 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -157,39 +157,9 @@ test_sources = files(
 'virtual_pmd.c',
 )
 
-test_deps = [
-'acl',
-'bus_pci',
-'bus_vdev',
-'bpf',
-'cfgfile',
-'cmdline',
-'cryptodev',
-'distributor',
-'dmadev',
-'efd',
-'ethdev',
-'eventdev',
-'fib',
-'flow_classify',
-'graph',
-'hash',
-'ipsec',
-'lpm',
-'member',
-'node',
-'pipeline',
-'port',
-'rawdev',
-'rcu',
-'reorder',
-'rib',
-'ring',
-'security',
-'stack',
-'telemetry',
-'timer',
-]
+test_deps = enabled_libs
+# as well as libs, the pci and vdev bus drivers are needed for a lot of tests
+test_deps += ['bus_pci', 'bus_vdev']
 
 # Each test is marked with flag true/false
 # to indicate whether it can run in no-huge mode.
@@ -380,7 +350,6 @@ if dpdk_conf.has('RTE_EVENT_SKELETON')
 test_deps += 'event_skeleton'
 endif
 if dpdk_conf.has('RTE_LIB_METRICS')
-test_deps += 'metrics'
 test_sources += ['test_metrics.c']
 fast_tests += [['metrics_autotest', true]]
 endif
@@ -410,17 +379,14 @@ if dpdk_conf.has('RTE_NET_RING')
 perf_test_names += 'ring_pmd_perf_autotest'
 fast_tests += [['event_eth_tx_adapter_autotest', false]]
 if dpdk_conf.has('RTE_LIB_BITRATESTATS')
-test_deps += 'bitratestats'
 test_sources += 'test_bitratestats.c'
 fast_tests += [['bitratestats_autotest', true]]
 endif
 if dpdk_conf.has('RTE_LIB_LATENCYSTATS')
-test_deps += 'latencystats'
 test_sources += 'test_latencystats.c'
 fast_tests += [['latencystats_autotest', true]]
 endif
 if dpdk_conf.has('RTE_LIB_PDUMP')
-test_deps += 'pdump'
 test_sources += 'test_pdump.c'
 fast_tests += [['pdump_autotest', true]]
 endif
@@ -434,18 +400,10 @@ endif
 if dpdk_conf.has('RTE_HAS_LIBPCAP')
 ext_deps += pcap_dep
 if dpdk_conf.has('RTE_LIB_PCAPNG')
-test_deps += 'pcapng'
 test_sources += 'test_pcapng.c'
 endif
 endif
 
-if dpdk_conf.has('RTE_LIB_POWER')
-test_deps += 'power'
-endif
-if dpdk_conf.has('RTE_LIB_KNI')
-test_deps += 'kni'
-endif
-
 if cc.has_argument('-Wno-format-truncation')
 cflags += '-Wno-format-truncation'
 endif
@@ -462,7 +420,6 @@ if dpdk_conf.has('RTE_LIB_COMPRESSDEV')
 if compress_test_dep.found()
 test_dep_objs += compress_test_dep
 test_sources += 'test_compressdev.c'
-test_deps += 'compressdev'
 fast_tests += [['compressdev_autotest', false]]
 endif
 endif
-- 
2.32.0



[PATCH v2 3/6] build: add node library to optional list

2022-01-19 Thread Bruce Richardson
Allow the 'node' library to be disabled in builds

Signed-off-by: Bruce Richardson 
Acked-by: Stephen Hemminger 
Acked-by: Morten Brørup 
---
 lib/meson.build | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/meson.build b/lib/meson.build
index af4662e942..dd20fe70a6 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -74,6 +74,7 @@ optional_libs = [
 'jobstats',
 'latencystats',
 'metrics',
+'node',
 'pdump',
 'power',
 'vhost',
-- 
2.32.0



[PATCH v2 4/6] build: add flow classification library to optional list

2022-01-19 Thread Bruce Richardson
Add the flow_classify library to the list of optional libraries, and
ensure tests can build with it disabled.

Signed-off-by: Bruce Richardson 
Acked-by: Stephen Hemminger 
Acked-by: Morten Brørup 
---
 app/test/meson.build | 7 +--
 lib/meson.build  | 1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/app/test/meson.build b/app/test/meson.build
index 210e03fbff..a39dd68934 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -65,7 +65,6 @@ test_sources = files(
 'test_fib6.c',
 'test_fib6_perf.c',
 'test_func_reentrancy.c',
-'test_flow_classify.c',
 'test_graph.c',
 'test_graph_perf.c',
 'test_hash.c',
@@ -196,7 +195,6 @@ fast_tests = [
 ['fib_autotest', true],
 ['fib6_autotest', true],
 ['func_reentrancy_autotest', false],
-['flow_classify_autotest', false],
 ['hash_autotest', true],
 ['interrupt_autotest', true],
 ['ipfrag_autotest', false],
@@ -349,6 +347,11 @@ endif
 if dpdk_conf.has('RTE_EVENT_SKELETON')
 test_deps += 'event_skeleton'
 endif
+
+if dpdk_conf.has('RTE_LIB_FLOW_CLASSIFY')
+test_sources += 'test_flow_classify.c'
+fast_tests += [['flow_classify_autotest', false]]
+endif
 if dpdk_conf.has('RTE_LIB_METRICS')
 test_sources += ['test_metrics.c']
 fast_tests += [['metrics_autotest', true]]
diff --git a/lib/meson.build b/lib/meson.build
index dd20fe70a6..ede5199374 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -67,6 +67,7 @@ libraries = [
 
 optional_libs = [
 'bitratestats',
+'flow_classify',
 'gpudev',
 'gro',
 'gso',
-- 
2.32.0



[PATCH v2 5/6] build: add "packet framework" libs to optional list

2022-01-19 Thread Bruce Richardson
Add port, table and pipeline libraries - collectively often known as
the "packet framework" -  to the list of optional libraries, and
ensure tests can build with them disabled.

Signed-off-by: Bruce Richardson 
Acked-by: Stephen Hemminger 
Acked-by: Morten Brørup 
---
 app/test/meson.build | 20 +---
 lib/meson.build  |  3 +++
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/app/test/meson.build b/app/test/meson.build
index a39dd68934..aac2b98800 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -135,12 +135,6 @@ test_sources = files(
 'test_stack.c',
 'test_stack_perf.c',
 'test_string_fns.c',
-'test_table.c',
-'test_table_acl.c',
-'test_table_combined.c',
-'test_table_pipeline.c',
-'test_table_ports.c',
-'test_table_tables.c',
 'test_tailq.c',
 'test_thash.c',
 'test_thash_perf.c',
@@ -229,7 +223,6 @@ fast_tests = [
 ['stack_autotest', false],
 ['stack_lf_autotest', false],
 ['string_autotest', true],
-['table_autotest', true],
 ['tailq_autotest', true],
 ['ticketlock_autotest', true],
 ['timer_autotest', false],
@@ -360,6 +353,19 @@ if dpdk_conf.has('RTE_LIB_TELEMETRY')
 test_sources += ['test_telemetry_json.c', 'test_telemetry_data.c']
 fast_tests += [['telemetry_json_autotest', true], 
['telemetry_data_autotest', true]]
 endif
+if dpdk_conf.has('RTE_LIB_PIPELINE')
+# pipeline lib depends on port and table libs, so those must be present
+# if pipeline library is.
+test_sources += [
+'test_table.c',
+'test_table_acl.c',
+'test_table_combined.c',
+'test_table_pipeline.c',
+'test_table_ports.c',
+'test_table_tables.c',
+]
+fast_tests += [['table_autotest', true]]
+endif
 
 # The following linkages of drivers are required because
 # they are used via a driver-specific API.
diff --git a/lib/meson.build b/lib/meson.build
index ede5199374..dcc1b4d835 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -77,7 +77,10 @@ optional_libs = [
 'metrics',
 'node',
 'pdump',
+'pipeline',
+'port',
 'power',
+'table',
 'vhost',
 ]
 
-- 
2.32.0



[PATCH v2 6/6] build: add cfgfile library to optional list

2022-01-19 Thread Bruce Richardson
Allow disabling of the cfgfile library in builds.

Signed-off-by: Bruce Richardson 
Acked-by: Stephen Hemminger 
Acked-by: Morten Brørup 
---
 lib/meson.build | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/meson.build b/lib/meson.build
index dcc1b4d835..8e5acd7819 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -67,6 +67,7 @@ libraries = [
 
 optional_libs = [
 'bitratestats',
+'cfgfile',
 'flow_classify',
 'gpudev',
 'gro',
-- 
2.32.0



[PATCH 1/4] net/ark: add device capabilities record

2022-01-19 Thread John Miller
Add static record of supported device capabilities.

Signed-off-by: John Miller 
---
 drivers/net/ark/ark_ethdev.c | 58 +---
 1 file changed, 48 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ark/ark_ethdev.c b/drivers/net/ark/ark_ethdev.c
index b618cba3f0..0414c78bb5 100644
--- a/drivers/net/ark/ark_ethdev.c
+++ b/drivers/net/ark/ark_ethdev.c
@@ -96,6 +96,26 @@ static const struct rte_pci_id pci_id_ark_map[] = {
{.vendor_id = 0, /* sentinel */ },
 };
 
+struct ark_caps {
+   bool rqpacing;
+};
+struct ark_dev_caps {
+   uint32_t  device_id;
+   struct ark_caps  caps;
+};
+static const struct ark_dev_caps
+ark_device_caps[] = {
+{0x100d, {.rqpacing = true} },
+{0x100e, {.rqpacing = true} },
+{0x100f, {.rqpacing = true} },
+{0x1010, {.rqpacing = false} },
+{0x1017, {.rqpacing = true} },
+{0x1018, {.rqpacing = true} },
+{0x1019, {.rqpacing = true} },
+{0x101e, {.rqpacing = false} },
+{.device_id = 0,}
+};
+
 static int
 eth_ark_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
struct rte_pci_device *pci_dev)
@@ -256,6 +276,7 @@ eth_ark_dev_init(struct rte_eth_dev *dev)
int ret;
int port_count = 1;
int p;
+   bool rqpacing = false;
 
ark->eth_dev = dev;
 
@@ -270,6 +291,15 @@ eth_ark_dev_init(struct rte_eth_dev *dev)
rte_eth_copy_pci_info(dev, pci_dev);
dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
 
+   p = 0;
+   while (ark_device_caps[p].device_id != 0) {
+   if (pci_dev->id.device_id == ark_device_caps[p].device_id) {
+   rqpacing = ark_device_caps[p].caps.rqpacing;
+   break;
+   }
+   p++;
+   }
+
/* Use dummy function until setup */
dev->rx_pkt_burst = ð_ark_recv_pkts_noop;
dev->tx_pkt_burst = ð_ark_xmit_pkts_noop;
@@ -288,8 +318,12 @@ eth_ark_dev_init(struct rte_eth_dev *dev)
ark->pktgen.v  = (void *)&ark->bar0[ARK_PKTGEN_BASE];
ark->pktchkr.v  = (void *)&ark->bar0[ARK_PKTCHKR_BASE];
 
-   ark->rqpacing =
-   (struct ark_rqpace_t *)(ark->bar0 + ARK_RCPACING_BASE);
+   if (rqpacing) {
+   ark->rqpacing =
+   (struct ark_rqpace_t *)(ark->bar0 + ARK_RCPACING_BASE);
+   } else {
+   ark->rqpacing = NULL;
+   }
ark->started = 0;
ark->pkt_dir_v = ARK_PKT_DIR_INIT_VAL;
 
@@ -309,13 +343,15 @@ eth_ark_dev_init(struct rte_eth_dev *dev)
return -1;
}
if (ark->sysctrl.t32[3] != 0) {
-   if (ark_rqp_lasped(ark->rqpacing)) {
-   ARK_PMD_LOG(ERR, "Arkville Evaluation System - "
-   "Timer has Expired\n");
-   return -1;
+   if (ark->rqpacing) {
+   if (ark_rqp_lasped(ark->rqpacing)) {
+   ARK_PMD_LOG(ERR, "Arkville Evaluation System - "
+   "Timer has Expired\n");
+   return -1;
+   }
+   ARK_PMD_LOG(WARNING, "Arkville Evaluation System - "
+   "Timer is Running\n");
}
-   ARK_PMD_LOG(WARNING, "Arkville Evaluation System - "
-   "Timer is Running\n");
}
 
ARK_PMD_LOG(DEBUG,
@@ -499,7 +535,8 @@ ark_config_device(struct rte_eth_dev *dev)
ark_ddm_stats_reset(ark->ddm.v);
 
ark_ddm_stop(ark->ddm.v, 0);
-   ark_rqp_stats_reset(ark->rqpacing);
+   if (ark->rqpacing)
+   ark_rqp_stats_reset(ark->rqpacing);
 
return 0;
 }
@@ -695,7 +732,8 @@ eth_ark_dev_close(struct rte_eth_dev *dev)
/*
 * TODO This should only be called once for the device during shutdown
 */
-   ark_rqp_dump(ark->rqpacing);
+   if (ark->rqpacing)
+   ark_rqp_dump(ark->rqpacing);
 
for (i = 0; i < dev->data->nb_tx_queues; i++) {
eth_ark_tx_queue_release(dev->data->tx_queues[i]);
-- 
2.25.1



[PATCH 2/4] net/ark: support arbitrary mbuf size

2022-01-19 Thread John Miller
Support arbitrary mbuf size per queue.

Signed-off-by: John Miller 
---
 drivers/net/ark/ark_ethdev.c|  8 
 drivers/net/ark/ark_ethdev_rx.c | 23 +++
 drivers/net/ark/ark_udm.h   |  2 +-
 3 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ark/ark_ethdev.c b/drivers/net/ark/ark_ethdev.c
index 0414c78bb5..b9843414b1 100644
--- a/drivers/net/ark/ark_ethdev.c
+++ b/drivers/net/ark/ark_ethdev.c
@@ -511,14 +511,6 @@ ark_config_device(struct rte_eth_dev *dev)
mpu = RTE_PTR_ADD(mpu, ARK_MPU_QOFFSET);
}
 
-   ark_udm_stop(ark->udm.v, 0);
-   ark_udm_configure(ark->udm.v,
- RTE_PKTMBUF_HEADROOM,
- RTE_MBUF_DEFAULT_DATAROOM,
- ARK_RX_WRITE_TIME_NS);
-   ark_udm_stats_reset(ark->udm.v);
-   ark_udm_stop(ark->udm.v, 0);
-
/* TX -- DDM */
if (ark_ddm_stop(ark->ddm.v, 1))
ARK_PMD_LOG(ERR, "Unable to stop DDM\n");
diff --git a/drivers/net/ark/ark_ethdev_rx.c b/drivers/net/ark/ark_ethdev_rx.c
index 98658ce621..1000f50be0 100644
--- a/drivers/net/ark/ark_ethdev_rx.c
+++ b/drivers/net/ark/ark_ethdev_rx.c
@@ -12,7 +12,6 @@
 
 #define ARK_RX_META_SIZE 32
 #define ARK_RX_META_OFFSET (RTE_PKTMBUF_HEADROOM - ARK_RX_META_SIZE)
-#define ARK_RX_MAX_NOCHAIN (RTE_MBUF_DEFAULT_DATAROOM)
 
 /* Forward declarations */
 struct ark_rx_queue;
@@ -41,6 +40,9 @@ struct ark_rx_queue {
rx_user_meta_hook_fn rx_user_meta_hook;
void *ext_user_data;
 
+   uint32_t dataroom;
+   uint32_t headroom;
+
uint32_t queue_size;
uint32_t queue_mask;
 
@@ -164,6 +166,9 @@ eth_ark_dev_rx_queue_setup(struct rte_eth_dev *dev,
 
/* NOTE zmalloc is used, no need to 0 indexes, etc. */
queue->mb_pool = mb_pool;
+   queue->dataroom = rte_pktmbuf_data_room_size(mb_pool) -
+   RTE_PKTMBUF_HEADROOM;
+   queue->headroom = RTE_PKTMBUF_HEADROOM;
queue->phys_qid = qidx;
queue->queue_index = queue_idx;
queue->queue_size = nb_desc;
@@ -196,6 +201,15 @@ eth_ark_dev_rx_queue_setup(struct rte_eth_dev *dev,
queue->udm = RTE_PTR_ADD(ark->udm.v, qidx * ARK_UDM_QOFFSET);
queue->mpu = RTE_PTR_ADD(ark->mpurx.v, qidx * ARK_MPU_QOFFSET);
 
+   /* Configure UDM per queue */
+   ark_udm_stop(queue->udm, 0);
+   ark_udm_configure(queue->udm,
+ RTE_PKTMBUF_HEADROOM,
+ queue->dataroom,
+ ARK_RX_WRITE_TIME_NS);
+   ark_udm_stats_reset(queue->udm);
+   ark_udm_stop(queue->udm, 0);
+
/* populate mbuf reserve */
status = eth_ark_rx_seed_mbufs(queue);
 
@@ -276,6 +290,7 @@ eth_ark_recv_pkts(void *rx_queue,
mbuf->data_len = meta->pkt_len;
 
if (ARK_DEBUG_CORE) {   /* debug sanity checks */
+
if ((meta->pkt_len > (1024 * 16)) ||
(meta->pkt_len == 0)) {
ARK_PMD_LOG(DEBUG, "RX: Bad Meta Q: %u"
@@ -304,7 +319,7 @@ eth_ark_recv_pkts(void *rx_queue,
}
}
 
-   if (unlikely(meta->pkt_len > ARK_RX_MAX_NOCHAIN))
+   if (unlikely(meta->pkt_len > queue->dataroom))
cons_index = eth_ark_rx_jumbo
(queue, meta, mbuf, cons_index + 1);
else
@@ -345,14 +360,14 @@ eth_ark_rx_jumbo(struct ark_rx_queue *queue,
/* first buf populated by called */
mbuf_prev = mbuf0;
segments = 1;
-   data_len = RTE_MIN(meta->pkt_len, RTE_MBUF_DEFAULT_DATAROOM);
+   data_len = RTE_MIN(meta->pkt_len, queue->dataroom);
remaining = meta->pkt_len - data_len;
mbuf0->data_len = data_len;
 
/* HW guarantees that the data does not exceed prod_index! */
while (remaining != 0) {
data_len = RTE_MIN(remaining,
-  RTE_MBUF_DEFAULT_DATAROOM);
+  queue->dataroom);
 
remaining -= data_len;
segments += 1;
diff --git a/drivers/net/ark/ark_udm.h b/drivers/net/ark/ark_udm.h
index 4e51a5e82c..1cbcd94a98 100644
--- a/drivers/net/ark/ark_udm.h
+++ b/drivers/net/ark/ark_udm.h
@@ -33,7 +33,7 @@ struct ark_rx_meta {
 #define ARK_RX_WRITE_TIME_NS 2500
 #define ARK_UDM_SETUP 0
 #define ARK_UDM_CONST2 0xbACECACE
-#define ARK_UDM_CONST3 0x334d4455
+#define ARK_UDM_CONST3 0x344d4455
 #define ARK_UDM_CONST ARK_UDM_CONST3
 struct ark_udm_setup_t {
uint32_t r0;
-- 
2.25.1



[PATCH 3/4] net/ark: publish include file for external access

2022-01-19 Thread John Miller
publish rte_pmd_ark.h for external access to extension

Signed-off-by: John Miller 
---
 doc/guides/nics/ark.rst  | 4 ++--
 drivers/net/ark/meson.build  | 2 ++
 drivers/net/ark/{ark_ext.h => rte_pmd_ark.h} | 8 ++--
 3 files changed, 10 insertions(+), 4 deletions(-)
 rename drivers/net/ark/{ark_ext.h => rte_pmd_ark.h} (97%)

diff --git a/doc/guides/nics/ark.rst b/doc/guides/nics/ark.rst
index da61814b5d..bcc3babd53 100644
--- a/doc/guides/nics/ark.rst
+++ b/doc/guides/nics/ark.rst
@@ -143,7 +143,7 @@ object file contains extension (or hook) functions that are 
registered
 and then called during PMD operations.
 
 The allowable set of extension functions are defined and documented in
-``ark_ext.h``, only the initialization function,
+``rte_pmd_ark.h``, only the initialization function,
 ``rte_pmd_ark_dev_init()``, is required; all others are optional. The
 following sections give a small extension example along with
 instructions for compiling and using the extension.
@@ -157,7 +157,7 @@ during RX from user meta data coming from FPGA hardware.
 
 .. code-block:: c
 
-   #include 
+   #include 
#include 
#include 
#include 
diff --git a/drivers/net/ark/meson.build b/drivers/net/ark/meson.build
index 8d87744c22..83488d87a8 100644
--- a/drivers/net/ark/meson.build
+++ b/drivers/net/ark/meson.build
@@ -7,6 +7,8 @@ if is_windows
 subdir_done()
 endif
 
+headers = files('rte_pmd_ark.h')
+
 sources = files(
 'ark_ddm.c',
 'ark_ethdev.c',
diff --git a/drivers/net/ark/ark_ext.h b/drivers/net/ark/rte_pmd_ark.h
similarity index 97%
rename from drivers/net/ark/ark_ext.h
rename to drivers/net/ark/rte_pmd_ark.h
index d235d0ff85..f77c36eb2d 100644
--- a/drivers/net/ark/ark_ext.h
+++ b/drivers/net/ark/rte_pmd_ark.h
@@ -5,7 +5,11 @@
 #ifndef _ARK_EXT_H_
 #define _ARK_EXT_H_
 
-#include 
+#include 
+struct rte_eth_dev;
+struct rte_mbuf;
+struct rte_ether_addr;
+struct rte_eth_stats;
 
 /* The following section lists function prototypes for Arkville's
  * dynamic PMD extension. User's who create an extension
@@ -55,7 +59,7 @@ void rte_pmd_ark_dev_uninit(struct rte_eth_dev *dev, void 
*user_data);
  *   user argument from dev_init() call.
  * @return (0) if successful.
  */
-uint8_t dev_get_port_count(struct rte_eth_dev *dev, void *user_data);
+uint8_t rte_pmd_ark_dev_get_port_count(struct rte_eth_dev *dev, void 
*user_data);
 
 /**
  * Extension prototype, optional implementation.
-- 
2.25.1



[PATCH 4/4] net/ark: support chunk DMA transfers

2022-01-19 Thread John Miller
Add support for chunk DMA transfers.

Signed-off-by: John Miller 
---
 drivers/net/ark/ark_ddm.c   |  1 +
 drivers/net/ark/ark_ethdev_rx.c | 16 +---
 drivers/net/ark/ark_mpu.c   |  1 +
 drivers/net/ark/ark_pktchkr.c   |  2 +-
 drivers/net/ark/ark_pktgen.c|  2 +-
 drivers/net/ark/ark_udm.c   |  3 +++
 6 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ark/ark_ddm.c b/drivers/net/ark/ark_ddm.c
index 2321371572..b16c739d50 100644
--- a/drivers/net/ark/ark_ddm.c
+++ b/drivers/net/ark/ark_ddm.c
@@ -55,6 +55,7 @@ ark_ddm_stop(struct ark_ddm_t *ddm, const int wait)
int cnt = 0;
 
ddm->cfg.command = 2;
+   rte_wmb();
while (wait && (ddm->cfg.stop_flushed & 0x01) == 0) {
if (cnt++ > 1000)
return 1;
diff --git a/drivers/net/ark/ark_ethdev_rx.c b/drivers/net/ark/ark_ethdev_rx.c
index 1000f50be0..49134ea08f 100644
--- a/drivers/net/ark/ark_ethdev_rx.c
+++ b/drivers/net/ark/ark_ethdev_rx.c
@@ -12,6 +12,7 @@
 
 #define ARK_RX_META_SIZE 32
 #define ARK_RX_META_OFFSET (RTE_PKTMBUF_HEADROOM - ARK_RX_META_SIZE)
+#define ARK_RX_MPU_CHUNK (64U)
 
 /* Forward declarations */
 struct ark_rx_queue;
@@ -104,7 +105,7 @@ static inline void
 eth_ark_rx_update_cons_index(struct ark_rx_queue *queue, uint32_t cons_index)
 {
queue->cons_index = cons_index;
-   if ((cons_index + queue->queue_size - queue->seed_index) >= 64U) {
+   if ((cons_index + queue->queue_size - queue->seed_index) >= 
ARK_RX_MPU_CHUNK) {
eth_ark_rx_seed_mbufs(queue);
ark_mpu_set_producer(queue->mpu, queue->seed_index);
}
@@ -179,12 +180,12 @@ eth_ark_dev_rx_queue_setup(struct rte_eth_dev *dev,
queue->reserve_q =
rte_zmalloc_socket("Ark_rx_queue mbuf",
   nb_desc * sizeof(struct rte_mbuf *),
-  64,
+  512,
   socket_id);
queue->paddress_q =
rte_zmalloc_socket("Ark_rx_queue paddr",
   nb_desc * sizeof(rte_iova_t),
-  64,
+  512,
   socket_id);
 
if (queue->reserve_q == 0 || queue->paddress_q == 0) {
@@ -455,7 +456,8 @@ eth_ark_rx_stop_queue(struct rte_eth_dev *dev, uint16_t 
queue_id)
 static inline int
 eth_ark_rx_seed_mbufs(struct ark_rx_queue *queue)
 {
-   uint32_t limit = queue->cons_index + queue->queue_size;
+   uint32_t limit = (queue->cons_index & ~(ARK_RX_MPU_CHUNK - 1)) +
+   queue->queue_size;
uint32_t seed_index = queue->seed_index;
 
uint32_t count = 0;
@@ -618,14 +620,14 @@ eth_ark_udm_force_close(struct rte_eth_dev *dev)
 
ark_mpu_start(queue->mpu);
/* Add some buffers */
-   index = 10 + queue->seed_index;
+   index = ARK_RX_MPU_CHUNK + queue->seed_index;
ark_mpu_set_producer(queue->mpu, index);
}
/* Wait to allow data to pass */
usleep(100);
 
-   ARK_PMD_LOG(DEBUG, "UDM forced flush attempt, stopped = %d\n",
-   ark_udm_is_flushed(ark->udm.v));
+   ARK_PMD_LOG(NOTICE, "UDM forced flush attempt, stopped = %d\n",
+   ark_udm_is_flushed(ark->udm.v));
}
ark_udm_reset(ark->udm.v);
 }
diff --git a/drivers/net/ark/ark_mpu.c b/drivers/net/ark/ark_mpu.c
index 8160c1de7b..b8e94b6ed3 100644
--- a/drivers/net/ark/ark_mpu.c
+++ b/drivers/net/ark/ark_mpu.c
@@ -68,6 +68,7 @@ ark_mpu_reset(struct ark_mpu_t *mpu)
int cnt = 0;
 
mpu->cfg.command = MPU_CMD_RESET;
+   rte_wmb();
 
while (mpu->cfg.command != MPU_CMD_IDLE) {
if (cnt++ > 1000)
diff --git a/drivers/net/ark/ark_pktchkr.c b/drivers/net/ark/ark_pktchkr.c
index 84bb567a41..12a5abb2f7 100644
--- a/drivers/net/ark/ark_pktchkr.c
+++ b/drivers/net/ark/ark_pktchkr.c
@@ -113,7 +113,7 @@ ark_pktchkr_stopped(ark_pkt_chkr_t handle)
struct ark_pkt_chkr_inst *inst = (struct ark_pkt_chkr_inst *)handle;
uint32_t r = inst->sregs->pkt_start_stop;
 
-   return (((r >> 16) & 1) == 1);
+   return (((r >> 16) & 1) == 1) || (r == 0);
 }
 
 void
diff --git a/drivers/net/ark/ark_pktgen.c b/drivers/net/ark/ark_pktgen.c
index 515bfe461c..6195ef997f 100644
--- a/drivers/net/ark/ark_pktgen.c
+++ b/drivers/net/ark/ark_pktgen.c
@@ -107,7 +107,7 @@ ark_pktgen_paused(ark_pkt_gen_t handle)
struct ark_pkt_gen_inst *inst = (struct ark_pkt_gen_inst *)handle;
uint32_t r = inst->regs->pkt_start_stop;
 
-   return (((r >> 16) & 1) == 1);
+   return (((r >> 24) & 1) == 1) || (((r >> 16) & 1) == 1)  || (r == 0);
 }
 
 void
diff --git a/drivers/net/ark/ark_udm.c b/drivers/net/ark/ark_udm.c
in

[PATCH v2 0/6] Fast restart with many hugepages

2022-01-19 Thread Dmitry Kozlyuk
This patchset is a new design and implementation of [1].

v2:
  * Fix hugepage file removal when they are no longer used.
Disable removal with --huge-unlink=never as intended.
Document this behavior difference. (Bruce)
  * Improve documentation, commit messages, and naming. (Thomas)

# Problem Statement

Large allocations that involve mapping new hugepages are slow.
This is problematic, for example, in the following use case.
A single-process application allocates ~1TB of mempools at startup.
Sometimes the app needs to restart as quick as possible.
Allocating the hugepages anew takes as long as 15 seconds,
while the new process could just pick up all the memory
left by the old one (reinitializing the contents as needed).

Almost all of mmap(2) time spent in the kernel
is clearing the memory, i.e. filling it with zeros.
This is done if a file in hugetlbfs is mapped
for the first time system-wide, i.e. a hugepage is committed
to prevent data leaks from the previous users of the same hugepage.
For example, mapping 32 GB from a new file may take 2.16 seconds,
while mapping the same pages again takes only 0.3 ms.
Security put aside, e.g. when the environment is controlled,
this effort is wasted for the memory intended for DMA,
because its content will be overwritten anyway.

Linux EAL explicitly removes hugetlbfs files at initialization
and before mapping to force the kernel clear the memory.
This allows the memory allocator to clean memory on only on freeing.

# Solution

Add a new mode allowing EAL to remap existing hugepage files.
While it is intended to make restarts faster in the first place,
it makes any startup faster except the cold one
(with no existing files).

It is the administrator who accepts security risks
implied by reusing hugepages.
The new mode is an opt-in and a warning is logged.

The feature is Linux-only as it is related
to mapping hugepages from files which only Linux does.
It is inherently incompatible with --in-memory,
for --huge-unlink see below.

There is formally no breakage of API contract,
but there is a behavior change in the new mode:
rte_malloc*() and rte_memzone_reserve*() may return dirty memory
(previously they were returning clean memory from free heap elements).
Their contract has always explicitly allowed this,
but still there may be users relying on the traditional behavior.
Such users will need to fix their code to use the new mode.

# Implementation

## User Interface

There is --huge-unlink switch in the same area to remove hugepage files
before mapping them. It is infeasible to use with the new mode,
because the point is to keep hugepage files for fast future restarts.
Extend --huge-unlink option to represent only valid combinations:

* --huge-unlink=existing OR no option (for compatibility):
  unlink files at initialization
  and before opening them as a precaution.

* --huge-unlink=always OR just --huge-unlink (for compatibility):
  same as above + unlink created files before mapping.

* --huge-unlink=never:
  the new mode, do not unlink hugepages files, reuse them.

This option was always Linux-only, but it is kept as common
in case there are users who expect it to be a no-op on other systems.
(Adding a separate --huge-reuse option was also considered,
but there is no obvious benefit and more combinations to test.)

## EAL

If a memseg is mapped dirty, it is marked with RTE_MEMSEG_FLAG_DIRTY
so that the memory allocator may clear the memory if need be.
See patch 5/6 description for details how this is done
in different memory mapping modes.

The memory manager tracks whether an element is clean or dirty.
If rte_zmalloc*() allocates from a dirty element,
the memory is cleared before handling it to the user.
On freeing, the allocator joins adjacent free elements,
but in the new mode it may not be feasible to clear the free memory
if the joint element is dirty (contains dirty parts).
In any case, memory will be cleared only once,
either on freeing or on allocation.
See patch 3/6 for details.
Patch 2/6 adds a benchmark to see how time is distributed
between allocation and freeing in different modes.

Besides clearing memory, each mmap() call takes some time.
For example, 1024 calls for 1 TB may take ~300 ms.
The time of one call mapping N hugepages is O(N),
because inside the kernel hugepages are allocated ony by one.
Syscall overhead is negligeable even for one page.
Hence, it does not make sense to reduce the number of mmap() calls,
which would essentially move the loop over pages into the kernel.

[1]: http://inbox.dpdk.org/dev/20211011085644.2716490-3-dkozl...@nvidia.com/

Dmitry Kozlyuk (6):
  doc: add hugepage mapping details
  app/test: add allocator performance benchmark
  mem: add dirty malloc element support
  eal: refactor --huge-unlink storage
  eal/linux: allow hugepage file reuse
  eal: extend --huge-unlink for hugepage file reuse

 app/test/meson.build  |   2 +
 app/test/test_eal_flags.c |  25 +++
 app

[PATCH v2 2/6] app/test: add allocator performance benchmark

2022-01-19 Thread Dmitry Kozlyuk
Memory allocator performance is crucial to applications that deal
with large amount of memory or allocate frequently. DPDK allocator
performance is affected by EAL options, API used and, at least,
allocation size. New autotest is intended to be run with different
EAL options. It measures performance with a range of sizes
for dirrerent APIs: rte_malloc, rte_zmalloc, and rte_memzone_reserve.

Work distribution between allocation and deallocation depends on EAL
options. The test prints both times and total time to ease comparison.

Memory can be filled with zeroes at different points of allocation path,
but it always takes considerable fraction of overall timing. This is why
the test measures filling speed and prints how long clearing takes
for each size as a reference (for rte_memzone_reserve estimations
are printed).

Signed-off-by: Dmitry Kozlyuk 
Reviewed-by: Viacheslav Ovsiienko 
Acked-by: Aaron Conole 
---
 app/test/meson.build|   2 +
 app/test/test_malloc_perf.c | 174 
 2 files changed, 176 insertions(+)
 create mode 100644 app/test/test_malloc_perf.c

diff --git a/app/test/meson.build b/app/test/meson.build
index 344a609a4d..50cf2602a9 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -88,6 +88,7 @@ test_sources = files(
 'test_lpm6_perf.c',
 'test_lpm_perf.c',
 'test_malloc.c',
+'test_malloc_perf.c',
 'test_mbuf.c',
 'test_member.c',
 'test_member_perf.c',
@@ -295,6 +296,7 @@ extra_test_names = [
 
 perf_test_names = [
 'ring_perf_autotest',
+'malloc_perf_autotest',
 'mempool_perf_autotest',
 'memcpy_perf_autotest',
 'hash_perf_autotest',
diff --git a/app/test/test_malloc_perf.c b/app/test/test_malloc_perf.c
new file mode 100644
index 00..9686fc8af5
--- /dev/null
+++ b/app/test/test_malloc_perf.c
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2021 NVIDIA Corporation & Affiliates
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "test.h"
+
+#define TEST_LOG(level, ...) RTE_LOG(level, USER1, __VA_ARGS__)
+
+typedef void * (alloc_t)(const char *name, size_t size, unsigned int align);
+typedef void (free_t)(void *addr);
+typedef void * (memset_t)(void *addr, int value, size_t size);
+
+static const uint64_t KB = 1 << 10;
+static const uint64_t GB = 1 << 30;
+
+static double
+tsc_to_us(uint64_t tsc, size_t runs)
+{
+   return (double)tsc / rte_get_tsc_hz() * US_PER_S / runs;
+}
+
+static int
+test_memset_perf(double *us_per_gb)
+{
+   static const size_t RUNS = 20;
+
+   void *ptr;
+   size_t i;
+   uint64_t tsc;
+
+   TEST_LOG(INFO, "Reference: memset\n");
+
+   ptr = rte_malloc(NULL, GB, 0);
+   if (ptr == NULL) {
+   TEST_LOG(ERR, "rte_malloc(size=%"PRIx64") failed\n", GB);
+   return -1;
+   }
+
+   tsc = rte_rdtsc_precise();
+   for (i = 0; i < RUNS; i++)
+   memset(ptr, 0, GB);
+   tsc = rte_rdtsc_precise() - tsc;
+
+   *us_per_gb = tsc_to_us(tsc, RUNS);
+   TEST_LOG(INFO, "Result: %f.3 GiB/s <=> %.2f us/MiB\n",
+   US_PER_S / *us_per_gb, *us_per_gb / KB);
+
+   rte_free(ptr);
+   TEST_LOG(INFO, "\n");
+   return 0;
+}
+
+static int
+test_alloc_perf(const char *name, alloc_t *alloc_fn, free_t *free_fn,
+   memset_t *memset_fn, double memset_gb_us, size_t max_runs)
+{
+   static const size_t SIZES[] = {
+   1 << 6, 1 << 7, 1 << 10, 1 << 12, 1 << 16, 1 << 20,
+   1 << 21, 1 << 22, 1 << 24, 1 << 30 };
+
+   size_t i, j;
+   void **ptrs;
+
+   TEST_LOG(INFO, "Performance: %s\n", name);
+
+   ptrs = calloc(max_runs, sizeof(ptrs[0]));
+   if (ptrs == NULL) {
+   TEST_LOG(ERR, "Cannot allocate memory for pointers");
+   return -1;
+   }
+
+   TEST_LOG(INFO, "%12s%8s%12s%12s%12s%17s\n", "Size (B)", "Runs",
+   "Alloc (us)", "Free (us)", "Total (us)",
+   memset_fn != NULL ? "memset (us)" : "est.memset (us)");
+   for (i = 0; i < RTE_DIM(SIZES); i++) {
+   size_t size = SIZES[i];
+   size_t runs_done;
+   uint64_t tsc_start, tsc_alloc, tsc_memset = 0, tsc_free;
+   double alloc_time, free_time, memset_time;
+
+   tsc_start = rte_rdtsc_precise();
+   for (j = 0; j < max_runs; j++) {
+   ptrs[j] = alloc_fn(NULL, size, 0);
+   if (ptrs[j] == NULL)
+   break;
+   }
+   tsc_alloc = rte_rdtsc_precise() - tsc_start;
+
+   if (j == 0) {
+   TEST_LOG(INFO, "%12zu Interrupted: out of memory.\n",
+   size);
+   break;
+   }
+   runs_done = j;
+
+

[PATCH v2 3/6] mem: add dirty malloc element support

2022-01-19 Thread Dmitry Kozlyuk
EAL malloc layer assumed all free elements content
is filled with zeros ("clean"), as opposed to uninitialized ("dirty").
This assumption was ensured in two ways:
1. EAL memalloc layer always returned clean memory.
2. Freed memory was cleared before returning into the heap.

Clearing the memory can be as slow as around 14 GiB/s.
To save doing so, memalloc layer is allowed to return dirty memory.
Such segments being marked with RTE_MEMSEG_FLAG_DIRTY.
The allocator tracks elements that contain dirty memory
using the new flag in the element header.
When clean memory is requested via rte_zmalloc*()
and the suitable element is dirty, it is cleared on allocation.
When memory is deallocated, the freed element is joined
with adjacent free elements, and the dirty flag is updated:

a) If the joint element contains dirty parts, it is dirty:

dirty + freed + dirty = dirty  =>  no need to clean
freed + dirty = dirty  the freed memory

   Dirty parts may be large (e.g. initial allocation),
   so clearing them could create unpredictable slowdown.

b) If the only dirty part of the joint element
   is the freed memory, the joint element can be made clean:

clean + freed + clean = clean  =>  freed memory
clean + freed = clean  must be cleared
freed + clean = clean
freed = clean

   This logic naturally reproduces the old behavior
   and always applies in modes when EAL memalloc layer
   returns only clean segments.

As a result, memory is either cleared on free, as before,
or it will be cleared on allocation if need be, but never twice.

Signed-off-by: Dmitry Kozlyuk 
---
 lib/eal/common/malloc_elem.c | 22 +++---
 lib/eal/common/malloc_elem.h | 11 +--
 lib/eal/common/malloc_heap.c | 18 --
 lib/eal/common/rte_malloc.c  | 21 ++---
 lib/eal/include/rte_memory.h |  8 ++--
 5 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/lib/eal/common/malloc_elem.c b/lib/eal/common/malloc_elem.c
index bdd20a162e..e04e0890fb 100644
--- a/lib/eal/common/malloc_elem.c
+++ b/lib/eal/common/malloc_elem.c
@@ -129,7 +129,7 @@ malloc_elem_find_max_iova_contig(struct malloc_elem *elem, 
size_t align)
 void
 malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap,
struct rte_memseg_list *msl, size_t size,
-   struct malloc_elem *orig_elem, size_t orig_size)
+   struct malloc_elem *orig_elem, size_t orig_size, bool dirty)
 {
elem->heap = heap;
elem->msl = msl;
@@ -137,6 +137,7 @@ malloc_elem_init(struct malloc_elem *elem, struct 
malloc_heap *heap,
elem->next = NULL;
memset(&elem->free_list, 0, sizeof(elem->free_list));
elem->state = ELEM_FREE;
+   elem->dirty = dirty;
elem->size = size;
elem->pad = 0;
elem->orig_elem = orig_elem;
@@ -300,7 +301,7 @@ split_elem(struct malloc_elem *elem, struct malloc_elem 
*split_pt)
const size_t new_elem_size = elem->size - old_elem_size;
 
malloc_elem_init(split_pt, elem->heap, elem->msl, new_elem_size,
-elem->orig_elem, elem->orig_size);
+   elem->orig_elem, elem->orig_size, elem->dirty);
split_pt->prev = elem;
split_pt->next = next_elem;
if (next_elem)
@@ -506,6 +507,7 @@ join_elem(struct malloc_elem *elem1, struct malloc_elem 
*elem2)
else
elem1->heap->last = elem1;
elem1->next = next;
+   elem1->dirty |= elem2->dirty;
if (elem1->pad) {
struct malloc_elem *inner = RTE_PTR_ADD(elem1, elem1->pad);
inner->size = elem1->size - elem1->pad;
@@ -579,6 +581,14 @@ malloc_elem_free(struct malloc_elem *elem)
ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
data_len = elem->size - MALLOC_ELEM_OVERHEAD;
 
+   /*
+* Consider the element clean for the purposes of joining.
+* If both neighbors are clean or non-existent,
+* the joint element will be clean,
+* which means the memory should be cleared.
+* There is no need to clear the memory if the joint element is dirty.
+*/
+   elem->dirty = false;
elem = malloc_elem_join_adjacent_free(elem);
 
malloc_elem_free_list_insert(elem);
@@ -588,8 +598,14 @@ malloc_elem_free(struct malloc_elem *elem)
/* decrease heap's count of allocated elements */
elem->heap->alloc_count--;
 
-   /* poison memory */
+#ifndef RTE_MALLOC_DEBUG
+   /* Normally clear the memory when needed. */
+   if (!elem->dirty)
+   memset(ptr, 0, data_len);
+#else
+   /* Always poison the memory in debug mode. */
memset(ptr, MALLOC_POISON, data_len);
+#endif
 
return elem;
 }
diff --git a/lib/eal/common/malloc_elem.h b/lib/eal/common/malloc_elem.h
index 15d8ba7af2..f2aa98821b 100644
--- a/lib/eal/common/malloc_elem.h
+++ b/lib/eal/common/malloc_elem.h
@@ -2

[PATCH v2 4/6] eal: refactor --huge-unlink storage

2022-01-19 Thread Dmitry Kozlyuk
In preparation to extend --huge-unlink option semantics
refactor how it is stored in the internal configuration.
It makes future changes more isolated.

Signed-off-by: Dmitry Kozlyuk 
Acked-by: Thomas Monjalon 
---
 lib/eal/common/eal_common_options.c | 9 +
 lib/eal/common/eal_internal_cfg.h   | 8 +++-
 lib/eal/linux/eal_memalloc.c| 7 ---
 lib/eal/linux/eal_memory.c  | 2 +-
 4 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/lib/eal/common/eal_common_options.c 
b/lib/eal/common/eal_common_options.c
index 1cfdd75f3b..7520ebda8e 100644
--- a/lib/eal/common/eal_common_options.c
+++ b/lib/eal/common/eal_common_options.c
@@ -1737,7 +1737,7 @@ eal_parse_common_option(int opt, const char *optarg,
 
/* long options */
case OPT_HUGE_UNLINK_NUM:
-   conf->hugepage_unlink = 1;
+   conf->hugepage_file.unlink_before_mapping = true;
break;
 
case OPT_NO_HUGE_NUM:
@@ -1766,7 +1766,7 @@ eal_parse_common_option(int opt, const char *optarg,
conf->in_memory = 1;
/* in-memory is a superset of noshconf and huge-unlink */
conf->no_shconf = 1;
-   conf->hugepage_unlink = 1;
+   conf->hugepage_file.unlink_before_mapping = true;
break;
 
case OPT_PROC_TYPE_NUM:
@@ -2050,7 +2050,8 @@ eal_check_common_options(struct internal_config 
*internal_cfg)
"be specified together with --"OPT_NO_HUGE"\n");
return -1;
}
-   if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink &&
+   if (internal_cfg->no_hugetlbfs &&
+   internal_cfg->hugepage_file.unlink_before_mapping &&
!internal_cfg->in_memory) {
RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot "
"be specified together with --"OPT_NO_HUGE"\n");
@@ -2061,7 +2062,7 @@ eal_check_common_options(struct internal_config 
*internal_cfg)
" is only supported in non-legacy memory mode\n");
}
if (internal_cfg->single_file_segments &&
-   internal_cfg->hugepage_unlink &&
+   internal_cfg->hugepage_file.unlink_before_mapping &&
!internal_cfg->in_memory) {
RTE_LOG(ERR, EAL, "Option --"OPT_SINGLE_FILE_SEGMENTS" is "
"not compatible with --"OPT_HUGE_UNLINK"\n");
diff --git a/lib/eal/common/eal_internal_cfg.h 
b/lib/eal/common/eal_internal_cfg.h
index d6c0470eb8..b5e6942578 100644
--- a/lib/eal/common/eal_internal_cfg.h
+++ b/lib/eal/common/eal_internal_cfg.h
@@ -40,6 +40,12 @@ struct simd_bitwidth {
uint16_t bitwidth; /**< bitwidth value */
 };
 
+/** Hugepage backing files discipline. */
+struct hugepage_file_discipline {
+   /** Unlink files before mapping them to leave no trace in hugetlbfs. */
+   bool unlink_before_mapping;
+};
+
 /**
  * internal configuration
  */
@@ -48,7 +54,7 @@ struct internal_config {
volatile unsigned force_nchannel; /**< force number of channels */
volatile unsigned force_nrank;/**< force number of ranks */
volatile unsigned no_hugetlbfs;   /**< true to disable hugetlbfs */
-   unsigned hugepage_unlink; /**< true to unlink backing files */
+   struct hugepage_file_discipline hugepage_file;
volatile unsigned no_pci; /**< true to disable PCI */
volatile unsigned no_hpet;/**< true to disable HPET */
volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
diff --git a/lib/eal/linux/eal_memalloc.c b/lib/eal/linux/eal_memalloc.c
index 337f2bc739..56a1ddb32b 100644
--- a/lib/eal/linux/eal_memalloc.c
+++ b/lib/eal/linux/eal_memalloc.c
@@ -564,7 +564,7 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
__func__, strerror(errno));
goto resized;
}
-   if (internal_conf->hugepage_unlink &&
+   if (internal_conf->hugepage_file.unlink_before_mapping 
&&
!internal_conf->in_memory) {
if (unlink(path)) {
RTE_LOG(DEBUG, EAL, "%s(): unlink() 
failed: %s\n",
@@ -697,7 +697,7 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
close_hugefile(fd, path, list_idx);
} else {
/* only remove file if we can take out a write lock */
-   if (internal_conf->hugepage_unlink == 0 &&
+   if (!internal_conf->hugepage_file.unlink_before_mapping &&
internal_conf->in_memory == 0 &&
lock(fd, LOCK_EX) == 1)
unlink(path);
@@ -756,7 +756,8 @@ free_seg(struct rte_memseg *ms, struct hugepage_

[PATCH v2 1/6] doc: add hugepage mapping details

2022-01-19 Thread Dmitry Kozlyuk
Hugepage mapping is a layer of EAL malloc builds upon.
There were implicit references to its details,
like mentions of segment file descriptors,
but no explicit description of its modes and operation.
Add an overview of mechanics used on ech supported OS.
Convert memory management subsections from list items
to level 4 headers: they are big and important enough.

Signed-off-by: Dmitry Kozlyuk 
---
 .../prog_guide/env_abstraction_layer.rst  | 95 +--
 1 file changed, 86 insertions(+), 9 deletions(-)

diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst 
b/doc/guides/prog_guide/env_abstraction_layer.rst
index c6accce701..fede7fe69d 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -86,7 +86,7 @@ See chapter
 Memory Mapping Discovery and Memory Reservation
 ~~~
 
-The allocation of large contiguous physical memory is done using the hugetlbfs 
kernel filesystem.
+The allocation of large contiguous physical memory is done using hugepages.
 The EAL provides an API to reserve named memory zones in this contiguous 
memory.
 The physical address of the reserved memory for that memory zone is also 
returned to the user by the memory zone reservation API.
 
@@ -95,11 +95,13 @@ and legacy mode. Both modes are explained below.
 
 .. note::
 
-Memory reservations done using the APIs provided by rte_malloc are also 
backed by pages from the hugetlbfs filesystem.
+Memory reservations done using the APIs provided by rte_malloc
+are also backed by hugepages unless ``--no-huge`` option is given.
 
-+ Dynamic memory mode
+Dynamic Memory Mode
+^^^
 
-Currently, this mode is only supported on Linux.
+Currently, this mode is only supported on Linux and Windows.
 
 In this mode, usage of hugepages by DPDK application will grow and shrink based
 on application's requests. Any memory allocation through ``rte_malloc()``,
@@ -155,7 +157,8 @@ of memory that can be used by DPDK application.
 :ref:`Multi-process Support ` for more details about
 DPDK IPC.
 
-+ Legacy memory mode
+Legacy Memory Mode
+^^
 
 This mode is enabled by specifying ``--legacy-mem`` command-line switch to the
 EAL. This switch will have no effect on FreeBSD as FreeBSD only supports
@@ -168,7 +171,8 @@ not allow acquiring or releasing hugepages from the system 
at runtime.
 If neither ``-m`` nor ``--socket-mem`` were specified, the entire available
 hugepage memory will be preallocated.
 
-+ Hugepage allocation matching
+Hugepage Allocation Matching
+
 
 This behavior is enabled by specifying the ``--match-allocations`` command-line
 switch to the EAL. This switch is Linux-only and not supported with
@@ -182,7 +186,8 @@ matching can be used by these types of applications to 
satisfy both of these
 requirements. This can result in some increased memory usage which is
 very dependent on the memory allocation patterns of the application.
 
-+ 32-bit support
+32-bit Support
+^^
 
 Additional restrictions are present when running in 32-bit mode. In dynamic
 memory mode, by default maximum of 2 gigabytes of VA space will be 
preallocated,
@@ -192,7 +197,8 @@ used.
 In legacy mode, VA space will only be preallocated for segments that were
 requested (plus padding, to keep IOVA-contiguousness).
 
-+ Maximum amount of memory
+Maximum Amount of Memory
+
 
 All possible virtual memory space that can ever be used for hugepage mapping in
 a DPDK process is preallocated at startup, thereby placing an upper limit on 
how
@@ -222,7 +228,77 @@ Normally, these options do not need to be changed.
 can later be mapped into that preallocated VA space (if dynamic memory mode
 is enabled), and can optionally be mapped into it at startup.
 
-+ Segment file descriptors
+Hugepage Mapping
+
+
+Below is an overview of methods used for each OS to obtain hugepages,
+explaining why certain limitations and options exist in EAL.
+See the user guide for a specific OS for configuration details.
+
+FreeBSD uses ``contigmem`` kernel module
+to reserve a fixed number of hugepages at system start,
+which are mapped by EAL at initialization using a specific ``sysctl()``.
+
+Windows EAL allocates hugepages from the OS as needed using Win32 API,
+so available amount depends on the system load.
+It uses ``virt2phys`` kernel module to obtain physical addresses,
+unless running in IOVA-as-VA mode (e.g. forced with ``--iova-mode=va``).
+
+Linux implements a variety of methods:
+
+* mapping each hugepage from its own file in hugetlbfs;
+* mapping multiple hugepages from a shared file in hugetlbfs;
+* anonymous mapping.
+
+Mapping hugepages from files in hugetlbfs is essential for multi-process,
+because secondary processes need to map the same hugepages.
+EAL creates files like ``rtemap_0``
+in directories specified with ``--huge-dir`` o

[PATCH v2 6/6] eal: extend --huge-unlink for hugepage file reuse

2022-01-19 Thread Dmitry Kozlyuk
Expose Linux EAL ability to reuse existing hugepage files
via --huge-unlink=never switch.
Default behavior is unchanged, it can also be specified
using --huge-unlink=existing for consistency.
Old --huge-unlink switch is kept,
it is an alias for --huge-unlink=always.
Add a test case for the --huge-unlink=never mode.

Signed-off-by: Dmitry Kozlyuk 
Acked-by: Thomas Monjalon 
---
 app/test/test_eal_flags.c | 25 
 doc/guides/linux_gsg/linux_eal_parameters.rst | 24 ++--
 .../prog_guide/env_abstraction_layer.rst  | 12 ++
 doc/guides/rel_notes/release_22_03.rst|  7 
 lib/eal/common/eal_common_options.c   | 39 +--
 5 files changed, 100 insertions(+), 7 deletions(-)

diff --git a/app/test/test_eal_flags.c b/app/test/test_eal_flags.c
index d7f4c2cd47..e2696cda63 100644
--- a/app/test/test_eal_flags.c
+++ b/app/test/test_eal_flags.c
@@ -1122,6 +1122,11 @@ test_file_prefix(void)
DEFAULT_MEM_SIZE, "--single-file-segments",
"--file-prefix=" memtest1 };
 
+   /* primary process with memtest1 and --huge-unlink=never mode */
+   const char * const argv9[] = {prgname, "-m",
+   DEFAULT_MEM_SIZE, "--huge-unlink=never",
+   "--file-prefix=" memtest1 };
+
/* check if files for current prefix are present */
if (process_hugefiles(prefix, HUGEPAGE_CHECK_EXISTS) != 1) {
printf("Error - hugepage files for %s were not created!\n", 
prefix);
@@ -1290,6 +1295,26 @@ test_file_prefix(void)
return -1;
}
 
+   /* this process will run with --huge-unlink,
+* so it should not remove hugepage files when it exits
+*/
+   if (launch_proc(argv9) != 0) {
+   printf("Error - failed to run with --huge-unlink=never\n");
+   return -1;
+   }
+
+   /* check if hugefiles for memtest1 are present */
+   if (process_hugefiles(memtest1, HUGEPAGE_CHECK_EXISTS) == 0) {
+   printf("Error - hugepage files for %s were deleted!\n",
+   memtest1);
+   return -1;
+   } else {
+   if (process_hugefiles(memtest1, HUGEPAGE_DELETE) != 1) {
+   printf("Error - deleting hugepages failed!\n");
+   return -1;
+   }
+   }
+
return 0;
 }
 
diff --git a/doc/guides/linux_gsg/linux_eal_parameters.rst 
b/doc/guides/linux_gsg/linux_eal_parameters.rst
index 74df2611b5..ea8f381391 100644
--- a/doc/guides/linux_gsg/linux_eal_parameters.rst
+++ b/doc/guides/linux_gsg/linux_eal_parameters.rst
@@ -84,10 +84,26 @@ Memory-related options
 Use specified hugetlbfs directory instead of autodetected ones. This can be
 a sub-directory within a hugetlbfs mountpoint.
 
-*   ``--huge-unlink``
-
-Unlink hugepage files after creating them (implies no secondary process
-support).
+*   ``--huge-unlink[=existing|always|never]``
+
+No ``--huge-unlink`` option or ``--huge-unlink=existing`` is the default:
+existing hugepage files are removed and re-created
+to ensure the kernel clears the memory and prevents any data leaks.
+
+With ``--huge-unlink`` (no value) or ``--huge-unlink=always``,
+hugepage files are also removed before mapping them,
+so that the application leaves no files in hugetlbfs.
+This mode implies no multi-process support.
+
+When ``--huge-unlink=never`` is specified, existing hugepage files
+are never removed, but are remapped instead, allowing hugepage reuse.
+This makes restart faster by saving time to clear memory at initialization,
+but it may slow down zeroed allocations later.
+Reused hugepages can contain data from previous processes that used them,
+which may be a security concern.
+Hugepage files created in this mode are also not removed
+when all the hugepages mapped from them are freed,
+which allows to reuse these files after a restart.
 
 *   ``--match-allocations``
 
diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst 
b/doc/guides/prog_guide/env_abstraction_layer.rst
index fede7fe69d..b1eae592ab 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -282,6 +282,18 @@ to prevent data leaks from previous users of the same 
hugepage.
 EAL ensures this behavior by removing existing backing files at startup
 and by recreating them before opening for mapping (as a precaution).
 
+One exception is ``--huge-unlink=never`` mode.
+It is used to speed up EAL initialization, usually on application restart.
+Clearing memory constitutes more than 95% of hugepage mapping time.
+EAL can save it by remapping existing backing files
+with all the data left in the mapped hugepages ("dirty" memory).
+Such segments are marked with ``RTE_MEMSEG_FLAG_DIRTY``.
+Memory allocator detects dirty segments handles them accordingly,
+in particular, it clears me

[PATCH v2 5/6] eal/linux: allow hugepage file reuse

2022-01-19 Thread Dmitry Kozlyuk
Linux EAL ensured that mapped hugepages are clean
by always mapping from newly created files:
existing hugepage backing files were always removed.
In this case, the kernel clears the page to prevent data leaks,
because the mapped memory may contain leftover data
from the previous process that was using this memory.
Clearing takes the bulk of the time spent in mmap(2),
increasing EAL initialization time.

Introduce a mode to keep existing files and reuse them
in order to speed up initial memory allocation in EAL.
Hugepages mapped from such files may contain data
left by the previous process that used this memory,
so RTE_MEMSEG_FLAG_DIRTY is set for their segments.
If multiple hugepages are mapped from the same file:
1. When fallocate(2) is used, all memory mapped from this file
   is considered dirty, because it is unknown
   which parts of the file are holes.
2. When ftruncate(3) is used, memory mapped from this file
   is considered dirty unless the file is extended
   to create a new mapping, which implies clean memory.

Signed-off-by: Dmitry Kozlyuk 
---
Coverity complains that "path" may be uninitialized in get_seg_fd()
at line 327, but it is always initialized with eal_get_hugefile_path()
at lines 309-316.

 lib/eal/common/eal_common_options.c |   2 +
 lib/eal/common/eal_internal_cfg.h   |   2 +
 lib/eal/linux/eal.c |   3 +-
 lib/eal/linux/eal_hugepage_info.c   | 118 
 lib/eal/linux/eal_memalloc.c| 166 +---
 5 files changed, 206 insertions(+), 85 deletions(-)

diff --git a/lib/eal/common/eal_common_options.c 
b/lib/eal/common/eal_common_options.c
index 7520ebda8e..cdd2284b0c 100644
--- a/lib/eal/common/eal_common_options.c
+++ b/lib/eal/common/eal_common_options.c
@@ -311,6 +311,8 @@ eal_reset_internal_config(struct internal_config 
*internal_cfg)
internal_cfg->force_nchannel = 0;
internal_cfg->hugefile_prefix = NULL;
internal_cfg->hugepage_dir = NULL;
+   internal_cfg->hugepage_file.unlink_before_mapping = false;
+   internal_cfg->hugepage_file.unlink_existing = true;
internal_cfg->force_sockets = 0;
/* zero out the NUMA config */
for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
diff --git a/lib/eal/common/eal_internal_cfg.h 
b/lib/eal/common/eal_internal_cfg.h
index b5e6942578..d2be7bfa57 100644
--- a/lib/eal/common/eal_internal_cfg.h
+++ b/lib/eal/common/eal_internal_cfg.h
@@ -44,6 +44,8 @@ struct simd_bitwidth {
 struct hugepage_file_discipline {
/** Unlink files before mapping them to leave no trace in hugetlbfs. */
bool unlink_before_mapping;
+   /** Unlink exisiting files at startup, re-create them before mapping. */
+   bool unlink_existing;
 };
 
 /**
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 60b4924838..9c8395ab14 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -1360,7 +1360,8 @@ rte_eal_cleanup(void)
struct internal_config *internal_conf =
eal_get_internal_configuration();
 
-   if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
+   internal_conf->hugepage_file.unlink_existing)
rte_memseg_walk(mark_freeable, NULL);
rte_service_finalize();
rte_mp_channel_cleanup();
diff --git a/lib/eal/linux/eal_hugepage_info.c 
b/lib/eal/linux/eal_hugepage_info.c
index 9fb0e968db..ec172ef4b8 100644
--- a/lib/eal/linux/eal_hugepage_info.c
+++ b/lib/eal/linux/eal_hugepage_info.c
@@ -84,7 +84,7 @@ static int get_hp_sysfs_value(const char *subdir, const char 
*file, unsigned lon
 /* this function is only called from eal_hugepage_info_init which itself
  * is only called from a primary process */
 static uint32_t
-get_num_hugepages(const char *subdir, size_t sz)
+get_num_hugepages(const char *subdir, size_t sz, unsigned int reusable_pages)
 {
unsigned long resv_pages, num_pages, over_pages, surplus_pages;
const char *nr_hp_file = "free_hugepages";
@@ -116,7 +116,7 @@ get_num_hugepages(const char *subdir, size_t sz)
else
over_pages = 0;
 
-   if (num_pages == 0 && over_pages == 0)
+   if (num_pages == 0 && over_pages == 0 && reusable_pages)
RTE_LOG(WARNING, EAL, "No available %zu kB hugepages 
reported\n",
sz >> 10);
 
@@ -124,6 +124,10 @@ get_num_hugepages(const char *subdir, size_t sz)
if (num_pages < over_pages) /* overflow */
num_pages = UINT32_MAX;
 
+   num_pages += reusable_pages;
+   if (num_pages < reusable_pages) /* overflow */
+   num_pages = UINT32_MAX;
+
/* we want to return a uint32_t and more than this looks suspicious
 * anyway ... */
if (num_pages > UINT32_MAX)
@@ -297,20 +301,28 @@ get_hugepage_dir(uint64_t hugepage_sz, char *hugedir, int 
len)
return -1;
 }
 
+struct walk_hugedir_data {
+   int dir_fd;
+   int file_

RE: [PATCH v1 0/6] Fast restart with many hugepages

2022-01-19 Thread Dmitry Kozlyuk
Hi Bruce,

> From: Bruce Richardson 
> [...]
> this seems really interesting, but in the absense of TB of memory
> being
> used, is it easily possible to see the benefits of this work? I've
> been
> playing with adding large memory allocations to helloworld example and
> checking the runtime. Allocating 1GB using malloc per thread seems to
> show
> a small (<0.5 second at most) benefit, and using a fixed 10GB
> allocation
> using memzone_reserve at startup shows runtimes within the margin of
> error
> when run with --huge-unlink=existing vs huge-unlink=never. At what
> size of
> memory footprint is it expected to make a clear improvement?

Sorry, there was a bug in v1 that completely broke the testing.
I should've double-checked
after what I considered a quick rebase before sending.
Version 2 can be simply tested even without modifyin the code:

time sh -c 'echo quit | sudo ../_build/dpdk/app/test/dpdk-test
--huge-unlink=never -m 8192 --single-file-segments --no-pci
2>/dev/null >/dev/null'

With --huge-unlink=existing:
real0m1.450s
user0m0.574s
sys 0m0.706s(1)

With --huge-unlink=never, first run (no hugepage files to reuse):
real0m0.892s
user0m0.002s
sys 0m0.718s(2)

With --huge-unlink=never, second run (hugepage files left):
real0m0.210s
user0m0.010s
sys 0m0.021s(3)

Notice that (1) and (2) are close since there is no reuse,
but (2) and (3) are differ by 0.7 seconds for 8GB,
which correlates with 14 GB/sec memset() speed on this machine.
Results without --single-file-segments are nearly identical.


RE: [PATCH v1 2/6] app/test: add allocator performance benchmark

2022-01-19 Thread Dmitry Kozlyuk
> From: Bruce Richardson 
> [...]
> > What is the expected running time of this test? When I tried it out
> on my
> > machine it appears to hang after the following output:
> > [...]

It always runs within 50 seconds on my machine (E5-1650 v3 @ 3.50GHz).
Judging by the output, it runs faster than yours
(203 vs 811 total microseconds in 1M allocation case):

USER1: Reference: memset
USER1: Result: 14.557848.3 GiB/s <=> 67.08 us/MiB
USER1: 
USER1: Performance: rte_malloc
USER1: Size (B)Runs  Alloc (us)   Free (us)  Total (us)  memset (us)
USER1:   64   10.090.040.13 0.01
USER1:  128   10.090.040.13 0.01
USER1: 1024   10.120.090.21 0.11
USER1: 4096   10.150.400.55 0.27
USER1:65536   10.164.374.53 4.25
USER1:  1048576   1   73.85  129.23  203.0767.26
USER1:  20971527154  148.98  259.42  408.39   134.34
USER1:  41943043570  298.28  519.76  818.04   268.65
USER1: 16777216 882 1206.85 2093.46 3300.30  1074.25
USER1:   1073741824   6   188765.01   206544.04   395309.06 68739.83
[...]

Note that to see --huge-unlink effect you must run it twice:
the first run creates and leaves the files, the second reuses them.


DTS WG meeting minutes - 1/12/22

2022-01-19 Thread Honnappa Nagarahalli
Hello,
Please find the minutes below. The history of minutes is at [1].

Thanks,
Honnappa

[1] 
https://docs.google.com/document/d/1E2mkTHNQ5vyln1JvnJTil15cjacUTXP7LXb9K960Vxs/edit?usp=sharing

Attendees:
--
Honnappa Nagarahalli
Owen Hilyard
Lijuan Tu
Ali Alnubani
Vladislav Grishenko

Agenda:
--
1) Review/Carry forward pending action items
2) Rotating the chairperson for hosting the meeting
3) Makefile build removal from DTS
4) DTS files license to change to SPDX license
5) How do we merge the DTS into DPDK - directory structure, compilation time, 
documentation (tooling and directory structure for online documentation)
6) Build DTS documentation using Meson

Minutes:
---
1) The chairperson hosting the meeting will be rotated going forward. As of now 
it will be shared between Honnappa, Juraj, Lijuan, Lincoln (?), Owen. The 
rotation will be done in the alphabetical order of the first name. However, as 
agreed, the meeting on 1/18 would be hosted by Owen.
2) All the stable releases use Meson build. So the makefile build support from 
DTS can be removed. Makefile removal patch is in the DTS community review
3) A quick scan reveals that DTS has some files with GPL2 license. A deeper 
look is required. Is there any process we need to follow to change to SPDX? 
Decided to follow up with the DPDK community.
4) Merging DTS into DPDK - Is a bigger discussion. Need to agree and fix the 
python version to use. Differed the discussion to future time as it needs more 
progress in DTS. Will be discussed when Lincoln and Juraj are available
5) The DTS user guide and DTS test plans are being built using makefile. This 
needs to be changed to using meson build.

Action Items:
-
1) Honnappa - Propose changes to DPDK to address changing the constants and 
printfs, refer to Lijuan's email.
2) All - Review Owen's DTS contributions guide
3) Owen - Send out the DTS contributions guide to DTS and DPDK mailing list. 2 
weeks of RFC followed by a patch.
4) Owen, Lijuan - Look for merging the makefile removal patch 
(http://patchwork.dpdk.org/project/dts/list/?series=20610)
5) Honnappa - Understand how the SPDX change was done for the DPDK repo
6) Honnappa - Talk to techboard about files with the GPL2 license in DTS. 
7) Owen - Take a deeper look at the license and copyright assignments for the 
files.
8) Juraj - Convert the makefile build for documentation to use meson build.

Any other business:
--
Next Meeting: January 19, 2022


[PATCH] net/cxgbe: rework mailbox access to fix gcc12 -Wdangling-pointer

2022-01-19 Thread Rahul Lakkireddy
Rework mailbox access serialization to dynamically allocate and
free mbox entry. Also remove unnecessary temp memory and macros.

Observed with: gcc-12.0 (GCC) 12.0.1 20220118 (experimental)

In file included from ../lib/eal/linux/include/rte_os.h:14,
 from ../lib/eal/include/rte_common.h:28,
 from ../lib/eal/include/rte_log.h:25,
 from ../lib/ethdev/rte_ethdev.h:164,
 from ../lib/ethdev/ethdev_driver.h:18,
 from ../drivers/net/cxgbe/base/t4vf_hw.c:6:
In function ‘t4_os_atomic_add_tail’,
inlined from ‘t4vf_wr_mbox_core’ at 
../drivers/net/cxgbe/base/t4vf_hw.c:115:2:
../drivers/net/cxgbe/base/adapter.h:742:9: warning: storing the address of 
local variable ‘entry’ in ‘((struct mbox_list *)adapter)[96].tqh_last’ 
[-Wdangling-pointer=]
  742 | TAILQ_INSERT_TAIL(head, entry, next);
  | ^
../drivers/net/cxgbe/base/t4vf_hw.c: In function ‘t4vf_wr_mbox_core’:
../drivers/net/cxgbe/base/t4vf_hw.c:86:27: note: ‘entry’ declared here
   86 | struct mbox_entry entry;
  |   ^
../drivers/net/cxgbe/base/t4vf_hw.c:86:27: note: ‘adapter’ declared here

Reported-by: Ferruh Yigit 
Signed-off-by: Rahul Lakkireddy 
---
 drivers/net/cxgbe/base/adapter.h |  2 -
 drivers/net/cxgbe/base/t4_hw.c   | 83 
 drivers/net/cxgbe/base/t4vf_hw.c | 28 +++
 3 files changed, 49 insertions(+), 64 deletions(-)

diff --git a/drivers/net/cxgbe/base/adapter.h b/drivers/net/cxgbe/base/adapter.h
index 1c7c8afe16..97963422bf 100644
--- a/drivers/net/cxgbe/base/adapter.h
+++ b/drivers/net/cxgbe/base/adapter.h
@@ -291,8 +291,6 @@ struct sge {
u32 fl_starve_thres;/* Free List starvation threshold */
 };
 
-#define T4_OS_NEEDS_MBOX_LOCKING 1
-
 /*
  * OS Lock/List primitives for those interfaces in the Common Code which
  * need this.
diff --git a/drivers/net/cxgbe/base/t4_hw.c b/drivers/net/cxgbe/base/t4_hw.c
index cdcd7e5510..645833765a 100644
--- a/drivers/net/cxgbe/base/t4_hw.c
+++ b/drivers/net/cxgbe/base/t4_hw.c
@@ -263,17 +263,6 @@ static void fw_asrt(struct adapter *adap, u32 mbox_addr)
 
 #define X_CIM_PF_NOACCESS 0x
 
-/*
- * If the Host OS Driver needs locking arround accesses to the mailbox, this
- * can be turned on via the T4_OS_NEEDS_MBOX_LOCKING CPP define ...
- */
-/* makes single-statement usage a bit cleaner ... */
-#ifdef T4_OS_NEEDS_MBOX_LOCKING
-#define T4_OS_MBOX_LOCKING(x) x
-#else
-#define T4_OS_MBOX_LOCKING(x) do {} while (0)
-#endif
-
 /**
  * t4_wr_mbox_meat_timeout - send a command to FW through the given mailbox
  * @adap: the adapter
@@ -314,28 +303,17 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int 
mbox,
1, 1, 3, 5, 10, 10, 20, 50, 100
};
 
-   u32 v;
-   u64 res;
-   int i, ms;
-   unsigned int delay_idx;
-   __be64 *temp = (__be64 *)malloc(size * sizeof(char));
-   __be64 *p = temp;
u32 data_reg = PF_REG(mbox, A_CIM_PF_MAILBOX_DATA);
u32 ctl_reg = PF_REG(mbox, A_CIM_PF_MAILBOX_CTRL);
-   u32 ctl;
-   struct mbox_entry entry;
-   u32 pcie_fw = 0;
-
-   if (!temp)
-   return -ENOMEM;
+   struct mbox_entry *entry;
+   u32 v, ctl, pcie_fw = 0;
+   unsigned int delay_idx;
+   const __be64 *p;
+   int i, ms, ret;
+   u64 res;
 
-   if ((size & 15) || size > MBOX_LEN) {
-   free(temp);
+   if ((size & 15) != 0 || size > MBOX_LEN)
return -EINVAL;
-   }
-
-   memset(p, 0, size);
-   memcpy(p, (const __be64 *)cmd, size);
 
/*
 * If we have a negative timeout, that implies that we can't sleep.
@@ -345,14 +323,17 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int 
mbox,
timeout = -timeout;
}
 
-#ifdef T4_OS_NEEDS_MBOX_LOCKING
+   entry = t4_os_alloc(sizeof(*entry));
+   if (entry == NULL)
+   return -ENOMEM;
+
/*
 * Queue ourselves onto the mailbox access list.  When our entry is at
 * the front of the list, we have rights to access the mailbox.  So we
 * wait [for a while] till we're at the front [or bail out with an
 * EBUSY] ...
 */
-   t4_os_atomic_add_tail(&entry, &adap->mbox_list, &adap->mbox_lock);
+   t4_os_atomic_add_tail(entry, &adap->mbox_list, &adap->mbox_lock);
 
delay_idx = 0;
ms = delay[0];
@@ -367,18 +348,18 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int 
mbox,
 */
pcie_fw = t4_read_reg(adap, A_PCIE_FW);
if (i > 4 * timeout || (pcie_fw & F_PCIE_FW_ERR)) {
-   t4_os_atomic_list_del(&entry, &adap->mbox_list,
+   t4_os_atomic_list_del(entry, &adap->mbox_list,
  &adap->mbox_lock);
t4_report_fw_error(adap);
-   free

Re: [PATCH] net/cxgbe: fix dangling pointer for gcc12

2022-01-19 Thread Rahul Lakkireddy
Hi Ferruh,

On Monday, January 01/17/22, 2022 at 14:36:30 +, Ferruh Yigit wrote:
> Observed with: gcc (GCC) 12.0.0 20220116 (experimental)
> 
> In file included from ../lib/eal/linux/include/rte_os.h:14,
>  from ../lib/eal/include/rte_common.h:28,
>  from ../lib/eal/include/rte_log.h:25,
>  from ../lib/ethdev/rte_ethdev.h:164,
>  from ../lib/ethdev/ethdev_driver.h:18,
>  from ../drivers/net/cxgbe/base/t4vf_hw.c:6:
> In function ‘t4_os_atomic_add_tail’,
> inlined from ‘t4vf_wr_mbox_core’ at 
> ../drivers/net/cxgbe/base/t4vf_hw.c:115:2:
> ../drivers/net/cxgbe/base/adapter.h:742:9: error: storing the address of 
> local variable ‘entry’ in ‘((struct mbox_list *)adapter)[96].tqh_last’ 
> [-Werror=dangling-pointer=]
>   742 | TAILQ_INSERT_TAIL(head, entry, next);
>   | ^
> ../drivers/net/cxgbe/base/t4vf_hw.c: In function ‘t4vf_wr_mbox_core’:
> ../drivers/net/cxgbe/base/t4vf_hw.c:86:27: note: ‘entry’ declared here
>86 | struct mbox_entry entry;
>   |   ^
> ../drivers/net/cxgbe/base/t4vf_hw.c:86:27: note: ‘adapter’ declared here
> 
> It is a valid compiler warning, make local variable a global one.
> 
> Signed-off-by: Ferruh Yigit 
> 
> ---
> 
> Fix is done in a quickest way, mainly to report the issue,
> please feel free to suggest another solution for the build error.
> ---

Thanks for reporting this issue.

The intention of this code is to use the address of the local variable
stored on stack to serialize access from multiple threads. This
address is used only within the scope of this function and is not
accessed from outside. I'm also stumped on why this warning is not
showing up inside t4_wr_mbox_meat_timeout() in t4_hw.c, which also
has similar code.

Nevertheless, I've sent a patch at [1] to dynamically allocate/free
the memory instead and fix the warning.

[1] https://mails.dpdk.org/archives/dev/2022-January/232816.html

Thanks,
Rahul

>  drivers/net/cxgbe/base/t4vf_hw.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/cxgbe/base/t4vf_hw.c 
> b/drivers/net/cxgbe/base/t4vf_hw.c
> index 561d759dbc0d..b42c4e78eba9 100644
> --- a/drivers/net/cxgbe/base/t4vf_hw.c
> +++ b/drivers/net/cxgbe/base/t4vf_hw.c
> @@ -83,7 +83,7 @@ int t4vf_wr_mbox_core(struct adapter *adapter,
>  
>   u32 mbox_ctl = T4VF_CIM_BASE_ADDR + A_CIM_VF_EXT_MAILBOX_CTRL;
>   __be64 cmd_rpl[MBOX_LEN / 8];
> - struct mbox_entry entry;
> + static struct mbox_entry entry;
>   unsigned int delay_idx;
>   u32 v, mbox_data;
>   const __be64 *p;
> -- 
> 2.34.1
> 


[PATCH v3] build: add missing arch define for Arm

2022-01-19 Thread Ruifeng Wang
As per design document, RTE_ARCH is the name of the architecture.
However, the definition was missing on Arm with meson build.
It impacts applications that refers to this string.

Added for Arm builds.

Fixes: b1d48c41189a ("build: support ARM with meson")
Cc: sta...@dpdk.org

Reported-by: Stephen Hemminger 
Signed-off-by: Ruifeng Wang 
---
v3:
Use armv8 instead of arm64. (Thomas, Juraj)
v2:
Renamed string for aarch32 to keep consistency with RTE_ARCH_xx defines. 
(Thomas)

 config/arm/meson.build | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/config/arm/meson.build b/config/arm/meson.build
index 16e808cdd5..c3a3f2faaf 100644
--- a/config/arm/meson.build
+++ b/config/arm/meson.build
@@ -49,6 +49,7 @@ implementer_generic = {
 ['RTE_ARCH_ARM_NEON_MEMCPY', false],
 ['RTE_ARCH_STRICT_ALIGN', true],
 ['RTE_ARCH_ARMv8_AARCH32', true],
+['RTE_ARCH', 'armv8_aarch32'],
 ['RTE_CACHE_LINE_SIZE', 64]
 ]
 }
@@ -432,11 +433,13 @@ if dpdk_conf.get('RTE_ARCH_32')
 else
 # armv7 build
 dpdk_conf.set('RTE_ARCH_ARMv7', true)
+dpdk_conf.set('RTE_ARCH', 'armv7')
 # the minimum architecture supported, armv7-a, needs the following,
 machine_args += '-mfpu=neon'
 endif
 else
 # armv8 build
+dpdk_conf.set('RTE_ARCH', 'armv8')
 update_flags = true
 soc_config = {}
 if not meson.is_cross_build()
-- 
2.25.1



[PATCH v2] raw/ifpga: fix pthread cannot join

2022-01-19 Thread Wei Huang
From: Tianfei Zhang 

When we want to close a thread, we should set a flag to notify
thread handler function.

Fixes: 9c006c45 ("raw/ifpga: scan PCIe BDF device tree")
Cc: sta...@dpdk.org

Signed-off-by: Tianfei Zhang 
---
v2: update commit log
---
 drivers/raw/ifpga/ifpga_rawdev.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/raw/ifpga/ifpga_rawdev.c b/drivers/raw/ifpga/ifpga_rawdev.c
index 8d9db58..26752c4 100644
--- a/drivers/raw/ifpga/ifpga_rawdev.c
+++ b/drivers/raw/ifpga/ifpga_rawdev.c
@@ -497,7 +497,7 @@ static int set_surprise_link_check_aer(
int gsd_enable, ret;
 #define MS 1000
 
-   while (1) {
+   while (ifpga_monitor_start) {
gsd_enable = 0;
for (i = 0; i < IFPGA_RAWDEV_NUM; i++) {
ifpga_rdev = &ifpga_rawdevices[i];
@@ -544,7 +544,9 @@ static int set_surprise_link_check_aer(
 {
int ret;
 
-   if (ifpga_monitor_start == 1) {
+   if ((ifpga_monitor_start == 1) && ifpga_monitor_start_thread) {
+   ifpga_monitor_start = 0;
+
ret = pthread_cancel(ifpga_monitor_start_thread);
if (ret)
IFPGA_RAWDEV_PMD_ERR("Can't cancel the thread");
@@ -553,8 +555,6 @@ static int set_surprise_link_check_aer(
if (ret)
IFPGA_RAWDEV_PMD_ERR("Can't join the thread");
 
-   ifpga_monitor_start = 0;
-
return ret;
}
 
-- 
1.8.3.1



[PATCH 0/2] add module EEPROM ops for ice

2022-01-19 Thread Steve Yang
Added the following 2 items of ice_eth_dev_ops for ice:
- ice_get_module_info
- ice_get_module_eeprom

Fixed stack overflow error when displaying a large size info.

Steve Yang (2):
  net/ice: add module EEPROM ops for ice
  app/testpmd: fix stack overflow for EEPROM display

 app/test-pmd/config.c|  22 -
 drivers/net/ice/ice_ethdev.c | 160 +++
 drivers/net/ice/ice_ethdev.h |  25 ++
 3 files changed, 203 insertions(+), 4 deletions(-)

-- 
2.27.0



[PATCH 1/2] net/ice: add module EEPROM ops for ice

2022-01-19 Thread Steve Yang
Add new callbacks for eth_dev_ops of ice to get the information
and data of plugin module EEPROM.

Signed-off-by: Steve Yang 
---
 drivers/net/ice/ice_ethdev.c | 160 +++
 drivers/net/ice/ice_ethdev.h |  25 ++
 2 files changed, 185 insertions(+)

diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index 13a7a9702a..70c162d0a8 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -139,6 +139,10 @@ static int ice_vlan_pvid_set(struct rte_eth_dev *dev,
 static int ice_get_eeprom_length(struct rte_eth_dev *dev);
 static int ice_get_eeprom(struct rte_eth_dev *dev,
  struct rte_dev_eeprom_info *eeprom);
+static int ice_get_module_info(struct rte_eth_dev *dev,
+  struct rte_eth_dev_module_info *modinfo);
+static int ice_get_module_eeprom(struct rte_eth_dev *dev,
+struct rte_dev_eeprom_info *info);
 static int ice_stats_get(struct rte_eth_dev *dev,
 struct rte_eth_stats *stats);
 static int ice_stats_reset(struct rte_eth_dev *dev);
@@ -238,6 +242,8 @@ static const struct eth_dev_ops ice_eth_dev_ops = {
.tx_burst_mode_get= ice_tx_burst_mode_get,
.get_eeprom_length= ice_get_eeprom_length,
.get_eeprom   = ice_get_eeprom,
+   .get_module_info  = ice_get_module_info,
+   .get_module_eeprom= ice_get_module_eeprom,
.stats_get= ice_stats_get,
.stats_reset  = ice_stats_reset,
.xstats_get   = ice_xstats_get,
@@ -4934,6 +4940,160 @@ ice_get_eeprom(struct rte_eth_dev *dev,
return 0;
 }
 
+static int
+ice_get_module_info(struct rte_eth_dev *dev,
+   struct rte_eth_dev_module_info *modinfo)
+{
+   struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+   enum ice_status status;
+   u8 sff8472_comp = 0;
+   u8 sff8472_swap = 0;
+   u8 sff8636_rev = 0;
+   u8 value = 0;
+
+   status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, 0x00, 0x00,
+  0, &value, 1, 0, NULL);
+   if (status)
+   return -EIO;
+
+   switch (value) {
+   case ICE_MODULE_TYPE_SFP:
+   status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+  ICE_MODULE_SFF_8472_COMP, 0x00, 0,
+  &sff8472_comp, 1, 0, NULL);
+   if (status)
+   return -EIO;
+   status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+  ICE_MODULE_SFF_8472_SWAP, 0x00, 0,
+  &sff8472_swap, 1, 0, NULL);
+   if (status)
+   return -EIO;
+
+   if (sff8472_swap & ICE_MODULE_SFF_ADDR_MODE) {
+   modinfo->type = ICE_MODULE_SFF_8079;
+   modinfo->eeprom_len = ICE_MODULE_SFF_8079_LEN;
+   } else if (sff8472_comp &&
+  (sff8472_swap & ICE_MODULE_SFF_DIAG_CAPAB)) {
+   modinfo->type = ICE_MODULE_SFF_8472;
+   modinfo->eeprom_len = ICE_MODULE_SFF_8472_LEN;
+   } else {
+   modinfo->type = ICE_MODULE_SFF_8079;
+   modinfo->eeprom_len = ICE_MODULE_SFF_8079_LEN;
+   }
+   break;
+   case ICE_MODULE_TYPE_QSFP_PLUS:
+   case ICE_MODULE_TYPE_QSFP28:
+   status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+  ICE_MODULE_REVISION_ADDR, 0x00, 0,
+  &sff8636_rev, 1, 0, NULL);
+   if (status)
+   return -EIO;
+   /* Check revision compliance */
+   if (sff8636_rev > 0x02) {
+   /* Module is SFF-8636 compliant */
+   modinfo->type = ICE_MODULE_SFF_8636;
+   modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN;
+   } else {
+   modinfo->type = ICE_MODULE_SFF_8436;
+   modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN;
+   }
+   break;
+   default:
+   PMD_DRV_LOG(WARNING, "SFF Module Type not recognized.\n");
+   return -EINVAL;
+   }
+   return 0;
+}
+
+static int
+ice_get_module_eeprom(struct rte_eth_dev *dev,
+ struct rte_dev_eeprom_info *info)
+{
+   struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+#define SFF_READ_BLOCK_SIZE 8
+#define I2C_BUSY_TRY_TIMES 4
+#define I2C_USLEEP_MIN_TIME 1500
+#define I2C_USLEEP_MAX_TIME 2500
+   uint8_t value[SFF_READ_BLOCK_SIZE] = {0};
+   uint8_t addr = ICE_I2C_EEPROM_DEV_ADDR;
+   

[PATCH 2/2] app/testpmd: fix stack overflow for EEPROM display

2022-01-19 Thread Steve Yang
When the size of EEPROM exceeds the default thread stack size(8MB),
e.g.: 10Mb size, it will be cashed with stack overflow.

Allocate the data of EPPROM information on the heap.

Fixes: 6b67721dee2a ("app/testpmd: add EEPROM command")

Signed-off-by: Steve Yang 
---
 app/test-pmd/config.c | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 1722d6c8f8..e812f57151 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -912,10 +912,15 @@ port_eeprom_display(portid_t port_id)
return;
}
 
-   char buf[len_eeprom];
einfo.offset = 0;
einfo.length = len_eeprom;
-   einfo.data = buf;
+   einfo.data = calloc(1, len_eeprom);
+   if (!einfo.data) {
+   fprintf(stderr,
+   "Allocation of port %u eeprom data failed\n",
+   port_id);
+   return;
+   }
 
ret = rte_eth_dev_get_eeprom(port_id, &einfo);
if (ret != 0) {
@@ -933,10 +938,12 @@ port_eeprom_display(portid_t port_id)
fprintf(stderr, "Unable to get EEPROM: %d\n", ret);
break;
}
+   free(einfo.data);
return;
}
rte_hexdump(stdout, "hexdump", einfo.data, einfo.length);
printf("Finish -- Port: %d EEPROM length: %d bytes\n", port_id, 
len_eeprom);
+   free(einfo.data);
 }
 
 void
@@ -972,10 +979,15 @@ port_module_eeprom_display(portid_t port_id)
return;
}
 
-   char buf[minfo.eeprom_len];
einfo.offset = 0;
einfo.length = minfo.eeprom_len;
-   einfo.data = buf;
+   einfo.data = calloc(1, minfo.eeprom_len);
+   if (!einfo.data) {
+   fprintf(stderr,
+   "Allocation of port %u eeprom data failed\n",
+   port_id);
+   return;
+   }
 
ret = rte_eth_dev_get_module_eeprom(port_id, &einfo);
if (ret != 0) {
@@ -994,11 +1006,13 @@ port_module_eeprom_display(portid_t port_id)
ret);
break;
}
+   free(einfo.data);
return;
}
 
rte_hexdump(stdout, "hexdump", einfo.data, einfo.length);
printf("Finish -- Port: %d MODULE EEPROM length: %d bytes\n", port_id, 
einfo.length);
+   free(einfo.data);
 }
 
 int
-- 
2.27.0



Re: [PATCH 2/8] net/cnxk: add CN9K template Rx functions to build

2022-01-19 Thread Jerin Jacob
On Mon, Dec 13, 2021 at 1:52 PM  wrote:
>
> From: Pavan Nikhilesh 
>
> Add CN9K seggeregated Rx and event dequeue functions to build,

Fix typo

> add macros to make future modifications simpler.
>
> Signed-off-by: Pavan Nikhilesh 

Please rebase to latest next-net-mrvl. There is a build issue now.

ccache gcc -Idrivers/libtmp_rte_net_cnxk.a.p -Idrivers -I../drivers
-Idrivers/net/cnxk -I../drivers/net/cnxk -Ilib/ethdev -I../lib/ethdev
-I. -I.. -Iconfig -I../config -Ilib/eal/include -I../lib/eal/include
-Ilib/eal/linux/include -I../lib/
eal/linux/include -Ilib/eal/x86/include -I../lib/eal/x86/include
-Ilib/eal/common -I../lib/eal/common -Ilib/eal -I../lib/eal
-Ilib/kvargs -I../lib/kvargs -Ilib/metrics -I../lib/metrics
-Ilib/telemetry -I../lib/telemetry -Ilib/net -I../lib/n
et -Ilib/mbuf -I../lib/mbuf -Ilib/mempool -I../lib/mempool -Ilib/ring
-I../lib/ring -Ilib/meter -I../lib/meter -Idrivers/bus/pci
-I../drivers/bus/pci -I../drivers/bus/pci/linux -Ilib/pci -I../lib/pci
-Idrivers/bus/vdev -I../drivers/bus/vdev
 -Ilib/cryptodev -I../lib/cryptodev -Ilib/rcu -I../lib/rcu
-Ilib/eventdev -I../lib/eventdev -Ilib/hash -I../lib/hash -Ilib/timer
-I../lib/timer -Ilib/security -I../lib/security -Idrivers/common/cnxk
-I../drivers/common/cnxk -Idrivers/mempoo
l/cnxk -I../drivers/mempool/cnxk -fdiagnostics-color=always
-D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -Werror -O2 -g -include
rte_config.h -Wextra -Wcast-qual -Wdeprecated -Wformat
-Wformat-nonliteral -Wformat-security -Wmissing-declaratio
ns -Wmissing-prototypes -Wnested-externs -Wold-style-definition
-Wpointer-arith -Wsign-compare -Wstrict-prototypes -Wundef
-Wwrite-strings -Wno-address-of-packed-member -Wno-packed-not-aligned
-Wno-missing-field-initializers -Wno-zero-lengt
h-bounds -D_GNU_SOURCE -fPIC -march=native -DALLOW_EXPERIMENTAL_API
-DALLOW_INTERNAL_API -Wno-format-truncation -flax-vector-conversions
-Wno-strict-aliasing -DRTE_LOG_DEFAULT_LOGTYPE=pmd.net.cnxk -MD -MQ
drivers/libtmp_rte_net_cnxk.a.p/net
_cnxk_cn9k_rx_select.c.o -MF
drivers/libtmp_rte_net_cnxk.a.p/net_cnxk_cn9k_rx_select.c.o.d -o
drivers/libtmp_rte_net_cnxk.a.p/net_cnxk_cn9k_rx_select.c.o -c
../drivers/net/cnxk/cn9k_rx_select.c
../drivers/net/cnxk/cn9k_rx_select.c: In function ‘cn9k_eth_set_rx_function’:
../drivers/net/cnxk/cn9k_rx_select.c:59:27: error:
"DEV_RX_OFFLOAD_SCATTER" is deprecated [-Werror]
   59 | if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
  |   ^~~~
../drivers/net/cnxk/cn9k_rx_select.c:64:20: error:
"DEV_RX_OFFLOAD_SCATTER" is deprecated [-Werror]
   64 | if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
  |^~~


[PATCH] net/virtio: fix incorrect slots number when indirect feature on

2022-01-19 Thread Marvin Liu
Virtio driver only occupies one slot for enqueuing chained mbufs when
indirect feature is on. Required slots calculation should depend on
indirect feature status at the end.

Fixes: 0eaf7fc2fe8e ("net/virtio: separate AVX Rx/Tx")
Cc: sta...@dpdk.org

Signed-off-by: Marvin Liu 

diff --git a/drivers/net/virtio/virtio_rxtx_packed.h 
b/drivers/net/virtio/virtio_rxtx_packed.h
index d5c259a1f6..536112983c 100644
--- a/drivers/net/virtio/virtio_rxtx_packed.h
+++ b/drivers/net/virtio/virtio_rxtx_packed.h
@@ -125,13 +125,12 @@ virtqueue_enqueue_single_packed_vec(struct virtnet_tx 
*txvq,
 * any_layout => number of segments
 * default=> number of segments + 1
 */
-   slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
can_push = rte_mbuf_refcnt_read(txm) == 1 &&
   RTE_MBUF_DIRECT(txm) &&
   txm->nb_segs == 1 &&
   rte_pktmbuf_headroom(txm) >= hdr_size;
 
-   slots = txm->nb_segs + !can_push;
+   slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
need = slots - vq->vq_free_cnt;
 
/* Positive value indicates it need free vring descriptors */
-- 
2.17.1



[PATCH v3] raw/ifpga: fix pthread cannot join

2022-01-19 Thread Wei Huang
From: Tianfei Zhang 

When we want to close a thread, we should set a flag to notify
thread handler function.

Fixes: 9c006c45 ("raw/ifpga: scan PCIe BDF device tree")
Cc: sta...@dpdk.org

Signed-off-by: Tianfei Zhang 
---
v2: update commit log
---
v3: set thread id to 0 after pthread_join
---
 drivers/raw/ifpga/ifpga_rawdev.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/raw/ifpga/ifpga_rawdev.c b/drivers/raw/ifpga/ifpga_rawdev.c
index 8d9db58..2bc569b 100644
--- a/drivers/raw/ifpga/ifpga_rawdev.c
+++ b/drivers/raw/ifpga/ifpga_rawdev.c
@@ -497,7 +497,7 @@ static int set_surprise_link_check_aer(
int gsd_enable, ret;
 #define MS 1000
 
-   while (1) {
+   while (ifpga_monitor_start) {
gsd_enable = 0;
for (i = 0; i < IFPGA_RAWDEV_NUM; i++) {
ifpga_rdev = &ifpga_rawdevices[i];
@@ -542,18 +542,21 @@ static int set_surprise_link_check_aer(
 static int
 ifpga_monitor_stop_func(void)
 {
+   void *status;
int ret;
 
-   if (ifpga_monitor_start == 1) {
+   if ((ifpga_monitor_start == 1) && ifpga_monitor_start_thread) {
+   ifpga_monitor_start = 0;
+
ret = pthread_cancel(ifpga_monitor_start_thread);
if (ret)
IFPGA_RAWDEV_PMD_ERR("Can't cancel the thread");
 
-   ret = pthread_join(ifpga_monitor_start_thread, NULL);
+   ret = pthread_join(ifpga_monitor_start_thread, &status);
if (ret)
IFPGA_RAWDEV_PMD_ERR("Can't join the thread");
 
-   ifpga_monitor_start = 0;
+   ifpga_monitor_start_thread = 0;
 
return ret;
}
-- 
1.8.3.1



Re: [PATCH v2 1/2] common/cnxk: get head-tail of Rx and Tx queues

2022-01-19 Thread Jerin Jacob
On Wed, Jan 19, 2022 at 3:14 PM Rahul Bhansali  wrote:
>
> Adds roc APIs roc_nix_cq_head_tail_get, roc_nix_sq_head_tail_get
> to get head-tail of receive and transmit queue respectively.
>
> Signed-off-by: Rahul Bhansali 

Series Acked-by: Jerin Jacob 
Series applied to dpdk-next-net-mrvl/for-next-net. Thanks.


Changed the git log to:

common/cnxk: get head and tail of Rx and Tx queues

Adds roc APIs roc_nix_cq_head_tail_get(), roc_nix_sq_head_tail_get()
to get tail and head of receive and transmit queue respectively.

Signed-off-by: Rahul Bhansali 
Acked-by: Ray Kinsella 
Acked-by: Jerin Jacob 


> ---
> v2 changes:
>  - No change
>
>  drivers/common/cnxk/roc_nix.h   |  4 +++
>  drivers/common/cnxk/roc_nix_queue.c | 53 +
>  drivers/common/cnxk/version.map |  2 ++
>  3 files changed, 59 insertions(+)
>
> diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
> index 69a5e8e7b4..d79abfef9f 100644
> --- a/drivers/common/cnxk/roc_nix.h
> +++ b/drivers/common/cnxk/roc_nix.h
> @@ -795,8 +795,12 @@ int __roc_api roc_nix_rq_ena_dis(struct roc_nix_rq *rq, 
> bool enable);
>  int __roc_api roc_nix_rq_fini(struct roc_nix_rq *rq);
>  int __roc_api roc_nix_cq_init(struct roc_nix *roc_nix, struct roc_nix_cq 
> *cq);
>  int __roc_api roc_nix_cq_fini(struct roc_nix_cq *cq);
> +void __roc_api roc_nix_cq_head_tail_get(struct roc_nix *roc_nix, uint16_t 
> qid,
> +   uint32_t *head, uint32_t *tail);
>  int __roc_api roc_nix_sq_init(struct roc_nix *roc_nix, struct roc_nix_sq 
> *sq);
>  int __roc_api roc_nix_sq_fini(struct roc_nix_sq *sq);
> +void __roc_api roc_nix_sq_head_tail_get(struct roc_nix *roc_nix, uint16_t 
> qid,
> +   uint32_t *head, uint32_t *tail);
>
>  /* PTP */
>  int __roc_api roc_nix_ptp_rx_ena_dis(struct roc_nix *roc_nix, int enable);
> diff --git a/drivers/common/cnxk/roc_nix_queue.c 
> b/drivers/common/cnxk/roc_nix_queue.c
> index c638cd43e4..67f83acdf2 100644
> --- a/drivers/common/cnxk/roc_nix_queue.c
> +++ b/drivers/common/cnxk/roc_nix_queue.c
> @@ -969,3 +969,56 @@ roc_nix_sq_fini(struct roc_nix_sq *sq)
>
> return rc;
>  }
> +
> +void
> +roc_nix_cq_head_tail_get(struct roc_nix *roc_nix, uint16_t qid, uint32_t 
> *head,
> +uint32_t *tail)
> +{
> +   struct nix *nix = roc_nix_to_nix_priv(roc_nix);
> +   uint64_t reg, val;
> +   int64_t *addr;
> +
> +   if (head == NULL || tail == NULL)
> +   return;
> +
> +   reg = (((uint64_t)qid) << 32);
> +   addr = (int64_t *)(nix->base + NIX_LF_CQ_OP_STATUS);
> +   val = roc_atomic64_add_nosync(reg, addr);
> +   if (val &
> +   (BIT_ULL(NIX_CQ_OP_STAT_OP_ERR) | BIT_ULL(NIX_CQ_OP_STAT_CQ_ERR)))
> +   val = 0;
> +
> +   *tail = (uint32_t)(val & 0xF);
> +   *head = (uint32_t)((val >> 20) & 0xF);
> +}
> +
> +void
> +roc_nix_sq_head_tail_get(struct roc_nix *roc_nix, uint16_t qid, uint32_t 
> *head,
> +uint32_t *tail)
> +{
> +   struct nix *nix = roc_nix_to_nix_priv(roc_nix);
> +   struct roc_nix_sq *sq = nix->sqs[qid];
> +   uint16_t sqes_per_sqb, sqb_cnt;
> +   uint64_t reg, val;
> +   int64_t *addr;
> +
> +   if (head == NULL || tail == NULL)
> +   return;
> +
> +   reg = (((uint64_t)qid) << 32);
> +   addr = (int64_t *)(nix->base + NIX_LF_SQ_OP_STATUS);
> +   val = roc_atomic64_add_nosync(reg, addr);
> +   if (val & BIT_ULL(NIX_CQ_OP_STAT_OP_ERR)) {
> +   val = 0;
> +   return;
> +   }
> +
> +   *tail = (uint32_t)((val >> 28) & 0x3F);
> +   *head = (uint32_t)((val >> 20) & 0x3F);
> +   sqb_cnt = (uint16_t)(val & 0x);
> +
> +   sqes_per_sqb = 1 << sq->sqes_per_sqb_log2;
> +
> +   /* Update tail index as per used sqb count */
> +   *tail += (sqes_per_sqb * (sqb_cnt - 1));
> +}
> diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map
> index 07c6720f0c..a9dba47e0e 100644
> --- a/drivers/common/cnxk/version.map
> +++ b/drivers/common/cnxk/version.map
> @@ -107,6 +107,7 @@ INTERNAL {
> roc_nix_bpf_timeunit_get;
> roc_nix_cq_dump;
> roc_nix_cq_fini;
> +   roc_nix_cq_head_tail_get;
> roc_nix_cq_init;
> roc_nix_cqe_dump;
> roc_nix_dev_fini;
> @@ -222,6 +223,7 @@ INTERNAL {
> roc_nix_rx_queue_intr_enable;
> roc_nix_sq_dump;
> roc_nix_sq_fini;
> +   roc_nix_sq_head_tail_get;
> roc_nix_sq_init;
> roc_nix_stats_get;
> roc_nix_stats_queue_get;
> --
> 2.25.1
>


Re: [dpdk-dev] [PATCH v2 1/4] drivers: add support for switch header type pre L2

2022-01-19 Thread Jerin Jacob
On Mon, Jan 3, 2022 at 11:49 AM  wrote:
>
> From: Kiran Kumar K 
>
> Adding changes to configure switch header type pre L2 for cnxk.
> Along with switch header type user needs to provide the
> offset with in the custom header that holds the size of the
> custom header and mask for the size with in the size offset.

1) with in -> within
2) Describe what is pre L2 in the commit message.
3) Change the subject to net/cnxk: support  pre L2 switch header type
Also remove "add" in other patches in the series.

4) Please rebase to fix the following

[for-next-net]dell[dpdk-next-net-mrvl] $ git pw series apply 21048
Failed to apply patch:
Applying: drivers: add support for switch header type pre L2
Applying: common/cnxk: support custom pre L2 header parsing as raw
Applying: common/cnxk: support matching VLAN existence in RTE Flow
error: sha1 information is lacking or useless
(drivers/common/cnxk/roc_npc_priv.h).
error: could not build fake ancestor
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0003 common/cnxk: support matching VLAN existence in RTE Flow
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

>
> Signed-off-by: Kiran Kumar K 
> Reviewed-by: Satheesh Paul 
> ---
> v2:
> * Fixed checkpatch errors in commit messages
>
>  doc/guides/nics/cnxk.rst   | 25 +++-
>  drivers/common/cnxk/hw/npc.h   | 11 ---
>  drivers/common/cnxk/roc_mbox.h |  1 +
>  drivers/common/cnxk/roc_nix.h  |  5 +++-
>  drivers/common/cnxk/roc_nix_ops.c  | 12 +++-
>  drivers/common/cnxk/roc_npc.h  |  8 +
>  drivers/net/cnxk/cnxk_ethdev.c |  7 +++--
>  drivers/net/cnxk/cnxk_ethdev_devargs.c | 41 ++
>  8 files changed, 99 insertions(+), 11 deletions(-)
>
> diff --git a/doc/guides/nics/cnxk.rst b/doc/guides/nics/cnxk.rst
> index 2927c6cb7e..7c82cb55e1 100644
> --- a/doc/guides/nics/cnxk.rst
> +++ b/doc/guides/nics/cnxk.rst
> @@ -167,7 +167,30 @@ Runtime Config Options
>
> With the above configuration, higig2 will be enabled on that port and the
> traffic on this port should be higig2 traffic only. Supported switch 
> header
> -   types are "chlen24b", "chlen90b", "dsa", "exdsa", "higig2" and 
> "vlan_exdsa".
> +   types are "chlen24b", "chlen90b", "dsa", "exdsa", "higig2", "vlan_exdsa" 
> and "pre_l2".
> +
> +- ``Flow pre l2 info`` (default ``0x0/0x0/0x0``)
> +
> +   In case of custom pre l2 headers, an offset, mask with in the offset and 
> shift direction

Please explain a bit on what is pre l2 header.

> +   has to be provided within the custom header that holds the size of the 
> custom header.
> +   This is valid only with switch header pre l2. Maximum supported offset 
> range is 0 to 255

pre l2 -> ``pre_l2``

> +   and mask range is 1 to 255 and shift direction, 0: left shift, 1: right 
> shift.
> +   Info format will be "offset/mask/shift direction". All parameters has to 
> be in hexadecimal
> +   format and mask should be contiguous. Info can be configured using
> +   ``flow_pre_l2_info`` ``devargs`` parameter.
> +
> +   For example::
> +
> +  -a 0002:02:00.0,switch_header="pre_l2",flow_pre_l2_info=0x2/0x7e/0x1
> +
> +   With the above configuration, custom pre l2 header will be enabled on 
> that port and size
> +   of the header is placed at byte offset 0x2 in the packet with mask 0x7e 
> and right shift will
> +   be used to get the size. i.e size will be (pkt[0x2] & 0x7e) >> shift 
> count.
> +   Shift count will be calculated based on mask and shift direction. For 
> example if mask is 0x7c

For example -> For example,

> +   and shift direction is 1, i.e right shift, then the shift count will be 2 
> i.e absolute
> +   position of the right most set bit. If the mask is 0x7c and shift 
> direction is 0, i.e left
> +   shift, then the shift count will be 1, i.e 8-n, where n is the absolute 
> position of
> +   left most set bit.
>
>  - ``RSS tag as XOR`` (default ``0``)