[PATCH] event/cnxk: add SLMTST support to Tx adapter

2022-04-27 Thread Pavan Nikhilesh
Scheduled LMTST uses in-core LSW (LMTST scheduling widget) to
coordinate with SSO and send a LMTST to the destination
coprocessor without the need for the core to be the head of
the scheduling context it is currently holding.

Use SLMTST to send mbuf to NIX-TX for transmit. SLMTST only
supports transmitting a single WQE.

Signed-off-by: Pavan Nikhilesh 
---
 Depends-on: Series-22634

 drivers/common/cnxk/hw/ssow.h|  7 +++
 drivers/common/cnxk/roc_dev_priv.h   |  6 ++
 drivers/common/cnxk/roc_io.h |  8 
 drivers/common/cnxk/roc_io_generic.h |  7 +++
 drivers/common/cnxk/roc_nix.c| 19 +++
 drivers/common/cnxk/roc_nix.h|  4 
 drivers/common/cnxk/roc_sso.c| 23 +++
 drivers/common/cnxk/roc_sso.h|  2 ++
 drivers/common/cnxk/version.map  |  2 ++
 drivers/event/cnxk/cn10k_eventdev.c  | 11 +++
 drivers/event/cnxk/cn10k_worker.h| 19 +--
 drivers/event/cnxk/cnxk_eventdev.h   |  2 +-
 12 files changed, 103 insertions(+), 7 deletions(-)

diff --git a/drivers/common/cnxk/hw/ssow.h b/drivers/common/cnxk/hw/ssow.h
index 618ab7973b..b40238bc6c 100644
--- a/drivers/common/cnxk/hw/ssow.h
+++ b/drivers/common/cnxk/hw/ssow.h
@@ -62,6 +62,13 @@
 #define SSOW_GW_RESULT_GW_NO_WORK (0x1ull) /* [CN10K, .) */
 #define SSOW_GW_RESULT_GW_ERROR  (0x2ull) /* [CN10K, .) */

+#define SSOW_LSW_MODE_NO_LSW (0x0)
+#define SSOW_LSW_MODE_WAIT   (0x1)
+#define SSOW_LSW_MODE_IMMED  (0x2)
+
+#define SSOW_LSW_WQE_RELEASE_WAIT_ACK (0x0)
+#define SSOW_LSW_WQE_RELEASE_IMMED(0x1)
+
 #define SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT 63
 #define SSOW_LF_GWS_TAG_PEND_SWITCH_BIT  62
 #define SSOW_LF_GWS_TAG_PEND_DESCHED_BIT  58
diff --git a/drivers/common/cnxk/roc_dev_priv.h 
b/drivers/common/cnxk/roc_dev_priv.h
index 302dc0feb0..e301487f4c 100644
--- a/drivers/common/cnxk/roc_dev_priv.h
+++ b/drivers/common/cnxk/roc_dev_priv.h
@@ -54,6 +54,12 @@ dev_get_pf(uint16_t pf_func)
return (pf_func >> RVU_PFVF_PF_SHIFT) & RVU_PFVF_PF_MASK;
 }

+static inline int
+dev_get_func(uint16_t pf_func)
+{
+   return (pf_func >> RVU_PFVF_FUNC_SHIFT) & RVU_PFVF_FUNC_MASK;
+}
+
 static inline int
 dev_pf_func(int pf, int vf)
 {
diff --git a/drivers/common/cnxk/roc_io.h b/drivers/common/cnxk/roc_io.h
index 62e98d9d00..6a76e3fa71 100644
--- a/drivers/common/cnxk/roc_io.h
+++ b/drivers/common/cnxk/roc_io.h
@@ -154,6 +154,14 @@ roc_lmt_submit_steorl(uint64_t data, plt_iova_t io_address)
 [rs] "r"(io_address));
 }

+static __plt_always_inline void
+roc_lmt_submit_stsmaxl(uint64_t data, plt_iova_t io_address)
+{
+   asm volatile(".cpu  generic+lse\n"
+"stsmaxl %x[d], [%[rs]]" ::[d] "r"(data),
+[rs] "r"(io_address));
+}
+
 static __plt_always_inline void
 roc_lmt_mov(void *out, const void *in, const uint32_t lmtext)
 {
diff --git a/drivers/common/cnxk/roc_io_generic.h 
b/drivers/common/cnxk/roc_io_generic.h
index 42764455cc..097ed8af09 100644
--- a/drivers/common/cnxk/roc_io_generic.h
+++ b/drivers/common/cnxk/roc_io_generic.h
@@ -98,6 +98,13 @@ roc_lmt_submit_steorl(uint64_t data, plt_iova_t io_address)
PLT_SET_USED(io_address);
 }

+static __plt_always_inline void
+roc_lmt_submit_stsmaxl(uint64_t data, plt_iova_t io_address)
+{
+   PLT_SET_USED(data);
+   PLT_SET_USED(io_address);
+}
+
 static __plt_always_inline void
 roc_lmt_mov(void *out, const void *in, const uint32_t lmtext)
 {
diff --git a/drivers/common/cnxk/roc_nix.c b/drivers/common/cnxk/roc_nix.c
index 151d8c3426..16d707b5ff 100644
--- a/drivers/common/cnxk/roc_nix.c
+++ b/drivers/common/cnxk/roc_nix.c
@@ -139,6 +139,25 @@ roc_nix_max_pkt_len(struct roc_nix *roc_nix)
return NIX_RPM_MAX_HW_FRS;
 }

+int
+roc_nix_sched_lmt_enable(struct roc_nix *roc_nix)
+{
+   struct nix *nix = roc_nix_to_nix_priv(roc_nix);
+   struct mbox *mbox = (&nix->dev)->mbox;
+   struct lmtst_tbl_setup_req *req;
+
+   req = mbox_alloc_msg_lmtst_tbl_setup(mbox);
+   if (req == NULL)
+   return -ENOSPC;
+   req->pcifunc = 0;
+   req->ssow_pf_func = dev_get_pf(idev_sso_pffunc_get()) << 8;
+   req->ssow_pf_func |=
+   (uint64_t)(dev_get_func(idev_sso_pffunc_get()) & 0xFF);
+   req->sched_ena = 1;
+
+   return mbox_process(mbox);
+}
+
 int
 roc_nix_lf_alloc(struct roc_nix *roc_nix, uint32_t nb_rxq, uint32_t nb_txq,
 uint64_t rx_cfg)
diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index dbb816d961..b985fb5df4 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -904,4 +904,8 @@ int __roc_api roc_nix_mcast_mcam_entry_write(struct roc_nix 
*roc_nix,
 uint64_t action);
 int __roc_api roc_nix_mcast_mcam_entry_ena_dis(struct roc_nix *roc_nix,
   uint32_t index, bool enable);
+
+/* SSO */
+in

[PATCH v2] net/iavf: fix segfaults when calling API after VF reset failed

2022-04-27 Thread Yiding Zhou
Some pointers will be set to NULL when iavf_dev_reset() failed,
for example vf->vf_res, vf->vsi_res vf->rss_key and etc.
APIs access these NULL pointers will trigger segfault.

This patch adds closed flag to indicate that the VF is closed,
and rejects API calls in this state to avoid coredump.

Fixes: e74e1bb6280d ("net/iavf: enable port reset")
Cc: sta...@dpdk.org

Signed-off-by: Yiding Zhou 
---
 drivers/net/iavf/iavf.h|  1 +
 drivers/net/iavf/iavf_ethdev.c | 57 +++---
 drivers/net/iavf/iavf_rxtx.c   | 10 ++
 drivers/net/iavf/iavf_vchnl.c  | 17 ++
 4 files changed, 81 insertions(+), 4 deletions(-)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index a01d18e61b..b3b582dd21 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -298,6 +298,7 @@ struct iavf_adapter {
bool tx_vec_allowed;
uint32_t ptype_tbl[IAVF_MAX_PKT_TYPE] __rte_cache_min_aligned;
bool stopped;
+   bool closed;
uint16_t fdir_ref_cnt;
struct iavf_devargs devargs;
 };
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index d6190ac24a..91b6e64840 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -229,9 +229,15 @@ static const struct eth_dev_ops iavf_eth_dev_ops = {
 };
 
 static int
-iavf_tm_ops_get(struct rte_eth_dev *dev __rte_unused,
+iavf_tm_ops_get(struct rte_eth_dev *dev,
void *arg)
 {
+   struct iavf_adapter *adapter =
+   IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+
+   if (adapter->closed)
+   return -EIO;
+
if (!arg)
return -EINVAL;
 
@@ -342,6 +348,9 @@ iavf_set_mc_addr_list(struct rte_eth_dev *dev,
return -EINVAL;
}
 
+   if (adapter->closed)
+   return -EIO;
+
/* flush previous addresses */
err = iavf_add_del_mc_addr_list(adapter, vf->mc_addrs, vf->mc_addrs_num,
false);
@@ -613,6 +622,9 @@ iavf_dev_configure(struct rte_eth_dev *dev)
dev->data->nb_tx_queues);
int ret;
 
+   if (ad->closed)
+   return -EIO;
+
ad->rx_bulk_alloc_allowed = true;
/* Initialize to TRUE. If any of Rx queues doesn't meet the
 * vector Rx/Tx preconditions, it will be reset.
@@ -932,6 +944,9 @@ iavf_dev_start(struct rte_eth_dev *dev)
 
PMD_INIT_FUNC_TRACE();
 
+   if (adapter->closed)
+   return -1;
+
adapter->stopped = 0;
 
vf->max_pkt_len = dev->data->mtu + IAVF_ETH_OVERHEAD;
@@ -1009,6 +1024,9 @@ iavf_dev_stop(struct rte_eth_dev *dev)
 
PMD_INIT_FUNC_TRACE();
 
+   if (adapter->closed)
+   return -1;
+
if (!(vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) &&
dev->data->dev_conf.intr_conf.rxq != 0)
rte_intr_disable(intr_handle);
@@ -1046,6 +1064,9 @@ iavf_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
struct iavf_info *vf = &adapter->vf;
 
+   if (adapter->closed)
+   return -EIO;
+
dev_info->max_rx_queues = IAVF_MAX_NUM_QUEUES_LV;
dev_info->max_tx_queues = IAVF_MAX_NUM_QUEUES_LV;
dev_info->min_rx_bufsize = IAVF_BUF_SIZE_MIN;
@@ -1286,6 +1307,9 @@ iavf_dev_vlan_filter_set(struct rte_eth_dev *dev, 
uint16_t vlan_id, int on)
struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
int err;
 
+   if (adapter->closed)
+   return -EIO;
+
if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) {
err = iavf_add_del_vlan_v2(adapter, vlan_id, on);
if (err)
@@ -1362,6 +1386,9 @@ iavf_dev_vlan_offload_set(struct rte_eth_dev *dev, int 
mask)
struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
int err;
 
+   if (adapter->closed)
+   return -EIO;
+
if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2)
return iavf_dev_vlan_offload_set_v2(dev, mask);
 
@@ -1394,6 +1421,9 @@ iavf_dev_rss_reta_update(struct rte_eth_dev *dev,
uint16_t i, idx, shift;
int ret;
 
+   if (adapter->closed)
+   return -EIO;
+
if (!(vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF))
return -ENOTSUP;
 
@@ -1439,6 +1469,9 @@ iavf_dev_rss_reta_query(struct rte_eth_dev *dev,
struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
uint16_t i, idx, shift;
 
+   if (adapter->closed)
+   return -EIO;
+
if (!(vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF))
return -ENOTSUP;
 
@@ -1492,6 +1525,9 @@ iavf_dev_rss_hash_update(struct rte_eth_dev *dev,
 
adapter->dev_data->dev_conf.rx_adv_conf.rss_conf = *rss_conf;
 
+   if (adapter->closed)
+   return -

RE: [PATCH] net/mlx5: fix RSS hash types adjustment

2022-04-27 Thread Raslan Darawsheh
Hi,

> -Original Message-
> From: Dariusz Sosnowski 
> Sent: Thursday, April 21, 2022 11:37 PM
> To: Matan Azrad ; Slava Ovsiienko
> ; Jack Min 
> Cc: dev@dpdk.org; Raslan Darawsheh ;
> sta...@dpdk.org
> Subject: [PATCH] net/mlx5: fix RSS hash types adjustment
> 
> When an indirect action was created with an RSS action configured to
> hash on both source and destination L3 addresses (or L4 ports), it caused
> shared hrxq to be configured to hash only on destination address
> (or port).
> 
> This patch fixes this behavior by refining RSS types specified in
> configuration before calculating hash types used for hrxq. Refining RSS
> types removes *_SRC_ONLY and *_DST_ONLY flags if they are both set.
> 
> Fixes: 212d17b6a650 ("net/mlx5: fix missing shared RSS hash types")
> Cc: jack...@nvidia.com
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Dariusz Sosnowski 
> Acked-by: Viacheslav Ovsiienko 

Patch applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh


RE: [PATCH 0/2] net/mlx5: LRO fixes

2022-04-27 Thread Raslan Darawsheh
Hi,

> -Original Message-
> From: Michael Baum 
> Sent: Monday, April 25, 2022 12:30 PM
> To: dev@dpdk.org
> Cc: Matan Azrad ; Raslan Darawsheh
> ; Slava Ovsiienko 
> Subject: [PATCH 0/2] net/mlx5: LRO fixes
> 
> Independent fixes about LRO supporting.
> 
> Michael Baum (2):
>   net/mlx5: fix miss LRO validation in RxQ setup
>   net/mlx5: fix LRO configuration in drop RxQ
> 
>  drivers/net/mlx5/mlx5_devx.c | 5 +++--
>  drivers/net/mlx5/mlx5_rxq.c  | 8 
>  2 files changed, 11 insertions(+), 2 deletions(-)
> 
> --
> 2.25.1

Series applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh


[PATCH v4 0/3] cryptodev: move dh type from xform to dh op

2022-04-27 Thread Arek Kusztal
Operation type (PUBLIC_KEY_GENERATION, SHARED_SECRET) should
be free to choose for any operation. One xform/session should
be enough to perform both DH operations, if op_type would be xform
member, session would have to be to be created twice for the same
group. Similar problem would be observed in sessionless case.
Additionally, it will help extend DH to support Elliptic Curves.

v4:
- changed op_type coment
- added openssl fix

Arek Kusztal (3):
  cryptodev: move dh type from xform to dh op
  crypto/openssl: move dh type from xform to dh op
  test/crypto: move dh type from xform to dh op

 app/test/test_cryptodev_asym.c   | 11 +++---
 drivers/crypto/openssl/rte_openssl_pmd.c | 54 ++--
 drivers/crypto/openssl/rte_openssl_pmd_ops.c | 26 --
 lib/cryptodev/rte_crypto_asym.h  | 14 
 4 files changed, 16 insertions(+), 89 deletions(-)

-- 
2.13.6



[PATCH v4 1/3] cryptodev: move dh type from xform to dh op

2022-04-27 Thread Arek Kusztal
Operation type (PUBLIC_KEY_GENERATION, SHARED_SECRET) should
be free to choose for any operation. One xform/session should
be enough to perform both DH operations, if op_type would be xform
member, session would have to be to be created twice for the same
group. Similar problem would be observed in sessionless case.
Additionally, it will help extend DH to support Elliptic Curves.

Signed-off-by: Arek Kusztal 
---
 lib/cryptodev/rte_crypto_asym.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/lib/cryptodev/rte_crypto_asym.h b/lib/cryptodev/rte_crypto_asym.h
index cd24d4b07b..4697a7bc59 100644
--- a/lib/cryptodev/rte_crypto_asym.h
+++ b/lib/cryptodev/rte_crypto_asym.h
@@ -256,8 +256,6 @@ struct rte_crypto_modinv_xform {
  *
  */
 struct rte_crypto_dh_xform {
-   enum rte_crypto_asym_op_type type;
-   /**< Setup xform for key generate or shared secret compute */
rte_crypto_uint p;
/**< Prime modulus data */
rte_crypto_uint g;
@@ -391,27 +389,29 @@ struct rte_crypto_rsa_op_param {
  * @note:
  */
 struct rte_crypto_dh_op_param {
+   enum rte_crypto_asym_op_type op_type;
+   /**< Diffie-Hellman operation type */
rte_crypto_uint pub_key;
/**<
-* Output generated public key when xform type is
+* Output generated public key when op_type is
 * DH PUB_KEY_GENERATION.
-* Input peer public key when xform type is DH
+* Input peer public key when op_type is DH
 * SHARED_SECRET_COMPUTATION
 *
 */
 
rte_crypto_uint priv_key;
/**<
-* Output generated private key if xform type is
+* Output generated private key if op_type is
 * DH PRIVATE_KEY_GENERATION
-* Input when xform type is DH SHARED_SECRET_COMPUTATION.
+* Input when op_type is DH SHARED_SECRET_COMPUTATION.
 *
 */
 
rte_crypto_uint shared_secret;
/**<
 * Output with calculated shared secret
-* when dh xform set up with op type = SHARED_SECRET_COMPUTATION.
+* when dh op_type = SHARED_SECRET_COMPUTATION.
 *
 */
 };
-- 
2.13.6



[PATCH v4 2/3] crypto/openssl: move dh type from xform to dh op

2022-04-27 Thread Arek Kusztal
This commit reflects API changes of location of
operation type in Diffie-Hellman.

Signed-off-by: Arek Kusztal 
---
 drivers/crypto/openssl/rte_openssl_pmd.c | 54 ++--
 drivers/crypto/openssl/rte_openssl_pmd_ops.c | 26 --
 2 files changed, 3 insertions(+), 77 deletions(-)

diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c 
b/drivers/crypto/openssl/rte_openssl_pmd.c
index d80e1052e2..409711c097 100644
--- a/drivers/crypto/openssl/rte_openssl_pmd.c
+++ b/drivers/crypto/openssl/rte_openssl_pmd.c
@@ -1696,12 +1696,7 @@ process_openssl_dh_op(struct rte_crypto_op *cop,
BIGNUM *priv_key = NULL;
int ret = 0;
 
-   if (sess->u.dh.key_op &
-   (1 << RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE)) {
-   /* compute shared secret using peer public key
-* and current private key
-* shared secret = peer_key ^ priv_key mod p
-*/
+   if (op->op_type == RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE) {
BIGNUM *peer_key = NULL;
 
/* copy private key and peer key and compute shared secret */
@@ -1735,10 +1730,6 @@ process_openssl_dh_op(struct rte_crypto_op *cop,
if (ret < 0) {
cop->status = RTE_CRYPTO_OP_STATUS_ERROR;
BN_free(peer_key);
-   /* priv key is already loaded into dh,
-* let's not free that directly here.
-* DH_free() will auto free it later.
-*/
return 0;
}
cop->status = RTE_CRYPTO_OP_STATUS_SUCCESS;
@@ -1747,50 +1738,12 @@ process_openssl_dh_op(struct rte_crypto_op *cop,
return 0;
}
 
-   /*
-* other options are public and private key generations.
-*
-* if user provides private key,
-* then first set DH with user provided private key
-*/
-   if ((sess->u.dh.key_op &
-   (1 << RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE)) &&
-   !(sess->u.dh.key_op &
-   (1 << RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE))) {
-   /* generate public key using user-provided private key
-* pub_key = g ^ priv_key mod p
-*/
-
-   /* load private key into DH */
-   priv_key = BN_bin2bn(op->priv_key.data,
-   op->priv_key.length,
-   priv_key);
-   if (priv_key == NULL) {
-   cop->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
-   return -1;
-   }
-   ret = set_dh_priv_key(dh_key, priv_key);
-   if (ret) {
-   OPENSSL_LOG(ERR, "Failed to set private key\n");
-   cop->status = RTE_CRYPTO_OP_STATUS_ERROR;
-   BN_free(priv_key);
-   return 0;
-   }
-   }
-
-   /* generate public and private key pair.
-*
-* if private key already set, generates only public key.
-*
-* if private key is not already set, then set it to random value
-* and update internal private key.
-*/
if (!DH_generate_key(dh_key)) {
cop->status = RTE_CRYPTO_OP_STATUS_ERROR;
return 0;
}
 
-   if (sess->u.dh.key_op & (1 << RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE)) {
+   if (op->op_type == RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE) {
const BIGNUM *pub_key = NULL;
 
OPENSSL_LOG(DEBUG, "%s:%d update public key\n",
@@ -1804,8 +1757,7 @@ process_openssl_dh_op(struct rte_crypto_op *cop,
op->pub_key.data);
}
 
-   if (sess->u.dh.key_op &
-   (1 << RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE)) {
+   if (op->op_type == RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE) {
const BIGNUM *priv_key = NULL;
 
OPENSSL_LOG(DEBUG, "%s:%d updated priv key\n",
diff --git a/drivers/crypto/openssl/rte_openssl_pmd_ops.c 
b/drivers/crypto/openssl/rte_openssl_pmd_ops.c
index 1cb07794bd..02802ab0c2 100644
--- a/drivers/crypto/openssl/rte_openssl_pmd_ops.c
+++ b/drivers/crypto/openssl/rte_openssl_pmd_ops.c
@@ -1000,32 +1000,6 @@ static int openssl_set_asym_session_parameters(
goto err_dh;
}
 
-   /*
-* setup xfrom for
-* public key generate, or
-* DH Priv key generate, or both
-* public and private key generate
-*/
-   asym_session->u.dh.key_op = (1 << xform->dh.type);
-
-   if (xform->dh.type ==
-   RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE) {
-   /* check if next is pubkey */
-   

[PATCH v4 3/3] test/crypto: move dh type from xform to dh op

2022-04-27 Thread Arek Kusztal
This commit reflects API changes in Diffie-Hellman,
now for setting crypto operation type asym_op no xform
is responsible.

Signed-off-by: Arek Kusztal 
---
 app/test/test_cryptodev_asym.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/app/test/test_cryptodev_asym.c b/app/test/test_cryptodev_asym.c
index 573af2a537..a5e385f4bd 100644
--- a/app/test/test_cryptodev_asym.c
+++ b/app/test/test_cryptodev_asym.c
@@ -1064,8 +1064,8 @@ test_dh_gen_shared_sec(struct rte_crypto_asym_xform *xfrm)
asym_op = op->asym;
 
/* Setup a xform and op to generate private key only */
-   xform.dh.type = RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE;
xform.next = NULL;
+   asym_op->dh.op_type = RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE;
asym_op->dh.priv_key.data = dh_test_params.priv_key.data;
asym_op->dh.priv_key.length = dh_test_params.priv_key.length;
asym_op->dh.pub_key.data = (uint8_t *)peer;
@@ -1146,7 +1146,7 @@ test_dh_gen_priv_key(struct rte_crypto_asym_xform *xfrm)
asym_op = op->asym;
 
/* Setup a xform and op to generate private key only */
-   xform.dh.type = RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE;
+   asym_op->dh.op_type = RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE;
xform.next = NULL;
asym_op->dh.priv_key.data = output;
asym_op->dh.priv_key.length = sizeof(output);
@@ -1229,7 +1229,7 @@ test_dh_gen_pub_key(struct rte_crypto_asym_xform *xfrm)
 * using test private key
 *
 */
-   xform.dh.type = RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE;
+   asym_op->dh.op_type = RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE;
xform.next = NULL;
 
asym_op->dh.pub_key.data = output;
@@ -1319,9 +1319,10 @@ test_dh_gen_kp(struct rte_crypto_asym_xform *xfrm)
/* Setup a xform chain to generate
 * private key first followed by
 * public key
-*/xform.dh.type = RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE;
+*/
+   asym_op->dh.op_type = RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE;
pub_key_xform.xform_type = RTE_CRYPTO_ASYM_XFORM_DH;
-   pub_key_xform.dh.type = RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE;
+   asym_op->dh.op_type = RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE;
xform.next = &pub_key_xform;
 
asym_op->dh.pub_key.data = out_pub_key;
-- 
2.13.6



RE: [PATCH v4 1/3] cryptodev: move dh type from xform to dh op

2022-04-27 Thread Zhang, Roy Fan
> -Original Message-
> From: Kusztal, ArkadiuszX 
> Sent: Wednesday, April 27, 2022 8:44 AM
> To: dev@dpdk.org
> Cc: gak...@marvell.com; Zhang, Roy Fan ; Kusztal,
> ArkadiuszX 
> Subject: [PATCH v4 1/3] cryptodev: move dh type from xform to dh op
> 
> Operation type (PUBLIC_KEY_GENERATION, SHARED_SECRET) should
> be free to choose for any operation. One xform/session should
> be enough to perform both DH operations, if op_type would be xform
> member, session would have to be to be created twice for the same
> group. Similar problem would be observed in sessionless case.
> Additionally, it will help extend DH to support Elliptic Curves.
> 
> Signed-off-by: Arek Kusztal 
> ---
Acked-by: Fan Zhang 


RE: [PATCH v4 2/3] crypto/openssl: move dh type from xform to dh op

2022-04-27 Thread Zhang, Roy Fan
> -Original Message-
> From: Kusztal, ArkadiuszX 
> Sent: Wednesday, April 27, 2022 8:44 AM
> To: dev@dpdk.org
> Cc: gak...@marvell.com; Zhang, Roy Fan ; Kusztal,
> ArkadiuszX 
> Subject: [PATCH v4 2/3] crypto/openssl: move dh type from xform to dh op
> 
> This commit reflects API changes of location of
> operation type in Diffie-Hellman.
> 
> Signed-off-by: Arek Kusztal 
> ---
Acked-by: Fan Zhang 


RE: [PATCH v4 3/3] test/crypto: move dh type from xform to dh op

2022-04-27 Thread Zhang, Roy Fan
> -Original Message-
> From: Kusztal, ArkadiuszX 
> Sent: Wednesday, April 27, 2022 8:44 AM
> To: dev@dpdk.org
> Cc: gak...@marvell.com; Zhang, Roy Fan ; Kusztal,
> ArkadiuszX 
> Subject: [PATCH v4 3/3] test/crypto: move dh type from xform to dh op
> 
> This commit reflects API changes in Diffie-Hellman,
> now for setting crypto operation type asym_op no xform
> is responsible.
> 
> Signed-off-by: Arek Kusztal 
> ---
Acked-by: Fan Zhang 


RE: [PATCH v4 0/3] cryptodev: move dh type from xform to dh op

2022-04-27 Thread Zhang, Roy Fan
> -Original Message-
> From: Kusztal, ArkadiuszX 
> Sent: Wednesday, April 27, 2022 8:44 AM
> To: dev@dpdk.org
> Cc: gak...@marvell.com; Zhang, Roy Fan ; Kusztal,
> ArkadiuszX 
> Subject: [PATCH v4 0/3] cryptodev: move dh type from xform to dh op
> 
> Operation type (PUBLIC_KEY_GENERATION, SHARED_SECRET) should
> be free to choose for any operation. One xform/session should
> be enough to perform both DH operations, if op_type would be xform
> member, session would have to be to be created twice for the same
> group. Similar problem would be observed in sessionless case.
> Additionally, it will help extend DH to support Elliptic Curves.
> 
> v4:
> - changed op_type coment
> - added openssl fix
> 
> Arek Kusztal (3):
>   cryptodev: move dh type from xform to dh op
>   crypto/openssl: move dh type from xform to dh op
>   test/crypto: move dh type from xform to dh op
> 
>  app/test/test_cryptodev_asym.c   | 11 +++---
>  drivers/crypto/openssl/rte_openssl_pmd.c | 54 
> ++--
>  drivers/crypto/openssl/rte_openssl_pmd_ops.c | 26 --
>  lib/cryptodev/rte_crypto_asym.h  | 14 
>  4 files changed, 16 insertions(+), 89 deletions(-)
> 
> --
> 2.13.6
Series-acked-by: Fan Zhang 



RE: [RFC] eal: allow worker lcore stacks to be allocated from hugepage memory

2022-04-27 Thread Morten Brørup
+CC: EAL and Memory maintainers.

> From: Don Wallwork [mailto:d...@xsightlabs.com]
> Sent: Tuesday, 26 April 2022 23.26
> 
> On 4/26/2022 5:21 PM, Stephen Hemminger wrote:
> > On Tue, 26 Apr 2022 17:01:18 -0400
> > Don Wallwork  wrote:
> >
> >> On 4/26/2022 10:58 AM, Stephen Hemminger wrote:
> >>> On Tue, 26 Apr 2022 08:19:59 -0400
> >>> Don Wallwork  wrote:
> >>>
>  Add support for using hugepages for worker lcore stack memory.
> The
>  intent is to improve performance by reducing stack memory related
> TLB
>  misses and also by using memory local to the NUMA node of each
> lcore.

This certainly seems like a good idea!

However, I wonder: Does the O/S assign memory local to the NUMA node to an 
lcore-pinned thread's stack when instantiating the tread? And does the DPDK EAL 
ensure that the preconditions for the O/S to do that are present?

(Not relevant for this patch, but the same locality questions come to mind 
regarding Thread Local Storage.)

> 
>  Platforms desiring to make use of this capability must enable the
>  associated option flag and stack size settings in platform config
>  files.
>  ---
> lib/eal/linux/eal.c | 39
> +++
> 1 file changed, 39 insertions(+)
> 
> >>> Good idea but having a fixed size stack makes writing complex
> application
> >>> more difficult. Plus you lose the safety of guard pages.

Would it be possible to add a guard page or guard region by using the O/S 
memory allocator instead of rte_zmalloc_socket()? Since the stack is considered 
private to the process, i.e. not accessible from other processes, this patch 
does not need to provide remote access to stack memory from secondary processes 
- and thus it is not a requirement for this features to use DPDK managed memory.

> >> Thanks for the quick reply.
> >>
> >> The expectation is that use of this optional feature would be
> limited to
> >> cases where
> >> the performance gains justify the implications of these tradeoffs.
> For
> >> example, a specific
> >> data plane application may be okay with limited stack size and could
> be
> >> tested to ensure
> >> stack usage remains within limits.

How to identify the required stack size and verify it... If aiming for small 
stacks, some instrumentation would be nice, like rte_mempool_audit() and 
rte_mempool_list_dump().

Alternatively, just assume that the stack is "always big enough", and don't 
worry about it - like the default O/S stack size. And as Stephen already 
mentioned: Regardless of stack size, overflowing the stack will cause memory 
corruption instead of a segmentation fault.

Keep in mind that the required stack size not only depends on the application, 
but also on DPDK and other libraries being used by the application.

> >>
> >> Also, since this applies only to worker threads, the main thread
> would
> >> not be impacted
> >> by this change.
> >>
> >>
> > I would prefer it as a runtime, not compile time option.
> > That way distributions could ship DPDK and application could opt in
> if it wanted.
> Good point..  I'll work on a v2 and will post that when it's ready.

May I suggest using the stack size configured in the O/S, from 
pthread_attr_getstacksize() or similar, instead of choosing the stack size 
manually? If you want it to be configurable, use the default size unless 
explicitly specified otherwise.

Do the worker threads need a different stack size than the main thread? In my 
opinion: "Nice to have", not "must have".

Do the worker threads need different stack sizes individually? In my opinion: 
Perhaps "nice to have", certainly not "must have".



RE: [PATCH] security: fix comments

2022-04-27 Thread Morten Brørup
> From: Anoob Joseph [mailto:ano...@marvell.com]
> Sent: Wednesday, 27 April 2022 08.02
> 
> Fix comments to reflect the field.
> 
> Fixes: ad7515a39f2a ("security: add SA lifetime configuration")
> Cc: ano...@marvell.com
> 
> Reported-by: Thomas Monjalon 
> Signed-off-by: Anoob Joseph 
> ---
>  lib/security/rte_security.h | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/security/rte_security.h b/lib/security/rte_security.h
> index 2a43cbe..675db94 100644
> --- a/lib/security/rte_security.h
> +++ b/lib/security/rte_security.h
> @@ -311,9 +311,9 @@ struct rte_security_ipsec_lifetime {
>   uint64_t bytes_soft_limit;
>   /**< Soft expiry limit in bytes */
>   uint64_t packets_hard_limit;
> - /**< Soft expiry limit in number of packets */
> + /**< Hard expiry limit in number of packets */
>   uint64_t bytes_hard_limit;
> - /**< Soft expiry limit in bytes */
> + /**< Hard expiry limit in bytes */
>  };
> 
>  /**
> --
> 2.7.4
> 

Reviewed-by: Morten Brørup 



Re: [PATCH v2] net/nfp: update how MAX MTU is read

2022-04-27 Thread Niklas Söderlund
Hello,

I have a question about the Checks that ran on this patch in patchwork 
[1]. It appears the job ci/iol-x86_64-compile-testing, 
dpdk_mingw64_compile have failed on a Windows Server 2019 build. But the 
logs from the job appears to be incomplete as it contains only 19 lines 
of output and stops without an error in the configuration part of meson. 

The failure is only flagged as a warning and not as an error in 
patchwork, is it it possible that the job in question fails to capture 
all output or that it fails to complete sometimes?

What can we do to on our end to remedy this? My concern is that that the 
patch is blocked due to the warning and I'm unclear on how move forward, 
sorry if the case is that I'm just impatient.

1. 
https://patchwork.dpdk.org/project/dpdk/patch/20220420134638.24010-1-walter.heym...@corigine.com/

On 2022-04-20 15:46:39 +0200, Walter Heymans wrote:
> The 'max_rx_pktlen' value was previously read from hardware, which was
> set by the running firmware. This caused confusion due to different
> meanings of 'MAX_MTU'. This patch updates the 'max_rx_pktlen' to the
> maximum value that the NFP NIC can support. The 'max_mtu' value that is
> read from hardware, is assigned to the 'dev_info->max_mtu' variable.
> 
> If more layer 2 metadata must be used, the firmware can be updated to
> report a smaller 'max_mtu' value.
> 
> The constant defined for NFP_FRAME_SIZE_MAX is derived for the maximum
> supported buffer size of 10240, minus 136 bytes that is reserved by the
> hardware and another 56 bytes reserved for expansion in firmware. This
> results in a usable maximum packet length of 10048 bytes.
> 
> Signed-off-by: Walter Heymans 
> Signed-off-by: Niklas Söderlund 
> Reviewed-by: Louis Peens 
> Reviewed-by: Chaoyong He 
> Reviewed-by: Richard Donkin 
> ---
>  drivers/net/nfp/nfp_common.c | 11 ++-
>  drivers/net/nfp/nfp_common.h |  3 +++
>  2 files changed, 13 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/nfp/nfp_common.c b/drivers/net/nfp/nfp_common.c
> index b26770dbfb..52fbda1a79 100644
> --- a/drivers/net/nfp/nfp_common.c
> +++ b/drivers/net/nfp/nfp_common.c
> @@ -692,7 +692,16 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct 
> rte_eth_dev_info *dev_info)
>   dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
>   dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
>   dev_info->min_rx_bufsize = RTE_ETHER_MIN_MTU;
> - dev_info->max_rx_pktlen = hw->max_mtu;
> + /*
> +  * The maximum rx packet length (max_rx_pktlen) is set to the
> +  * maximum supported frame size that the NFP can handle. This
> +  * includes layer 2 headers, CRC and other metadata that can
> +  * optionally be used.
> +  * The maximum layer 3 MTU (max_mtu) is read from hardware,
> +  * which was set by the firmware loaded onto the card.
> +  */
> + dev_info->max_rx_pktlen = NFP_FRAME_SIZE_MAX;
> + dev_info->max_mtu = hw->max_mtu;
>   /* Next should change when PF support is implemented */
>   dev_info->max_mac_addrs = 1;
>  
> diff --git a/drivers/net/nfp/nfp_common.h b/drivers/net/nfp/nfp_common.h
> index 8b35fa119c..8db5ec23f8 100644
> --- a/drivers/net/nfp/nfp_common.h
> +++ b/drivers/net/nfp/nfp_common.h
> @@ -98,6 +98,9 @@ struct nfp_net_adapter;
>  /* Number of supported physical ports */
>  #define NFP_MAX_PHYPORTS 12
>  
> +/* Maximum supported NFP frame size (MTU + layer 2 headers) */
> +#define NFP_FRAME_SIZE_MAX   10048
> +
>  #include 
>  #include 
>  
> -- 
> 2.25.1
> 

-- 
Kind Regards,
Niklas Söderlund


[PATCH v3] sched: enable/disable TC OV at runtime

2022-04-27 Thread Marcin Danilewicz
Added new API to enable or disable TC over subscription for best
effort traffic class at subport level.
Added changes after review and increased throughput.

By default TC OV is disabled.

Signed-off-by: Marcin Danilewicz 
---
 lib/sched/rte_sched.c | 189 +++---
 lib/sched/rte_sched.h |  18 
 lib/sched/version.map |   3 +
 3 files changed, 178 insertions(+), 32 deletions(-)

diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index ec74bee939..6e7d81df46 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -213,6 +213,9 @@ struct rte_sched_subport {
uint8_t *bmp_array;
struct rte_mbuf **queue_array;
uint8_t memory[0] __rte_cache_aligned;
+
+   /* TC oversubscription activation */
+   int is_tc_ov_enabled;
 } __rte_cache_aligned;
 
 struct rte_sched_port {
@@ -1165,6 +1168,45 @@ rte_sched_cman_config(struct rte_sched_port *port,
 }
 #endif
 
+int
+rte_sched_subport_tc_ov_config(struct rte_sched_port *port,
+   uint32_t subport_id,
+   bool tc_ov_enable)
+{
+   struct rte_sched_subport *s;
+   struct rte_sched_subport_profile *profile;
+
+   if (port == NULL) {
+   RTE_LOG(ERR, SCHED,
+   "%s: Incorrect value for parameter port\n", __func__);
+   return -EINVAL;
+   }
+
+   if (subport_id >= port->n_subports_per_port) {
+   RTE_LOG(ERR, SCHED,
+   "%s: Incorrect value for parameter subport id\n", 
__func__);
+   return  -EINVAL;
+   }
+
+   s = port->subports[subport_id];
+   s->is_tc_ov_enabled = tc_ov_enable ? 1 : 0;
+
+   if (s->is_tc_ov_enabled) {
+   /* TC oversubscription */
+   s->tc_ov_wm_min = port->mtu;
+   s->tc_ov_period_id = 0;
+   s->tc_ov = 0;
+   s->tc_ov_n = 0;
+   s->tc_ov_rate = 0;
+
+   profile = port->subport_profiles + s->profile;
+   s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period,
+   s->pipe_tc_be_rate_max);
+   s->tc_ov_wm = s->tc_ov_wm_max;
+   }
+   return 0;
+}
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
uint32_t subport_id,
@@ -1254,6 +1296,9 @@ rte_sched_subport_config(struct rte_sched_port *port,
s->n_pipe_profiles = params->n_pipe_profiles;
s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
+   /* TC over-subscription is disabled by default */
+   s->is_tc_ov_enabled = 0;
+
 #ifdef RTE_SCHED_CMAN
if (params->cman_params != NULL) {
s->cman_enabled = true;
@@ -1316,13 +1361,6 @@ rte_sched_subport_config(struct rte_sched_port *port,
 
for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
s->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID;
-
-   /* TC oversubscription */
-   s->tc_ov_wm_min = port->mtu;
-   s->tc_ov_period_id = 0;
-   s->tc_ov = 0;
-   s->tc_ov_n = 0;
-   s->tc_ov_rate = 0;
}
 
{
@@ -1342,9 +1380,6 @@ rte_sched_subport_config(struct rte_sched_port *port,
else
profile->tc_credits_per_period[i] = 0;
 
-   s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period,
-   s->pipe_tc_be_rate_max);
-   s->tc_ov_wm = s->tc_ov_wm_max;
s->profile = subport_profile_id;
 
}
@@ -1417,17 +1452,20 @@ rte_sched_pipe_config(struct rte_sched_port *port,
double pipe_tc_be_rate =
(double) 
params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
/ (double) params->tc_period;
-   uint32_t tc_be_ov = s->tc_ov;
 
-   /* Unplug pipe from its subport */
-   s->tc_ov_n -= params->tc_ov_weight;
-   s->tc_ov_rate -= pipe_tc_be_rate;
-   s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
+   if (s->is_tc_ov_enabled) {
+   uint32_t tc_be_ov = s->tc_ov;
 
-   if (s->tc_ov != tc_be_ov) {
-   RTE_LOG(DEBUG, SCHED,
-   "Subport %u Best-effort TC oversubscription is 
OFF (%.4lf >= %.4lf)\n",
-   subport_id, subport_tc_be_rate, s->tc_ov_rate);
+   /* Unplug pipe from its subport */
+   s->tc_ov_n -= params->tc_ov_weight;
+   s->tc_ov_rate -= pipe_tc_be_rate;
+   s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
+
+   if (s->tc_ov != tc_be_ov) {
+   RTE_LOG(DEBUG, SCHED,
+   "Subport %u Best-effort TC 
oversubscription is OFF 

Re: [PATCH] event/cnxk: add SLMTST support to Tx adapter

2022-04-27 Thread Ray Kinsella


Pavan Nikhilesh  writes:

> Scheduled LMTST uses in-core LSW (LMTST scheduling widget) to
> coordinate with SSO and send a LMTST to the destination
> coprocessor without the need for the core to be the head of
> the scheduling context it is currently holding.
>
> Use SLMTST to send mbuf to NIX-TX for transmit. SLMTST only
> supports transmitting a single WQE.
>
> Signed-off-by: Pavan Nikhilesh 
> ---
>  Depends-on: Series-22634
>
>  drivers/common/cnxk/hw/ssow.h|  7 +++
>  drivers/common/cnxk/roc_dev_priv.h   |  6 ++
>  drivers/common/cnxk/roc_io.h |  8 
>  drivers/common/cnxk/roc_io_generic.h |  7 +++
>  drivers/common/cnxk/roc_nix.c| 19 +++
>  drivers/common/cnxk/roc_nix.h|  4 
>  drivers/common/cnxk/roc_sso.c| 23 +++
>  drivers/common/cnxk/roc_sso.h|  2 ++
>  drivers/common/cnxk/version.map  |  2 ++
>  drivers/event/cnxk/cn10k_eventdev.c  | 11 +++
>  drivers/event/cnxk/cn10k_worker.h| 19 +--
>  drivers/event/cnxk/cnxk_eventdev.h   |  2 +-
>  12 files changed, 103 insertions(+), 7 deletions(-)
>
Acked-by: Ray Kinsella 

-- 
Regards, Ray K


[PATCH v7 00/18] add virtio_blk device support to vdpa/ifc

2022-04-27 Thread Andy Pei
This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.

v7:
 Check on expected fd num in new vhost msg handler.
 Sanity check on vhost msg size.
 Fix typo.
 Add commit log to help understand code.
 Remove duplicated code.
 Add new API to get vDPA device type.
v6:
 Fix some commit log.
 Add vhost socket in log output to make it more user-friendly.
 When driver ops fail, just output some log, do not break message handler.
 Check vhost msg size in msg handler.
v5:
 Fix some coding style issues.
v4:
 Add args "isblk" to vdpa example to specify a block device, fix some
 Issue in example.
 Make sure code specify for block device does not affect net device.
v3:
 Fix some compile issues.
v2:
 Fix some coding style issues.

Andy Pei (18):
  vdpa/ifc: add support for virtio blk device
  vhost: add vDPA ops for blk device
  vhost: add vhost msg support
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vDPA interrupt for blk device
  vdpa/ifc: add block device SW live-migration
  vhost: add API to get vDPA device type
  vdpa/ifc: add get device type ops to ifc driver
  examples/vdpa: add vDPA blk support in example
  usertools: add support for virtio blk device
  vdpa/ifc: add set vring state for blk device
  vdpa/ifc: add some log at vDPA launch before qemu connect
  vdpa/ifc: read virtio max queues from hardware
  vdpa/ifc: add interrupt and handle for virtio blk
  vdpa/ifc: add is blk flag to ifcvf HW struct
  vdpa/ifc/base: access correct register for blk device
  vdpa/ifc: blk device pause without no inflight IO
  vhost: make sure each queue callfd is configured

 drivers/vdpa/ifc/base/ifcvf.c|  36 +++-
 drivers/vdpa/ifc/base/ifcvf.h|  20 +-
 drivers/vdpa/ifc/ifcvf_vdpa.c| 392 +--
 examples/vdpa/main.c |  57 ++
 examples/vdpa/vdpa_blk_compact.h |  65 +++
 lib/vhost/rte_vhost.h|  17 ++
 lib/vhost/socket.c   |  39 
 lib/vhost/vdpa_driver.h  |  11 +-
 lib/vhost/version.map|   2 +
 lib/vhost/vhost_user.c   |  97 ++
 lib/vhost/vhost_user.h   |  13 ++
 usertools/dpdk-devbind.py|   5 +-
 12 files changed, 730 insertions(+), 24 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h

-- 
1.8.3.1



[PATCH v7 01/18] vdpa/ifc: add support for virtio blk device

2022-04-27 Thread Andy Pei
Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemented with proper feature and ops.

Signed-off-by: Andy Pei 
Reviewed-by: Maxime Coquelin 
---
 drivers/vdpa/ifc/base/ifcvf.h | 16 +++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 92 +++
 2 files changed, 98 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..01522c6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,8 +5,17 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include 
 #include "ifcvf_osdep.h"
 
+#define IFCVF_NET  0
+#define IFCVF_BLK  1
+
+/* for BLK */
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID0x1001
+#define IFCVF_BLK_MODERN_DEVICE_ID  0x1042
+#define IFCVF_BLK_DEVICE_ID 0x0002
+
 #define IFCVF_VENDOR_ID0x1AF4
 #define IFCVF_DEVICE_ID0x1041
 #define IFCVF_SUBSYS_VENDOR_ID 0x8086
@@ -57,7 +66,6 @@
 
 #define IFCVF_32_BIT_MASK  0x
 
-
 struct ifcvf_pci_cap {
u8 cap_vndr;/* Generic PCI field: PCI_CAP_ID_VNDR */
u8 cap_next;/* Generic PCI field: next ptr. */
@@ -126,7 +134,11 @@ struct ifcvf_hw {
u8 notify_region;
u32notify_off_multiplier;
struct ifcvf_pci_common_cfg *common_cfg;
-   struct ifcvf_net_config *dev_cfg;
+   union {
+   struct ifcvf_net_config *net_cfg;
+   struct virtio_blk_config *blk_cfg;
+   void *dev_cfg;
+   };
u8 *isr;
u16*notify_base;
u16*notify_addr[IFCVF_MAX_QUEUES * 2];
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9f05595..e3210a8 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -58,6 +58,7 @@ struct ifcvf_internal {
struct rte_vdpa_device *vdev;
uint16_t max_queues;
uint64_t features;
+   int device_type;
rte_atomic32_t started;
rte_atomic32_t dev_attached;
rte_atomic32_t running;
@@ -75,6 +76,12 @@ struct internal_list {
struct ifcvf_internal *internal;
 };
 
+/* vdpa device info includes device features and devcic operation. */
+struct rte_vdpa_dev_info {
+   uint64_t features;
+   struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1167,6 +1174,48 @@ struct internal_list {
return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+   uint16_t pci_device_id = pci_dev->id.device_id;
+   uint16_t device_id;
+
+   if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+   DRV_LOG(ERR, "Probe device is not a virtio device\n");
+   return -1;
+   }
+
+   if (pci_device_id < 0x1040) {
+   /* Transitional devices: use the PCI subsystem device id as
+* virtio device id, same as legacy driver always did.
+*/
+   device_id = pci_dev->id.subsystem_device_id;
+   } else {
+   /* Modern devices: simply use PCI device id,
+* but start from 0x1040.
+*/
+   device_id = pci_device_id - 0x1040;
+   }
+
+   return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+   {
+   .features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+   (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+   (1ULL << VIRTIO_NET_F_STATUS) |
+   (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+   (1ULL << VHOST_F_LOG_ALL),
+   .ops = &ifcvf_ops,
+   },
+   {
+   .features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+   (1ULL << VHOST_F_LOG_ALL),
+   .ops = NULL,
+   },
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
struct rte_pci_device *pci_dev)
@@ -1178,6 +1227,7 @@ struct internal_list {
int sw_fallback_lm = 0;
struct rte_kvargs *kvlist = NULL;
int ret = 0;
+   int16_t device_id;
 
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
@@ -1227,13 +1277,24 @@ struct internal_list {
internal->configured = 0;
internal->max_queues = IFCVF_MAX_QUEUES;
features = ifcvf_get_features(&internal->hw);
-   internal->features = (features &
-   ~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-   (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-   (1ULL << VIRTIO_NET_F_CTRL_VQ) |
-   (1ULL << VIRTIO_NET_F_STATUS) |
-   (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
-   (1ULL << VHOST_F_LOG_ALL);
+
+   device_id = ifcvf_pci_get_devi

[PATCH v7 02/18] vhost: add vDPA ops for blk device

2022-04-27 Thread Andy Pei
Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vDPA ops.

Signed-off-by: Andy Pei 
Reviewed-by: Maxime Coquelin 
---
 lib/vhost/vdpa_driver.h | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 88138be..e59a834 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -72,8 +72,12 @@ struct rte_vdpa_dev_ops {
/** Reset statistics of the queue */
int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-   /** Reserved for future extension */
-   void *reserved[2];
+   /** Get the device configuration space */
+   int (*get_config)(int vid, uint8_t *config, uint32_t len);
+
+   /** Set the device configuration space */
+   int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+ uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1



[PATCH v7 03/18] vhost: add vhost msg support

2022-04-27 Thread Andy Pei
Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG.
VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only
supported by virtio blk VDPA device.

Signed-off-by: Andy Pei 
---
 lib/vhost/vhost_user.c | 83 ++
 lib/vhost/vhost_user.h | 13 
 2 files changed, 96 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 1d39067..e925428 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -80,6 +80,8 @@
[VHOST_USER_NET_SET_MTU]  = "VHOST_USER_NET_SET_MTU",
[VHOST_USER_SET_SLAVE_REQ_FD]  = "VHOST_USER_SET_SLAVE_REQ_FD",
[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
+   [VHOST_USER_GET_CONFIG]  = "VHOST_USER_GET_CONFIG",
+   [VHOST_USER_SET_CONFIG]  = "VHOST_USER_SET_CONFIG",
[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
[VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
@@ -2542,6 +2544,85 @@ static int is_vring_iotlb(struct virtio_net *dev,
 }
 
 static int
+vhost_user_get_config(struct virtio_net **pdev,
+   struct vhu_msg_context *ctx,
+   int main_fd __rte_unused)
+{
+   struct virtio_net *dev = *pdev;
+   struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+   int ret = 0;
+
+   if (validate_msg_fds(dev, ctx, 0) != 0)
+   return RTE_VHOST_MSG_RESULT_ERR;
+
+   if (vdpa_dev->ops->get_config) {
+   ret = vdpa_dev->ops->get_config(dev->vid,
+  ctx->msg.payload.cfg.region,
+  ctx->msg.payload.cfg.size);
+   if (ret != 0) {
+   ctx->msg.size = 0;
+   VHOST_LOG_CONFIG(ERR,
+"(%s) get_config() return error!\n",
+dev->ifname);
+   }
+   } else {
+   VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supported!\n",
+dev->ifname);
+   }
+
+   return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
+static int
+vhost_user_set_config(struct virtio_net **pdev,
+   struct vhu_msg_context *ctx,
+   int main_fd __rte_unused)
+{
+   struct virtio_net *dev = *pdev;
+   struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev;
+   int ret = 0;
+
+   if (validate_msg_fds(dev, ctx, 0) != 0)
+   return RTE_VHOST_MSG_RESULT_ERR;
+
+   if (ctx->msg.size != sizeof(struct vhost_user_config)) {
+   VHOST_LOG_CONFIG(ERR,
+   "(%s) invalid set config msg size: %"PRIu32" != %d\n",
+   dev->ifname, ctx->msg.size,
+   (int)sizeof(struct vhost_user_config));
+   goto out;
+   }
+
+   if (ctx->msg.payload.cfg.size > VHOST_USER_MAX_CONFIG_SIZE) {
+   VHOST_LOG_CONFIG(ERR,
+   "(%s) vhost_user_config size: %"PRIu32", should not be 
larger than %d\n",
+   dev->ifname, ctx->msg.payload.cfg.size,
+   VHOST_USER_MAX_CONFIG_SIZE);
+   goto out;
+   }
+
+   if (vdpa_dev->ops->set_config) {
+   ret = vdpa_dev->ops->set_config(dev->vid,
+   ctx->msg.payload.cfg.region,
+   ctx->msg.payload.cfg.offset,
+   ctx->msg.payload.cfg.size,
+   ctx->msg.payload.cfg.flags);
+   if (ret)
+   VHOST_LOG_CONFIG(ERR,
+"(%s) set_config() return error!\n",
+dev->ifname);
+   } else {
+   VHOST_LOG_CONFIG(ERR, "(%s) set_config() not supported!\n",
+dev->ifname);
+   }
+
+   return RTE_VHOST_MSG_RESULT_OK;
+
+out:
+   return RTE_VHOST_MSG_RESULT_ERR;
+}
+
+static int
 vhost_user_iotlb_msg(struct virtio_net **pdev,
struct vhu_msg_context *ctx,
int main_fd __rte_unused)
@@ -2782,6 +2863,8 @@ typedef int (*vhost_message_handler_t)(struct virtio_net 
**pdev,
[VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
[VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
[VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
+   [VHOST_USER_GET_CONFIG] = vhost_user_get_config,
+   [VHOST_USER_SET_CONFIG] = vhost_user_set_config,
[VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h
index c946cc2..97cfb2f 100644
--- a/lib/vhost/vhost_user.h
+++ b/lib/vhost/vhost_user.h
@@ -50,6 +50,8 @@
VHOST_USE

[PATCH v7 04/18] vdpa/ifc: add blk ops for ifc device

2022-04-27 Thread Andy Pei
For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.

Signed-off-by: Andy Pei 
Reviewed-by: Maxime Coquelin 
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 85 ++-
 2 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 01522c6..769c603 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -66,6 +66,10 @@
 
 #define IFCVF_32_BIT_MASK  0x
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG   9
+#endif
+
 struct ifcvf_pci_cap {
u8 cap_vndr;/* Generic PCI field: PCI_CAP_ID_VNDR */
u8 cap_next;/* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index e3210a8..8ee041f 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1088,6 +1088,10 @@ struct rte_vdpa_dev_info {
 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+   (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1200,6 +1204,85 @@ struct rte_vdpa_dev_info {
return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
+{
+   struct virtio_blk_config *dev_cfg;
+   struct ifcvf_internal *internal;
+   struct rte_vdpa_device *vdev;
+   struct internal_list *list;
+   uint32_t i;
+   uint64_t capacity = 0;
+   uint8_t *byte;
+
+   if (len < sizeof(struct virtio_blk_config)) {
+   DRV_LOG(ERR, "Invalid len: %u, required: %u",
+   len, (uint32_t)sizeof(struct virtio_blk_config));
+   return -1;
+   }
+
+   vdev = rte_vhost_get_vdpa_device(vid);
+   list = find_internal_resource_by_vdev(vdev);
+   if (list == NULL) {
+   DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+   return -1;
+   }
+
+   internal = list->internal;
+
+   for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+   config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+   dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+   /* cannot read 64-bit register in one attempt, so read byte by byte. */
+   for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+   byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+   capacity |= (uint64_t)*byte << (i * 8);
+   }
+   DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+   DRV_LOG(INFO, "size_max  : 0x%08x", dev_cfg->size_max);
+   DRV_LOG(INFO, "seg_max   : 0x%08x", dev_cfg->seg_max);
+   DRV_LOG(INFO, "blk_size  : 0x%08x", dev_cfg->blk_size);
+   DRV_LOG(INFO, "geometry");
+   DRV_LOG(INFO, "  cylinders: %u", dev_cfg->geometry.cylinders);
+   DRV_LOG(INFO, "  heads: %u", dev_cfg->geometry.heads);
+   DRV_LOG(INFO, "  sectors  : %u", dev_cfg->geometry.sectors);
+   DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+   DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+   config[0], config[1], config[2], config[3], config[4],
+   config[5], config[6], config[7]);
+   return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+   uint64_t *features)
+{
+   RTE_SET_USED(vdev);
+
+   *features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+   *features |= VDPA_BLK_PROTOCOL_FEATURES;
+   return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+   .get_queue_num = ifcvf_get_queue_num,
+   .get_features = ifcvf_get_vdpa_features,
+   .set_features = ifcvf_set_features,
+   .get_protocol_features = ifcvf_blk_get_protocol_features,
+   .dev_conf = ifcvf_dev_config,
+   .dev_close = ifcvf_dev_close,
+   .set_vring_state = NULL,
+   .migration_done = NULL,
+   .get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+   .get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+   .get_notify_area = ifcvf_get_notify_area,
+   .get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
{
.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1212,7 +1295,7 @@ struct rte_vdpa_dev_info dev_info[] = {
{
.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
(1ULL << VHOST_F_LOG_ALL),
-   .ops = NULL,
+   .ops = &ifcvf_blk_ops,
},
 };
 
-- 
1.8.3.1



[PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device

2022-04-27 Thread Andy Pei
For the block device type, we use one queue to transfer
both read and write requests, so we have to relay commands
on all queues.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8ee041f..07fc3ca 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -370,6 +370,7 @@ struct rte_vdpa_dev_info {
irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
irq_set->start = 0;
fd_ptr = (int *)&irq_set->data;
+   /* The first interrupt is for the configure space change notification */
fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
rte_intr_fd_get(internal->pdev->intr_handle);
 
@@ -379,7 +380,13 @@ struct rte_vdpa_dev_info {
for (i = 0; i < nr_vring; i++) {
rte_vhost_get_vhost_vring(internal->vid, i, &vring);
fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-   if ((i & 1) == 0 && m_rx == true) {
+   if (m_rx == true &&
+   ((i & 1) == 0 || internal->device_type == IFCVF_BLK)) {
+   /* For the net we only need to relay rx queue,
+* which will change the mem of VM.
+* For the blk we need to relay all the read cmd
+* of each queue
+*/
fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
if (fd < 0) {
DRV_LOG(ERR, "can't setup eventfd: %s",
-- 
1.8.3.1



[PATCH v7 06/18] vdpa/ifc: add block device SW live-migration

2022-04-27 Thread Andy Pei
Add SW live-migration support to block device.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 33 +
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 07fc3ca..8a260b7 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -312,6 +312,7 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_stop(struct ifcvf_internal *internal)
 {
struct ifcvf_hw *hw = &internal->hw;
+   struct rte_vhost_vring vq;
uint32_t i;
int vid;
uint64_t features = 0;
@@ -319,6 +320,22 @@ struct rte_vdpa_dev_info {
uint64_t len;
 
vid = internal->vid;
+
+   /* to make sure no packet is lost for blk device
+* do not stop until last_avail_idx == last_used_idx
+*/
+   if (internal->device_type == IFCVF_BLK) {
+   for (i = 0; i < hw->nr_vring; i++) {
+   rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+   while (vq.avail->idx != vq.used->idx) {
+   ifcvf_notify_queue(hw, i);
+   usleep(10);
+   }
+   hw->vring[i].last_avail_idx = vq.avail->idx;
+   hw->vring[i].last_used_idx = vq.used->idx;
+   }
+   }
+
ifcvf_stop_hw(hw);
 
for (i = 0; i < hw->nr_vring; i++)
@@ -642,8 +659,10 @@ struct rte_vdpa_dev_info {
}
hw->vring[i].avail = gpa;
 
-   /* Direct I/O for Tx queue, relay for Rx queue */
-   if (i & 1) {
+   /* NET: Direct I/O for Tx queue, relay for Rx queue
+* BLK: relay every queue
+*/
+   if ((internal->device_type == IFCVF_NET) && (i & 1)) {
gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
if (gpa == 0) {
DRV_LOG(ERR, "Fail to get GPA for used ring.");
@@ -693,8 +712,12 @@ struct rte_vdpa_dev_info {
 
for (i = 0; i < hw->nr_vring; i++) {
/* synchronize remaining new used entries if any */
-   if ((i & 1) == 0)
+   if (internal->device_type == IFCVF_NET) {
+   if ((i & 1) == 0)
+   update_used_ring(internal, i);
+   } else if (internal->device_type == IFCVF_BLK) {
update_used_ring(internal, i);
+   }
 
rte_vhost_get_vhost_vring(vid, i, &vq);
len = IFCVF_USED_RING_LEN(vq.size);
@@ -756,7 +779,9 @@ struct rte_vdpa_dev_info {
}
}
 
-   for (qid = 0; qid < q_num; qid += 2) {
+   for (qid = 0; qid < q_num; qid += 1) {
+   if ((internal->device_type == IFCVF_NET) && (qid & 1))
+   continue;
ev.events = EPOLLIN | EPOLLPRI;
/* leave a flag to mark it's for interrupt */
ev.data.u64 = 1 | qid << 1 |
-- 
1.8.3.1



[PATCH v7 07/18] vhost: add API to get vDPA device type

2022-04-27 Thread Andy Pei
Vhost backend of different devices have different features.
Add a API to get vDPA device type, net device or blk device
currently, so users can set different features for different
kinds of devices.

Signed-off-by: Andy Pei 
---
 lib/vhost/rte_vhost.h   | 17 +
 lib/vhost/socket.c  | 39 +++
 lib/vhost/vdpa_driver.h |  3 +++
 lib/vhost/version.map   |  2 ++
 4 files changed, 61 insertions(+)

diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
index c733f85..c977a24 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -117,6 +117,9 @@
 
 #define RTE_MAX_VHOST_DEVICE   1024
 
+#define VDPA_DEVICE_TYPE_NET 0
+#define VDPA_DEVICE_TYPE_BLK 1
+
 struct rte_vdpa_device;
 
 /**
@@ -486,6 +489,20 @@ struct rte_vdpa_device *
 rte_vhost_driver_get_vdpa_device(const char *path);
 
 /**
+ * Get the device type of the vdpa device.
+ *
+ * @param path
+ *  The vhost-user socket file path
+ * @param type
+ *  the device type of the vdpa device
+ * @return
+ *  0 on success, -1 on failure
+ */
+__rte_experimental
+int
+rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type);
+
+/**
  * Set the feature bits the vhost-user driver supports.
  *
  * @param path
diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
index b304339..7da90e8 100644
--- a/lib/vhost/socket.c
+++ b/lib/vhost/socket.c
@@ -619,6 +619,45 @@ struct rte_vdpa_device *
 }
 
 int
+rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type)
+{
+   struct vhost_user_socket *vsocket;
+   struct rte_vdpa_device *vdpa_dev;
+   uint32_t vdpa_type = 0;
+   int ret = 0;
+
+   pthread_mutex_lock(&vhost_user.mutex);
+   vsocket = find_vhost_user_socket(path);
+   if (!vsocket) {
+   VHOST_LOG_CONFIG(ERR,
+"(%s) socket file is not registered yet.\n",
+path);
+   ret = -1;
+   goto unlock_exit;
+   }
+
+   vdpa_dev = vsocket->vdpa_dev;
+   if (!vdpa_dev) {
+   ret = -1;
+   goto unlock_exit;
+   }
+
+   if (vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type) < 0) {
+   VHOST_LOG_CONFIG(ERR,
+   "(%s) failed to get vdpa dev type for socket file.\n",
+   path);
+   ret = -1;
+   goto unlock_exit;
+   }
+
+   *type = vdpa_type;
+
+unlock_exit:
+   pthread_mutex_unlock(&vhost_user.mutex);
+   return ret;
+}
+
+int
 rte_vhost_driver_disable_features(const char *path, uint64_t features)
 {
struct vhost_user_socket *vsocket;
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index e59a834..9cbd7cd 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -78,6 +78,9 @@ struct rte_vdpa_dev_ops {
/** Set the device configuration space */
int (*set_config)(int vid, uint8_t *config, uint32_t offset,
  uint32_t size, uint32_t flags);
+
+   /** get device type: net device, blk device... */
+   int (*get_dev_type)(struct rte_vdpa_device *dev, uint32_t *type);
 };
 
 /**
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index 0a66c58..fe4e8de 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -87,6 +87,8 @@ EXPERIMENTAL {
 
# added in 22.03
rte_vhost_async_dma_configure;
+
+   rte_vhost_driver_get_vdpa_dev_type;
 };
 
 INTERNAL {
-- 
1.8.3.1



[PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver

2022-04-27 Thread Andy Pei
Add get device type ops to ifc driver.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8a260b7..99a6ab0 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1300,6 +1300,15 @@ struct rte_vdpa_dev_info {
return 0;
 }
 
+static int
+ifcvf_blk_get_device_type(struct rte_vdpa_device *vdev,
+   uint32_t *type)
+{
+   RTE_SET_USED(vdev);
+   *type = VDPA_DEVICE_TYPE_BLK;
+   return 0;
+}
+
 static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
.get_queue_num = ifcvf_get_queue_num,
.get_features = ifcvf_get_vdpa_features,
@@ -1313,6 +1322,7 @@ struct rte_vdpa_dev_info {
.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
.get_notify_area = ifcvf_get_notify_area,
.get_config = ifcvf_blk_get_config,
+   .get_dev_type = ifcvf_blk_get_device_type,
 };
 
 struct rte_vdpa_dev_info dev_info[] = {
-- 
1.8.3.1



[PATCH v7 09/18] examples/vdpa: add vDPA blk support in example

2022-04-27 Thread Andy Pei
Add virtio blk device support to vDPA example.

Signed-off-by: Andy Pei 
---
 examples/vdpa/main.c | 57 +++
 examples/vdpa/vdpa_blk_compact.h | 65 
 2 files changed, 122 insertions(+)
 create mode 100644 examples/vdpa/vdpa_blk_compact.h

diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..2544141 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -159,8 +160,54 @@ struct vdpa_port {
 };
 
 static int
+vdpa_blk_device_set_features_and_protocol(const char *path)
+{
+   uint64_t protocol_features = 0;
+   int ret;
+
+   ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES);
+   if (ret != 0) {
+   RTE_LOG(ERR, VDPA,
+   "rte_vhost_driver_set_features for %s failed.\n",
+   path);
+   goto out;
+   }
+
+   ret = rte_vhost_driver_disable_features(path,
+   VHOST_BLK_DISABLED_FEATURES);
+   if (ret != 0) {
+   RTE_LOG(ERR, VDPA,
+   "rte_vhost_driver_disable_features for %s failed.\n",
+   path);
+   goto out;
+   }
+
+   ret = rte_vhost_driver_get_protocol_features(path, &protocol_features);
+   if (ret != 0) {
+   RTE_LOG(ERR, VDPA,
+   "rte_vhost_driver_get_protocol_features for %s 
failed.\n",
+   path);
+   goto out;
+   }
+
+   protocol_features |= VHOST_BLK_PROTOCOL_FEATURES;
+
+   ret = rte_vhost_driver_set_protocol_features(path, protocol_features);
+   if (ret != 0) {
+   RTE_LOG(ERR, VDPA,
+   "rte_vhost_driver_set_protocol_features for %s 
failed.\n",
+   path);
+   goto out;
+   }
+
+out:
+   return ret;
+}
+
+static int
 start_vdpa(struct vdpa_port *vport)
 {
+   uint32_t device_type = 0;
int ret;
char *socket_path = vport->ifname;
 
@@ -192,6 +239,16 @@ struct vdpa_port {
"attach vdpa device failed: %s\n",
socket_path);
 
+   ret = rte_vhost_driver_get_vdpa_dev_type(socket_path, &device_type);
+   if (ret == 0 && device_type == VDPA_DEVICE_TYPE_BLK) {
+   RTE_LOG(NOTICE, VDPA, "is a blk device\n");
+   ret = vdpa_blk_device_set_features_and_protocol(socket_path);
+   if (ret != 0)
+   rte_exit(EXIT_FAILURE,
+   "set vhost blk driver features and protocol 
features failed: %s\n",
+   socket_path);
+   }
+
if (rte_vhost_driver_start(socket_path) < 0)
rte_exit(EXIT_FAILURE,
"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h
new file mode 100644
index 000..136c3f6
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _VDPA_BLK_COMPACT_H_
+#define _VDPA_BLK_COMPACT_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include 
+
+#include 
+#include 
+
+/* Feature bits */
+#define VIRTIO_BLK_F_SIZE_MAX 1/* Indicates maximum segment size */
+#define VIRTIO_BLK_F_SEG_MAX  2/* Indicates maximum # of segments */
+#define VIRTIO_BLK_F_GEOMETRY 4/* Legacy geometry available  */
+#define VIRTIO_BLK_F_BLK_SIZE 6/* Block size of disk is available */
+#define VIRTIO_BLK_F_TOPOLOGY 10   /* Topology information is available */
+#define VIRTIO_BLK_F_MQ   12   /* support more than one vq */
+
+/* Legacy feature bits */
+#ifndef VIRTIO_BLK_NO_LEGACY
+#define VIRTIO_BLK_F_BARRIER  0/* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI 7/* Supports scsi command passthru */
+#define VIRTIO_BLK_F_CONFIG_WCE   11   /* Writeback mode available in config */
+#endif /* !VIRTIO_BLK_NO_LEGACY */
+
+#ifndef VHOST_USER_F_PROTOCOL_FEATURES
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+#endif
+
+#define VHOST_BLK_FEATURES_BASE ((1ULL << VHOST_F_LOG_ALL) | \
+   (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
+   (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
+   (1ULL << VIRTIO_RING_F_EVENT_IDX) | \
+   (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
+   (1ULL << VIRTIO_F_VERSION_1))
+
+#define VHOST_BLK_DISABLED_FEATURES_BASE ((1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | 
\
+   (1ULL << VIRTIO_RING_F_EVENT_IDX))
+
+#define VHOST_BLK_FEATURES (VHOST_BLK_FEATURES_BASE | \
+   (1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+   (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+   (1ULL << VIRTIO_BL

[PATCH v7 10/18] usertools: add support for virtio blk device

2022-04-27 Thread Andy Pei
Add virtio blk device support to devbind.

Signed-off-by: Andy Pei 
---
 usertools/dpdk-devbind.py | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..7231be4 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -72,6 +72,9 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
  'SVendor': None, 'SDevice': None}
 
+virtio_blk = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
+'SVendor': None, 'SDevice': None}
+
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -82,7 +85,7 @@
 compress_devices = [cavium_zip]
 regex_devices = [cn9k_ree]
 misc_devices = [cnxk_bphy, cnxk_bphy_cgx, cnxk_inl_dev,
-intel_ntb_skx, intel_ntb_icx]
+intel_ntb_skx, intel_ntb_icx, virtio_blk]
 
 # global dict ethernet devices present. Dictionary indexed by PCI address.
 # Each device within this is itself a dictionary of device properties
-- 
1.8.3.1



[PATCH v7 11/18] vdpa/ifc: add set vring state for blk device

2022-04-27 Thread Andy Pei
Set_vring_state op is mandatory, add set_vring_state for blk device.
Currently set_vring_state for blk device is not implemented.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 99a6ab0..ca49bc3 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1290,6 +1290,16 @@ struct rte_vdpa_dev_info {
 }
 
 static int
+ifcvf_blk_set_vring_state(int vid, int vring, int state)
+{
+   RTE_SET_USED(vid);
+   RTE_SET_USED(vring);
+   RTE_SET_USED(state);
+
+   return 0;
+}
+
+static int
 ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
uint64_t *features)
 {
@@ -1316,7 +1326,7 @@ struct rte_vdpa_dev_info {
.get_protocol_features = ifcvf_blk_get_protocol_features,
.dev_conf = ifcvf_dev_config,
.dev_close = ifcvf_dev_close,
-   .set_vring_state = NULL,
+   .set_vring_state = ifcvf_blk_set_vring_state,
.migration_done = NULL,
.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
-- 
1.8.3.1



[PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before qemu connect

2022-04-27 Thread Andy Pei
Add some log of virtio blk device config space information
at VDPA launch before qemu connects.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 28 
 1 file changed, 28 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index ca49bc3..4060a44 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1363,6 +1363,9 @@ struct rte_vdpa_dev_info dev_info[] = {
struct rte_kvargs *kvlist = NULL;
int ret = 0;
int16_t device_id;
+   uint64_t capacity = 0;
+   uint8_t *byte;
+   uint32_t i;
 
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
@@ -1429,6 +1432,31 @@ struct rte_vdpa_dev_info dev_info[] = {
internal->features = features &
~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
internal->features |= dev_info[IFCVF_BLK].features;
+
+   /* cannot read 64-bit register in one attempt,
+* so read byte by byte.
+*/
+   for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+   byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+   capacity |= (uint64_t)*byte << (i * 8);
+   }
+   DRV_LOG(INFO, "capacity  : %"PRIu64"G", capacity >> 21);
+
+   DRV_LOG(INFO, "size_max  : 0x%08x",
+   internal->hw.blk_cfg->size_max);
+   DRV_LOG(INFO, "seg_max   : 0x%08x",
+   internal->hw.blk_cfg->seg_max);
+   DRV_LOG(INFO, "blk_size  : 0x%08x",
+   internal->hw.blk_cfg->blk_size);
+   DRV_LOG(INFO, "geometry");
+   DRV_LOG(INFO, "cylinders: %u",
+   internal->hw.blk_cfg->geometry.cylinders);
+   DRV_LOG(INFO, "heads: %u",
+   internal->hw.blk_cfg->geometry.heads);
+   DRV_LOG(INFO, "sectors  : %u",
+   internal->hw.blk_cfg->geometry.sectors);
+   DRV_LOG(INFO, "num_queues: 0x%08x",
+   internal->hw.blk_cfg->num_queues);
}
 
list->internal = internal;
-- 
1.8.3.1



[PATCH v7 13/18] vdpa/ifc: read virtio max queues from hardware

2022-04-27 Thread Andy Pei
Original code max_queues is set to IFCVF_MAX_QUEUES.
New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 4060a44..5a8cf1c 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1457,6 +1457,10 @@ struct rte_vdpa_dev_info dev_info[] = {
internal->hw.blk_cfg->geometry.sectors);
DRV_LOG(INFO, "num_queues: 0x%08x",
internal->hw.blk_cfg->num_queues);
+
+   /* reset max_queue here, to minimum modification */
+   internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
+   internal->hw.blk_cfg->num_queues);
}
 
list->internal = internal;
-- 
1.8.3.1



[PATCH v7 14/18] vdpa/ifc: add interrupt and handle for virtio blk

2022-04-27 Thread Andy Pei
Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 112 ++
 1 file changed, 112 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 5a8cf1c..0e94e1f 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
int vfio_group_fd;
int vfio_dev_fd;
pthread_t tid;  /* thread for notify relay */
+   pthread_t intr_tid; /* thread for intr relay */
int epfd;
+   int csc_fd;
int vid;
struct rte_vdpa_device *vdev;
uint16_t max_queues;
@@ -558,6 +560,107 @@ struct rte_vdpa_dev_info {
return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+   int vid = internal->vid;
+   int ret;
+
+   ret = rte_vhost_slave_config_change(vid, 1);
+   if (ret)
+   DRV_LOG(ERR, "failed to notify the guest about configuration 
space change.");
+}
+
+static void *
+intr_relay(void *arg)
+{
+   struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+   struct epoll_event csc_event;
+   struct epoll_event ev;
+   uint64_t buf;
+   int nbytes;
+   int csc_fd, csc_val = 0;
+
+   csc_fd = epoll_create(1);
+   if (csc_fd < 0) {
+   DRV_LOG(ERR, "failed to create epoll for config space change.");
+   return NULL;
+   }
+
+   ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+   ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+   if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
+   rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+   DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+   return NULL;
+   }
+
+   internal->csc_fd = csc_fd;
+
+   for (;;) {
+   csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
+   if (csc_val < 0) {
+   if (errno == EINTR)
+   continue;
+   DRV_LOG(ERR, "epoll_wait return fail\n");
+   return NULL;
+   } else if (csc_val == 0) {
+   continue;
+   } else {
+   /* csc_val > 0 */
+   nbytes = read(csc_event.data.fd, &buf, 8);
+   if (nbytes < 0) {
+   if (errno == EINTR || errno == EWOULDBLOCK)
+   continue;
+   DRV_LOG(ERR, "Error reading from file 
descriptor %d: %s\n",
+   csc_event.data.fd,
+   strerror(errno));
+   return NULL;
+   } else if (nbytes == 0) {
+   DRV_LOG(ERR, "Read nothing from file descriptor 
%d\n",
+   csc_event.data.fd);
+   continue;
+   } else {
+   virtio_interrupt_handler(internal);
+   }
+   }
+   }
+
+   return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+   int ret;
+
+   ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
+   (void *)internal);
+   if (ret) {
+   DRV_LOG(ERR, "failed to create notify relay pthread.");
+   return -1;
+   }
+   return 0;
+}
+
+static int
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+   void *status;
+
+   if (internal->intr_tid) {
+   pthread_cancel(internal->intr_tid);
+   pthread_join(internal->intr_tid, &status);
+   }
+   internal->intr_tid = 0;
+
+   if (internal->csc_fd >= 0)
+   close(internal->csc_fd);
+   internal->csc_fd = -1;
+
+   return 0;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -584,10 +687,16 @@ struct rte_vdpa_dev_info {
if (ret)
goto err;
 
+   ret = setup_intr_relay(internal);
+   if (ret)
+   goto err;
+
rte_atomic32_set(&internal->running, 1);
} else if (rte_atomic32_read(&internal->running) &&
   (!rte_atomic32_read(&internal->started) ||
!rte_atomic32_read(&internal->dev_attached))) {
+   ret = unset_intr_relay(internal);
+
ret = unset_notify_relay(internal);
if (ret)
goto err;
@@ -880,6 +989,9 @@ struct rte_vdpa_dev_info {
/* stop the direct IO data path */
unset_notify_relay(internal);
vdpa_ifcvf_stop(internal);
+
+   unset_intr_relay(internal);

[PATCH v7 15/18] vdpa/ifc: add is blk flag to ifcvf HW struct

2022-04-27 Thread Andy Pei
Add is_blk flag to ifcvf_hw, and init is_blk during probe.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.h | 1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..8591ef1 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -149,6 +149,7 @@ struct ifcvf_hw {
u8 *lm_cfg;
struct vring_info vring[IFCVF_MAX_QUEUES * 2];
u8 nr_vring;
+   u8 is_blk;
struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 0e94e1f..4923bc1 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1536,11 +1536,13 @@ struct rte_vdpa_dev_info dev_info[] = {
 
if (device_id == VIRTIO_ID_NET) {
internal->device_type = IFCVF_NET;
+   internal->hw.is_blk = IFCVF_NET;
internal->features = features &
~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
internal->features |= dev_info[IFCVF_NET].features;
} else if (device_id == VIRTIO_ID_BLOCK) {
internal->device_type = IFCVF_BLK;
+   internal->hw.is_blk = IFCVF_BLK;
internal->features = features &
~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
internal->features |= dev_info[IFCVF_BLK].features;
-- 
1.8.3.1



[PATCH v7 16/18] vdpa/ifc/base: access correct register for blk device

2022-04-27 Thread Andy Pei
1.last_avail_idx is lower 16 bit of the register.
2.address of ring_state register is different between net and blk device.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.c | 36 +---
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index d10c1fd..4d5881a 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -218,10 +218,18 @@
&cfg->queue_used_hi);
IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-   (i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-   (u32)hw->vring[i].last_avail_idx |
-   ((u32)hw->vring[i].last_used_idx << 16);
+   if (hw->is_blk == IFCVF_BLK) {
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   } else if (hw->is_blk == IFCVF_NET) {
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   (i / 2) * IFCVF_LM_CFG_SIZE +
+   (i % 2) * 4) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   }
 
IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -254,9 +262,23 @@
IFCVF_WRITE_REG16(i, &cfg->queue_select);
IFCVF_WRITE_REG16(0, &cfg->queue_enable);
IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-   ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-   (i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-   hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+   if (hw->is_blk) {
+   ring_state = *(u32 *)(hw->lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE);
+   } else if (hw->is_blk == IFCVF_NET) {
+   ring_state = *(u32 *)(hw->lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   (i / 2) * IFCVF_LM_CFG_SIZE +
+   (i % 2) * 4);
+   }
+
+   if (hw->is_blk == IFCVF_BLK)
+   hw->vring[i].last_avail_idx =
+   (u16)(ring_state & IFCVF_16_BIT_MASK);
+   else if (hw->is_blk == IFCVF_NET)
+   hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
}
 }
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 8591ef1..ff11b12 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,7 @@
 #define IFCVF_MEDIATED_VRING   0x2000
 
 #define IFCVF_32_BIT_MASK  0x
+#define IFCVF_16_BIT_MASK  0x
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
 #define VHOST_USER_PROTOCOL_F_CONFIG   9
-- 
1.8.3.1



[PATCH v7 17/18] vdpa/ifc: blk device pause without no inflight IO

2022-04-27 Thread Andy Pei
When virtio blk device is pause, make sure hardware last_avail_idx
and last_used_idx are the same.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 25 +
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 4923bc1..def6adf 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -314,12 +314,12 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_stop(struct ifcvf_internal *internal)
 {
struct ifcvf_hw *hw = &internal->hw;
-   struct rte_vhost_vring vq;
uint32_t i;
int vid;
uint64_t features = 0;
uint64_t log_base = 0, log_size = 0;
uint64_t len;
+   u32 ring_state = 0;
 
vid = internal->vid;
 
@@ -328,13 +328,22 @@ struct rte_vdpa_dev_info {
 */
if (internal->device_type == IFCVF_BLK) {
for (i = 0; i < hw->nr_vring; i++) {
-   rte_vhost_get_vhost_vring(internal->vid, i, &vq);
-   while (vq.avail->idx != vq.used->idx) {
-   ifcvf_notify_queue(hw, i);
-   usleep(10);
-   }
-   hw->vring[i].last_avail_idx = vq.avail->idx;
-   hw->vring[i].last_used_idx = vq.used->idx;
+   do {
+   if (hw->lm_cfg != NULL)
+   ring_state = *(u32 *)(hw->lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE);
+   hw->vring[i].last_avail_idx =
+   (u16)(ring_state & IFCVF_16_BIT_MASK);
+   hw->vring[i].last_used_idx =
+   (u16)(ring_state >> 16);
+   if (hw->vring[i].last_avail_idx !=
+   hw->vring[i].last_used_idx) {
+   ifcvf_notify_queue(hw, i);
+   usleep(10);
+   }
+   } while (hw->vring[i].last_avail_idx !=
+   hw->vring[i].last_used_idx);
}
}
 
-- 
1.8.3.1



[PATCH v7 18/18] vhost: make sure each queue callfd is configured

2022-04-27 Thread Andy Pei
During the vhost data path building process, qemu will create
a call fd at first, and create another call fd in the end.
The final call fd will be used to relay notify.
In the original code, after kick fd is set, dev_conf will
set the first call fd. Even though the actual call fd will set,
the data path will not work correctly.

Signed-off-by: Andy Pei 
---
 lib/vhost/vhost_user.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index e925428..82122b6 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -3230,12 +3230,26 @@ typedef int (*vhost_message_handler_t)(struct 
virtio_net **pdev,
if (!vdpa_dev)
goto out;
 
+   if (request != VHOST_USER_SET_VRING_CALL)
+   goto out;
+
if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
if (vdpa_dev->ops->dev_conf(dev->vid))
VHOST_LOG_CONFIG(ERR, "(%s) failed to configure vDPA 
device\n",
dev->ifname);
else
dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+   } else {
+   /* when VIRTIO_DEV_VDPA_CONFIGURED already configured
+* close the device and config the device again,
+* make sure the call fd of each queue is configured correctly.
+*/
+   if (vdpa_dev->ops->dev_close(dev->vid))
+   VHOST_LOG_CONFIG(ERR,
+"Failed to close vDPA device\n");
+   if (vdpa_dev->ops->dev_conf(dev->vid))
+   VHOST_LOG_CONFIG(ERR,
+"Failed to re-config vDPA device\n");
}
 
 out:
-- 
1.8.3.1



[PATCH 1/1] examples/ipsec-secgw: create lookaside sessions at init

2022-04-27 Thread Volodymyr Fialko
In event lookaside mode same session could be handled with multiple
cores, and session creation in datapath will cause situation where
multiple cores will try to create same session simultaneously.
To avoid such case and enable event lookaside mode in future, lookaside
sessions are now created at initialization in sa_add_rules().

All sessions(inline and lookaside) now created during init process, so
session pool information was removed from ipsec context. Core id was
added to obtain correct crypto device queue pair for the current core.

Signed-off-by: Volodymyr Fialko 
---
Depends-on: series-22265 ("examples/ipsec-secgw: examples/ipsec-secgw: destroy 
lookaside sessions")
Depends-on: series-22593 ("examples/ipsec-secgw: move fast path helper 
functions")

 examples/ipsec-secgw/ipsec-secgw.c   |  27 +++
 examples/ipsec-secgw/ipsec.c | 101 +--
 examples/ipsec-secgw/ipsec.h |  13 ++--
 examples/ipsec-secgw/ipsec_process.c |  33 +++--
 examples/ipsec-secgw/ipsec_worker.c  |   8 +--
 examples/ipsec-secgw/sa.c|  45 +++-
 6 files changed, 121 insertions(+), 106 deletions(-)

diff --git a/examples/ipsec-secgw/ipsec-secgw.c 
b/examples/ipsec-secgw/ipsec-secgw.c
index 57acc01e3b..05b57ce3f5 100644
--- a/examples/ipsec-secgw/ipsec-secgw.c
+++ b/examples/ipsec-secgw/ipsec-secgw.c
@@ -684,16 +684,12 @@ ipsec_poll_mode_worker(void)
qconf->inbound.sp6_ctx = socket_ctx[socket_id].sp_ip6_in;
qconf->inbound.sa_ctx = socket_ctx[socket_id].sa_in;
qconf->inbound.cdev_map = cdev_map_in;
-   qconf->inbound.session_pool = socket_ctx[socket_id].session_pool;
-   qconf->inbound.session_priv_pool =
-   socket_ctx[socket_id].session_priv_pool;
+   qconf->inbound.lcore_id = lcore_id;
qconf->outbound.sp4_ctx = socket_ctx[socket_id].sp_ip4_out;
qconf->outbound.sp6_ctx = socket_ctx[socket_id].sp_ip6_out;
qconf->outbound.sa_ctx = socket_ctx[socket_id].sa_out;
qconf->outbound.cdev_map = cdev_map_out;
-   qconf->outbound.session_pool = socket_ctx[socket_id].session_pool;
-   qconf->outbound.session_priv_pool =
-   socket_ctx[socket_id].session_priv_pool;
+   qconf->outbound.lcore_id = lcore_id;
qconf->frag.pool_indir = socket_ctx[socket_id].mbuf_pool_indir;
 
rc = ipsec_sad_lcore_cache_init(app_sa_prm.cache_sz);
@@ -1458,7 +1454,7 @@ check_all_ports_link_status(uint32_t port_mask)
 }
 
 static int32_t
-add_mapping(struct rte_hash *map, const char *str, uint16_t cdev_id,
+add_mapping(const char *str, uint16_t cdev_id,
uint16_t qp, struct lcore_params *params,
struct ipsec_ctx *ipsec_ctx,
const struct rte_cryptodev_capabilities *cipher,
@@ -1477,7 +1473,7 @@ add_mapping(struct rte_hash *map, const char *str, 
uint16_t cdev_id,
if (aead)
key.aead_algo = aead->sym.aead.algo;
 
-   ret = rte_hash_lookup(map, &key);
+   ret = rte_hash_lookup(ipsec_ctx->cdev_map, &key);
if (ret != -ENOENT)
return 0;
 
@@ -1499,7 +1495,7 @@ add_mapping(struct rte_hash *map, const char *str, 
uint16_t cdev_id,
cdev_id, qp, i);
}
 
-   ret = rte_hash_add_key_data(map, &key, (void *)i);
+   ret = rte_hash_add_key_data(ipsec_ctx->cdev_map, &key, (void *)i);
if (ret < 0) {
printf("Faled to insert cdev mapping for (lcore %u, "
"cdev %u, qp %u), errno %d\n",
@@ -1517,20 +1513,19 @@ add_cdev_mapping(struct rte_cryptodev_info *dev_info, 
uint16_t cdev_id,
 {
int32_t ret = 0;
const struct rte_cryptodev_capabilities *i, *j;
-   struct rte_hash *map;
struct lcore_conf *qconf;
struct ipsec_ctx *ipsec_ctx;
const char *str;
 
qconf = &lcore_conf[params->lcore_id];
 
-   if ((unprotected_port_mask & (1 << params->port_id)) == 0) {
-   map = cdev_map_out;
+   if (!is_unprotected_port(params->port_id)) {
ipsec_ctx = &qconf->outbound;
+   ipsec_ctx->cdev_map = cdev_map_out;
str = "Outbound";
} else {
-   map = cdev_map_in;
ipsec_ctx = &qconf->inbound;
+   ipsec_ctx->cdev_map = cdev_map_in;
str = "Inbound";
}
 
@@ -1545,7 +1540,7 @@ add_cdev_mapping(struct rte_cryptodev_info *dev_info, 
uint16_t cdev_id,
continue;
 
if (i->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AEAD) {
-   ret |= add_mapping(map, str, cdev_id, qp, params,
+   ret |= add_mapping(str, cdev_id, qp, params,
ipsec_ctx, NULL, NULL, i);
continue;
}
@@ -1561,7 +1556,7 @@ add_cdev_mapping(struct rte_cryptodev_info *dev_info, 
uint16_t cdev_id,
if (j->sym.x

[PATCH v3] sched: enable/disable TC OV at runtime

2022-04-27 Thread Marcin Danilewicz
Added new API to enable or disable TC over subscription for best
effort traffic class at subport level.
Added changes after review and increased throughput.

By default TC OV is disabled.

Signed-off-by: Marcin Danilewicz 
---
 lib/sched/rte_sched.c | 189 +++---
 lib/sched/rte_sched.h |  18 
 lib/sched/version.map |   3 +
 3 files changed, 178 insertions(+), 32 deletions(-)

diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index ec74bee939..6e7d81df46 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -213,6 +213,9 @@ struct rte_sched_subport {
uint8_t *bmp_array;
struct rte_mbuf **queue_array;
uint8_t memory[0] __rte_cache_aligned;
+
+   /* TC oversubscription activation */
+   int is_tc_ov_enabled;
 } __rte_cache_aligned;
 
 struct rte_sched_port {
@@ -1165,6 +1168,45 @@ rte_sched_cman_config(struct rte_sched_port *port,
 }
 #endif
 
+int
+rte_sched_subport_tc_ov_config(struct rte_sched_port *port,
+   uint32_t subport_id,
+   bool tc_ov_enable)
+{
+   struct rte_sched_subport *s;
+   struct rte_sched_subport_profile *profile;
+
+   if (port == NULL) {
+   RTE_LOG(ERR, SCHED,
+   "%s: Incorrect value for parameter port\n", __func__);
+   return -EINVAL;
+   }
+
+   if (subport_id >= port->n_subports_per_port) {
+   RTE_LOG(ERR, SCHED,
+   "%s: Incorrect value for parameter subport id\n", 
__func__);
+   return  -EINVAL;
+   }
+
+   s = port->subports[subport_id];
+   s->is_tc_ov_enabled = tc_ov_enable ? 1 : 0;
+
+   if (s->is_tc_ov_enabled) {
+   /* TC oversubscription */
+   s->tc_ov_wm_min = port->mtu;
+   s->tc_ov_period_id = 0;
+   s->tc_ov = 0;
+   s->tc_ov_n = 0;
+   s->tc_ov_rate = 0;
+
+   profile = port->subport_profiles + s->profile;
+   s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period,
+   s->pipe_tc_be_rate_max);
+   s->tc_ov_wm = s->tc_ov_wm_max;
+   }
+   return 0;
+}
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
uint32_t subport_id,
@@ -1254,6 +1296,9 @@ rte_sched_subport_config(struct rte_sched_port *port,
s->n_pipe_profiles = params->n_pipe_profiles;
s->n_max_pipe_profiles = params->n_max_pipe_profiles;

+   /* TC over-subscription is disabled by default */
+   s->is_tc_ov_enabled = 0;
+
 #ifdef RTE_SCHED_CMAN
if (params->cman_params != NULL) {
s->cman_enabled = true;
@@ -1316,13 +1361,6 @@ rte_sched_subport_config(struct rte_sched_port *port,
 
for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
s->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID;
-
-   /* TC oversubscription */
-   s->tc_ov_wm_min = port->mtu;
-   s->tc_ov_period_id = 0;
-   s->tc_ov = 0;
-   s->tc_ov_n = 0;
-   s->tc_ov_rate = 0;
}
 
{
@@ -1342,9 +1380,6 @@ rte_sched_subport_config(struct rte_sched_port *port,
else
profile->tc_credits_per_period[i] = 0;
 
-   s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period,
-   s->pipe_tc_be_rate_max);
-   s->tc_ov_wm = s->tc_ov_wm_max;
s->profile = subport_profile_id;
 
}
@@ -1417,17 +1452,20 @@ rte_sched_pipe_config(struct rte_sched_port *port,
double pipe_tc_be_rate =
(double) 
params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
/ (double) params->tc_period;
-   uint32_t tc_be_ov = s->tc_ov;
 
-   /* Unplug pipe from its subport */
-   s->tc_ov_n -= params->tc_ov_weight;
-   s->tc_ov_rate -= pipe_tc_be_rate;
-   s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
+   if (s->is_tc_ov_enabled) {
+   uint32_t tc_be_ov = s->tc_ov;

-   if (s->tc_ov != tc_be_ov) {
-   RTE_LOG(DEBUG, SCHED,
-   "Subport %u Best-effort TC oversubscription is 
OFF (%.4lf >= %.4lf)\n",
-   subport_id, subport_tc_be_rate, s->tc_ov_rate);
+   /* Unplug pipe from its subport */
+   s->tc_ov_n -= params->tc_ov_weight;
+   s->tc_ov_rate -= pipe_tc_be_rate;
+   s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
+
+   if (s->tc_ov != tc_be_ov) {
+   RTE_LOG(DEBUG, SCHED,
+   "Subport %u Best-effort TC 
oversubscription is OFF (%

RE: [PATCH v3] sched: enable/disable TC OV at runtime

2022-04-27 Thread Dumitrescu, Cristian
Marcin,

Every time you send a new version, you need to copy the maintainers and the 
other relevant people, otherwise there is a high chance we are not going to see 
your patch, thanks! I only saw this one due to pure chance ;)

Regards,
Cristian

> -Original Message-
> From: Marcin Danilewicz 
> Sent: Wednesday, April 27, 2022 9:59 AM
> To: dev@dpdk.org
> Subject: [PATCH v3] sched: enable/disable TC OV at runtime
> 
> Added new API to enable or disable TC over subscription for best
> effort traffic class at subport level.
> Added changes after review and increased throughput.
> 
> By default TC OV is disabled.
> 
> Signed-off-by: Marcin Danilewicz 
> ---
>  lib/sched/rte_sched.c | 189 +++---
>  lib/sched/rte_sched.h |  18 
>  lib/sched/version.map |   3 +
>  3 files changed, 178 insertions(+), 32 deletions(-)
> 
> diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
> index ec74bee939..6e7d81df46 100644
> --- a/lib/sched/rte_sched.c
> +++ b/lib/sched/rte_sched.c
> @@ -213,6 +213,9 @@ struct rte_sched_subport {
>   uint8_t *bmp_array;
>   struct rte_mbuf **queue_array;
>   uint8_t memory[0] __rte_cache_aligned;
> +
> + /* TC oversubscription activation */
> + int is_tc_ov_enabled;
>  } __rte_cache_aligned;
> 
>  struct rte_sched_port {
> @@ -1165,6 +1168,45 @@ rte_sched_cman_config(struct rte_sched_port
> *port,
>  }
>  #endif
> 
> +int
> +rte_sched_subport_tc_ov_config(struct rte_sched_port *port,
> + uint32_t subport_id,
> + bool tc_ov_enable)
> +{
> + struct rte_sched_subport *s;
> + struct rte_sched_subport_profile *profile;
> +
> + if (port == NULL) {
> + RTE_LOG(ERR, SCHED,
> + "%s: Incorrect value for parameter port\n", __func__);
> + return -EINVAL;
> + }
> +
> + if (subport_id >= port->n_subports_per_port) {
> + RTE_LOG(ERR, SCHED,
> + "%s: Incorrect value for parameter subport id\n",
> __func__);
> + return  -EINVAL;
> + }
> +
> + s = port->subports[subport_id];
> + s->is_tc_ov_enabled = tc_ov_enable ? 1 : 0;
> +
> + if (s->is_tc_ov_enabled) {
> + /* TC oversubscription */
> + s->tc_ov_wm_min = port->mtu;
> + s->tc_ov_period_id = 0;
> + s->tc_ov = 0;
> + s->tc_ov_n = 0;
> + s->tc_ov_rate = 0;
> +
> + profile = port->subport_profiles + s->profile;
> + s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile-
> >tc_period,
> + s->pipe_tc_be_rate_max);
> + s->tc_ov_wm = s->tc_ov_wm_max;
> + }
> + return 0;
> +}
> +
>  int
>  rte_sched_subport_config(struct rte_sched_port *port,
>   uint32_t subport_id,
> @@ -1254,6 +1296,9 @@ rte_sched_subport_config(struct rte_sched_port
> *port,
>   s->n_pipe_profiles = params->n_pipe_profiles;
>   s->n_max_pipe_profiles = params->n_max_pipe_profiles;
> 
> + /* TC over-subscription is disabled by default */
> + s->is_tc_ov_enabled = 0;
> +
>  #ifdef RTE_SCHED_CMAN
>   if (params->cman_params != NULL) {
>   s->cman_enabled = true;
> @@ -1316,13 +1361,6 @@ rte_sched_subport_config(struct rte_sched_port
> *port,
> 
>   for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
>   s->grinder_base_bmp_pos[i] =
> RTE_SCHED_PIPE_INVALID;
> -
> - /* TC oversubscription */
> - s->tc_ov_wm_min = port->mtu;
> - s->tc_ov_period_id = 0;
> - s->tc_ov = 0;
> - s->tc_ov_n = 0;
> - s->tc_ov_rate = 0;
>   }
> 
>   {
> @@ -1342,9 +1380,6 @@ rte_sched_subport_config(struct rte_sched_port
> *port,
>   else
>   profile->tc_credits_per_period[i] = 0;
> 
> - s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile-
> >tc_period,
> - s-
> >pipe_tc_be_rate_max);
> - s->tc_ov_wm = s->tc_ov_wm_max;
>   s->profile = subport_profile_id;
> 
>   }
> @@ -1417,17 +1452,20 @@ rte_sched_pipe_config(struct rte_sched_port
> *port,
>   double pipe_tc_be_rate =
>   (double) params-
> >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
>   / (double) params->tc_period;
> - uint32_t tc_be_ov = s->tc_ov;
> 
> - /* Unplug pipe from its subport */
> - s->tc_ov_n -= params->tc_ov_weight;
> - s->tc_ov_rate -= pipe_tc_be_rate;
> - s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
> + if (s->is_tc_ov_enabled) {
> + uint32_t tc_be_ov = s->tc_ov;
> 
> - if (s->tc_ov != tc_be_ov) {
> - RTE_LOG(DEBUG, SCHED,
> - "Subport %u Best-effort TC oversubscription is
> OFF

RE: [PATCH v3] sched: enable/disable TC OV at runtime

2022-04-27 Thread Dumitrescu, Cristian
Adding Jasvinder

> -Original Message-
> From: Dumitrescu, Cristian
> Sent: Wednesday, April 27, 2022 10:37 AM
> To: Marcin Danilewicz ; dev@dpdk.org
> Subject: RE: [PATCH v3] sched: enable/disable TC OV at runtime
> 
> Marcin,
> 
> Every time you send a new version, you need to copy the maintainers and the
> other relevant people, otherwise there is a high chance we are not going to 
> see
> your patch, thanks! I only saw this one due to pure chance ;)
> 
> Regards,
> Cristian
> 
> > -Original Message-
> > From: Marcin Danilewicz 
> > Sent: Wednesday, April 27, 2022 9:59 AM
> > To: dev@dpdk.org
> > Subject: [PATCH v3] sched: enable/disable TC OV at runtime
> >
> > Added new API to enable or disable TC over subscription for best
> > effort traffic class at subport level.
> > Added changes after review and increased throughput.
> >
> > By default TC OV is disabled.
> >
> > Signed-off-by: Marcin Danilewicz 
> > ---
> >  lib/sched/rte_sched.c | 189 +++---
> >  lib/sched/rte_sched.h |  18 
> >  lib/sched/version.map |   3 +
> >  3 files changed, 178 insertions(+), 32 deletions(-)
> >
> > diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
> > index ec74bee939..6e7d81df46 100644
> > --- a/lib/sched/rte_sched.c
> > +++ b/lib/sched/rte_sched.c
> > @@ -213,6 +213,9 @@ struct rte_sched_subport {
> > uint8_t *bmp_array;
> > struct rte_mbuf **queue_array;
> > uint8_t memory[0] __rte_cache_aligned;
> > +
> > +   /* TC oversubscription activation */
> > +   int is_tc_ov_enabled;
> >  } __rte_cache_aligned;
> >
> >  struct rte_sched_port {
> > @@ -1165,6 +1168,45 @@ rte_sched_cman_config(struct rte_sched_port
> > *port,
> >  }
> >  #endif
> >
> > +int
> > +rte_sched_subport_tc_ov_config(struct rte_sched_port *port,
> > +   uint32_t subport_id,
> > +   bool tc_ov_enable)
> > +{
> > +   struct rte_sched_subport *s;
> > +   struct rte_sched_subport_profile *profile;
> > +
> > +   if (port == NULL) {
> > +   RTE_LOG(ERR, SCHED,
> > +   "%s: Incorrect value for parameter port\n", __func__);
> > +   return -EINVAL;
> > +   }
> > +
> > +   if (subport_id >= port->n_subports_per_port) {
> > +   RTE_LOG(ERR, SCHED,
> > +   "%s: Incorrect value for parameter subport id\n",
> > __func__);
> > +   return  -EINVAL;
> > +   }
> > +
> > +   s = port->subports[subport_id];
> > +   s->is_tc_ov_enabled = tc_ov_enable ? 1 : 0;
> > +
> > +   if (s->is_tc_ov_enabled) {
> > +   /* TC oversubscription */
> > +   s->tc_ov_wm_min = port->mtu;
> > +   s->tc_ov_period_id = 0;
> > +   s->tc_ov = 0;
> > +   s->tc_ov_n = 0;
> > +   s->tc_ov_rate = 0;
> > +
> > +   profile = port->subport_profiles + s->profile;
> > +   s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile-
> > >tc_period,
> > +   s->pipe_tc_be_rate_max);
> > +   s->tc_ov_wm = s->tc_ov_wm_max;
> > +   }
> > +   return 0;
> > +}
> > +
> >  int
> >  rte_sched_subport_config(struct rte_sched_port *port,
> > uint32_t subport_id,
> > @@ -1254,6 +1296,9 @@ rte_sched_subport_config(struct rte_sched_port
> > *port,
> > s->n_pipe_profiles = params->n_pipe_profiles;
> > s->n_max_pipe_profiles = params->n_max_pipe_profiles;
> >
> > +   /* TC over-subscription is disabled by default */
> > +   s->is_tc_ov_enabled = 0;
> > +
> >  #ifdef RTE_SCHED_CMAN
> > if (params->cman_params != NULL) {
> > s->cman_enabled = true;
> > @@ -1316,13 +1361,6 @@ rte_sched_subport_config(struct rte_sched_port
> > *port,
> >
> > for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
> > s->grinder_base_bmp_pos[i] =
> > RTE_SCHED_PIPE_INVALID;
> > -
> > -   /* TC oversubscription */
> > -   s->tc_ov_wm_min = port->mtu;
> > -   s->tc_ov_period_id = 0;
> > -   s->tc_ov = 0;
> > -   s->tc_ov_n = 0;
> > -   s->tc_ov_rate = 0;
> > }
> >
> > {
> > @@ -1342,9 +1380,6 @@ rte_sched_subport_config(struct rte_sched_port
> > *port,
> > else
> > profile->tc_credits_per_period[i] = 0;
> >
> > -   s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile-
> > >tc_period,
> > -   s-
> > >pipe_tc_be_rate_max);
> > -   s->tc_ov_wm = s->tc_ov_wm_max;
> > s->profile = subport_profile_id;
> >
> > }
> > @@ -1417,17 +1452,20 @@ rte_sched_pipe_config(struct rte_sched_port
> > *port,
> > double pipe_tc_be_rate =
> > (double) params-
> > >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
> > / (double) params->tc_period;
> > -   uint32_t tc_be_ov = s->tc_ov;
> >
> > -   /* Unplug pipe from its subport */
> > -   s->tc_ov_n -= params->tc_ov_weight;
> > -   s-

Re: [PATCH v2] net/nfp: update how MAX MTU is read

2022-04-27 Thread Ferruh Yigit

On 4/27/2022 9:37 AM, Niklas Söderlund wrote:

Hello,

I have a question about the Checks that ran on this patch in patchwork
[1]. It appears the job ci/iol-x86_64-compile-testing,
dpdk_mingw64_compile have failed on a Windows Server 2019 build. But the
logs from the job appears to be incomplete as it contains only 19 lines
of output and stops without an error in the configuration part of meson.



It is not clear why it failed, patch looks nothing specific to Windows.
I have triggered a new build (on top of next-net), please give ~15 minutes.


The failure is only flagged as a warning and not as an error in
patchwork, is it it possible that the job in question fails to capture
all output or that it fails to complete sometimes?



The patchwork warning is to highlight new version of patches needs to be 
send as reply to previous version. This enables all versions are in same 
email thread, and this helps reviewer to see previous versions and 
comments/changes to previous versions easily. Also this makes possible 
to see all versions and history in one place in mail list archives.


There is nothing to do for this version, but please use 'git 
send-email', '--in-reply-to' option for new patches.



What can we do to on our end to remedy this? My concern is that that the
patch is blocked due to the warning and I'm unclear on how move forward,
sorry if the case is that I'm just impatient.



The patch is not blocked for above reasons, it is in the queue (which is 
moving a little slow in this release for some operational reasons).



1. 
https://patchwork.dpdk.org/project/dpdk/patch/20220420134638.24010-1-walter.heym...@corigine.com/

On 2022-04-20 15:46:39 +0200, Walter Heymans wrote:

The 'max_rx_pktlen' value was previously read from hardware, which was
set by the running firmware. This caused confusion due to different
meanings of 'MAX_MTU'. This patch updates the 'max_rx_pktlen' to the
maximum value that the NFP NIC can support. The 'max_mtu' value that is
read from hardware, is assigned to the 'dev_info->max_mtu' variable.

If more layer 2 metadata must be used, the firmware can be updated to
report a smaller 'max_mtu' value.

The constant defined for NFP_FRAME_SIZE_MAX is derived for the maximum
supported buffer size of 10240, minus 136 bytes that is reserved by the
hardware and another 56 bytes reserved for expansion in firmware. This
results in a usable maximum packet length of 10048 bytes.

Signed-off-by: Walter Heymans 
Signed-off-by: Niklas Söderlund 
Reviewed-by: Louis Peens 
Reviewed-by: Chaoyong He 
Reviewed-by: Richard Donkin 
---
  drivers/net/nfp/nfp_common.c | 11 ++-
  drivers/net/nfp/nfp_common.h |  3 +++
  2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/nfp/nfp_common.c b/drivers/net/nfp/nfp_common.c
index b26770dbfb..52fbda1a79 100644
--- a/drivers/net/nfp/nfp_common.c
+++ b/drivers/net/nfp/nfp_common.c
@@ -692,7 +692,16 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
dev_info->min_rx_bufsize = RTE_ETHER_MIN_MTU;
-   dev_info->max_rx_pktlen = hw->max_mtu;
+   /*
+* The maximum rx packet length (max_rx_pktlen) is set to the
+* maximum supported frame size that the NFP can handle. This
+* includes layer 2 headers, CRC and other metadata that can
+* optionally be used.
+* The maximum layer 3 MTU (max_mtu) is read from hardware,
+* which was set by the firmware loaded onto the card.
+*/
+   dev_info->max_rx_pktlen = NFP_FRAME_SIZE_MAX;
+   dev_info->max_mtu = hw->max_mtu;
/* Next should change when PF support is implemented */
dev_info->max_mac_addrs = 1;
  
diff --git a/drivers/net/nfp/nfp_common.h b/drivers/net/nfp/nfp_common.h

index 8b35fa119c..8db5ec23f8 100644
--- a/drivers/net/nfp/nfp_common.h
+++ b/drivers/net/nfp/nfp_common.h
@@ -98,6 +98,9 @@ struct nfp_net_adapter;
  /* Number of supported physical ports */
  #define NFP_MAX_PHYPORTS  12
  
+/* Maximum supported NFP frame size (MTU + layer 2 headers) */

+#define NFP_FRAME_SIZE_MAX 10048
+
  #include 
  #include 
  
--

2.25.1







[PATCH v2] cryptodev: add elliptic curve diffie hellman

2022-04-27 Thread Arek Kusztal
This commit adds Elliptic Curve Diffie-Hellman option to Cryptodev.
This could be achieved with EC point multiplication but:
1) Phase 1 of DH is used with EC generator, multiplication expect
setting generator manually.
2) It will unify usage of DH.
3) Can be extended easily to support X25519 and X448.

Signed-off-by: Arek Kusztal 
---
v2:
- added ecdh comments to operation types

Depends-on: series-22684 ("cryptodev: move dh type from xform to dh op")

 lib/cryptodev/rte_crypto_asym.h | 46 +++--
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/lib/cryptodev/rte_crypto_asym.h b/lib/cryptodev/rte_crypto_asym.h
index 4697a7bc59..64d97ae054 100644
--- a/lib/cryptodev/rte_crypto_asym.h
+++ b/lib/cryptodev/rte_crypto_asym.h
@@ -91,6 +91,8 @@ enum rte_crypto_asym_xform_type {
/**< Elliptic Curve Digital Signature Algorithm
 * Perform Signature Generation and Verification.
 */
+   RTE_CRYPTO_ASYM_XFORM_ECDH,
+   /**< Elliptic Curve Diffie Hellman */
RTE_CRYPTO_ASYM_XFORM_ECPM,
/**< Elliptic Curve Point Multiplication */
RTE_CRYPTO_ASYM_XFORM_TYPE_LIST_END
@@ -112,9 +114,9 @@ enum rte_crypto_asym_op_type {
RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE,
/**< DH Private Key generation operation */
RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE,
-   /**< DH Public Key generation operation */
+   /**< DH/ECDH Public Key generation operation */
RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE,
-   /**< DH Shared Secret compute operation */
+   /**< DH/ECDH Shared Secret compute operation */
RTE_CRYPTO_ASYM_OP_LIST_END
 };
 
@@ -385,34 +387,38 @@ struct rte_crypto_rsa_op_param {
 };
 
 /**
- * Diffie-Hellman Operations params.
+ * Diffie-Hellman/Elliptic Curve Diffie-Hellman operation.
  * @note:
  */
 struct rte_crypto_dh_op_param {
enum rte_crypto_asym_op_type op_type;
/**< Diffie-Hellman operation type */
-   rte_crypto_uint pub_key;
+   rte_crypto_param priv_key;
/**<
-* Output generated public key when op_type is
-* DH PUB_KEY_GENERATION.
-* Input peer public key when op_type is DH
-* SHARED_SECRET_COMPUTATION
-*
+* Diffie-Hellman private part
+* For DH and ECDH it is big-endian integer.
+* Input for both phases of Diffie-Hellman
 */
-
-   rte_crypto_uint priv_key;
+   union {
+   rte_crypto_uint pub_key;
+   struct rte_crypto_ec_point pub_point;
+   };
/**<
-* Output generated private key if op_type is
-* DH PRIVATE_KEY_GENERATION
-* Input when op_type is DH SHARED_SECRET_COMPUTATION.
-*
+* Diffie-Hellman public part
+* For DH it is big-endian unsigned integer.
+* For ECDH it is a point on the curve.
+* Output for RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE
+* Input for RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE
 */
-
-   rte_crypto_uint shared_secret;
+   union {
+   rte_crypto_uint shared_secret;
+   struct rte_crypto_ec_point shared_point;
+   };
/**<
-* Output with calculated shared secret
-* when dh op_type = SHARED_SECRET_COMPUTATION.
-*
+* Diffie-Hellman shared secret
+* For DH it is big-endian unsigned integer.
+* For ECDH it is a point on the curve.
+* Output for RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE
 */
 };
 
-- 
2.13.6



[PATCH v6 00/12] complete common VF features for DCF

2022-04-27 Thread Kevin Liu
The DCF PMD support the below dev ops,
dev_supported_ptypes_get
dev_link_update
xstats_get
xstats_get_names
xstats_reset
promiscuous_enable
promiscuous_disable
allmulticast_enable
allmulticast_disable
mac_addr_add
mac_addr_remove
set_mc_addr_list
vlan_filter_set
vlan_offload_set
mac_addr_set
reta_update
reta_query
rss_hash_update
rss_hash_conf_get
rxq_info_get
txq_info_get
mtu_set
tx_done_cleanup
get_monitor_addr

v6:
* add patch:
1.net/ice: support DCF new VLAN capabilities
* remove patch:
1.doc: update for ice DCF datapath configuration
* Split doc into specific patch.

v5:
* remove patch:
1.complete common VF features for DCF
2.net/ice: enable CVL DCF device reset API
3.net/ice: support IPv6 NVGRE tunnel
4.net/ice: support new pattern of IPv4
5.net/ice: treat unknown package as OS default package
6.net/ice: handle virtchnl event message without interrupt
7.net/ice: add DCF request queues function
8.net/ice: negotiate large VF and request more queues
9.net/ice: enable multiple queues configurations for large VF
10.net/ice: enable IRQ mapping configuration for large VF
11.net/ice: add enable/disable queues for DCF large VF

v4:
* remove patch:
1.testpmd: force flow flush
2.net/ice: fix DCF ACL flow engine
3.net/ice: fix DCF reset
* add patch:
1.net/ice: add extended stats
2.net/ice: support queue information getting
3.net/ice: implement power management
4.doc: update for ice DCF datapath configuration

v3:
* remove patch:
1.net/ice/base: add VXLAN support for switch filter
2.net/ice: add VXLAN support for switch filter
3.common/iavf: support flushing rules and reporting DCF id
4.net/ice/base: fix ethertype filter input set
5.net/ice/base: support IPv6 GRE UDP pattern
6.net/ice/base: support new patterns of TCP and UDP
7.net/ice: support new patterns of TCP and UDP
8.net/ice/base: support IPv4 GRE tunnel
9.net/ice: support IPv4 GRE raw pattern type
10.net/ice/base: update Profile ID table for VXLAN
11.net/ice/base: update Protocol ID table to match DVM DDP

v2:
* remove patch:
1.net/iavf: support checking if device is an MDCF instance
2.net/ice: support MDCF(multi-DCF) instance
3.net/ice/base: support custom DDP buildin recipe
4.net/ice: support buildin recipe configuration
5.net/ice/base: support custom ddp package version
6.net/ice: disable ACL function for MDCF instance

Alvin Zhang (3):
  net/ice: support dcf promisc configuration
  net/ice: support dcf VLAN filter and offload configuration
  net/ice: support DCF new VLAN capabilities

Jie Wang (2):
  net/ice: add ops MTU-SET to dcf
  net/ice: add ops dev-supported-ptypes-get to dcf

Kevin Liu (4):
  net/ice: support dcf MAC configuration
  net/ice: add extended stats
  net/ice: support queue information getting
  net/ice: implement power management

Robin Zhang (1):
  net/ice: cleanup Tx buffers

Steve Yang (2):
  net/ice: enable RSS RETA ops for DCF hardware
  net/ice: enable RSS HASH ops for DCF hardware

 doc/guides/nics/features/ice_dcf.ini   |  10 +
 doc/guides/rel_notes/release_22_07.rst |  14 +
 drivers/net/ice/ice_dcf.c  |  40 +-
 drivers/net/ice/ice_dcf.h  |  29 +-
 drivers/net/ice/ice_dcf_ethdev.c   | 820 -
 drivers/net/ice/ice_dcf_ethdev.h   |  10 +
 6 files changed, 885 insertions(+), 38 deletions(-)

-- 
2.33.1



[PATCH v6 01/12] net/ice: enable RSS RETA ops for DCF hardware

2022-04-27 Thread Kevin Liu
From: Steve Yang 

RSS RETA should be updated and queried by application,
Add related ops ('.reta_update', '.reta_query') for DCF.

Signed-off-by: Steve Yang 
Signed-off-by: Kevin Liu 
---
 doc/guides/nics/features/ice_dcf.ini   |  1 +
 doc/guides/rel_notes/release_22_07.rst |  3 +
 drivers/net/ice/ice_dcf.c  |  2 +-
 drivers/net/ice/ice_dcf.h  |  1 +
 drivers/net/ice/ice_dcf_ethdev.c   | 77 ++
 5 files changed, 83 insertions(+), 1 deletion(-)

diff --git a/doc/guides/nics/features/ice_dcf.ini 
b/doc/guides/nics/features/ice_dcf.ini
index 54073f0b88..5221c99a9c 100644
--- a/doc/guides/nics/features/ice_dcf.ini
+++ b/doc/guides/nics/features/ice_dcf.ini
@@ -15,6 +15,7 @@ L3 checksum offload  = P
 L4 checksum offload  = P
 Inner L3 checksum= P
 Inner L4 checksum= P
+RSS reta update  = Y
 Basic stats  = Y
 Linux= Y
 x86-32   = Y
diff --git a/doc/guides/rel_notes/release_22_07.rst 
b/doc/guides/rel_notes/release_22_07.rst
index 90123bb807..cbdc90760c 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -60,6 +60,9 @@ New Features
   * Added Tx QoS queue rate limitation support.
   * Added quanta size configuration support.
 
+* **Updated Intel ice driver.**
+
+ * Added enable RSS RETA ops for DCF hardware.
 
 Removed Items
 -
diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
index 7f0c074b01..070d1b71ac 100644
--- a/drivers/net/ice/ice_dcf.c
+++ b/drivers/net/ice/ice_dcf.c
@@ -790,7 +790,7 @@ ice_dcf_configure_rss_key(struct ice_dcf_hw *hw)
return err;
 }
 
-static int
+int
 ice_dcf_configure_rss_lut(struct ice_dcf_hw *hw)
 {
struct virtchnl_rss_lut *rss_lut;
diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index 6ec766ebda..b2c6aa2684 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -122,6 +122,7 @@ int ice_dcf_send_aq_cmd(void *dcf_hw, struct ice_aq_desc 
*desc,
 int ice_dcf_handle_vsi_update_event(struct ice_dcf_hw *hw);
 int ice_dcf_init_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw);
 void ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw);
+int ice_dcf_configure_rss_lut(struct ice_dcf_hw *hw);
 int ice_dcf_init_rss(struct ice_dcf_hw *hw);
 int ice_dcf_configure_queues(struct ice_dcf_hw *hw);
 int ice_dcf_config_irq_map(struct ice_dcf_hw *hw);
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 59610e058f..1ac66ed990 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -761,6 +761,81 @@ ice_dcf_dev_flow_ops_get(struct rte_eth_dev *dev,
return 0;
 }
 
+static int
+ice_dcf_dev_rss_reta_update(struct rte_eth_dev *dev,
+   struct rte_eth_rss_reta_entry64 *reta_conf,
+   uint16_t reta_size)
+{
+   struct ice_dcf_adapter *adapter = dev->data->dev_private;
+   struct ice_dcf_hw *hw = &adapter->real_hw;
+   uint8_t *lut;
+   uint16_t i, idx, shift;
+   int ret;
+
+   if (!(hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF))
+   return -ENOTSUP;
+
+   if (reta_size != hw->vf_res->rss_lut_size) {
+   PMD_DRV_LOG(ERR, "The size of hash lookup table configured "
+   "(%d) doesn't match the number of hardware can "
+   "support (%d)", reta_size, hw->vf_res->rss_lut_size);
+   return -EINVAL;
+   }
+
+   lut = rte_zmalloc("rss_lut", reta_size, 0);
+   if (!lut) {
+   PMD_DRV_LOG(ERR, "No memory can be allocated");
+   return -ENOMEM;
+   }
+   /* store the old lut table temporarily */
+   rte_memcpy(lut, hw->rss_lut, reta_size);
+
+   for (i = 0; i < reta_size; i++) {
+   idx = i / RTE_ETH_RETA_GROUP_SIZE;
+   shift = i % RTE_ETH_RETA_GROUP_SIZE;
+   if (reta_conf[idx].mask & (1ULL << shift))
+   lut[i] = reta_conf[idx].reta[shift];
+   }
+
+   rte_memcpy(hw->rss_lut, lut, reta_size);
+   /* send virtchnnl ops to configure rss*/
+   ret = ice_dcf_configure_rss_lut(hw);
+   if (ret) /* revert back */
+   rte_memcpy(hw->rss_lut, lut, reta_size);
+   rte_free(lut);
+
+   return ret;
+}
+
+static int
+ice_dcf_dev_rss_reta_query(struct rte_eth_dev *dev,
+  struct rte_eth_rss_reta_entry64 *reta_conf,
+  uint16_t reta_size)
+{
+   struct ice_dcf_adapter *adapter = dev->data->dev_private;
+   struct ice_dcf_hw *hw = &adapter->real_hw;
+   uint16_t i, idx, shift;
+
+   if (!(hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF))
+   return -ENOTSUP;
+
+   if (reta_size != hw->vf_res->rss_lut_size) {
+   PMD_DRV_LOG(ERR, "The size of hash lookup table configured "
+   "(%d) doesn't match the number of 

[PATCH v6 02/12] net/ice: enable RSS HASH ops for DCF hardware

2022-04-27 Thread Kevin Liu
From: Steve Yang 

RSS HASH should be updated and queried by application,
Add related ops ('.rss_hash_update', '.rss_hash_conf_get') for DCF.

Because DCF doesn't support configure RSS HASH, only HASH key can be
updated within ops '.rss_hash_update'.

Signed-off-by: Steve Yang 
Signed-off-by: Kevin Liu 
---
 doc/guides/nics/features/ice_dcf.ini   |  1 +
 doc/guides/rel_notes/release_22_07.rst |  1 +
 drivers/net/ice/ice_dcf.c  |  2 +-
 drivers/net/ice/ice_dcf.h  |  1 +
 drivers/net/ice/ice_dcf_ethdev.c   | 51 ++
 5 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/doc/guides/nics/features/ice_dcf.ini 
b/doc/guides/nics/features/ice_dcf.ini
index 5221c99a9c..d9c1b25407 100644
--- a/doc/guides/nics/features/ice_dcf.ini
+++ b/doc/guides/nics/features/ice_dcf.ini
@@ -16,6 +16,7 @@ L4 checksum offload  = P
 Inner L3 checksum= P
 Inner L4 checksum= P
 RSS reta update  = Y
+RSS key update   = Y
 Basic stats  = Y
 Linux= Y
 x86-32   = Y
diff --git a/doc/guides/rel_notes/release_22_07.rst 
b/doc/guides/rel_notes/release_22_07.rst
index cbdc90760c..cc2c243e81 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -63,6 +63,7 @@ New Features
 * **Updated Intel ice driver.**
 
  * Added enable RSS RETA ops for DCF hardware.
+ * Added enable RSS HASH ops for DCF hardware.
 
 Removed Items
 -
diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
index 070d1b71ac..89c0203ba3 100644
--- a/drivers/net/ice/ice_dcf.c
+++ b/drivers/net/ice/ice_dcf.c
@@ -758,7 +758,7 @@ ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev, struct 
ice_dcf_hw *hw)
hw->ets_config = NULL;
 }
 
-static int
+int
 ice_dcf_configure_rss_key(struct ice_dcf_hw *hw)
 {
struct virtchnl_rss_key *rss_key;
diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index b2c6aa2684..f0b45af5ae 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -122,6 +122,7 @@ int ice_dcf_send_aq_cmd(void *dcf_hw, struct ice_aq_desc 
*desc,
 int ice_dcf_handle_vsi_update_event(struct ice_dcf_hw *hw);
 int ice_dcf_init_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw);
 void ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw);
+int ice_dcf_configure_rss_key(struct ice_dcf_hw *hw);
 int ice_dcf_configure_rss_lut(struct ice_dcf_hw *hw);
 int ice_dcf_init_rss(struct ice_dcf_hw *hw);
 int ice_dcf_configure_queues(struct ice_dcf_hw *hw);
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 1ac66ed990..ccad7fc304 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -836,6 +836,55 @@ ice_dcf_dev_rss_reta_query(struct rte_eth_dev *dev,
return 0;
 }
 
+static int
+ice_dcf_dev_rss_hash_update(struct rte_eth_dev *dev,
+   struct rte_eth_rss_conf *rss_conf)
+{
+   struct ice_dcf_adapter *adapter = dev->data->dev_private;
+   struct ice_dcf_hw *hw = &adapter->real_hw;
+
+   if (!(hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF))
+   return -ENOTSUP;
+
+   /* HENA setting, it is enabled by default, no change */
+   if (!rss_conf->rss_key || rss_conf->rss_key_len == 0) {
+   PMD_DRV_LOG(DEBUG, "No key to be configured");
+   return 0;
+   } else if (rss_conf->rss_key_len != hw->vf_res->rss_key_size) {
+   PMD_DRV_LOG(ERR, "The size of hash key configured "
+   "(%d) doesn't match the size of hardware can "
+   "support (%d)", rss_conf->rss_key_len,
+   hw->vf_res->rss_key_size);
+   return -EINVAL;
+   }
+
+   rte_memcpy(hw->rss_key, rss_conf->rss_key, rss_conf->rss_key_len);
+
+   return ice_dcf_configure_rss_key(hw);
+}
+
+static int
+ice_dcf_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
+ struct rte_eth_rss_conf *rss_conf)
+{
+   struct ice_dcf_adapter *adapter = dev->data->dev_private;
+   struct ice_dcf_hw *hw = &adapter->real_hw;
+
+   if (!(hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF))
+   return -ENOTSUP;
+
+   /* Just set it to default value now. */
+   rss_conf->rss_hf = ICE_RSS_OFFLOAD_ALL;
+
+   if (!rss_conf->rss_key)
+   return 0;
+
+   rss_conf->rss_key_len = hw->vf_res->rss_key_size;
+   rte_memcpy(rss_conf->rss_key, hw->rss_key, rss_conf->rss_key_len);
+
+   return 0;
+}
+
 #define ICE_DCF_32_BIT_WIDTH (CHAR_BIT * 4)
 #define ICE_DCF_48_BIT_WIDTH (CHAR_BIT * 6)
 #define ICE_DCF_48_BIT_MASK  RTE_LEN2MASK(ICE_DCF_48_BIT_WIDTH, uint64_t)
@@ -1184,6 +1233,8 @@ static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
.tm_ops_get  = ice_dcf_tm_ops_get,
.reta_update = ice_dcf_dev_rss_reta_update,
.reta_query  = ice_dcf_dev_rss_reta_

[PATCH v6 03/12] net/ice: cleanup Tx buffers

2022-04-27 Thread Kevin Liu
From: Robin Zhang 

Add support for ops rte_eth_tx_done_cleanup in dcf

Signed-off-by: Robin Zhang 
Signed-off-by: Kevin Liu 
---
 doc/guides/rel_notes/release_22_07.rst | 1 +
 drivers/net/ice/ice_dcf_ethdev.c   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/doc/guides/rel_notes/release_22_07.rst 
b/doc/guides/rel_notes/release_22_07.rst
index cc2c243e81..bbd3d296de 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -64,6 +64,7 @@ New Features
 
  * Added enable RSS RETA ops for DCF hardware.
  * Added enable RSS HASH ops for DCF hardware.
+ * Added cleanup Tx buffers.
 
 Removed Items
 -
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index ccad7fc304..d8b5961514 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -1235,6 +1235,7 @@ static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
.reta_query  = ice_dcf_dev_rss_reta_query,
.rss_hash_update = ice_dcf_dev_rss_hash_update,
.rss_hash_conf_get   = ice_dcf_dev_rss_hash_conf_get,
+   .tx_done_cleanup = ice_tx_done_cleanup,
 };
 
 static int
-- 
2.33.1



[PATCH v6 04/12] net/ice: add ops MTU-SET to dcf

2022-04-27 Thread Kevin Liu
From: Jie Wang 

add API "mtu_set" to dcf, and it can configure the port mtu through
cmdline.

Signed-off-by: Jie Wang 
Signed-off-by: Kevin Liu 
---
 doc/guides/nics/features/ice_dcf.ini   |  1 +
 doc/guides/rel_notes/release_22_07.rst |  1 +
 drivers/net/ice/ice_dcf_ethdev.c   | 14 ++
 drivers/net/ice/ice_dcf_ethdev.h   |  6 ++
 4 files changed, 22 insertions(+)

diff --git a/doc/guides/nics/features/ice_dcf.ini 
b/doc/guides/nics/features/ice_dcf.ini
index d9c1b25407..be34ab4692 100644
--- a/doc/guides/nics/features/ice_dcf.ini
+++ b/doc/guides/nics/features/ice_dcf.ini
@@ -17,6 +17,7 @@ Inner L3 checksum= P
 Inner L4 checksum= P
 RSS reta update  = Y
 RSS key update   = Y
+MTU update   = Y
 Basic stats  = Y
 Linux= Y
 x86-32   = Y
diff --git a/doc/guides/rel_notes/release_22_07.rst 
b/doc/guides/rel_notes/release_22_07.rst
index bbd3d296de..dc37de85f3 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -65,6 +65,7 @@ New Features
  * Added enable RSS RETA ops for DCF hardware.
  * Added enable RSS HASH ops for DCF hardware.
  * Added cleanup Tx buffers.
+ * Added add ops MTU-SET to dcf.
 
 Removed Items
 -
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index d8b5961514..06d752fd61 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -1081,6 +1081,19 @@ ice_dcf_link_update(struct rte_eth_dev *dev,
return rte_eth_linkstatus_set(dev, &new_link);
 }
 
+static int
+ice_dcf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu __rte_unused)
+{
+   /* mtu setting is forbidden if port is start */
+   if (dev->data->dev_started != 0) {
+   PMD_DRV_LOG(ERR, "port %d must be stopped before configuration",
+   dev->data->port_id);
+   return -EBUSY;
+   }
+
+   return 0;
+}
+
 bool
 ice_dcf_adminq_need_retry(struct ice_adapter *ad)
 {
@@ -1236,6 +1249,7 @@ static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
.rss_hash_update = ice_dcf_dev_rss_hash_update,
.rss_hash_conf_get   = ice_dcf_dev_rss_hash_conf_get,
.tx_done_cleanup = ice_tx_done_cleanup,
+   .mtu_set = ice_dcf_dev_mtu_set,
 };
 
 static int
diff --git a/drivers/net/ice/ice_dcf_ethdev.h b/drivers/net/ice/ice_dcf_ethdev.h
index 11a1305038..f2faf26f58 100644
--- a/drivers/net/ice/ice_dcf_ethdev.h
+++ b/drivers/net/ice/ice_dcf_ethdev.h
@@ -15,6 +15,12 @@
 
 #define ICE_DCF_MAX_RINGS  1
 
+#define ICE_DCF_FRAME_SIZE_MAX   9728
+#define ICE_DCF_VLAN_TAG_SIZE   4
+#define ICE_DCF_ETH_OVERHEAD \
+   (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + ICE_DCF_VLAN_TAG_SIZE * 2)
+#define ICE_DCF_ETH_MAX_LEN (RTE_ETHER_MTU + ICE_DCF_ETH_OVERHEAD)
+
 struct ice_dcf_queue {
uint64_t dummy;
 };
-- 
2.33.1



[PATCH v6 05/12] net/ice: add ops dev-supported-ptypes-get to dcf

2022-04-27 Thread Kevin Liu
From: Jie Wang 

add API "dev_supported_ptypes_get" to dcf, that dcf pmd can get
ptypes through the new API.

Signed-off-by: Jie Wang 
Signed-off-by: Kevin Liu 
---
 doc/guides/rel_notes/release_22_07.rst |  1 +
 drivers/net/ice/ice_dcf_ethdev.c   | 80 --
 2 files changed, 50 insertions(+), 31 deletions(-)

diff --git a/doc/guides/rel_notes/release_22_07.rst 
b/doc/guides/rel_notes/release_22_07.rst
index dc37de85f3..a39196c605 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -66,6 +66,7 @@ New Features
  * Added enable RSS HASH ops for DCF hardware.
  * Added cleanup Tx buffers.
  * Added add ops MTU-SET to dcf.
+ * Added add ops dev-supported-ptypes-get to dcf.
 
 Removed Items
 -
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 06d752fd61..6a577a6582 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -1218,38 +1218,56 @@ ice_dcf_dev_reset(struct rte_eth_dev *dev)
return ret;
 }
 
+static const uint32_t *
+ice_dcf_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused)
+{
+   static const uint32_t ptypes[] = {
+   RTE_PTYPE_L2_ETHER,
+   RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
+   RTE_PTYPE_L4_FRAG,
+   RTE_PTYPE_L4_ICMP,
+   RTE_PTYPE_L4_NONFRAG,
+   RTE_PTYPE_L4_SCTP,
+   RTE_PTYPE_L4_TCP,
+   RTE_PTYPE_L4_UDP,
+   RTE_PTYPE_UNKNOWN
+   };
+   return ptypes;
+}
+
 static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
-   .dev_start   = ice_dcf_dev_start,
-   .dev_stop= ice_dcf_dev_stop,
-   .dev_close   = ice_dcf_dev_close,
-   .dev_reset   = ice_dcf_dev_reset,
-   .dev_configure   = ice_dcf_dev_configure,
-   .dev_infos_get   = ice_dcf_dev_info_get,
-   .rx_queue_setup  = ice_rx_queue_setup,
-   .tx_queue_setup  = ice_tx_queue_setup,
-   .rx_queue_release= ice_dev_rx_queue_release,
-   .tx_queue_release= ice_dev_tx_queue_release,
-   .rx_queue_start  = ice_dcf_rx_queue_start,
-   .tx_queue_start  = ice_dcf_tx_queue_start,
-   .rx_queue_stop   = ice_dcf_rx_queue_stop,
-   .tx_queue_stop   = ice_dcf_tx_queue_stop,
-   .link_update = ice_dcf_link_update,
-   .stats_get   = ice_dcf_stats_get,
-   .stats_reset = ice_dcf_stats_reset,
-   .promiscuous_enable  = ice_dcf_dev_promiscuous_enable,
-   .promiscuous_disable = ice_dcf_dev_promiscuous_disable,
-   .allmulticast_enable = ice_dcf_dev_allmulticast_enable,
-   .allmulticast_disable= ice_dcf_dev_allmulticast_disable,
-   .flow_ops_get= ice_dcf_dev_flow_ops_get,
-   .udp_tunnel_port_add = ice_dcf_dev_udp_tunnel_port_add,
-   .udp_tunnel_port_del = ice_dcf_dev_udp_tunnel_port_del,
-   .tm_ops_get  = ice_dcf_tm_ops_get,
-   .reta_update = ice_dcf_dev_rss_reta_update,
-   .reta_query  = ice_dcf_dev_rss_reta_query,
-   .rss_hash_update = ice_dcf_dev_rss_hash_update,
-   .rss_hash_conf_get   = ice_dcf_dev_rss_hash_conf_get,
-   .tx_done_cleanup = ice_tx_done_cleanup,
-   .mtu_set = ice_dcf_dev_mtu_set,
+   .dev_start= ice_dcf_dev_start,
+   .dev_stop = ice_dcf_dev_stop,
+   .dev_close= ice_dcf_dev_close,
+   .dev_reset= ice_dcf_dev_reset,
+   .dev_configure= ice_dcf_dev_configure,
+   .dev_infos_get= ice_dcf_dev_info_get,
+   .dev_supported_ptypes_get = ice_dcf_dev_supported_ptypes_get,
+   .rx_queue_setup   = ice_rx_queue_setup,
+   .tx_queue_setup   = ice_tx_queue_setup,
+   .rx_queue_release = ice_dev_rx_queue_release,
+   .tx_queue_release = ice_dev_tx_queue_release,
+   .rx_queue_start   = ice_dcf_rx_queue_start,
+   .tx_queue_start   = ice_dcf_tx_queue_start,
+   .rx_queue_stop= ice_dcf_rx_queue_stop,
+   .tx_queue_stop= ice_dcf_tx_queue_stop,
+   .link_update  = ice_dcf_link_update,
+   .stats_get= ice_dcf_stats_get,
+   .stats_reset  = ice_dcf_stats_reset,
+   .promiscuous_enable   = ice_dcf_dev_promiscuous_enable,
+   .promiscuous_disable  = ice_dcf_dev_promiscuous_disable,
+   .allmulticast_enable  = ice_dcf_dev_allmulticast_enable,
+   .allmulticast_disable = ice_dcf_dev_allmulticast_disable,
+   .flow_ops_get = ice_dcf_dev_flow_ops_get,
+   .udp_tunnel_port_add  = ice_dcf_dev_udp_tunnel_port_add,
+   .udp_tunnel_port_del  = ice_dcf_dev_udp

[PATCH v6 06/12] net/ice: support dcf promisc configuration

2022-04-27 Thread Kevin Liu
From: Alvin Zhang 

Support configuration of unicast and multicast promisc on dcf.

Signed-off-by: Alvin Zhang 
Signed-off-by: Kevin Liu 
---
 doc/guides/nics/features/ice_dcf.ini   |  2 +
 doc/guides/rel_notes/release_22_07.rst |  1 +
 drivers/net/ice/ice_dcf_ethdev.c   | 77 --
 drivers/net/ice/ice_dcf_ethdev.h   |  3 +
 4 files changed, 79 insertions(+), 4 deletions(-)

diff --git a/doc/guides/nics/features/ice_dcf.ini 
b/doc/guides/nics/features/ice_dcf.ini
index be34ab4692..fe3ada8733 100644
--- a/doc/guides/nics/features/ice_dcf.ini
+++ b/doc/guides/nics/features/ice_dcf.ini
@@ -18,6 +18,8 @@ Inner L4 checksum= P
 RSS reta update  = Y
 RSS key update   = Y
 MTU update   = Y
+Promiscuous mode = Y
+Allmulticast mode= Y
 Basic stats  = Y
 Linux= Y
 x86-32   = Y
diff --git a/doc/guides/rel_notes/release_22_07.rst 
b/doc/guides/rel_notes/release_22_07.rst
index a39196c605..c7ba4453ff 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -67,6 +67,7 @@ New Features
  * Added cleanup Tx buffers.
  * Added add ops MTU-SET to dcf.
  * Added add ops dev-supported-ptypes-get to dcf.
+ * Added support dcf promisc configuration.
 
 Removed Items
 -
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 6a577a6582..87d281ee93 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -727,27 +727,95 @@ ice_dcf_dev_info_get(struct rte_eth_dev *dev,
 }
 
 static int
-ice_dcf_dev_promiscuous_enable(__rte_unused struct rte_eth_dev *dev)
+dcf_config_promisc(struct ice_dcf_adapter *adapter,
+  bool enable_unicast,
+  bool enable_multicast)
 {
+   struct ice_dcf_hw *hw = &adapter->real_hw;
+   struct virtchnl_promisc_info promisc;
+   struct dcf_virtchnl_cmd args;
+   int err;
+
+   promisc.flags = 0;
+   promisc.vsi_id = hw->vsi_res->vsi_id;
+
+   if (enable_unicast)
+   promisc.flags |= FLAG_VF_UNICAST_PROMISC;
+
+   if (enable_multicast)
+   promisc.flags |= FLAG_VF_MULTICAST_PROMISC;
+
+   memset(&args, 0, sizeof(args));
+   args.v_op = VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE;
+   args.req_msg = (uint8_t *)&promisc;
+   args.req_msglen = sizeof(promisc);
+
+   err = ice_dcf_execute_virtchnl_cmd(hw, &args);
+   if (err) {
+   PMD_DRV_LOG(ERR,
+   "fail to execute command 
VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE");
+   return err;
+   }
+
+   adapter->promisc_unicast_enabled = enable_unicast;
+   adapter->promisc_multicast_enabled = enable_multicast;
return 0;
 }
 
+static int
+ice_dcf_dev_promiscuous_enable(__rte_unused struct rte_eth_dev *dev)
+{
+   struct ice_dcf_adapter *adapter = dev->data->dev_private;
+
+   if (adapter->promisc_unicast_enabled) {
+   PMD_DRV_LOG(INFO, "promiscuous has been enabled");
+   return 0;
+   }
+
+   return dcf_config_promisc(adapter, true,
+ adapter->promisc_multicast_enabled);
+}
+
 static int
 ice_dcf_dev_promiscuous_disable(__rte_unused struct rte_eth_dev *dev)
 {
-   return 0;
+   struct ice_dcf_adapter *adapter = dev->data->dev_private;
+
+   if (!adapter->promisc_unicast_enabled) {
+   PMD_DRV_LOG(INFO, "promiscuous has been disabled");
+   return 0;
+   }
+
+   return dcf_config_promisc(adapter, false,
+ adapter->promisc_multicast_enabled);
 }
 
 static int
 ice_dcf_dev_allmulticast_enable(__rte_unused struct rte_eth_dev *dev)
 {
-   return 0;
+   struct ice_dcf_adapter *adapter = dev->data->dev_private;
+
+   if (adapter->promisc_multicast_enabled) {
+   PMD_DRV_LOG(INFO, "allmulticast has been enabled");
+   return 0;
+   }
+
+   return dcf_config_promisc(adapter, adapter->promisc_unicast_enabled,
+ true);
 }
 
 static int
 ice_dcf_dev_allmulticast_disable(__rte_unused struct rte_eth_dev *dev)
 {
-   return 0;
+   struct ice_dcf_adapter *adapter = dev->data->dev_private;
+
+   if (!adapter->promisc_multicast_enabled) {
+   PMD_DRV_LOG(INFO, "allmulticast has been disabled");
+   return 0;
+   }
+
+   return dcf_config_promisc(adapter, adapter->promisc_unicast_enabled,
+ false);
 }
 
 static int
@@ -1299,6 +1367,7 @@ ice_dcf_dev_init(struct rte_eth_dev *eth_dev)
return -1;
}
 
+   dcf_config_promisc(adapter, false, false);
return 0;
 }
 
diff --git a/drivers/net/ice/ice_dcf_ethdev.h b/drivers/net/ice/ice_dcf_ethdev.h
index f2faf26f58..22e450527b 100644
--- a/drivers/net/ice/ice_dcf_ethdev.h
+++ b/drivers/net/ice/ice_dcf_ethdev.h
@@ -33,6 +33,9 @@ struct ice_dcf_adapter

[PATCH v6 07/12] net/ice: support dcf MAC configuration

2022-04-27 Thread Kevin Liu
Below PMD ops are supported in this patch:
.mac_addr_add  = dcf_dev_add_mac_addr
.mac_addr_remove   = dcf_dev_del_mac_addr
.set_mc_addr_list  = dcf_set_mc_addr_list
.mac_addr_set  = dcf_dev_set_default_mac_addr

Signed-off-by: Alvin Zhang 
Signed-off-by: Kevin Liu 
---
 doc/guides/nics/features/ice_dcf.ini   |   1 +
 doc/guides/rel_notes/release_22_07.rst |   1 +
 drivers/net/ice/ice_dcf.c  |   9 +-
 drivers/net/ice/ice_dcf.h  |   4 +-
 drivers/net/ice/ice_dcf_ethdev.c   | 218 -
 drivers/net/ice/ice_dcf_ethdev.h   |   5 +-
 6 files changed, 228 insertions(+), 10 deletions(-)

diff --git a/doc/guides/nics/features/ice_dcf.ini 
b/doc/guides/nics/features/ice_dcf.ini
index fe3ada8733..c9bdbcd6cc 100644
--- a/doc/guides/nics/features/ice_dcf.ini
+++ b/doc/guides/nics/features/ice_dcf.ini
@@ -20,6 +20,7 @@ RSS key update   = Y
 MTU update   = Y
 Promiscuous mode = Y
 Allmulticast mode= Y
+Unicast MAC filter   = Y
 Basic stats  = Y
 Linux= Y
 x86-32   = Y
diff --git a/doc/guides/rel_notes/release_22_07.rst 
b/doc/guides/rel_notes/release_22_07.rst
index c7ba4453ff..e29ec16720 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -68,6 +68,7 @@ New Features
  * Added add ops MTU-SET to dcf.
  * Added add ops dev-supported-ptypes-get to dcf.
  * Added support dcf promisc configuration.
+ * Added support dcf MAC configuration.
 
 Removed Items
 -
diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
index 89c0203ba3..55ae68c456 100644
--- a/drivers/net/ice/ice_dcf.c
+++ b/drivers/net/ice/ice_dcf.c
@@ -1089,10 +1089,11 @@ ice_dcf_query_stats(struct ice_dcf_hw *hw,
 }
 
 int
-ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool add)
+ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw,
+struct rte_ether_addr *addr,
+bool add, uint8_t type)
 {
struct virtchnl_ether_addr_list *list;
-   struct rte_ether_addr *addr;
struct dcf_virtchnl_cmd args;
int len, err = 0;
 
@@ -1105,7 +1106,6 @@ ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool 
add)
}
 
len = sizeof(struct virtchnl_ether_addr_list);
-   addr = hw->eth_dev->data->mac_addrs;
len += sizeof(struct virtchnl_ether_addr);
 
list = rte_zmalloc(NULL, len, 0);
@@ -1116,9 +1116,10 @@ ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool 
add)
 
rte_memcpy(list->list[0].addr, addr->addr_bytes,
sizeof(addr->addr_bytes));
+
PMD_DRV_LOG(DEBUG, "add/rm mac:" RTE_ETHER_ADDR_PRT_FMT,
RTE_ETHER_ADDR_BYTES(addr));
-
+   list->list[0].type = type;
list->vsi_id = hw->vsi_res->vsi_id;
list->num_elements = 1;
 
diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index f0b45af5ae..78df202a77 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -131,7 +131,9 @@ int ice_dcf_switch_queue(struct ice_dcf_hw *hw, uint16_t 
qid, bool rx, bool on);
 int ice_dcf_disable_queues(struct ice_dcf_hw *hw);
 int ice_dcf_query_stats(struct ice_dcf_hw *hw,
struct virtchnl_eth_stats *pstats);
-int ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool add);
+int ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw,
+struct rte_ether_addr *addr, bool add,
+uint8_t type);
 int ice_dcf_link_update(struct rte_eth_dev *dev,
__rte_unused int wait_to_complete);
 void ice_dcf_tm_conf_init(struct rte_eth_dev *dev);
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 87d281ee93..0d944f9fd2 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -26,6 +26,12 @@
 #include "ice_dcf_ethdev.h"
 #include "ice_rxtx.h"
 
+#define DCF_NUM_MACADDR_MAX  64
+
+static int dcf_add_del_mc_addr_list(struct ice_dcf_hw *hw,
+   struct rte_ether_addr *mc_addrs,
+   uint32_t mc_addrs_num, bool 
add);
+
 static int
 ice_dcf_dev_udp_tunnel_port_add(struct rte_eth_dev *dev,
struct rte_eth_udp_tunnel *udp_tunnel);
@@ -561,12 +567,22 @@ ice_dcf_dev_start(struct rte_eth_dev *dev)
return ret;
}
 
-   ret = ice_dcf_add_del_all_mac_addr(hw, true);
+   ret = ice_dcf_add_del_all_mac_addr(hw, hw->eth_dev->data->mac_addrs,
+  true, VIRTCHNL_ETHER_ADDR_PRIMARY);
if (ret) {
PMD_DRV_LOG(ERR, "Failed to add mac addr");
return ret;
}
 
+   if (dcf_ad->mc_addrs_num) {
+   /* flush previous addresses */
+   ret = dcf_add_del_mc_addr_list(hw, dcf_ad->mc_addrs,

[PATCH v6 08/12] net/ice: support dcf VLAN filter and offload configuration

2022-04-27 Thread Kevin Liu
From: Alvin Zhang 

Below PMD ops are supported in this patch:
.vlan_filter_set  = dcf_dev_vlan_filter_set
.vlan_offload_set = dcf_dev_vlan_offload_set

Signed-off-by: Alvin Zhang 
Signed-off-by: Kevin Liu 
---
 doc/guides/nics/features/ice_dcf.ini   |   2 +
 doc/guides/rel_notes/release_22_07.rst |   1 +
 drivers/net/ice/ice_dcf_ethdev.c   | 101 +
 3 files changed, 104 insertions(+)

diff --git a/doc/guides/nics/features/ice_dcf.ini 
b/doc/guides/nics/features/ice_dcf.ini
index c9bdbcd6cc..01e7527915 100644
--- a/doc/guides/nics/features/ice_dcf.ini
+++ b/doc/guides/nics/features/ice_dcf.ini
@@ -21,6 +21,8 @@ MTU update   = Y
 Promiscuous mode = Y
 Allmulticast mode= Y
 Unicast MAC filter   = Y
+VLAN filter  = Y
+VLAN offload = Y
 Basic stats  = Y
 Linux= Y
 x86-32   = Y
diff --git a/doc/guides/rel_notes/release_22_07.rst 
b/doc/guides/rel_notes/release_22_07.rst
index e29ec16720..268f3bba9a 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -69,6 +69,7 @@ New Features
  * Added add ops dev-supported-ptypes-get to dcf.
  * Added support dcf promisc configuration.
  * Added support dcf MAC configuration.
+ * Added support dcf VLAN filter and offload configuration.
 
 Removed Items
 -
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 0d944f9fd2..e58cdf47d2 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -1026,6 +1026,105 @@ dcf_dev_set_default_mac_addr(struct rte_eth_dev *dev,
return 0;
 }
 
+static int
+dcf_add_del_vlan(struct ice_dcf_hw *hw, uint16_t vlanid, bool add)
+{
+   struct virtchnl_vlan_filter_list *vlan_list;
+   uint8_t cmd_buffer[sizeof(struct virtchnl_vlan_filter_list) +
+   sizeof(uint16_t)];
+   struct dcf_virtchnl_cmd args;
+   int err;
+
+   vlan_list = (struct virtchnl_vlan_filter_list *)cmd_buffer;
+   vlan_list->vsi_id = hw->vsi_res->vsi_id;
+   vlan_list->num_elements = 1;
+   vlan_list->vlan_id[0] = vlanid;
+
+   memset(&args, 0, sizeof(args));
+   args.v_op = add ? VIRTCHNL_OP_ADD_VLAN : VIRTCHNL_OP_DEL_VLAN;
+   args.req_msg = cmd_buffer;
+   args.req_msglen = sizeof(cmd_buffer);
+   err = ice_dcf_execute_virtchnl_cmd(hw, &args);
+   if (err)
+   PMD_DRV_LOG(ERR, "fail to execute command %s",
+   add ? "OP_ADD_VLAN" :  "OP_DEL_VLAN");
+
+   return err;
+}
+
+static int
+dcf_enable_vlan_strip(struct ice_dcf_hw *hw)
+{
+   struct dcf_virtchnl_cmd args;
+   int ret;
+
+   memset(&args, 0, sizeof(args));
+   args.v_op = VIRTCHNL_OP_ENABLE_VLAN_STRIPPING;
+   ret = ice_dcf_execute_virtchnl_cmd(hw, &args);
+   if (ret)
+   PMD_DRV_LOG(ERR,
+   "Failed to execute command of 
OP_ENABLE_VLAN_STRIPPING");
+
+   return ret;
+}
+
+static int
+dcf_disable_vlan_strip(struct ice_dcf_hw *hw)
+{
+   struct dcf_virtchnl_cmd args;
+   int ret;
+
+   memset(&args, 0, sizeof(args));
+   args.v_op = VIRTCHNL_OP_DISABLE_VLAN_STRIPPING;
+   ret = ice_dcf_execute_virtchnl_cmd(hw, &args);
+   if (ret)
+   PMD_DRV_LOG(ERR,
+   "Failed to execute command of 
OP_DISABLE_VLAN_STRIPPING");
+
+   return ret;
+}
+
+static int
+dcf_dev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+{
+   struct ice_dcf_adapter *adapter = dev->data->dev_private;
+   struct ice_dcf_hw *hw = &adapter->real_hw;
+   int err;
+
+   if (!(hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN))
+   return -ENOTSUP;
+
+   err = dcf_add_del_vlan(hw, vlan_id, on);
+   if (err)
+   return -EIO;
+   return 0;
+}
+
+static int
+dcf_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
+{
+   struct ice_dcf_adapter *adapter = dev->data->dev_private;
+   struct ice_dcf_hw *hw = &adapter->real_hw;
+   struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
+   int err;
+
+   if (!(hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN))
+   return -ENOTSUP;
+
+   /* Vlan stripping setting */
+   if (mask & RTE_ETH_VLAN_STRIP_MASK) {
+   /* Enable or disable VLAN stripping */
+   if (dev_conf->rxmode.offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
+   err = dcf_enable_vlan_strip(hw);
+   else
+   err = dcf_disable_vlan_strip(hw);
+
+   if (err)
+   return -EIO;
+   }
+   return 0;
+}
+
 static int
 ice_dcf_dev_flow_ops_get(struct rte_eth_dev *dev,
 const struct rte_flow_ops **ops)
@@ -1538,6 +1637,8 @@ static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
.mac_addr_remove  = dcf

[PATCH v6 09/12] net/ice: add extended stats

2022-04-27 Thread Kevin Liu
Add implementation of xstats() functions in DCF PMD.

Signed-off-by: Kevin Liu 
---
 doc/guides/nics/features/ice_dcf.ini   |  1 +
 doc/guides/rel_notes/release_22_07.rst |  1 +
 drivers/net/ice/ice_dcf.h  | 22 
 drivers/net/ice/ice_dcf_ethdev.c   | 75 ++
 4 files changed, 99 insertions(+)

diff --git a/doc/guides/nics/features/ice_dcf.ini 
b/doc/guides/nics/features/ice_dcf.ini
index 01e7527915..54ea7f150c 100644
--- a/doc/guides/nics/features/ice_dcf.ini
+++ b/doc/guides/nics/features/ice_dcf.ini
@@ -23,6 +23,7 @@ Allmulticast mode= Y
 Unicast MAC filter   = Y
 VLAN filter  = Y
 VLAN offload = Y
+Extended stats   = Y
 Basic stats  = Y
 Linux= Y
 x86-32   = Y
diff --git a/doc/guides/rel_notes/release_22_07.rst 
b/doc/guides/rel_notes/release_22_07.rst
index 268f3bba9a..1f404a6ee5 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -70,6 +70,7 @@ New Features
  * Added support dcf promisc configuration.
  * Added support dcf MAC configuration.
  * Added support dcf VLAN filter and offload configuration.
+ * Added add extended stats.
 
 Removed Items
 -
diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index 78df202a77..44a61404c3 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -15,6 +15,12 @@
 #include "base/ice_type.h"
 #include "ice_logs.h"
 
+/* ICE_DCF_DEV_PRIVATE_TO */
+#define ICE_DCF_DEV_PRIVATE_TO_ADAPTER(adapter) \
+   ((struct ice_dcf_adapter *)adapter)
+#define ICE_DCF_DEV_PRIVATE_TO_VF(adapter) \
+   (&((struct ice_dcf_adapter *)adapter)->vf)
+
 struct dcf_virtchnl_cmd {
TAILQ_ENTRY(dcf_virtchnl_cmd) next;
 
@@ -74,6 +80,22 @@ struct ice_dcf_tm_conf {
bool committed;
 };
 
+struct ice_dcf_eth_stats {
+   u64 rx_bytes;   /* gorc */
+   u64 rx_unicast; /* uprc */
+   u64 rx_multicast;   /* mprc */
+   u64 rx_broadcast;   /* bprc */
+   u64 rx_discards;/* rdpc */
+   u64 rx_unknown_protocol;/* rupp */
+   u64 tx_bytes;   /* gotc */
+   u64 tx_unicast; /* uptc */
+   u64 tx_multicast;   /* mptc */
+   u64 tx_broadcast;   /* bptc */
+   u64 tx_discards;/* tdpc */
+   u64 tx_errors;  /* tepc */
+   u64 rx_no_desc; /* repc */
+   u64 rx_errors;  /* repc */
+};
 struct ice_dcf_hw {
struct iavf_hw avf;
 
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index e58cdf47d2..6503700e02 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -45,6 +45,30 @@ ice_dcf_dev_init(struct rte_eth_dev *eth_dev);
 static int
 ice_dcf_dev_uninit(struct rte_eth_dev *eth_dev);
 
+struct rte_ice_dcf_xstats_name_off {
+   char name[RTE_ETH_XSTATS_NAME_SIZE];
+   unsigned int offset;
+};
+
+static const struct rte_ice_dcf_xstats_name_off rte_ice_dcf_stats_strings[] = {
+   {"rx_bytes", offsetof(struct ice_dcf_eth_stats, rx_bytes)},
+   {"rx_unicast_packets", offsetof(struct ice_dcf_eth_stats, rx_unicast)},
+   {"rx_multicast_packets", offsetof(struct ice_dcf_eth_stats, 
rx_multicast)},
+   {"rx_broadcast_packets", offsetof(struct ice_dcf_eth_stats, 
rx_broadcast)},
+   {"rx_dropped_packets", offsetof(struct ice_dcf_eth_stats, rx_discards)},
+   {"rx_unknown_protocol_packets", offsetof(struct ice_dcf_eth_stats,
+   rx_unknown_protocol)},
+   {"tx_bytes", offsetof(struct ice_dcf_eth_stats, tx_bytes)},
+   {"tx_unicast_packets", offsetof(struct ice_dcf_eth_stats, tx_unicast)},
+   {"tx_multicast_packets", offsetof(struct ice_dcf_eth_stats, 
tx_multicast)},
+   {"tx_broadcast_packets", offsetof(struct ice_dcf_eth_stats, 
tx_broadcast)},
+   {"tx_dropped_packets", offsetof(struct ice_dcf_eth_stats, tx_discards)},
+   {"tx_error_packets", offsetof(struct ice_dcf_eth_stats, tx_errors)},
+};
+
+#define ICE_DCF_NB_XSTATS (sizeof(rte_ice_dcf_stats_strings) / \
+   sizeof(rte_ice_dcf_stats_strings[0]))
+
 static uint16_t
 ice_dcf_recv_pkts(__rte_unused void *rx_queue,
  __rte_unused struct rte_mbuf **bufs,
@@ -1358,6 +1382,54 @@ ice_dcf_stats_reset(struct rte_eth_dev *dev)
return 0;
 }
 
+static int ice_dcf_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
+ struct rte_eth_xstat_name *xstats_names,
+ __rte_unused unsigned int limit)
+{
+   unsigned int i;
+
+   if (xstats_names != NULL)
+   for (i = 0; i < ICE_DCF_NB_XSTATS; i++) {
+   snprintf(xstats_names[i].name,
+   sizeof(xstats_names[i].name),
+   "%s", rt

[PATCH v6 10/12] net/ice: support queue information getting

2022-04-27 Thread Kevin Liu
Add below ops,
rxq_info_get
txq_info_get

Signed-off-by: Kevin Liu 
---
 doc/guides/rel_notes/release_22_07.rst | 1 +
 drivers/net/ice/ice_dcf_ethdev.c   | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/doc/guides/rel_notes/release_22_07.rst 
b/doc/guides/rel_notes/release_22_07.rst
index 1f404a6ee5..0d6577cd74 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -71,6 +71,7 @@ New Features
  * Added support dcf MAC configuration.
  * Added support dcf VLAN filter and offload configuration.
  * Added add extended stats.
+ * Added support queue information getting.
 
 Removed Items
 -
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 6503700e02..9217392d04 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -1698,6 +1698,8 @@ static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
.tx_queue_start   = ice_dcf_tx_queue_start,
.rx_queue_stop= ice_dcf_rx_queue_stop,
.tx_queue_stop= ice_dcf_tx_queue_stop,
+   .rxq_info_get = ice_rxq_info_get,
+   .txq_info_get = ice_txq_info_get,
.link_update  = ice_dcf_link_update,
.stats_get= ice_dcf_stats_get,
.stats_reset  = ice_dcf_stats_reset,
-- 
2.33.1



[PATCH v6 11/12] net/ice: implement power management

2022-04-27 Thread Kevin Liu
Implement support for the power management API by implementing a
'get_monitor_addr' function that will return an address of an RX ring's
status bit.

Signed-off-by: Kevin Liu 
---
 doc/guides/nics/features/ice_dcf.ini   | 1 +
 doc/guides/rel_notes/release_22_07.rst | 1 +
 drivers/net/ice/ice_dcf_ethdev.c   | 1 +
 3 files changed, 3 insertions(+)

diff --git a/doc/guides/nics/features/ice_dcf.ini 
b/doc/guides/nics/features/ice_dcf.ini
index 54ea7f150c..3b11622d4c 100644
--- a/doc/guides/nics/features/ice_dcf.ini
+++ b/doc/guides/nics/features/ice_dcf.ini
@@ -25,6 +25,7 @@ VLAN filter  = Y
 VLAN offload = Y
 Extended stats   = Y
 Basic stats  = Y
+Power mgmt address monitor = Y
 Linux= Y
 x86-32   = Y
 x86-64   = Y
diff --git a/doc/guides/rel_notes/release_22_07.rst 
b/doc/guides/rel_notes/release_22_07.rst
index 0d6577cd74..004a6d3343 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -72,6 +72,7 @@ New Features
  * Added support dcf VLAN filter and offload configuration.
  * Added add extended stats.
  * Added support queue information getting.
+ * Added implement power management.
 
 Removed Items
 -
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 9217392d04..236c0395e0 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -1700,6 +1700,7 @@ static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
.tx_queue_stop= ice_dcf_tx_queue_stop,
.rxq_info_get = ice_rxq_info_get,
.txq_info_get = ice_txq_info_get,
+   .get_monitor_addr = ice_get_monitor_addr,
.link_update  = ice_dcf_link_update,
.stats_get= ice_dcf_stats_get,
.stats_reset  = ice_dcf_stats_reset,
-- 
2.33.1



[PATCH v6 12/12] net/ice: support DCF new VLAN capabilities

2022-04-27 Thread Kevin Liu
From: Alvin Zhang 

The new VLAN virtchnl opcodes introduce new capabilities like VLAN
filtering, stripping and insertion.

The DCF needs to query the VLAN capabilities based on current device
configuration firstly.

DCF is able to configure inner VLAN filter when port VLAN is enabled
base on negotiation; and DCF is able to configure outer VLAN (0x8100)
if port VLAN is disabled to be compatible with legacy mode.

When port VLAN is updated by DCF, the DCF needs to reset to query the
new VLAN capabilities.

Signed-off-by: Alvin Zhang 
Signed-off-by: Kevin Liu 
---
 doc/guides/rel_notes/release_22_07.rst |   1 +
 drivers/net/ice/ice_dcf.c  |  27 
 drivers/net/ice/ice_dcf.h  |   1 +
 drivers/net/ice/ice_dcf_ethdev.c   | 171 ++---
 4 files changed, 183 insertions(+), 17 deletions(-)

diff --git a/doc/guides/rel_notes/release_22_07.rst 
b/doc/guides/rel_notes/release_22_07.rst
index 004a6d3343..7c932a7c8a 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -73,6 +73,7 @@ New Features
  * Added add extended stats.
  * Added support queue information getting.
  * Added implement power management.
+ * Added support DCF new VLAN capabilities.
 
 Removed Items
 -
diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
index 55ae68c456..885d58c0f4 100644
--- a/drivers/net/ice/ice_dcf.c
+++ b/drivers/net/ice/ice_dcf.c
@@ -587,6 +587,29 @@ ice_dcf_get_supported_rxdid(struct ice_dcf_hw *hw)
return 0;
 }
 
+static int
+dcf_get_vlan_offload_caps_v2(struct ice_dcf_hw *hw)
+{
+   struct virtchnl_vlan_caps vlan_v2_caps;
+   struct dcf_virtchnl_cmd args;
+   int ret;
+
+   memset(&args, 0, sizeof(args));
+   args.v_op = VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS;
+   args.rsp_msgbuf = (uint8_t *)&vlan_v2_caps;
+   args.rsp_buflen = sizeof(vlan_v2_caps);
+
+   ret = ice_dcf_execute_virtchnl_cmd(hw, &args);
+   if (ret) {
+   PMD_DRV_LOG(ERR,
+   "Failed to execute command of 
VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS");
+   return ret;
+   }
+
+   rte_memcpy(&hw->vlan_v2_caps, &vlan_v2_caps, sizeof(vlan_v2_caps));
+   return 0;
+}
+
 int
 ice_dcf_init_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw)
 {
@@ -701,6 +724,10 @@ ice_dcf_init_hw(struct rte_eth_dev *eth_dev, struct 
ice_dcf_hw *hw)
rte_intr_enable(pci_dev->intr_handle);
ice_dcf_enable_irq0(hw);
 
+   if ((hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) &&
+   dcf_get_vlan_offload_caps_v2(hw))
+   goto err_rss;
+
return 0;
 
 err_rss:
diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index 44a61404c3..7f42ebabe9 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -129,6 +129,7 @@ struct ice_dcf_hw {
uint16_t nb_msix;
uint16_t rxq_map[16];
struct virtchnl_eth_stats eth_stats_offset;
+   struct virtchnl_vlan_caps vlan_v2_caps;
 
/* Link status */
bool link_up;
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 236c0395e0..8005eb2ab8 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -1050,6 +1050,46 @@ dcf_dev_set_default_mac_addr(struct rte_eth_dev *dev,
return 0;
 }
 
+static int
+dcf_add_del_vlan_v2(struct ice_dcf_hw *hw, uint16_t vlanid, bool add)
+{
+   struct virtchnl_vlan_supported_caps *supported_caps =
+   &hw->vlan_v2_caps.filtering.filtering_support;
+   struct virtchnl_vlan *vlan_setting;
+   struct virtchnl_vlan_filter_list_v2 vlan_filter;
+   struct dcf_virtchnl_cmd args;
+   uint32_t filtering_caps;
+   int err;
+
+   if (supported_caps->outer) {
+   filtering_caps = supported_caps->outer;
+   vlan_setting = &vlan_filter.filters[0].outer;
+   } else {
+   filtering_caps = supported_caps->inner;
+   vlan_setting = &vlan_filter.filters[0].inner;
+   }
+
+   if (!(filtering_caps & VIRTCHNL_VLAN_ETHERTYPE_8100))
+   return -ENOTSUP;
+
+   memset(&vlan_filter, 0, sizeof(vlan_filter));
+   vlan_filter.vport_id = hw->vsi_res->vsi_id;
+   vlan_filter.num_elements = 1;
+   vlan_setting->tpid = RTE_ETHER_TYPE_VLAN;
+   vlan_setting->tci = vlanid;
+
+   memset(&args, 0, sizeof(args));
+   args.v_op = add ? VIRTCHNL_OP_ADD_VLAN_V2 : VIRTCHNL_OP_DEL_VLAN_V2;
+   args.req_msg = (uint8_t *)&vlan_filter;
+   args.req_msglen = sizeof(vlan_filter);
+   err = ice_dcf_execute_virtchnl_cmd(hw, &args);
+   if (err)
+   PMD_DRV_LOG(ERR, "fail to execute command %s",
+   add ? "OP_ADD_VLAN_V2" :  "OP_DEL_VLAN_V2");
+
+   return err;
+}
+
 static int
 dcf_add_del_vlan(struct ice_dcf_hw *hw, uint16_t vlanid, bool add)
 {
@@ -1076,6 +1116,116 @@ dcf_

RE: [PATCH v6 01/12] net/ice: enable RSS RETA ops for DCF hardware

2022-04-27 Thread Zhang, Qi Z



> -Original Message-
> From: Liu, KevinX 
> Sent: Thursday, April 28, 2022 2:13 AM
> To: dev@dpdk.org
> Cc: Yang, Qiming ; Zhang, Qi Z
> ; Yang, SteveX ; Liu, KevinX
> 
> Subject: [PATCH v6 01/12] net/ice: enable RSS RETA ops for DCF hardware
> 
> From: Steve Yang 
> 
> RSS RETA should be updated and queried by application, Add related ops
> ('.reta_update', '.reta_query') for DCF.
> 
> Signed-off-by: Steve Yang 
> Signed-off-by: Kevin Liu 
> ---
>  doc/guides/nics/features/ice_dcf.ini   |  1 +
>  doc/guides/rel_notes/release_22_07.rst |  3 +
>  drivers/net/ice/ice_dcf.c  |  2 +-
>  drivers/net/ice/ice_dcf.h  |  1 +
>  drivers/net/ice/ice_dcf_ethdev.c   | 77 ++
>  5 files changed, 83 insertions(+), 1 deletion(-)
> 
> diff --git a/doc/guides/nics/features/ice_dcf.ini
> b/doc/guides/nics/features/ice_dcf.ini
> index 54073f0b88..5221c99a9c 100644
> --- a/doc/guides/nics/features/ice_dcf.ini
> +++ b/doc/guides/nics/features/ice_dcf.ini
> @@ -15,6 +15,7 @@ L3 checksum offload  = P
>  L4 checksum offload  = P
>  Inner L3 checksum= P
>  Inner L4 checksum= P
> +RSS reta update  = Y
>  Basic stats  = Y
>  Linux= Y
>  x86-32   = Y
> diff --git a/doc/guides/rel_notes/release_22_07.rst
> b/doc/guides/rel_notes/release_22_07.rst
> index 90123bb807..cbdc90760c 100644
> --- a/doc/guides/rel_notes/release_22_07.rst
> +++ b/doc/guides/rel_notes/release_22_07.rst
> @@ -60,6 +60,9 @@ New Features
>* Added Tx QoS queue rate limitation support.
>* Added quanta size configuration support.
> 
> +* **Updated Intel ice driver.**
> +
> + * Added enable RSS RETA ops for DCF hardware.

There is no DCF hardware, better change to

Added support for RSS RETA configure in DCF mode.



RE: [PATCH v6 03/12] net/ice: cleanup Tx buffers

2022-04-27 Thread Zhang, Qi Z



> -Original Message-
> From: Liu, KevinX 
> Sent: Thursday, April 28, 2022 2:13 AM
> To: dev@dpdk.org
> Cc: Yang, Qiming ; Zhang, Qi Z
> ; Yang, SteveX ; Zhang,
> RobinX ; Liu, KevinX 
> Subject: [PATCH v6 03/12] net/ice: cleanup Tx buffers
> 
> From: Robin Zhang 
> 
> Add support for ops rte_eth_tx_done_cleanup in dcf
> 
> Signed-off-by: Robin Zhang 
> Signed-off-by: Kevin Liu 
> ---
>  doc/guides/rel_notes/release_22_07.rst | 1 +
>  drivers/net/ice/ice_dcf_ethdev.c   | 1 +
>  2 files changed, 2 insertions(+)
> 
> diff --git a/doc/guides/rel_notes/release_22_07.rst
> b/doc/guides/rel_notes/release_22_07.rst
> index cc2c243e81..bbd3d296de 100644
> --- a/doc/guides/rel_notes/release_22_07.rst
> +++ b/doc/guides/rel_notes/release_22_07.rst
> @@ -64,6 +64,7 @@ New Features
> 
>   * Added enable RSS RETA ops for DCF hardware.
>   * Added enable RSS HASH ops for DCF hardware.
> + * Added cleanup Tx buffers.

Please keep the pattern be consistent,

Added support for Tx buffer cleanup in DCF mode.

Anyway, this is not worth for a release note update, you can remove it.

> 
>  Removed Items
>  -
> diff --git a/drivers/net/ice/ice_dcf_ethdev.c 
> b/drivers/net/ice/ice_dcf_ethdev.c
> index ccad7fc304..d8b5961514 100644
> --- a/drivers/net/ice/ice_dcf_ethdev.c
> +++ b/drivers/net/ice/ice_dcf_ethdev.c
> @@ -1235,6 +1235,7 @@ static const struct eth_dev_ops
> ice_dcf_eth_dev_ops = {
>   .reta_query  = ice_dcf_dev_rss_reta_query,
>   .rss_hash_update = ice_dcf_dev_rss_hash_update,
>   .rss_hash_conf_get   = ice_dcf_dev_rss_hash_conf_get,
> + .tx_done_cleanup = ice_tx_done_cleanup,
>  };
> 
>  static int
> --
> 2.33.1



RE: [PATCH v6 05/12] net/ice: add ops dev-supported-ptypes-get to dcf

2022-04-27 Thread Zhang, Qi Z



> -Original Message-
> From: Liu, KevinX 
> Sent: Thursday, April 28, 2022 2:13 AM
> To: dev@dpdk.org
> Cc: Yang, Qiming ; Zhang, Qi Z
> ; Yang, SteveX ; Wang, Jie1X
> ; Liu, KevinX 
> Subject: [PATCH v6 05/12] net/ice: add ops dev-supported-ptypes-get to dcf
> 
> From: Jie Wang 
> 
> add API "dev_supported_ptypes_get" to dcf, that dcf pmd can get ptypes
> through the new API.
> 
> Signed-off-by: Jie Wang 
> Signed-off-by: Kevin Liu 
> ---
>  doc/guides/rel_notes/release_22_07.rst |  1 +
>  drivers/net/ice/ice_dcf_ethdev.c   | 80 --
>  2 files changed, 50 insertions(+), 31 deletions(-)
> 
> diff --git a/doc/guides/rel_notes/release_22_07.rst
> b/doc/guides/rel_notes/release_22_07.rst
> index dc37de85f3..a39196c605 100644
> --- a/doc/guides/rel_notes/release_22_07.rst
> +++ b/doc/guides/rel_notes/release_22_07.rst
> @@ -66,6 +66,7 @@ New Features
>   * Added enable RSS HASH ops for DCF hardware.
>   * Added cleanup Tx buffers.
>   * Added add ops MTU-SET to dcf.
> + * Added add ops dev-supported-ptypes-get to dcf.

Misc feature is not necessary for release notes update, please remove this.

> 
>  Removed Items
>  -
> diff --git a/drivers/net/ice/ice_dcf_ethdev.c 
> b/drivers/net/ice/ice_dcf_ethdev.c
> index 06d752fd61..6a577a6582 100644
> --- a/drivers/net/ice/ice_dcf_ethdev.c
> +++ b/drivers/net/ice/ice_dcf_ethdev.c
> @@ -1218,38 +1218,56 @@ ice_dcf_dev_reset(struct rte_eth_dev *dev)
>   return ret;
>  }
> 
> +static const uint32_t *
> +ice_dcf_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused)
> +{
> + static const uint32_t ptypes[] = {
> + RTE_PTYPE_L2_ETHER,
> + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
> + RTE_PTYPE_L4_FRAG,
> + RTE_PTYPE_L4_ICMP,
> + RTE_PTYPE_L4_NONFRAG,
> + RTE_PTYPE_L4_SCTP,
> + RTE_PTYPE_L4_TCP,
> + RTE_PTYPE_L4_UDP,
> + RTE_PTYPE_UNKNOWN
> + };
> + return ptypes;
> +}
> +
>  static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
> - .dev_start   = ice_dcf_dev_start,
> - .dev_stop= ice_dcf_dev_stop,
> - .dev_close   = ice_dcf_dev_close,
> - .dev_reset   = ice_dcf_dev_reset,
> - .dev_configure   = ice_dcf_dev_configure,
> - .dev_infos_get   = ice_dcf_dev_info_get,
> - .rx_queue_setup  = ice_rx_queue_setup,
> - .tx_queue_setup  = ice_tx_queue_setup,
> - .rx_queue_release= ice_dev_rx_queue_release,
> - .tx_queue_release= ice_dev_tx_queue_release,
> - .rx_queue_start  = ice_dcf_rx_queue_start,
> - .tx_queue_start  = ice_dcf_tx_queue_start,
> - .rx_queue_stop   = ice_dcf_rx_queue_stop,
> - .tx_queue_stop   = ice_dcf_tx_queue_stop,
> - .link_update = ice_dcf_link_update,
> - .stats_get   = ice_dcf_stats_get,
> - .stats_reset = ice_dcf_stats_reset,
> - .promiscuous_enable  = ice_dcf_dev_promiscuous_enable,
> - .promiscuous_disable = ice_dcf_dev_promiscuous_disable,
> - .allmulticast_enable = ice_dcf_dev_allmulticast_enable,
> - .allmulticast_disable= ice_dcf_dev_allmulticast_disable,
> - .flow_ops_get= ice_dcf_dev_flow_ops_get,
> - .udp_tunnel_port_add = ice_dcf_dev_udp_tunnel_port_add,
> - .udp_tunnel_port_del = ice_dcf_dev_udp_tunnel_port_del,
> - .tm_ops_get  = ice_dcf_tm_ops_get,
> - .reta_update = ice_dcf_dev_rss_reta_update,
> - .reta_query  = ice_dcf_dev_rss_reta_query,
> - .rss_hash_update = ice_dcf_dev_rss_hash_update,
> - .rss_hash_conf_get   = ice_dcf_dev_rss_hash_conf_get,
> - .tx_done_cleanup = ice_tx_done_cleanup,
> - .mtu_set = ice_dcf_dev_mtu_set,
> + .dev_start= ice_dcf_dev_start,
> + .dev_stop = ice_dcf_dev_stop,
> + .dev_close= ice_dcf_dev_close,
> + .dev_reset= ice_dcf_dev_reset,
> + .dev_configure= ice_dcf_dev_configure,
> + .dev_infos_get= ice_dcf_dev_info_get,
> + .dev_supported_ptypes_get = ice_dcf_dev_supported_ptypes_get,
> + .rx_queue_setup   = ice_rx_queue_setup,
> + .tx_queue_setup   = ice_tx_queue_setup,
> + .rx_queue_release = ice_dev_rx_queue_release,
> + .tx_queue_release = ice_dev_tx_queue_release,
> + .rx_queue_start   = ice_dcf_rx_queue_start,
> + .tx_queue_start   = ice_dcf_tx_queue_start,
> + .rx_queue_stop= ice_dcf_rx_queue_stop,
> + .tx_queue_stop= ice_dcf_tx_queue_stop,
> + .link_update  = ice_dcf_link_update,
> + .stats_get= ice_dcf_stats_get,
> + .stats_reset  = ice_dcf_stats_reset,
> + .promiscuous_enable   

RE: [PATCH v6 12/12] net/ice: support DCF new VLAN capabilities

2022-04-27 Thread Zhang, Qi Z



> -Original Message-
> From: Liu, KevinX 
> Sent: Thursday, April 28, 2022 2:13 AM
> To: dev@dpdk.org
> Cc: Yang, Qiming ; Zhang, Qi Z
> ; Yang, SteveX ; Alvin Zhang
> ; Liu, KevinX 
> Subject: [PATCH v6 12/12] net/ice: support DCF new VLAN capabilities
> 
> From: Alvin Zhang 
> 
> The new VLAN virtchnl opcodes introduce new capabilities like VLAN filtering,
> stripping and insertion.
> 
> The DCF needs to query the VLAN capabilities based on current device
> configuration firstly.
> 
> DCF is able to configure inner VLAN filter when port VLAN is enabled base on
> negotiation; and DCF is able to configure outer VLAN (0x8100) if port VLAN is
> disabled to be compatible with legacy mode.
> 
> When port VLAN is updated by DCF, the DCF needs to reset to query the new
> VLAN capabilities.
> 
> Signed-off-by: Alvin Zhang 
> Signed-off-by: Kevin Liu 
> ---
>  doc/guides/rel_notes/release_22_07.rst |   1 +
>  drivers/net/ice/ice_dcf.c  |  27 
>  drivers/net/ice/ice_dcf.h  |   1 +
>  drivers/net/ice/ice_dcf_ethdev.c   | 171 ++---
>  4 files changed, 183 insertions(+), 17 deletions(-)
> 
> diff --git a/doc/guides/rel_notes/release_22_07.rst
> b/doc/guides/rel_notes/release_22_07.rst
> index 004a6d3343..7c932a7c8a 100644
> --- a/doc/guides/rel_notes/release_22_07.rst
> +++ b/doc/guides/rel_notes/release_22_07.rst
> @@ -73,6 +73,7 @@ New Features
>   * Added add extended stats.
>   * Added support queue information getting.
>   * Added implement power management.
> + * Added support DCF new VLAN capabilities.

This feature is not exposed to user, no need release note update.

> 
>  Removed Items
>  -
> diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c index
> 55ae68c456..885d58c0f4 100644
> --- a/drivers/net/ice/ice_dcf.c
> +++ b/drivers/net/ice/ice_dcf.c
> @@ -587,6 +587,29 @@ ice_dcf_get_supported_rxdid(struct ice_dcf_hw *hw)
>   return 0;
>  }
> 
> +static int
> +dcf_get_vlan_offload_caps_v2(struct ice_dcf_hw *hw) {
> + struct virtchnl_vlan_caps vlan_v2_caps;
> + struct dcf_virtchnl_cmd args;
> + int ret;
> +
> + memset(&args, 0, sizeof(args));
> + args.v_op = VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS;
> + args.rsp_msgbuf = (uint8_t *)&vlan_v2_caps;
> + args.rsp_buflen = sizeof(vlan_v2_caps);
> +
> + ret = ice_dcf_execute_virtchnl_cmd(hw, &args);
> + if (ret) {
> + PMD_DRV_LOG(ERR,
> + "Failed to execute command of
> VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS");
> + return ret;
> + }
> +
> + rte_memcpy(&hw->vlan_v2_caps, &vlan_v2_caps,
> sizeof(vlan_v2_caps));
> + return 0;
> +}
> +
>  int
>  ice_dcf_init_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw)  { @@ -
> 701,6 +724,10 @@ ice_dcf_init_hw(struct rte_eth_dev *eth_dev, struct
> ice_dcf_hw *hw)
>   rte_intr_enable(pci_dev->intr_handle);
>   ice_dcf_enable_irq0(hw);
> 
> + if ((hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2)
> &&
> + dcf_get_vlan_offload_caps_v2(hw))
> + goto err_rss;
> +
>   return 0;
> 
>  err_rss:
> diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h index
> 44a61404c3..7f42ebabe9 100644
> --- a/drivers/net/ice/ice_dcf.h
> +++ b/drivers/net/ice/ice_dcf.h
> @@ -129,6 +129,7 @@ struct ice_dcf_hw {
>   uint16_t nb_msix;
>   uint16_t rxq_map[16];
>   struct virtchnl_eth_stats eth_stats_offset;
> + struct virtchnl_vlan_caps vlan_v2_caps;
> 
>   /* Link status */
>   bool link_up;
> diff --git a/drivers/net/ice/ice_dcf_ethdev.c 
> b/drivers/net/ice/ice_dcf_ethdev.c
> index 236c0395e0..8005eb2ab8 100644
> --- a/drivers/net/ice/ice_dcf_ethdev.c
> +++ b/drivers/net/ice/ice_dcf_ethdev.c
> @@ -1050,6 +1050,46 @@ dcf_dev_set_default_mac_addr(struct
> rte_eth_dev *dev,
>   return 0;
>  }
> 
> +static int
> +dcf_add_del_vlan_v2(struct ice_dcf_hw *hw, uint16_t vlanid, bool add) {
> + struct virtchnl_vlan_supported_caps *supported_caps =
> + &hw->vlan_v2_caps.filtering.filtering_support;
> + struct virtchnl_vlan *vlan_setting;
> + struct virtchnl_vlan_filter_list_v2 vlan_filter;
> + struct dcf_virtchnl_cmd args;
> + uint32_t filtering_caps;
> + int err;
> +
> + if (supported_caps->outer) {
> + filtering_caps = supported_caps->outer;
> + vlan_setting = &vlan_filter.filters[0].outer;
> + } else {
> + filtering_caps = supported_caps->inner;
> + vlan_setting = &vlan_filter.filters[0].inner;
> + }
> +
> + if (!(filtering_caps & VIRTCHNL_VLAN_ETHERTYPE_8100))
> + return -ENOTSUP;
> +
> + memset(&vlan_filter, 0, sizeof(vlan_filter));
> + vlan_filter.vport_id = hw->vsi_res->vsi_id;
> + vlan_filter.num_elements = 1;
> + vlan_setting->tpid = RTE_ETHER_TYPE_VLAN;
> + vlan_setting->tci = vlanid;
> +
> + memset(&args, 0, sizeof(args));
> + args.v_op = add ?

Re: [PATCH v3 2/3] examples/dma: fix Tx drop statistic is not collected

2022-04-27 Thread Kevin Laatz

On 24/04/2022 07:07, Chengwen Feng wrote:

The Tx drop statistic was designed to collected by
rte_eth_dev_tx_buffer mechanism, but the application uses
rte_eth_tx_burst to send packets and this lead the Tx drop statistic
was not collected.

This patch removes rte_eth_dev_tx_buffer mechanism to fix the problem.

Fixes: 632bcd9b5d4f ("examples/ioat: print statistics")
Cc: sta...@dpdk.org

Signed-off-by: Chengwen Feng 
Acked-by: Bruce Richardson 
---
  examples/dma/dmafwd.c | 27 +--
  1 file changed, 5 insertions(+), 22 deletions(-)


Acked-by: Kevin Laatz 




Re: [PATCH v3 3/3] examples/dma: add force minimal copy size parameter

2022-04-27 Thread Kevin Laatz

On 24/04/2022 07:07, Chengwen Feng wrote:

This patch adds force minimal copy size parameter
(-m/--force-min-copy-size), so when do copy by CPU or DMA, the real copy
size will be the maximum of mbuf's data_len and this parameter.

This parameter was designed to compare the performance between CPU copy
and DMA copy. User could send small packets with a high rate to drive
the performance test.

Signed-off-by: Chengwen Feng 
Acked-by: Bruce Richardson 
---
  examples/dma/dmafwd.c | 30 +++---
  1 file changed, 27 insertions(+), 3 deletions(-)


Acked-by: Kevin Laatz 




[PATCH v2 1/2] event/cnxk: add additional checks in OP_RELEASE

2022-04-27 Thread Pavan Nikhilesh
Add additional checks while performing RTE_EVENT_OP_RELEASE to
ensure that there are no pending SWTAGs and FLUSHEs in flight.

Signed-off-by: Pavan Nikhilesh 
---
 v2 Changes:
 - Fix compilation with RTE_LIBRTE_MEMPOOL_DEBUG enabled.

 drivers/event/cnxk/cn10k_eventdev.c |  4 +---
 drivers/event/cnxk/cn10k_worker.c   |  8 ++--
 drivers/event/cnxk/cn9k_eventdev.c  |  4 +---
 drivers/event/cnxk/cn9k_worker.c| 16 
 drivers/event/cnxk/cn9k_worker.h|  3 +--
 drivers/event/cnxk/cnxk_worker.h| 17 ++---
 6 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c 
b/drivers/event/cnxk/cn10k_eventdev.c
index 9b4d2895ec..2fa2cd31c2 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -137,9 +137,7 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, 
uintptr_t base,
if (fn != NULL && ev.u64 != 0)
fn(arg, ev);
if (ev.sched_type != SSO_TT_EMPTY)
-   cnxk_sso_hws_swtag_flush(
-   ws->base + SSOW_LF_GWS_WQE0,
-   ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+   cnxk_sso_hws_swtag_flush(ws->base);
do {
val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
} while (val & BIT_ULL(56));
diff --git a/drivers/event/cnxk/cn10k_worker.c 
b/drivers/event/cnxk/cn10k_worker.c
index 975a22336a..0d99b4c5e5 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -18,8 +18,12 @@ cn10k_sso_hws_enq(void *port, const struct rte_event *ev)
cn10k_sso_hws_forward_event(ws, ev);
break;
case RTE_EVENT_OP_RELEASE:
-   cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_WQE0,
-ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+   if (ws->swtag_req) {
+   cnxk_sso_hws_desched(ev->u64, ws->base);
+   ws->swtag_req = 0;
+   break;
+   }
+   cnxk_sso_hws_swtag_flush(ws->base);
break;
default:
return 0;
diff --git a/drivers/event/cnxk/cn9k_eventdev.c 
b/drivers/event/cnxk/cn9k_eventdev.c
index 4bba477dd1..41bbe3cb22 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -156,9 +156,7 @@ cn9k_sso_hws_flush_events(void *hws, uint8_t queue_id, 
uintptr_t base,
if (fn != NULL && ev.u64 != 0)
fn(arg, ev);
if (ev.sched_type != SSO_TT_EMPTY)
-   cnxk_sso_hws_swtag_flush(
-   ws_base + SSOW_LF_GWS_TAG,
-   ws_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+   cnxk_sso_hws_swtag_flush(ws_base);
do {
val = plt_read64(ws_base + SSOW_LF_GWS_PENDSTATE);
} while (val & BIT_ULL(56));
diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c
index a981bc986f..41dbe6cafb 100644
--- a/drivers/event/cnxk/cn9k_worker.c
+++ b/drivers/event/cnxk/cn9k_worker.c
@@ -19,8 +19,12 @@ cn9k_sso_hws_enq(void *port, const struct rte_event *ev)
cn9k_sso_hws_forward_event(ws, ev);
break;
case RTE_EVENT_OP_RELEASE:
-   cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_TAG,
-ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+   if (ws->swtag_req) {
+   cnxk_sso_hws_desched(ev->u64, ws->base);
+   ws->swtag_req = 0;
+   break;
+   }
+   cnxk_sso_hws_swtag_flush(ws->base);
break;
default:
return 0;
@@ -78,8 +82,12 @@ cn9k_sso_hws_dual_enq(void *port, const struct rte_event *ev)
cn9k_sso_hws_dual_forward_event(dws, base, ev);
break;
case RTE_EVENT_OP_RELEASE:
-   cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG,
-base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+   if (dws->swtag_req) {
+   cnxk_sso_hws_desched(ev->u64, base);
+   dws->swtag_req = 0;
+   break;
+   }
+   cnxk_sso_hws_swtag_flush(base);
break;
default:
return 0;
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index 917d1e0b40..88eb4e9cf9 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -841,8 +841,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, 
uint64_t *cmd,
return 1;
}

-   cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG,
-   

[PATCH v2 2/2] event/cnxk: move post-processing to separate function

2022-04-27 Thread Pavan Nikhilesh
Move event post-processing to a separate function.
Do complete event post-processing in tear-down functions to prevent
incorrect memory free.

Signed-off-by: Pavan Nikhilesh 
---
 drivers/event/cnxk/cn10k_eventdev.c |   5 +-
 drivers/event/cnxk/cn10k_worker.h   | 189 +---
 drivers/event/cnxk/cn9k_eventdev.c  |   9 +-
 drivers/event/cnxk/cn9k_worker.h| 114 ++---
 4 files changed, 137 insertions(+), 180 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c 
b/drivers/event/cnxk/cn10k_eventdev.c
index 2fa2cd31c2..94829e789c 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -133,7 +133,10 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, 
uintptr_t base,
 
while (aq_cnt || cq_ds_cnt || ds_cnt) {
plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0);
-   cn10k_sso_hws_get_work_empty(ws, &ev);
+   cn10k_sso_hws_get_work_empty(
+   ws, &ev,
+   (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F |
+   NIX_RX_MULTI_SEG_F | CPT_RX_WQE_F);
if (fn != NULL && ev.u64 != 0)
fn(arg, ev);
if (ev.sched_type != SSO_TT_EMPTY)
diff --git a/drivers/event/cnxk/cn10k_worker.h 
b/drivers/event/cnxk/cn10k_worker.h
index c96048f47d..034f508dd8 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -196,15 +196,87 @@ cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, 
const uint32_t flags,
}
 }
 
+static __rte_always_inline void
+cn10k_sso_hws_post_process(struct cn10k_sso_hws *ws, uint64_t *u64,
+  const uint32_t flags)
+{
+   uint64_t tstamp_ptr;
+
+   u64[0] = (u64[0] & (0x3ull << 32)) << 6 |
+(u64[0] & (0x3FFull << 36)) << 4 | (u64[0] & 0x);
+   if ((flags & CPT_RX_WQE_F) &&
+   (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_CRYPTODEV)) {
+   u64[1] = cn10k_cpt_crypto_adapter_dequeue(u64[1]);
+   } else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_ETHDEV) {
+   uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]);
+   uint64_t mbuf;
+
+   mbuf = u64[1] - sizeof(struct rte_mbuf);
+   rte_prefetch0((void *)mbuf);
+   if (flags & NIX_RX_OFFLOAD_SECURITY_F) {
+   const uint64_t mbuf_init =
+   0x10001ULL | RTE_PKTMBUF_HEADROOM |
+   (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+   struct rte_mbuf *m;
+   uintptr_t sa_base;
+   uint64_t iova = 0;
+   uint8_t loff = 0;
+   uint16_t d_off;
+   uint64_t cq_w1;
+   uint64_t cq_w5;
+
+   m = (struct rte_mbuf *)mbuf;
+   d_off = (uintptr_t)(m->buf_addr) - (uintptr_t)m;
+   d_off += RTE_PKTMBUF_HEADROOM;
+
+   cq_w1 = *(uint64_t *)(u64[1] + 8);
+   cq_w5 = *(uint64_t *)(u64[1] + 40);
+
+   sa_base = cnxk_nix_sa_base_get(port, ws->lookup_mem);
+   sa_base &= ~(ROC_NIX_INL_SA_BASE_ALIGN - 1);
+
+   mbuf = (uint64_t)nix_sec_meta_to_mbuf_sc(
+   cq_w1, cq_w5, sa_base, (uintptr_t)&iova, &loff,
+   (struct rte_mbuf *)mbuf, d_off, flags,
+   mbuf_init | ((uint64_t)port) << 48);
+   if (loff)
+   roc_npa_aura_op_free(m->pool->pool_id, 0, iova);
+   }
+
+   u64[0] = CNXK_CLR_SUB_EVENT(u64[0]);
+   cn10k_wqe_to_mbuf(u64[1], mbuf, port, u64[0] & 0xF, flags,
+ ws->lookup_mem);
+   /* Extracting tstamp, if PTP enabled*/
+   tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)u64[1]) +
+  CNXK_SSO_WQE_SG_PTR);
+   cn10k_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, ws->tstamp,
+flags & NIX_RX_OFFLOAD_TSTAMP_F,
+(uint64_t *)tstamp_ptr);
+   u64[1] = mbuf;
+   } else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) ==
+  RTE_EVENT_TYPE_ETHDEV_VECTOR) {
+   uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]);
+   __uint128_t vwqe_hdr = *(__uint128_t *)u64[1];
+
+   vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) |
+  ((vwqe_hdr & 0x) << 48) | ((uint64_t)port << 32);
+   *(uint64_t *)u64[1] = (uint64_t)vwqe_hdr;
+   cn10k_process_vwqe(u64[1], port, flags, ws->lookup_mem,
+  ws->tstamp, ws->lmt_base

RE: [PATCH] doc: fix support table for ETH and VLAN flow items

2022-04-27 Thread Asaf Penso
>-Original Message-
>From: Ferruh Yigit 
>Sent: Tuesday, April 26, 2022 1:48 PM
>To: Asaf Penso ; Ilya Maximets ;
>dev@dpdk.org; Ori Kam 
>Cc: Ajit Khaparde ; Rahul Lakkireddy
>; Hemant Agrawal
>; Haiyue Wang ; John
>Daley ; Guoyang Zhou ;
>Min Hu (Connor) ; Beilei Xing
>; Jingjing Wu ; Qi Zhang
>; Rosen Xu ; Matan Azrad
>; Slava Ovsiienko ; Liron Himi
>; Jiawen Wu ; Dekel Peled
>; NBU-Contact-Thomas Monjalon (EXTERNAL)
>; sta...@dpdk.org
>Subject: Re: [PATCH] doc: fix support table for ETH and VLAN flow items
>
>On 4/26/2022 9:55 AM, Asaf Penso wrote:
>>> -Original Message-
>>> From: Ferruh Yigit 
>>> Sent: Wednesday, April 20, 2022 8:52 PM
>>> To: Ilya Maximets ; dev@dpdk.org; Asaf Penso
>>> 
>>> Cc: Ajit Khaparde ; Rahul Lakkireddy
>>> ; Hemant Agrawal
>>> ; Haiyue Wang ;
>John
>>> Daley ; Guoyang Zhou
>;
>>> Min Hu (Connor) ; Beilei Xing
>>> ; Jingjing Wu ; Qi
>>> Zhang ; Rosen Xu ; Matan
>>> Azrad ; Slava Ovsiienko ;
>>> Liron Himi ; Jiawen Wu
>;
>>> Ori Kam ; Dekel Peled ;
>>> NBU-Contact- Thomas Monjalon (EXTERNAL) ;
>>> sta...@dpdk.org; NBU-Contact-Thomas Monjalon (EXTERNAL)
>>> 
>>> Subject: Re: [PATCH] doc: fix support table for ETH and VLAN flow
>>> items
>>>
>>> On 3/16/2022 12:01 PM, Ilya Maximets wrote:
 'has_vlan' attribute is only supported by sfc, mlx5 and cnxk.
 Other drivers doesn't support it.  Most of them (like i40e) just
 ignore it silently.  Some drivers (like mlx4) never had a full
 support of the eth item even before introduction of 'has_vlan'
 (mlx4 allows to match on the destination MAC only).

 Same for the 'has_more_vlan' flag of the vlan item.

 Changing the support level to 'partial' for all such drivers.
 This doesn't solve the issue, but at least marks the problematic
 drivers.

>>>
>>> Hi Asaf,
>>>
>>> This was the kind of maintanance issue I was referring to have this
>>> kind of capability documentation for flow API.
>>>
>> Are you referring to the fact that fields like has_vlan are not part of the
>table?
>> If so, you are right, but IMHO having the high level items still allows the
>users to understand what is supported quickly.
>> We can have another level of tables per each relevant item to address this
>specific issue.
>> In this case, we'll have a table for ETH that elaborates the different 
>> fields'
>support, like has_vlan.
>> If you are referring to a different issue, please elaborate.
>>
>
>'vlan' in the .ini file is already to document the flow API VLAN support, so I 
>am
>not suggesting adding more to the table.
>
>My point was it is hard to make this kind documentation correct.
>

Yes, and I think that between having none and having everything, what we 
currently have is closer to everything, and is very useful for the users.
 

>>
>>> All below drivers are using 'RTE_FLOW_ITEM_TYPE_VLAN', the script
>>> verifies this, but are they actually supporting VLAN filter and in which 
>>> case?
>>>
>>> We need comment from driver maintainers about the support level.
>>
>> @Ori Kam, please comment for mlx driver.
>>
>>>
 Some details are available in:
 https://bugs.dpdk.org/show_bug.cgi?id=958

 Fixes: 09315fc83861 ("ethdev: add VLAN attributes to ethernet and
 VLAN
 items")
 Cc: sta...@dpdk.org

 Signed-off-by: Ilya Maximets 
 ---

 I added the stable in CC, but the patch should be extended while
 backporting.  For 21.11 the cnxk driver should be also updated, for
 20.11, sfc driver should also be included.

doc/guides/nics/features/bnxt.ini   | 4 ++--
doc/guides/nics/features/cxgbe.ini  | 4 ++--
doc/guides/nics/features/dpaa2.ini  | 4 ++--
doc/guides/nics/features/e1000.ini  | 2 +-
doc/guides/nics/features/enic.ini   | 4 ++--
doc/guides/nics/features/hinic.ini  | 2 +-
doc/guides/nics/features/hns3.ini   | 4 ++--
doc/guides/nics/features/i40e.ini   | 4 ++--
doc/guides/nics/features/iavf.ini   | 4 ++--
doc/guides/nics/features/ice.ini| 4 ++--
doc/guides/nics/features/igc.ini| 2 +-
doc/guides/nics/features/ipn3ke.ini | 4 ++--
doc/guides/nics/features/ixgbe.ini  | 4 ++--
doc/guides/nics/features/mlx4.ini   | 4 ++--
doc/guides/nics/features/mvpp2.ini  | 4 ++--
doc/guides/nics/features/tap.ini| 4 ++--
doc/guides/nics/features/txgbe.ini  | 4 ++--
17 files changed, 31 insertions(+), 31 deletions(-)

 diff --git a/doc/guides/nics/features/bnxt.ini
 b/doc/guides/nics/features/bnxt.ini
 index afb5414b49..ac682c5779 100644
 --- a/doc/guides/nics/features/bnxt.ini
 +++ b/doc/guides/nics/features/bnxt.ini
 @@ -57,7 +57,7 @@ Perf doc = Y

[rte_flow items]
any  = Y
 -eth  = Y
 +eth  = P
ipv4 = Y
ipv6 = Y
gre  = Y
>>

[Bug 996] DPDK:20.11.1: net/ena crash while fetching xstats

2022-04-27 Thread bugzilla
https://bugs.dpdk.org/show_bug.cgi?id=996

Michal Krawczyk (m...@semihalf.com) changed:

   What|Removed |Added

 Resolution|WONTFIX |---
 Status|RESOLVED|UNCONFIRMED

--- Comment #3 from Michal Krawczyk (m...@semihalf.com) ---
Hey Amiya,

sorry for the late reply, I was OOO for one week.

Thank you for providing us with more details. 

If you aren't calling any API that needs to use the ENA admin queue from the
secondary process, the situation you're seeing shouldn't happen.

I just executed simple application on DPDK v20.11.1 in MP mode - the main
process is fetching the xstats, the secondary process is simply performing the
packets forwarding. The application is not crashing for my case.

>From what I understand, the crash happens, because:

1. The ENA admin queue is not using the shared memory
2. The secondary process sends the request and saves it in the secondary
process memory
3. The primary process receives the interrupt and executes the completion
handler 
4. The completion handler cannot find the relevant request (as it's in the
secondary process memory) and the app crashes.

Please double check if:

1. The xstats aren't being fetched from the secondary process
2. You aren't calling any of API below from the secondary process, which also
uses the ENA admin queue:
  - rte_eth_dev_set_mtu()
  - rte_eth_dev_rss_reta_update()
  - rte_eth_dev_rss_reta_query()

The point 1. is much more likely as you've described it's a regression in
v20.11, and indeed - the xstats were extended after v19.11 release.

If none of the above is true, every other information that could potentially
get us closer to the core of the issue may be helpful (we can't reproduce this
on our side).

Thanks,
Michal

-- 
You are receiving this mail because:
You are the assignee for the bug.

[PATCH 1/3] eventdev: add function to quiesce an event port

2022-04-27 Thread Pavan Nikhilesh
Add function to quiesce any core specific resources consumed by
the event port.

When the application decides to migrate the event port to another lcore
or teardown the current lcore it may to call `rte_event_port_quiesce`
to make sure that all the data associated with the event port are released
from the lcore, this might also include any prefetched events.

While releasing the event port from the lcore, this function calls the
user-provided flush callback once per event.

Signed-off-by: Pavan Nikhilesh 
---
 lib/eventdev/eventdev_pmd.h | 19 +++
 lib/eventdev/rte_eventdev.c | 19 +++
 lib/eventdev/rte_eventdev.h | 33 +
 lib/eventdev/version.map|  3 +++
 4 files changed, 74 insertions(+)

diff --git a/lib/eventdev/eventdev_pmd.h b/lib/eventdev/eventdev_pmd.h
index ce469d47a6..cf9f2146a1 100644
--- a/lib/eventdev/eventdev_pmd.h
+++ b/lib/eventdev/eventdev_pmd.h
@@ -381,6 +381,23 @@ typedef int (*eventdev_port_setup_t)(struct rte_eventdev 
*dev,
  */
 typedef void (*eventdev_port_release_t)(void *port);
 
+/**
+ * Quiesce any core specific resources consumed by the event port
+ *
+ * @param dev
+ *   Event device pointer.
+ * @param port
+ *   Event port pointer.
+ * @param flush_cb
+ *   User-provided event flush function.
+ * @param args
+ *   Arguments to be passed to the user-provided event flush function.
+ *
+ */
+typedef void (*eventdev_port_quiesce_t)(struct rte_eventdev *dev, void *port,
+   eventdev_port_flush_t flush_cb,
+   void *args);
+
 /**
  * Link multiple source event queues to destination event port.
  *
@@ -1218,6 +1235,8 @@ struct eventdev_ops {
/**< Set up an event port. */
eventdev_port_release_t port_release;
/**< Release an event port. */
+   eventdev_port_quiesce_t port_quiesce;
+   /**< Quiesce an event port. */
 
eventdev_port_link_t port_link;
/**< Link event queues to an event port. */
diff --git a/lib/eventdev/rte_eventdev.c b/lib/eventdev/rte_eventdev.c
index 532a253553..541fa5dc61 100644
--- a/lib/eventdev/rte_eventdev.c
+++ b/lib/eventdev/rte_eventdev.c
@@ -730,6 +730,25 @@ rte_event_port_setup(uint8_t dev_id, uint8_t port_id,
return 0;
 }
 
+void
+rte_event_port_quiesce(uint8_t dev_id, uint8_t port_id,
+  eventdev_port_flush_t release_cb, void *args)
+{
+   struct rte_eventdev *dev;
+
+   RTE_EVENTDEV_VALID_DEVID_OR_RET(dev_id);
+   dev = &rte_eventdevs[dev_id];
+
+   if (!is_valid_port(dev, port_id)) {
+   RTE_EDEV_LOG_ERR("Invalid port_id=%" PRIu8, port_id);
+   return;
+   }
+
+   if (dev->dev_ops->port_quiesce)
+   (*dev->dev_ops->port_quiesce)(dev, dev->data->ports[port_id],
+ release_cb, args);
+}
+
 int
 rte_event_dev_attr_get(uint8_t dev_id, uint32_t attr_id,
   uint32_t *attr_value)
diff --git a/lib/eventdev/rte_eventdev.h b/lib/eventdev/rte_eventdev.h
index 42a5660169..c86d8a5576 100644
--- a/lib/eventdev/rte_eventdev.h
+++ b/lib/eventdev/rte_eventdev.h
@@ -830,6 +830,39 @@ int
 rte_event_port_setup(uint8_t dev_id, uint8_t port_id,
 const struct rte_event_port_conf *port_conf);
 
+typedef void (*eventdev_port_flush_t)(uint8_t dev_id, struct rte_event event,
+ void *arg);
+/**< Callback function prototype that can be passed during
+ * rte_event_port_release(), invoked once per a released event.
+ */
+
+/**
+ * Quiesce any core specific resources consumed by the event port.
+ *
+ * Event ports are generally coupled with lcores, and a given Hardware
+ * implementation might require the PMD to store port specific data in the
+ * lcore.
+ * When the application decides to migrate the event port to an other lcore
+ * or teardown the current lcore it may to call `rte_event_port_quiesce`
+ * to make sure that all the data associated with the event port are released
+ * from the lcore, this might also include any prefetched events.
+ * While releasing the event port from the lcore, this function calls the
+ * user-provided flush callback once per event.
+ *
+ * The event port specific config is not reset.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param port_id
+ *   The index of the event port to setup. The value must be in the range
+ *   [0, nb_event_ports - 1] previously supplied to rte_event_dev_configure().
+ * @param release_cb
+ *   Callback function invoked once per flushed event.
+ */
+__rte_experimental
+void rte_event_port_quiesce(uint8_t dev_id, uint8_t port_id,
+   eventdev_port_flush_t release_cb, void *args);
+
 /**
  * The queue depth of the port on the enqueue side
  */
diff --git a/lib/eventdev/version.map b/lib/eventdev/version.map
index cd5dada07f..1907093539 100644
--- a/lib/eventdev/version.map
+++ b/lib/eventdev/versio

[PATCH 2/3] eventdev: update examples to use port quiesce

2022-04-27 Thread Pavan Nikhilesh
Quiesce event ports used by the workers core on exit to free up
any outstanding resources.

Signed-off-by: Pavan Nikhilesh 
Change-Id: Iea1f933d4f4926630d82a9883fbe3f1e75876097
---
 Depends-on: Series-22677

 app/test-eventdev/test_perf_common.c |  8 
 app/test-eventdev/test_pipeline_common.c | 12 
 examples/eventdev_pipeline/pipeline_common.h |  9 +
 examples/ipsec-secgw/ipsec_worker.c  | 13 +
 examples/l2fwd-event/l2fwd_common.c  | 13 +
 examples/l3fwd/l3fwd_event.c | 13 +
 6 files changed, 68 insertions(+)

diff --git a/app/test-eventdev/test_perf_common.c 
b/app/test-eventdev/test_perf_common.c
index f673a9fddd..2016583979 100644
--- a/app/test-eventdev/test_perf_common.c
+++ b/app/test-eventdev/test_perf_common.c
@@ -985,6 +985,13 @@ perf_opt_dump(struct evt_options *opt, uint8_t nb_queues)
evt_dump("prod_enq_burst_sz", "%d", opt->prod_enq_burst_sz);
 }

+static void
+perf_event_port_flush(uint8_t dev_id __rte_unused, struct rte_event ev,
+ void *args)
+{
+   rte_mempool_put(args, ev.event_ptr);
+}
+
 void
 perf_worker_cleanup(struct rte_mempool *const pool, uint8_t dev_id,
uint8_t port_id, struct rte_event events[], uint16_t nb_enq,
@@ -1000,6 +1007,7 @@ perf_worker_cleanup(struct rte_mempool *const pool, 
uint8_t dev_id,
events[i].op = RTE_EVENT_OP_RELEASE;
rte_event_enqueue_burst(dev_id, port_id, events, nb_deq);
}
+   rte_event_port_quiesce(dev_id, port_id, perf_event_port_flush, pool);
 }

 void
diff --git a/app/test-eventdev/test_pipeline_common.c 
b/app/test-eventdev/test_pipeline_common.c
index a8dd07..82e5745071 100644
--- a/app/test-eventdev/test_pipeline_common.c
+++ b/app/test-eventdev/test_pipeline_common.c
@@ -518,6 +518,16 @@ pipeline_vector_array_free(struct rte_event events[], 
uint16_t num)
}
 }

+static void
+pipeline_event_port_flush(uint8_t dev_id __rte_unused, struct rte_event ev,
+ void *args __rte_unused)
+{
+   if (ev.event_type & RTE_EVENT_TYPE_VECTOR)
+   pipeline_vector_array_free(&ev, 1);
+   else
+   rte_pktmbuf_free(ev.mbuf);
+}
+
 void
 pipeline_worker_cleanup(uint8_t dev, uint8_t port, struct rte_event ev[],
uint16_t enq, uint16_t deq)
@@ -542,6 +552,8 @@ pipeline_worker_cleanup(uint8_t dev, uint8_t port, struct 
rte_event ev[],

rte_event_enqueue_burst(dev, port, ev, deq);
}
+
+   rte_event_port_quiesce(dev, port, pipeline_event_port_flush, NULL);
 }

 void
diff --git a/examples/eventdev_pipeline/pipeline_common.h 
b/examples/eventdev_pipeline/pipeline_common.h
index 9899b257b0..28b6ab85ff 100644
--- a/examples/eventdev_pipeline/pipeline_common.h
+++ b/examples/eventdev_pipeline/pipeline_common.h
@@ -140,6 +140,13 @@ schedule_devices(unsigned int lcore_id)
}
 }

+static void
+event_port_flush(uint8_t dev_id __rte_unused, struct rte_event ev,
+void *args __rte_unused)
+{
+   rte_mempool_put(args, ev.event_ptr);
+}
+
 static inline void
 worker_cleanup(uint8_t dev_id, uint8_t port_id, struct rte_event events[],
   uint16_t nb_enq, uint16_t nb_deq)
@@ -160,6 +167,8 @@ worker_cleanup(uint8_t dev_id, uint8_t port_id, struct 
rte_event events[],
events[i].op = RTE_EVENT_OP_RELEASE;
rte_event_enqueue_burst(dev_id, port_id, events, nb_deq);
}
+
+   rte_event_port_quiesce(dev_id, port_id, event_port_flush, NULL);
 }

 void set_worker_generic_setup_data(struct setup_data *caps, bool burst);
diff --git a/examples/ipsec-secgw/ipsec_worker.c 
b/examples/ipsec-secgw/ipsec_worker.c
index 3df5acf384..7f259e4cf3 100644
--- a/examples/ipsec-secgw/ipsec_worker.c
+++ b/examples/ipsec-secgw/ipsec_worker.c
@@ -737,6 +737,13 @@ ipsec_ev_vector_drv_mode_process(struct eh_event_link_info 
*links,
  * selected.
  */

+static void
+ipsec_event_port_flush(uint8_t eventdev_id __rte_unused, struct rte_event ev,
+  void *args __rte_unused)
+{
+   rte_pktmbuf_free(ev.mbuf);
+}
+
 /* Workers registered */
 #define IPSEC_EVENTMODE_WORKERS2

@@ -861,6 +868,9 @@ ipsec_wrkr_non_burst_int_port_drv_mode(struct 
eh_event_link_info *links,
rte_event_enqueue_burst(links[0].eventdev_id,
links[0].event_port_id, &ev, 1);
}
+
+   rte_event_port_quiesce(links[0].eventdev_id, links[0].event_port_id,
+  ipsec_event_port_flush, NULL);
 }

 /*
@@ -974,6 +984,9 @@ ipsec_wrkr_non_burst_int_port_app_mode(struct 
eh_event_link_info *links,
rte_event_enqueue_burst(links[0].eventdev_id,
links[0].event_port_id, &ev, 1);
}
+
+   rte_event_port_quiesce(links[0].eventdev_id, links[0].event_port_id,
+

[PATCH 3/3] event/cnxk: implement event port quiesce function

2022-04-27 Thread Pavan Nikhilesh
Implement event port quiesce function to clean up any lcore
resources used.

Signed-off-by: Pavan Nikhilesh 
Change-Id: I7dda3d54dc698645d25ebbfbabd81760940fe649
---
 drivers/event/cnxk/cn10k_eventdev.c | 78 ++---
 drivers/event/cnxk/cn9k_eventdev.c  | 60 +-
 2 files changed, 130 insertions(+), 8 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c 
b/drivers/event/cnxk/cn10k_eventdev.c
index 94829e789c..d84c5d2d1e 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -167,15 +167,23 @@ cn10k_sso_hws_reset(void *arg, void *hws)
uint64_t u64[2];
} gw;
uint8_t pend_tt;
+   bool is_pend;
 
plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
/* Wait till getwork/swtp/waitw/desched completes. */
+   is_pend = false;
+   /* Work in WQE0 is always consumed, unless its a SWTAG. */
+   pend_state = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+   if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) ||
+   ws->swtag_req)
+   is_pend = true;
+
do {
pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
} while (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) |
   BIT_ULL(56) | BIT_ULL(54)));
pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
-   if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
+   if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */
if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
cnxk_sso_hws_swtag_untag(base +
 SSOW_LF_GWS_OP_SWTAG_UNTAG);
@@ -189,15 +197,10 @@ cn10k_sso_hws_reset(void *arg, void *hws)
 
switch (dev->gw_mode) {
case CN10K_GW_MODE_PREF:
+   case CN10K_GW_MODE_PREF_WFE:
while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
;
break;
-   case CN10K_GW_MODE_PREF_WFE:
-   while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) &
-  SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
-   continue;
-   plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
-   break;
case CN10K_GW_MODE_NONE:
default:
break;
@@ -533,6 +536,66 @@ cn10k_sso_port_release(void *port)
rte_free(gws_cookie);
 }
 
+static void
+cn10k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
+  eventdev_port_flush_t flush_cb, void *args)
+{
+   struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+   struct cn10k_sso_hws *ws = port;
+   struct rte_event ev;
+   uint64_t ptag;
+   bool is_pend;
+
+   is_pend = false;
+   /* Work in WQE0 is always consumed, unless its a SWTAG. */
+   ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+   if (ptag & (BIT_ULL(62) | BIT_ULL(54)) || ws->swtag_req)
+   is_pend = true;
+   do {
+   ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+   } while (ptag &
+(BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
+
+   cn10k_sso_hws_get_work_empty(ws, &ev,
+(NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F |
+NIX_RX_MULTI_SEG_F | CPT_RX_WQE_F);
+   if (is_pend && ev.u64) {
+   if (flush_cb)
+   flush_cb(event_dev->data->dev_id, ev, args);
+   cnxk_sso_hws_swtag_flush(ws->base);
+   }
+
+   /* Check if we have work in PRF_WQE0, if so extract it. */
+   switch (dev->gw_mode) {
+   case CN10K_GW_MODE_PREF:
+   case CN10K_GW_MODE_PREF_WFE:
+   while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) &
+  BIT_ULL(63))
+   ;
+   break;
+   case CN10K_GW_MODE_NONE:
+   default:
+   break;
+   }
+
+   if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) !=
+   SSO_TT_EMPTY) {
+   plt_write64(BIT_ULL(16) | 1,
+   ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+   cn10k_sso_hws_get_work_empty(
+   ws, &ev,
+   (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F |
+   NIX_RX_MULTI_SEG_F | CPT_RX_WQE_F);
+   if (ev.u64) {
+   if (flush_cb)
+   flush_cb(event_dev->data->dev_id, ev, args);
+   cnxk_sso_hws_swtag_flush(ws->base);
+   }
+   }
+   ws->swtag_req = 0;
+   plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+}
+
 static int
 cn10k_sso_port_link(struct rte_eventdev *event_dev, void *port,
const uint8_t queues[], const uint8_t priorities[],
@@ 

[PATCH 2/3 v2] eventdev: update examples to use port quiesce

2022-04-27 Thread Pavan Nikhilesh
Quiesce event ports used by the workers core on exit to free up
any outstanding resources.

Signed-off-by: Pavan Nikhilesh 
---
 Depends-on: Series-22677

 app/test-eventdev/test_perf_common.c |  8 
 app/test-eventdev/test_pipeline_common.c | 12 
 examples/eventdev_pipeline/pipeline_common.h |  9 +
 examples/ipsec-secgw/ipsec_worker.c  | 13 +
 examples/l2fwd-event/l2fwd_common.c  | 13 +
 examples/l3fwd/l3fwd_event.c | 13 +
 6 files changed, 68 insertions(+)

diff --git a/app/test-eventdev/test_perf_common.c 
b/app/test-eventdev/test_perf_common.c
index f673a9fddd..2016583979 100644
--- a/app/test-eventdev/test_perf_common.c
+++ b/app/test-eventdev/test_perf_common.c
@@ -985,6 +985,13 @@ perf_opt_dump(struct evt_options *opt, uint8_t nb_queues)
evt_dump("prod_enq_burst_sz", "%d", opt->prod_enq_burst_sz);
 }

+static void
+perf_event_port_flush(uint8_t dev_id __rte_unused, struct rte_event ev,
+ void *args)
+{
+   rte_mempool_put(args, ev.event_ptr);
+}
+
 void
 perf_worker_cleanup(struct rte_mempool *const pool, uint8_t dev_id,
uint8_t port_id, struct rte_event events[], uint16_t nb_enq,
@@ -1000,6 +1007,7 @@ perf_worker_cleanup(struct rte_mempool *const pool, 
uint8_t dev_id,
events[i].op = RTE_EVENT_OP_RELEASE;
rte_event_enqueue_burst(dev_id, port_id, events, nb_deq);
}
+   rte_event_port_quiesce(dev_id, port_id, perf_event_port_flush, pool);
 }

 void
diff --git a/app/test-eventdev/test_pipeline_common.c 
b/app/test-eventdev/test_pipeline_common.c
index a8dd07..82e5745071 100644
--- a/app/test-eventdev/test_pipeline_common.c
+++ b/app/test-eventdev/test_pipeline_common.c
@@ -518,6 +518,16 @@ pipeline_vector_array_free(struct rte_event events[], 
uint16_t num)
}
 }

+static void
+pipeline_event_port_flush(uint8_t dev_id __rte_unused, struct rte_event ev,
+ void *args __rte_unused)
+{
+   if (ev.event_type & RTE_EVENT_TYPE_VECTOR)
+   pipeline_vector_array_free(&ev, 1);
+   else
+   rte_pktmbuf_free(ev.mbuf);
+}
+
 void
 pipeline_worker_cleanup(uint8_t dev, uint8_t port, struct rte_event ev[],
uint16_t enq, uint16_t deq)
@@ -542,6 +552,8 @@ pipeline_worker_cleanup(uint8_t dev, uint8_t port, struct 
rte_event ev[],

rte_event_enqueue_burst(dev, port, ev, deq);
}
+
+   rte_event_port_quiesce(dev, port, pipeline_event_port_flush, NULL);
 }

 void
diff --git a/examples/eventdev_pipeline/pipeline_common.h 
b/examples/eventdev_pipeline/pipeline_common.h
index 9899b257b0..28b6ab85ff 100644
--- a/examples/eventdev_pipeline/pipeline_common.h
+++ b/examples/eventdev_pipeline/pipeline_common.h
@@ -140,6 +140,13 @@ schedule_devices(unsigned int lcore_id)
}
 }

+static void
+event_port_flush(uint8_t dev_id __rte_unused, struct rte_event ev,
+void *args __rte_unused)
+{
+   rte_mempool_put(args, ev.event_ptr);
+}
+
 static inline void
 worker_cleanup(uint8_t dev_id, uint8_t port_id, struct rte_event events[],
   uint16_t nb_enq, uint16_t nb_deq)
@@ -160,6 +167,8 @@ worker_cleanup(uint8_t dev_id, uint8_t port_id, struct 
rte_event events[],
events[i].op = RTE_EVENT_OP_RELEASE;
rte_event_enqueue_burst(dev_id, port_id, events, nb_deq);
}
+
+   rte_event_port_quiesce(dev_id, port_id, event_port_flush, NULL);
 }

 void set_worker_generic_setup_data(struct setup_data *caps, bool burst);
diff --git a/examples/ipsec-secgw/ipsec_worker.c 
b/examples/ipsec-secgw/ipsec_worker.c
index 3df5acf384..7f259e4cf3 100644
--- a/examples/ipsec-secgw/ipsec_worker.c
+++ b/examples/ipsec-secgw/ipsec_worker.c
@@ -737,6 +737,13 @@ ipsec_ev_vector_drv_mode_process(struct eh_event_link_info 
*links,
  * selected.
  */

+static void
+ipsec_event_port_flush(uint8_t eventdev_id __rte_unused, struct rte_event ev,
+  void *args __rte_unused)
+{
+   rte_pktmbuf_free(ev.mbuf);
+}
+
 /* Workers registered */
 #define IPSEC_EVENTMODE_WORKERS2

@@ -861,6 +868,9 @@ ipsec_wrkr_non_burst_int_port_drv_mode(struct 
eh_event_link_info *links,
rte_event_enqueue_burst(links[0].eventdev_id,
links[0].event_port_id, &ev, 1);
}
+
+   rte_event_port_quiesce(links[0].eventdev_id, links[0].event_port_id,
+  ipsec_event_port_flush, NULL);
 }

 /*
@@ -974,6 +984,9 @@ ipsec_wrkr_non_burst_int_port_app_mode(struct 
eh_event_link_info *links,
rte_event_enqueue_burst(links[0].eventdev_id,
links[0].event_port_id, &ev, 1);
}
+
+   rte_event_port_quiesce(links[0].eventdev_id, links[0].event_port_id,
+  ipsec_event_port_flush, NULL);
 }


[PATCH 1/3 v2] eventdev: add function to quiesce an event port

2022-04-27 Thread Pavan Nikhilesh
Add function to quiesce any core specific resources consumed by
the event port.

When the application decides to migrate the event port to another lcore
or teardown the current lcore it may to call `rte_event_port_quiesce`
to make sure that all the data associated with the event port are released
from the lcore, this might also include any prefetched events.

While releasing the event port from the lcore, this function calls the
user-provided flush callback once per event.

Signed-off-by: Pavan Nikhilesh 
---
 v2 Changes:
 - Remove internal Change-Id tag from commit messages.

 lib/eventdev/eventdev_pmd.h | 19 +++
 lib/eventdev/rte_eventdev.c | 19 +++
 lib/eventdev/rte_eventdev.h | 33 +
 lib/eventdev/version.map|  3 +++
 4 files changed, 74 insertions(+)

diff --git a/lib/eventdev/eventdev_pmd.h b/lib/eventdev/eventdev_pmd.h
index ce469d47a6..cf9f2146a1 100644
--- a/lib/eventdev/eventdev_pmd.h
+++ b/lib/eventdev/eventdev_pmd.h
@@ -381,6 +381,23 @@ typedef int (*eventdev_port_setup_t)(struct rte_eventdev 
*dev,
  */
 typedef void (*eventdev_port_release_t)(void *port);

+/**
+ * Quiesce any core specific resources consumed by the event port
+ *
+ * @param dev
+ *   Event device pointer.
+ * @param port
+ *   Event port pointer.
+ * @param flush_cb
+ *   User-provided event flush function.
+ * @param args
+ *   Arguments to be passed to the user-provided event flush function.
+ *
+ */
+typedef void (*eventdev_port_quiesce_t)(struct rte_eventdev *dev, void *port,
+   eventdev_port_flush_t flush_cb,
+   void *args);
+
 /**
  * Link multiple source event queues to destination event port.
  *
@@ -1218,6 +1235,8 @@ struct eventdev_ops {
/**< Set up an event port. */
eventdev_port_release_t port_release;
/**< Release an event port. */
+   eventdev_port_quiesce_t port_quiesce;
+   /**< Quiesce an event port. */

eventdev_port_link_t port_link;
/**< Link event queues to an event port. */
diff --git a/lib/eventdev/rte_eventdev.c b/lib/eventdev/rte_eventdev.c
index 532a253553..541fa5dc61 100644
--- a/lib/eventdev/rte_eventdev.c
+++ b/lib/eventdev/rte_eventdev.c
@@ -730,6 +730,25 @@ rte_event_port_setup(uint8_t dev_id, uint8_t port_id,
return 0;
 }

+void
+rte_event_port_quiesce(uint8_t dev_id, uint8_t port_id,
+  eventdev_port_flush_t release_cb, void *args)
+{
+   struct rte_eventdev *dev;
+
+   RTE_EVENTDEV_VALID_DEVID_OR_RET(dev_id);
+   dev = &rte_eventdevs[dev_id];
+
+   if (!is_valid_port(dev, port_id)) {
+   RTE_EDEV_LOG_ERR("Invalid port_id=%" PRIu8, port_id);
+   return;
+   }
+
+   if (dev->dev_ops->port_quiesce)
+   (*dev->dev_ops->port_quiesce)(dev, dev->data->ports[port_id],
+ release_cb, args);
+}
+
 int
 rte_event_dev_attr_get(uint8_t dev_id, uint32_t attr_id,
   uint32_t *attr_value)
diff --git a/lib/eventdev/rte_eventdev.h b/lib/eventdev/rte_eventdev.h
index 42a5660169..c86d8a5576 100644
--- a/lib/eventdev/rte_eventdev.h
+++ b/lib/eventdev/rte_eventdev.h
@@ -830,6 +830,39 @@ int
 rte_event_port_setup(uint8_t dev_id, uint8_t port_id,
 const struct rte_event_port_conf *port_conf);

+typedef void (*eventdev_port_flush_t)(uint8_t dev_id, struct rte_event event,
+ void *arg);
+/**< Callback function prototype that can be passed during
+ * rte_event_port_release(), invoked once per a released event.
+ */
+
+/**
+ * Quiesce any core specific resources consumed by the event port.
+ *
+ * Event ports are generally coupled with lcores, and a given Hardware
+ * implementation might require the PMD to store port specific data in the
+ * lcore.
+ * When the application decides to migrate the event port to an other lcore
+ * or teardown the current lcore it may to call `rte_event_port_quiesce`
+ * to make sure that all the data associated with the event port are released
+ * from the lcore, this might also include any prefetched events.
+ * While releasing the event port from the lcore, this function calls the
+ * user-provided flush callback once per event.
+ *
+ * The event port specific config is not reset.
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ * @param port_id
+ *   The index of the event port to setup. The value must be in the range
+ *   [0, nb_event_ports - 1] previously supplied to rte_event_dev_configure().
+ * @param release_cb
+ *   Callback function invoked once per flushed event.
+ */
+__rte_experimental
+void rte_event_port_quiesce(uint8_t dev_id, uint8_t port_id,
+   eventdev_port_flush_t release_cb, void *args);
+
 /**
  * The queue depth of the port on the enqueue side
  */
diff --git a/lib/eventdev/version.map b/lib/eventdev/version.map
index cd5dada07f..190709353

[PATCH 3/3 v2] event/cnxk: implement event port quiesce function

2022-04-27 Thread Pavan Nikhilesh
Implement event port quiesce function to clean up any lcore
resources used.

Signed-off-by: Pavan Nikhilesh 
---
 drivers/event/cnxk/cn10k_eventdev.c | 78 ++---
 drivers/event/cnxk/cn9k_eventdev.c  | 60 +-
 2 files changed, 130 insertions(+), 8 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c 
b/drivers/event/cnxk/cn10k_eventdev.c
index 94829e789c..d84c5d2d1e 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -167,15 +167,23 @@ cn10k_sso_hws_reset(void *arg, void *hws)
uint64_t u64[2];
} gw;
uint8_t pend_tt;
+   bool is_pend;

plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
/* Wait till getwork/swtp/waitw/desched completes. */
+   is_pend = false;
+   /* Work in WQE0 is always consumed, unless its a SWTAG. */
+   pend_state = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+   if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) ||
+   ws->swtag_req)
+   is_pend = true;
+
do {
pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE);
} while (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) |
   BIT_ULL(56) | BIT_ULL(54)));
pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0));
-   if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */
+   if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */
if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED)
cnxk_sso_hws_swtag_untag(base +
 SSOW_LF_GWS_OP_SWTAG_UNTAG);
@@ -189,15 +197,10 @@ cn10k_sso_hws_reset(void *arg, void *hws)

switch (dev->gw_mode) {
case CN10K_GW_MODE_PREF:
+   case CN10K_GW_MODE_PREF_WFE:
while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63))
;
break;
-   case CN10K_GW_MODE_PREF_WFE:
-   while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) &
-  SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT)
-   continue;
-   plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL);
-   break;
case CN10K_GW_MODE_NONE:
default:
break;
@@ -533,6 +536,66 @@ cn10k_sso_port_release(void *port)
rte_free(gws_cookie);
 }

+static void
+cn10k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port,
+  eventdev_port_flush_t flush_cb, void *args)
+{
+   struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+   struct cn10k_sso_hws *ws = port;
+   struct rte_event ev;
+   uint64_t ptag;
+   bool is_pend;
+
+   is_pend = false;
+   /* Work in WQE0 is always consumed, unless its a SWTAG. */
+   ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+   if (ptag & (BIT_ULL(62) | BIT_ULL(54)) || ws->swtag_req)
+   is_pend = true;
+   do {
+   ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE);
+   } while (ptag &
+(BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54)));
+
+   cn10k_sso_hws_get_work_empty(ws, &ev,
+(NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F |
+NIX_RX_MULTI_SEG_F | CPT_RX_WQE_F);
+   if (is_pend && ev.u64) {
+   if (flush_cb)
+   flush_cb(event_dev->data->dev_id, ev, args);
+   cnxk_sso_hws_swtag_flush(ws->base);
+   }
+
+   /* Check if we have work in PRF_WQE0, if so extract it. */
+   switch (dev->gw_mode) {
+   case CN10K_GW_MODE_PREF:
+   case CN10K_GW_MODE_PREF_WFE:
+   while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) &
+  BIT_ULL(63))
+   ;
+   break;
+   case CN10K_GW_MODE_NONE:
+   default:
+   break;
+   }
+
+   if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) !=
+   SSO_TT_EMPTY) {
+   plt_write64(BIT_ULL(16) | 1,
+   ws->base + SSOW_LF_GWS_OP_GET_WORK0);
+   cn10k_sso_hws_get_work_empty(
+   ws, &ev,
+   (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F |
+   NIX_RX_MULTI_SEG_F | CPT_RX_WQE_F);
+   if (ev.u64) {
+   if (flush_cb)
+   flush_cb(event_dev->data->dev_id, ev, args);
+   cnxk_sso_hws_swtag_flush(ws->base);
+   }
+   }
+   ws->swtag_req = 0;
+   plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL);
+}
+
 static int
 cn10k_sso_port_link(struct rte_eventdev *event_dev, void *port,
const uint8_t queues[], const uint8_t priorities[],
@@ -852,6 +915,7 @@ static struct eventdev_ops cn10k_sso_de

Re: [PATCH 1/2] app/testpmd: fix stats get when display fwd stats

2022-04-27 Thread Singh, Aman Deep




On 4/6/2022 2:15 PM, Min Hu (Connor) wrote:

In function 'fwd_stats_display', if function 'rte_eth_stats_get' fails,
'stats' is uncertainty value. The display result will be abnormal.

This patch check the return value of 'rte_eth_stats_get' to avoid
display abnormal stats.

Fixes: 53324971a14e ("app/testpmd: display/clear forwarding stats on demand")
Cc: sta...@dpdk.org

Signed-off-by: Min Hu (Connor) 
---
  app/test-pmd/config.c  | 10 --
  app/test-pmd/testpmd.c | 16 ++--
  2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index cc8e7aa138..bd689f9f86 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -249,14 +249,20 @@ nic_stats_display(portid_t port_id)
diff_ns;
uint64_t mpps_rx, mpps_tx, mbps_rx, mbps_tx;
struct rte_eth_stats stats;
-
static const char *nic_stats_border = "";
+   int ret;
  
  	if (port_id_is_invalid(port_id, ENABLED_WARN)) {

print_valid_ports();
return;
}
-   rte_eth_stats_get(port_id, &stats);
+   ret = rte_eth_stats_get(port_id, &stats);
+   if (ret != 0) {
+   fprintf(stderr,
+   "%s: Error: failed to get stats (port %u): %d",
+   __func__, port_id, ret);
+   return;
+   }
printf("\n  %s NIC statistics for port %-2d %s\n",
   nic_stats_border, port_id, nic_stats_border);
  
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c

index fe2ce19f99..79bb23264b 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -1982,6 +1982,7 @@ fwd_stats_display(void)
struct rte_port *port;
streamid_t sm_id;
portid_t pt_id;
+   int ret;
int i;
  
  	memset(ports_stats, 0, sizeof(ports_stats));

@@ -2013,7 +2014,13 @@ fwd_stats_display(void)
pt_id = fwd_ports_ids[i];
port = &ports[pt_id];
  
-		rte_eth_stats_get(pt_id, &stats);

+   ret = rte_eth_stats_get(pt_id, &stats);
+   if (ret != 0) {
+   fprintf(stderr,
+   "%s: Error: failed to get stats (port %u): %d",
+   __func__, pt_id, ret);
+   continue;
+   }
stats.ipackets -= port->stats.ipackets;
stats.opackets -= port->stats.opackets;
stats.ibytes -= port->stats.ibytes;
@@ -2108,11 +2115,16 @@ fwd_stats_reset(void)
  {
streamid_t sm_id;
portid_t pt_id;
+   int ret;
int i;
  
  	for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {

pt_id = fwd_ports_ids[i];
-   rte_eth_stats_get(pt_id, &ports[pt_id].stats);
+   ret = rte_eth_stats_get(pt_id, &ports[pt_id].stats);
+   if (ret != 0)
+   fprintf(stderr,
+   "%s: Error: failed to clear stats (port %u):%d",
+   __func__, pt_id, ret);

Should we clear "ports[pt_id].stats" in this condition.

}
for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) {
struct fwd_stream *fs = fwd_streams[sm_id];

As such LGTM
Acked-by: Aman Singh 


[PATCH v6 1/2] hash: split x86 and SW hash CRC intrinsics

2022-04-27 Thread Pavan Nikhilesh
Split x86 and SW hash crc intrinsics into a separate files.

Signed-off-by: Pavan Nikhilesh 
---
 v6 Changes:
 - Simplify rte_hash_crc_set_alg function. (Ruifeng)
 v5 Changes:
 - Move CRC functions to implementation specific files to remove
   ifdef clutter. (Ruifeng)
 lib/hash/hash_crc_sw.h  | 419 
 lib/hash/hash_crc_x86.h |  62 ++
 lib/hash/rte_hash_crc.h | 396 +
 3 files changed, 483 insertions(+), 394 deletions(-)
 create mode 100644 lib/hash/hash_crc_sw.h
 create mode 100644 lib/hash/hash_crc_x86.h

diff --git a/lib/hash/hash_crc_sw.h b/lib/hash/hash_crc_sw.h
new file mode 100644
index 00..4790a0970b
--- /dev/null
+++ b/lib/hash/hash_crc_sw.h
@@ -0,0 +1,419 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#ifndef _HASH_CRC_SW_H_
+#define _HASH_CRC_SW_H_
+
+/* Lookup tables for software implementation of CRC32C */
+static const uint32_t crc32c_tables[8][256] = {
+   {0x, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C,
+0x26A1E7E8, 0xD4CA64EB, 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
+0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, 0x105EC76F, 0xE235446C,
+0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
+0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC,
+0xBC267848, 0x4E4DFB4B, 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
+0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, 0xAA64D611, 0x580F5512,
+0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
+0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD,
+0x1642AE59, 0xE4292D5A, 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
+0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595, 0x417B1DBC, 0xB3109EBF,
+0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
+0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F,
+0xED03A29B, 0x1F682198, 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
+0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38, 0xDBFC821C, 0x2997011F,
+0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
+0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E,
+0x4767748A, 0xB50CF789, 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
+0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46, 0x7198540D, 0x83F3D70E,
+0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
+0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE,
+0xDDE0EB2A, 0x2F8B6829, 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
+0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93, 0x082F63B7, 0xFA44E0B4,
+0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
+0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B,
+0xB4091BFF, 0x466298FC, 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
+0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, 0xA24BB5A6, 0x502036A5,
+0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
+0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975,
+0x0E330A81, 0xFC588982, 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
+0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, 0x38CC2A06, 0xCAA7A905,
+0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
+0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8,
+0xE52CC12C, 0x1747422F, 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
+0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0, 0xD3D3E1AB, 0x21B862A8,
+0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
+0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78,
+0x7FAB5E8C, 0x8DC0DD8F, 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
+0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1, 0x69E9F0D5, 0x9B8273D6,
+0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
+0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69,
+0xD5CF889D, 0x27A40B9E, 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
+0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351},
+   {0x, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB,
+0x69CF5132, 0x7A6DC945, 0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21,
+0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD, 0x3FC5F181, 0x2C6769F6,
+0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4,
+0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, 0xEC5B53E5, 0xFFF9CB92,
+0xCB1E630B, 0xD8BCFB7C, 0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B,
+0x310182DE, 0x22A31AA9, 0x16

[PATCH v6 2/2] hash: unify crc32 selection for x86 and Arm

2022-04-27 Thread Pavan Nikhilesh
Merge crc32 hash calculation public API implementation for x86 and Arm.
Select the best available CRC32 algorithm when unsupported algorithm
on a given CPU architecture is requested by an application.

Previously, if an application directly includes `rte_crc_arm64.h`
without including `rte_hash_crc.h` it will fail to compile.

Signed-off-by: Pavan Nikhilesh 
---
 .../{rte_crc_arm64.h => hash_crc_arm64.h} |  69 ++---
 lib/hash/hash_crc_x86.h   |  89 +++
 lib/hash/meson.build  |   1 -
 lib/hash/rte_hash_crc.h   | 145 +-
 4 files changed, 136 insertions(+), 168 deletions(-)
 rename lib/hash/{rte_crc_arm64.h => hash_crc_arm64.h} (65%)

diff --git a/lib/hash/rte_crc_arm64.h b/lib/hash/hash_crc_arm64.h
similarity index 65%
rename from lib/hash/rte_crc_arm64.h
rename to lib/hash/hash_crc_arm64.h
index b4628cfc09..172894335f 100644
--- a/lib/hash/rte_crc_arm64.h
+++ b/lib/hash/hash_crc_arm64.h
@@ -2,23 +2,8 @@
  * Copyright(c) 2015 Cavium, Inc
  */
 
-#ifndef _RTE_CRC_ARM64_H_
-#define _RTE_CRC_ARM64_H_
-
-/**
- * @file
- *
- * RTE CRC arm64 Hash
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include 
-#include 
-#include 
-#include 
+#ifndef _HASH_CRC_ARM64_H_
+#define _HASH_CRC_ARM64_H_
 
 static inline uint32_t
 crc32c_arm64_u8(uint8_t data, uint32_t init_val)
@@ -61,40 +46,8 @@ crc32c_arm64_u64(uint64_t data, uint32_t init_val)
 }
 
 /**
- * Allow or disallow use of arm64 SIMD instrinsics for CRC32 hash
- * calculation.
- *
- * @param alg
- *   An OR of following flags:
- *   - (CRC32_SW) Don't use arm64 crc intrinsics
- *   - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available
- *
- */
-static inline void
-rte_hash_crc_set_alg(uint8_t alg)
-{
-   switch (alg) {
-   case CRC32_ARM64:
-   if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
-   alg = CRC32_SW;
-   /* fall-through */
-   case CRC32_SW:
-   crc32_alg = alg;
-   /* fall-through */
-   default:
-   break;
-   }
-}
-
-/* Setting the best available algorithm */
-RTE_INIT(rte_hash_crc_init_alg)
-{
-   rte_hash_crc_set_alg(CRC32_ARM64);
-}
-
-/**
- * Use single crc32 instruction to perform a hash on a 1 byte value.
- * Fall back to software crc32 implementation in case arm64 crc intrinsics is
+ * Use single crc32 instruction to perform a hash on a byte value.
+ * Fall back to software crc32 implementation in case ARM CRC is
  * not supported
  *
  * @param data
@@ -115,7 +68,7 @@ rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
 
 /**
  * Use single crc32 instruction to perform a hash on a 2 bytes value.
- * Fall back to software crc32 implementation in case arm64 crc intrinsics is
+ * Fall back to software crc32 implementation in case ARM CRC is
  * not supported
  *
  * @param data
@@ -136,7 +89,7 @@ rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
 
 /**
  * Use single crc32 instruction to perform a hash on a 4 byte value.
- * Fall back to software crc32 implementation in case arm64 crc intrinsics is
+ * Fall back to software crc32 implementation in case ARM CRC is
  * not supported
  *
  * @param data
@@ -157,7 +110,7 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
 
 /**
  * Use single crc32 instruction to perform a hash on a 8 byte value.
- * Fall back to software crc32 implementation in case arm64 crc intrinsics is
+ * Fall back to software crc32 implementation in case ARM CRC is
  * not supported
  *
  * @param data
@@ -170,14 +123,10 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
 static inline uint32_t
 rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
 {
-   if (likely(crc32_alg == CRC32_ARM64))
+   if (likely(crc32_alg & CRC32_ARM64))
return crc32c_arm64_u64(data, init_val);
 
return crc32c_2words(data, init_val);
 }
 
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _RTE_CRC_ARM64_H_ */
+#endif /* _HASH_CRC_ARM64_H_ */
diff --git a/lib/hash/hash_crc_x86.h b/lib/hash/hash_crc_x86.h
index b80a742afa..19eb3584e7 100644
--- a/lib/hash/hash_crc_x86.h
+++ b/lib/hash/hash_crc_x86.h
@@ -59,4 +59,93 @@ crc32c_sse42_u64(uint64_t data, uint64_t init_val)
return (uint32_t)init_val;
 }
 
+/**
+ * Use single crc32 instruction to perform a hash on a byte value.
+ * Fall back to software crc32 implementation in case SSE4.2 is
+ * not supported
+ *
+ * @param data
+ *   Data to perform hash on.
+ * @param init_val
+ *   Value to initialise hash generator.
+ * @return
+ *   32bit calculated hash value.
+ */
+static inline uint32_t
+rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
+{
+   if (likely(crc32_alg & CRC32_SSE42))
+   return crc32c_sse42_u8(data, init_val);
+
+   return crc32c_1byte(data, init_val);
+}
+
+/**
+ * Use single crc32 instruction to perform a hash on a 2 bytes value.
+ * Fall back to software crc32 implementation in case SSE4.2 is
+ * not supported
+ *
+ 

[PATCH] doc: describe ixgbe devargs fiber_sdp3_no_tx_disable

2022-04-27 Thread Jeff Daly
The devargs option for the IXGBE driver is introduced in order to
inform the driver to skip checking SDP3 as an indicator of laser
enable/disable for SFP modules.

Signed-off-by: Jeff Daly 
---
 doc/guides/nics/ixgbe.rst | 17 +
 1 file changed, 17 insertions(+)

diff --git a/doc/guides/nics/ixgbe.rst b/doc/guides/nics/ixgbe.rst
index 82fa453fa28e..ad1a3da6101e 100644
--- a/doc/guides/nics/ixgbe.rst
+++ b/doc/guides/nics/ixgbe.rst
@@ -101,6 +101,23 @@ To guarantee the constraint, capabilities in 
dev_conf.rxmode.offloads will be ch
 
 fdir_conf->mode will also be checked.
 
+Disable SDP3 TX_DISABLE for Fiber Links
+^^^
+
+The following ``devargs`` option can be enabled at runtime.  It must
+be passed as part of EAL arguments. For example,
+
+.. code-block:: console
+
+   dpdk-testpmd -a fiber_sdp3_no_tx_disable=1 -- -i
+
+- ``fiber_sdp3_no_tx_disable`` (default **0**)
+
+  Not all IXGBE implementations with SFP cages use the SDP3 signal as
+  TX_DISABLE as a means to disable the laser on fiber SFP modules.
+  This option informs the driver that in this case, SDP3 is not to be
+  used as a check for link up by testing for laser on/off.
+
 VF Runtime Options
 ^^
 
-- 
2.25.1



RE: OVS DPDK DMA-Dev library/Design Discussion

2022-04-27 Thread Mcnamara, John


> -Original Message-
> From: Ilya Maximets 
> Sent: Monday, April 25, 2022 10:46 PM
> To: Mcnamara, John ; Hu, Jiayu
> ; Maxime Coquelin ; Van
> Haaren, Harry ; Morten Brørup
> ; Richardson, Bruce
> 
> Cc: i.maxim...@ovn.org; Pai G, Sunil ; Stokes,
> Ian ; Ferriter, Cian ;
> ovs-...@openvswitch.org; dev@dpdk.org; O'Driscoll, Tim
> ; Finn, Emma 
> Subject: Re: OVS DPDK DMA-Dev library/Design Discussion
> 
> ...
> 
> FWIW, I think it makes sense to PoC and test options that are going to
> be simply unavailable going forward if not explored now.
> Especially because we don't have any good solutions anyway ("Deferral
> of Work" is architecturally wrong solution for OVS).

I agree that there is value in doing PoCs and we have been doing that for over 
a year based on different proposals and none of them show the potential of the 
Deferral of Work approach. It isn't productive to keep building PoCs 
indefinitely; at some point we need to make progress with merging a specific 
solution upstream.


> > Let's have another call so that we can move towards a single solution
> that the DPDK and OVS communities agree on. I'll set up a call for next
> week in a similar time slot to the previous one.
> 
> Is there any particular reason we can't use a mailing list to discuss
> that topic further?

The discussion can continue on the mailing list. It just seemed more efficient 
and interactive to discuss this in a meeting.

John
-- 




[dpdk-dev] [PATCH 00/17] bnxt PMD fixes

2022-04-27 Thread Kalesh A P
From: Kalesh AP 

This patch set contains bug fixes in bnxt PMD. Please apply.

Kalesh AP (12):
  net/bnxt: update HWRM structures
  net/bnxt: fix device capability reporting
  net/bnxt: fix to remove an unused macro
  net/bnxt: fix Rxq configure
  net/bnxt: fix support for tunnel stateless offloads
  net/bnxt: fix RSS action support
  net/bnxt: add check for dupliate queue ids
  net/bnxt: avoid unnecessary endianness conversion
  net/bnxt: fix setting autoneg speed
  net/bnxt: force PHY update on certain configurations
  net/bnxt: fix reporting link status when port is stopped
  net/bnxt: recheck FW readiness if FW is in reset process

Somnath Kotur (5):
  net/bnxt: remove support for COUNT action
  net/bnxt: fix to reconfigure the VNIC's default receive ring
  net/bnxt: fix to handle queue stop during RSS flow create
  net/bnxt: fix freeing of VNIC filters
  net/bnxt: don't wait for link up completion in dev start

 drivers/net/bnxt/bnxt.h|   29 +-
 drivers/net/bnxt/bnxt_ethdev.c |   58 +-
 drivers/net/bnxt/bnxt_filter.c |2 +
 drivers/net/bnxt/bnxt_flow.c   |   92 +-
 drivers/net/bnxt/bnxt_hwrm.c   |   15 +-
 drivers/net/bnxt/bnxt_hwrm.h   |   20 +
 drivers/net/bnxt/bnxt_reps.c   |6 +-
 drivers/net/bnxt/bnxt_rxq.c|   75 +-
 drivers/net/bnxt/bnxt_rxq.h|1 +
 drivers/net/bnxt/bnxt_txq.c|   29 +
 drivers/net/bnxt/bnxt_txq.h|1 +
 drivers/net/bnxt/hsi_struct_def_dpdk.h | 4025 
 12 files changed, 3809 insertions(+), 544 deletions(-)

-- 
2.10.1



[PATCH 02/17] net/bnxt: fix device capability reporting

2022-04-27 Thread Kalesh A P
From: Kalesh AP 

1. Added two functions bnxt_get_tx_port_offloads() and
   bnxt_get_rx_port_offloads() to report the device
   tx/rx offload capabilities to the application.
2. This avoids few duplicate code in the driver and make
   VF-rep capability the same as VF.
3. This will help in selectively reporting offload capabilities
   based on FW support.

Fixes: 0a6d2a720078 ("net/bnxt: get device infos")
Cc: sta...@dpdk.org

Signed-off-by: Kalesh AP 
Reviewed-by: Ajit Khaparde 
Reviewed-by: Somnath Kotur 
---
 drivers/net/bnxt/bnxt.h| 24 
 drivers/net/bnxt/bnxt_ethdev.c | 10 ++
 drivers/net/bnxt/bnxt_reps.c   |  6 ++
 drivers/net/bnxt/bnxt_rxq.c| 25 +
 drivers/net/bnxt/bnxt_rxq.h|  1 +
 drivers/net/bnxt/bnxt_txq.c| 23 +++
 drivers/net/bnxt/bnxt_txq.h|  1 +
 7 files changed, 54 insertions(+), 36 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 44724a9..5eddb4f 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -580,30 +580,6 @@ struct bnxt_rep_info {
RTE_ETH_RSS_NONFRAG_IPV6_UDP |  \
RTE_ETH_RSS_LEVEL_MASK)
 
-#define BNXT_DEV_TX_OFFLOAD_SUPPORT (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | \
-RTE_ETH_TX_OFFLOAD_TCP_CKSUM | \
-RTE_ETH_TX_OFFLOAD_UDP_CKSUM | \
-RTE_ETH_TX_OFFLOAD_TCP_TSO | \
-RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM | \
-RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO | \
-RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO | \
-RTE_ETH_TX_OFFLOAD_IPIP_TNL_TSO | \
-RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO | \
-RTE_ETH_TX_OFFLOAD_QINQ_INSERT | \
-RTE_ETH_TX_OFFLOAD_MULTI_SEGS)
-
-#define BNXT_DEV_RX_OFFLOAD_SUPPORT (RTE_ETH_RX_OFFLOAD_VLAN_FILTER | \
-RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | \
-RTE_ETH_RX_OFFLOAD_UDP_CKSUM | \
-RTE_ETH_RX_OFFLOAD_TCP_CKSUM | \
-RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM | \
-RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM | \
-RTE_ETH_RX_OFFLOAD_KEEP_CRC | \
-RTE_ETH_RX_OFFLOAD_VLAN_EXTEND | \
-RTE_ETH_RX_OFFLOAD_TCP_LRO | \
-RTE_ETH_RX_OFFLOAD_SCATTER | \
-RTE_ETH_RX_OFFLOAD_RSS_HASH)
-
 #define BNXT_HWRM_SHORT_REQ_LENsizeof(struct hwrm_short_input)
 
 struct bnxt_flow_stat_info {
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 261fe0b..fac3925 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -971,16 +971,10 @@ static int bnxt_dev_info_get_op(struct rte_eth_dev 
*eth_dev,
dev_info->min_rx_bufsize = 1;
dev_info->max_rx_pktlen = BNXT_MAX_PKT_LEN;
 
-   dev_info->rx_offload_capa = BNXT_DEV_RX_OFFLOAD_SUPPORT;
-   if (bp->flags & BNXT_FLAG_PTP_SUPPORTED)
-   dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_TIMESTAMP;
-   if (bp->vnic_cap_flags & BNXT_VNIC_CAP_VLAN_RX_STRIP)
-   dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
+   dev_info->rx_offload_capa = bnxt_get_rx_port_offloads(bp);
dev_info->tx_queue_offload_capa = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
-   dev_info->tx_offload_capa = BNXT_DEV_TX_OFFLOAD_SUPPORT |
+   dev_info->tx_offload_capa = bnxt_get_tx_port_offloads(bp) |
dev_info->tx_queue_offload_capa;
-   if (bp->fw_cap & BNXT_FW_CAP_VLAN_TX_INSERT)
-   dev_info->tx_offload_capa |= RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
dev_info->flow_type_rss_offloads = BNXT_ETH_RSS_SUPPORT;
 
dev_info->speed_capa = bnxt_get_speed_capabilities(bp);
diff --git a/drivers/net/bnxt/bnxt_reps.c b/drivers/net/bnxt/bnxt_reps.c
index e773932..8a5b777 100644
--- a/drivers/net/bnxt/bnxt_reps.c
+++ b/drivers/net/bnxt/bnxt_reps.c
@@ -567,10 +567,8 @@ int bnxt_rep_dev_info_get_op(struct rte_eth_dev *eth_dev,
dev_info->min_rx_bufsize = 1;
dev_info->max_rx_pktlen = BNXT_MAX_PKT_LEN;
 
-   dev_info->rx_offload_capa = BNXT_DEV_RX_OFFLOAD_SUPPORT;
-   if (parent_bp->flags & BNXT_FLAG_PTP_SUPPORTED)
-   dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_TIMESTAMP;
-   dev_info->tx_offload_capa = BNXT_DEV_TX_OFFLOAD_SUPPORT;
+   dev_info->rx_offload_capa = bnxt_get_rx_port_offloads(parent_bp);
+   dev_info->tx_offload_capa = bnxt_get_tx_port_offloads(parent_bp);
dev_info->flow_type_rss_offloads = BNXT_ETH_RSS_SUPPORT;
 
  

[PATCH 03/17] net/bnxt: fix to remove an unused macro

2022-04-27 Thread Kalesh A P
From: Kalesh AP 

BNXT_FLAG_UPDATE_HASH is redundant now, remove it.

Fixes: 1ebb765090a6 ("net/bnxt: fix config RSS update")
Cc: sta...@dpdk.org

Signed-off-by: Kalesh AP 
Reviewed-by: Ajit Khaparde 
Reviewed-by: Somnath Kotur 
---
 drivers/net/bnxt/bnxt.h| 1 -
 drivers/net/bnxt/bnxt_ethdev.c | 2 --
 drivers/net/bnxt/bnxt_rxq.c| 3 ---
 3 files changed, 6 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 5eddb4f..9e5ff74 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -648,7 +648,6 @@ struct bnxt {
 #define BNXT_FLAG_PORT_STATS   BIT(2)
 #define BNXT_FLAG_JUMBOBIT(3)
 #define BNXT_FLAG_SHORT_CMDBIT(4)
-#define BNXT_FLAG_UPDATE_HASH  BIT(5)
 #define BNXT_FLAG_PTP_SUPPORTEDBIT(6)
 #define BNXT_FLAG_MULTI_HOST   BIT(7)
 #define BNXT_FLAG_EXT_RX_PORT_STATSBIT(8)
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index fac3925..181de42 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -2125,8 +2125,6 @@ static int bnxt_rss_hash_update_op(struct rte_eth_dev 
*eth_dev,
return -EINVAL;
}
 
-   bp->flags |= BNXT_FLAG_UPDATE_HASH;
-
/* Update the default RSS VNIC(s) */
vnic = BNXT_GET_DEFAULT_VNIC(bp);
vnic->hash_type = bnxt_rte_to_hwrm_hash_types(rss_conf->rss_hf);
diff --git a/drivers/net/bnxt/bnxt_rxq.c b/drivers/net/bnxt/bnxt_rxq.c
index 0cfd0e5..3c2283b 100644
--- a/drivers/net/bnxt/bnxt_rxq.c
+++ b/drivers/net/bnxt/bnxt_rxq.c
@@ -175,9 +175,6 @@ int bnxt_mq_rx_configure(struct bnxt *bp)
if (dev_conf->rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) {
struct rte_eth_rss_conf *rss = &bp->rss_conf;
 
-   if (bp->flags & BNXT_FLAG_UPDATE_HASH)
-   bp->flags &= ~BNXT_FLAG_UPDATE_HASH;
-
for (i = 0; i < bp->nr_vnics; i++) {
uint32_t lvl = RTE_ETH_RSS_LEVEL(rss->rss_hf);
 
-- 
2.10.1



[PATCH 04/17] net/bnxt: fix Rxq configure

2022-04-27 Thread Kalesh A P
From: Kalesh AP 

We are currently not handling RX/RSS modes correctly.
After launching testpmd with multiple RXQs, if the user tries to set
the number of RXQs to 1, driver is not updating the "hash_type"
and "hash_mode" values of the VNICs. As a result, driver issues
bnxt_vnic_rss_configure() unnecessarily and the FW command fails.

Fixed bnxt_mq_rx_configure() to update VNIC RSS fields unconditionally.

Fixes: 4191bc8f79a8 ("net/bnxt: handle multi queue mode properly")
Cc: sta...@dpdk.org

Signed-off-by: Kalesh AP 
Reviewed-by: Ajit Khaparde 
Reviewed-by: Somnath Kotur 
---
 drivers/net/bnxt/bnxt_rxq.c | 37 ++---
 1 file changed, 14 insertions(+), 23 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_rxq.c b/drivers/net/bnxt/bnxt_rxq.c
index 3c2283b..8977138 100644
--- a/drivers/net/bnxt/bnxt_rxq.c
+++ b/drivers/net/bnxt/bnxt_rxq.c
@@ -65,6 +65,7 @@ void bnxt_free_rxq_stats(struct bnxt_rx_queue *rxq)
 int bnxt_mq_rx_configure(struct bnxt *bp)
 {
struct rte_eth_conf *dev_conf = &bp->eth_dev->data->dev_conf;
+   struct rte_eth_rss_conf *rss = &bp->rss_conf;
const struct rte_eth_vmdq_rx_conf *conf =
&dev_conf->rx_adv_conf.vmdq_rx_conf;
unsigned int i, j, nb_q_per_grp = 1, ring_idx = 0;
@@ -172,29 +173,19 @@ int bnxt_mq_rx_configure(struct bnxt *bp)
 
bp->rx_num_qs_per_vnic = nb_q_per_grp;
 
-   if (dev_conf->rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) {
-   struct rte_eth_rss_conf *rss = &bp->rss_conf;
-
-   for (i = 0; i < bp->nr_vnics; i++) {
-   uint32_t lvl = RTE_ETH_RSS_LEVEL(rss->rss_hf);
-
-   vnic = &bp->vnic_info[i];
-   vnic->hash_type =
-   bnxt_rte_to_hwrm_hash_types(rss->rss_hf);
-   vnic->hash_mode =
-   bnxt_rte_to_hwrm_hash_level(bp,
-   rss->rss_hf,
-   lvl);
-
-   /*
-* Use the supplied key if the key length is
-* acceptable and the rss_key is not NULL
-*/
-   if (rss->rss_key &&
-   rss->rss_key_len <= HW_HASH_KEY_SIZE)
-   memcpy(vnic->rss_hash_key,
-  rss->rss_key, rss->rss_key_len);
-   }
+   for (i = 0; i < bp->nr_vnics; i++) {
+   uint32_t lvl = RTE_ETH_RSS_LEVEL(rss->rss_hf);
+
+   vnic = &bp->vnic_info[i];
+   vnic->hash_type = bnxt_rte_to_hwrm_hash_types(rss->rss_hf);
+   vnic->hash_mode = bnxt_rte_to_hwrm_hash_level(bp, rss->rss_hf, 
lvl);
+
+   /*
+* Use the supplied key if the key length is
+* acceptable and the rss_key is not NULL
+*/
+   if (rss->rss_key && rss->rss_key_len <= HW_HASH_KEY_SIZE)
+   memcpy(vnic->rss_hash_key, rss->rss_key, 
rss->rss_key_len);
}
 
return rc;
-- 
2.10.1



[PATCH 05/17] net/bnxt: fix support for tunnel stateless offloads

2022-04-27 Thread Kalesh A P
From: Kalesh AP 

The HW only supports tunnel header parsing globally for supported tunnel
types. When a function uses one default VNIC to receive both the tunnel
and non-tunnel packets, applying the same stateless offload operation to
both tunnel and non-tunnel packets can cause problems in certain scenarios.
To workaround these problems, the firmware advertises no tunnel header
parsing capabilities to the driver using the HWRM_FUNC_QCAPS.
The driver must check this flag setting and accordingly not advertise
tunnel packet stateless offload capabilities to the stack.

If the device supports VXLAN, GRE, IPIP and GENEVE tunnel parsing,
then reports RX_OFFLOAD_OUTER_IPV4_CKSUM, RX_OFFLOAD_OUTER_UDP_CKSUM
and TX_OFFLOAD_OUTER_IPV4_CKSUM in the Rx/Tx offload capabilities of
the device.
Also, advertise tunnel TSO capabilities based on FW support.

Fixes: 0a6d2a720078 ("net/bnxt: get device infos")
Cc: sta...@dpdk.org

Signed-off-by: Kalesh AP 
Reviewed-by: Ajit Khaparde 
Reviewed-by: Somnath Kotur 
---
 drivers/net/bnxt/bnxt.h  |  1 +
 drivers/net/bnxt/bnxt_hwrm.c |  5 +
 drivers/net/bnxt/bnxt_hwrm.h | 20 
 drivers/net/bnxt/bnxt_rxq.c  |  7 ---
 drivers/net/bnxt/bnxt_txq.c  | 18 --
 5 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 9e5ff74..e4e8e8e 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -871,6 +871,7 @@ struct bnxt {
uint32_tmax_mcast_addr; /* maximum number of mcast 
filters supported */
 
struct rte_eth_rss_conf rss_conf; /* RSS configuration. */
+   uint16_ttunnel_disable_flag; /* tunnel stateless 
offloads status */
 };
 
 static
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index bff73a9..178a112 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -939,6 +939,11 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
bp->fw_cap |= BNXT_FW_CAP_VLAN_TX_INSERT;
PMD_DRV_LOG(DEBUG, "VLAN acceleration for TX is enabled\n");
}
+
+   bp->tunnel_disable_flag = rte_le_to_cpu_16(resp->tunnel_disable_flag);
+   if (bp->tunnel_disable_flag)
+   PMD_DRV_LOG(DEBUG, "Tunnel parsing capability is disabled, 
flags : %#x\n",
+   bp->tunnel_disable_flag);
 unlock:
HWRM_UNLOCK();
 
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index 63f8d8c..77f8521 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -121,6 +121,26 @@ struct bnxt_pf_resource_info {
 
 #define BNXT_CTX_VAL_INVAL 0x
 
+#define BNXT_TUNNELED_OFFLOADS_CAP_VXLAN_EN(bp)\
+   (!((bp)->tunnel_disable_flag & 
HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_VXLAN))
+#define BNXT_TUNNELED_OFFLOADS_CAP_NGE_EN(bp)  \
+   (!((bp)->tunnel_disable_flag & 
HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_NGE))
+#define BNXT_TUNNELED_OFFLOADS_CAP_GRE_EN(bp)  \
+   (!((bp)->tunnel_disable_flag & 
HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_GRE))
+#define BNXT_TUNNELED_OFFLOADS_CAP_IPINIP_EN(bp)   \
+   (!((bp)->tunnel_disable_flag & 
HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_IPINIP))
+
+/*
+ * If the device supports VXLAN, GRE, IPIP and GENEVE tunnel parsing, then 
report
+ * RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM, RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM and
+ * RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM in the Rx/Tx offload capabilities of 
the device.
+ */
+#define BNXT_TUNNELED_OFFLOADS_CAP_ALL_EN(bp)  \
+   (BNXT_TUNNELED_OFFLOADS_CAP_VXLAN_EN(bp) && \
+BNXT_TUNNELED_OFFLOADS_CAP_NGE_EN(bp)   && \
+BNXT_TUNNELED_OFFLOADS_CAP_GRE_EN(bp)   && \
+BNXT_TUNNELED_OFFLOADS_CAP_IPINIP_EN(bp))
+
 int bnxt_hwrm_cfa_l2_clear_rx_mask(struct bnxt *bp,
   struct bnxt_vnic_info *vnic);
 int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, struct bnxt_vnic_info *vnic,
diff --git a/drivers/net/bnxt/bnxt_rxq.c b/drivers/net/bnxt/bnxt_rxq.c
index 8977138..8147404 100644
--- a/drivers/net/bnxt/bnxt_rxq.c
+++ b/drivers/net/bnxt/bnxt_rxq.c
@@ -34,14 +34,15 @@ uint64_t bnxt_get_rx_port_offloads(struct bnxt *bp)
  RTE_ETH_RX_OFFLOAD_SCATTER |
  RTE_ETH_RX_OFFLOAD_RSS_HASH;
 
-   rx_offload_capa |= RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM |
-  RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM;
-
if (bp->flags & BNXT_FLAG_PTP_SUPPORTED)
rx_offload_capa |= RTE_ETH_RX_OFFLOAD_TIMESTAMP;
if (bp->vnic_cap_flags & BNXT_VNIC_CAP_VLAN_RX_STRIP)
rx_offload_capa |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
 
+   if (BNXT_TUNNELED_OFFLOADS_CAP_ALL_EN(bp))
+   rx_offload_capa |= RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM |
+   

[PATCH 06/17] net/bnxt: remove support for COUNT action

2022-04-27 Thread Kalesh A P
From: Somnath Kotur 

'Count' action was never really implemented in the legacy/AFM model.
But there was some place holder code, remove it so that the user
will see a failure when a flow with 'count' action is being
created.

Signed-off-by: Somnath Kotur 
Reviewed-by: Kalesh AP 
---
 drivers/net/bnxt/bnxt_flow.c | 17 -
 1 file changed, 17 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_flow.c b/drivers/net/bnxt/bnxt_flow.c
index f7c90c4..71a8edd 100644
--- a/drivers/net/bnxt/bnxt_flow.c
+++ b/drivers/net/bnxt/bnxt_flow.c
@@ -1405,23 +1405,6 @@ bnxt_validate_and_parse_flow(struct rte_eth_dev *dev,
 
bnxt_update_filter_flags_en(filter, filter1, use_ntuple);
break;
-   case RTE_FLOW_ACTION_TYPE_COUNT:
-   vnic0 = &bp->vnic_info[0];
-   filter1 = bnxt_get_l2_filter(bp, filter, vnic0);
-   if (filter1 == NULL) {
-   rte_flow_error_set(error,
-  ENOSPC,
-  RTE_FLOW_ERROR_TYPE_ACTION,
-  act,
-  "New filter not available");
-   rc = -rte_errno;
-   goto ret;
-   }
-
-   filter->fw_l2_filter_id = filter1->fw_l2_filter_id;
-   filter->flow_id = filter1->flow_id;
-   filter->flags = HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_FLAGS_METER;
-   break;
case RTE_FLOW_ACTION_TYPE_VF:
act_vf = (const struct rte_flow_action_vf *)act->conf;
vf = act_vf->id;
-- 
2.10.1



[PATCH 07/17] net/bnxt: fix RSS action support

2022-04-27 Thread Kalesh A P
From: Kalesh AP 

Specifying a subset of Rx queues created by the application in
the "flow create" command is invalid.
User must either specify all Rx queues created or no queues.

Also removed a wrong comment as RSS action will not be supported
if user or application specifies MARK or COUNT action.

Fixes: 239695f754cb ("net/bnxt: enhance RSS action support")
Cc: sta...@dpdk.org

Signed-off-by: Kalesh AP 
Reviewed-by: Somnath Kotur 
---
 drivers/net/bnxt/bnxt_flow.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bnxt/bnxt_flow.c b/drivers/net/bnxt/bnxt_flow.c
index 71a8edd..bd96bba 100644
--- a/drivers/net/bnxt/bnxt_flow.c
+++ b/drivers/net/bnxt/bnxt_flow.c
@@ -1074,7 +1074,6 @@ bnxt_update_filter_flags_en(struct bnxt_filter_info 
*filter,
filter1, filter->fw_l2_filter_id, filter->l2_ref_cnt);
 }
 
-/* Valid actions supported along with RSS are count and mark. */
 static int
 bnxt_validate_rss_action(const struct rte_flow_action actions[])
 {
@@ -1123,6 +1122,17 @@ bnxt_vnic_rss_cfg_update(struct bnxt *bp,
 
rss = (const struct rte_flow_action_rss *)act->conf;
 
+   /* must specify either all the Rx queues created by application or zero 
queues */
+   if (rss->queue_num && vnic->rx_queue_cnt != rss->queue_num) {
+   rte_flow_error_set(error,
+  EINVAL,
+  RTE_FLOW_ERROR_TYPE_ACTION,
+  act,
+  "Incorrect RXQ count");
+   rc = -rte_errno;
+   goto ret;
+   }
+
/* Currently only Toeplitz hash is supported. */
if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
-- 
2.10.1



[PATCH 08/17] net/bnxt: fix to reconfigure the VNIC's default receive ring

2022-04-27 Thread Kalesh A P
From: Somnath Kotur 

When an Rx queue is stopped and restarted, as part of that workflow,
for cards that have ring groups, we free and reallocate the ring group.
This new ring group is not communicated to the VNIC though via
HWRM_VNIC_CFG cmd.
Fix to issue HWRM_VNIC_CFG cmd on all adapters now in this scenario.

Fixes: ed0ae3502fc9 ("net/bnxt: update ring group after ring stop start")

Signed-off-by: Somnath Kotur 
Reviewed-by: Ajit Khaparde 
Reviewed-by: Kalesh AP 
---
 drivers/net/bnxt/bnxt_rxq.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_rxq.c b/drivers/net/bnxt/bnxt_rxq.c
index 8147404..9b5ff4c 100644
--- a/drivers/net/bnxt/bnxt_rxq.c
+++ b/drivers/net/bnxt/bnxt_rxq.c
@@ -488,10 +488,11 @@ int bnxt_rx_queue_start(struct rte_eth_dev *dev, uint16_t 
rx_queue_id)
if (rc)
return rc;
 
-   if (BNXT_CHIP_P5(bp)) {
-   /* Reconfigure default receive ring and MRU. */
-   bnxt_hwrm_vnic_cfg(bp, rxq->vnic);
-   }
+   if (BNXT_HAS_RING_GRPS(bp))
+   rxq->vnic->dflt_ring_grp = bp->grp_info[rx_queue_id].fw_grp_id;
+   /* Reconfigure default receive ring and MRU. */
+   bnxt_hwrm_vnic_cfg(bp, rxq->vnic);
+
PMD_DRV_LOG(INFO, "Rx queue started %d\n", rx_queue_id);
 
if (dev_conf->rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) {
-- 
2.10.1



[PATCH 09/17] net/bnxt: add check for dupliate queue ids

2022-04-27 Thread Kalesh A P
From: Kalesh AP 

Currently driver does not have a check for duplicate queue ids.
User must either specify all Rx queues created or no queues in the
flow create command. Repeating the queue index is invalid.

Also, moved the check for invalid queue to the beginning of the function.

Fixes: 239695f754cb ("net/bnxt: enhance RSS action support")
Cc: sta...@dpdk.org

Signed-off-by: Kalesh AP 
Reviewed-by: Ajit Khaparde 
Reviewed-by: Somnath Kotur 
---
 drivers/net/bnxt/bnxt_flow.c | 49 +---
 1 file changed, 32 insertions(+), 17 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_flow.c b/drivers/net/bnxt/bnxt_flow.c
index bd96bba..fc5bacf 100644
--- a/drivers/net/bnxt/bnxt_flow.c
+++ b/drivers/net/bnxt/bnxt_flow.c
@@ -1115,7 +1115,7 @@ bnxt_vnic_rss_cfg_update(struct bnxt *bp,
 struct rte_flow_error *error)
 {
const struct rte_flow_action_rss *rss;
-   unsigned int rss_idx, i;
+   unsigned int rss_idx, i, j;
uint16_t hash_type;
uint64_t types;
int rc;
@@ -1133,6 +1133,37 @@ bnxt_vnic_rss_cfg_update(struct bnxt *bp,
goto ret;
}
 
+   /* Validate Rx queues */
+   for (i = 0; i < rss->queue_num; i++) {
+   PMD_DRV_LOG(DEBUG, "RSS action Queue %d\n", rss->queue[i]);
+
+   if (rss->queue[i] >= bp->rx_nr_rings ||
+   !bp->rx_queues[rss->queue[i]]) {
+   rte_flow_error_set(error,
+  EINVAL,
+  RTE_FLOW_ERROR_TYPE_ACTION,
+  act,
+  "Invalid queue ID for RSS");
+   rc = -rte_errno;
+   goto ret;
+   }
+   }
+
+   /* Duplicate queue ids are not supported. */
+   for (i = 0; i < rss->queue_num; i++) {
+   for (j = i + 1; j < rss->queue_num; j++) {
+   if (rss->queue[i] == rss->queue[j]) {
+   rte_flow_error_set(error,
+  EINVAL,
+  RTE_FLOW_ERROR_TYPE_ACTION,
+  act,
+  "Duplicate queue ID for 
RSS");
+   rc = -rte_errno;
+   goto ret;
+   }
+   }
+   }
+
/* Currently only Toeplitz hash is supported. */
if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
@@ -1200,22 +1231,6 @@ bnxt_vnic_rss_cfg_update(struct bnxt *bp,
if (rss->queue_num == 0)
goto skip_rss_table;
 
-   /* Validate Rx queues */
-   for (i = 0; i < rss->queue_num; i++) {
-   PMD_DRV_LOG(DEBUG, "RSS action Queue %d\n", rss->queue[i]);
-
-   if (rss->queue[i] >= bp->rx_nr_rings ||
-   !bp->rx_queues[rss->queue[i]]) {
-   rte_flow_error_set(error,
-  EINVAL,
-  RTE_FLOW_ERROR_TYPE_ACTION,
-  act,
-  "Invalid queue ID for RSS");
-   rc = -rte_errno;
-   goto ret;
-   }
-   }
-
/* Prepare the indirection table */
for (rss_idx = 0; rss_idx < HW_HASH_INDEX_SIZE; rss_idx++) {
struct bnxt_rx_queue *rxq;
-- 
2.10.1



[PATCH 10/17] net/bnxt: fix to handle queue stop during RSS flow create

2022-04-27 Thread Kalesh A P
From: Somnath Kotur 

The programming of the RSS table was not taking into account if
any of the queues in the set were stopped prior to the flow
creation, hence leading to a vnic RSS config cmd failure thrown by
the FW.
Fix by programming only the active queues in the RSS action queue
set.

Fixes: 239695f754cb ("net/bnxt: enhance RSS action support")
Cc: sta...@dpdk.org

Signed-off-by: Somnath Kotur 
Reviewed-by: Ajit Khaparde 
Reviewed-by: Kalesh AP 
---
 drivers/net/bnxt/bnxt_flow.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_flow.c b/drivers/net/bnxt/bnxt_flow.c
index fc5bacf..f8e1096 100644
--- a/drivers/net/bnxt/bnxt_flow.c
+++ b/drivers/net/bnxt/bnxt_flow.c
@@ -1115,7 +1115,7 @@ bnxt_vnic_rss_cfg_update(struct bnxt *bp,
 struct rte_flow_error *error)
 {
const struct rte_flow_action_rss *rss;
-   unsigned int rss_idx, i, j;
+   unsigned int rss_idx, i, j, fw_idx;
uint16_t hash_type;
uint64_t types;
int rc;
@@ -1232,11 +1232,21 @@ bnxt_vnic_rss_cfg_update(struct bnxt *bp,
goto skip_rss_table;
 
/* Prepare the indirection table */
-   for (rss_idx = 0; rss_idx < HW_HASH_INDEX_SIZE; rss_idx++) {
+   for (rss_idx = 0, fw_idx = 0; rss_idx < HW_HASH_INDEX_SIZE;
+rss_idx++, fw_idx++) {
+   uint8_t *rxq_state = bp->eth_dev->data->rx_queue_state;
struct bnxt_rx_queue *rxq;
uint32_t idx;
 
-   idx = rss->queue[rss_idx % rss->queue_num];
+   for (i = 0; i < bp->rx_cp_nr_rings; i++) {
+   idx = rss->queue[fw_idx % rss->queue_num];
+   if (rxq_state[idx] != RTE_ETH_QUEUE_STATE_STOPPED)
+   break;
+   fw_idx++;
+   }
+
+   if (i == bp->rx_cp_nr_rings)
+   return 0;
 
if (BNXT_CHIP_P5(bp)) {
rxq = bp->rx_queues[idx];
-- 
2.10.1



[PATCH 11/17] net/bnxt: avoid unnecessary endianness conversion

2022-04-27 Thread Kalesh A P
From: Kalesh AP 

The "active_fec_signal_mode" in HWRM_PORT_PHY_QCFG response is uint8_t.
So no need of endianness conversion while parsing response.
Also, signal_mode is the first 4bits of "active_fec_signal_mode".

Fixes: c23f9ded0391 ("net/bnxt: support 200G PAM4 link")
Cc: sta...@dpdk.org

Signed-off-by: Kalesh AP 
Reviewed-by: Ajit Khaparde 
Reviewed-by: Somnath Kotur 
---
 drivers/net/bnxt/bnxt_hwrm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 178a112..d87f0c3 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -1506,7 +1506,7 @@ static int bnxt_hwrm_port_phy_qcfg(struct bnxt *bp,
link_info->phy_ver[1] = resp->phy_min;
link_info->phy_ver[2] = resp->phy_bld;
link_info->link_signal_mode =
-   rte_le_to_cpu_16(resp->active_fec_signal_mode);
+   resp->active_fec_signal_mode & 
HWRM_PORT_PHY_QCFG_OUTPUT_SIGNAL_MODE_MASK;
link_info->force_pam4_link_speed =
rte_le_to_cpu_16(resp->force_pam4_link_speed);
link_info->support_pam4_speeds =
-- 
2.10.1



[PATCH 12/17] net/bnxt: fix setting autoneg speed

2022-04-27 Thread Kalesh A P
From: Kalesh AP 

The "active_fec_signal_mode" in HWRM_PORT_PHY_QCFG response
does not return correct value till the link is up. Driver cannot
rely on active_fec_signal_mode while setting autoneg speed.

While setting autoneg speed, driver is currently checking only
"auto_link_speed_mask". Fixed to check "auto_pam4_link_speed_mask"
as well. Also, while setting auto mode and setting speed mask,
driver will have to set both NRZ and PAM4 mask.

Fixes: c23f9ded0391 ("net/bnxt: support 200G PAM4 link")
Cc: sta...@dpdk.org

Signed-off-by: Kalesh AP 
Reviewed-by: Ajit Khaparde 
Reviewed-by: Somnath Kotur 
---
 drivers/net/bnxt/bnxt_hwrm.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index d87f0c3..9eb8b8d 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -1424,17 +1424,17 @@ static int bnxt_hwrm_port_phy_cfg(struct bnxt *bp, 
struct bnxt_link_info *conf)
}
}
/* AutoNeg - Advertise speeds specified. */
-   if (conf->auto_link_speed_mask &&
+   if ((conf->auto_link_speed_mask || 
conf->auto_pam4_link_speed_mask) &&
!(conf->phy_flags & HWRM_PORT_PHY_CFG_INPUT_FLAGS_FORCE)) {
req.auto_mode =
HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_SPEED_MASK;
-   if (conf->auto_pam4_link_speed_mask &&
-   bp->link_info->link_signal_mode) {
+   if (conf->auto_pam4_link_speed_mask) {
enables |=
HWRM_PORT_PHY_CFG_IN_EN_AUTO_PAM4_LINK_SPD_MASK;
req.auto_link_pam4_speed_mask =

rte_cpu_to_le_16(conf->auto_pam4_link_speed_mask);
-   } else {
+   }
+   if (conf->auto_link_speed_mask) {
enables |=
HWRM_PORT_PHY_CFG_IN_EN_AUTO_LINK_SPEED_MASK;
req.auto_link_speed_mask =
-- 
2.10.1



[PATCH 14/17] net/bnxt: fix reporting link status when port is stopped

2022-04-27 Thread Kalesh A P
From: Kalesh AP 

Driver forces link down during port stop. But device is not obliged
link down in certain scenarios, even when forced. In that case,
subsequent link queries returns link as up.
Fixed to return link status as down when port is stopped.
Driver is already doing that for VF/NPAR/MH functions.

Fixes: c09f57b49c13 ("net/bnxt: add start/stop/link update operations")
Cc: sta...@dpdk.org

Signed-off-by: Kalesh AP 
Reviewed-by: Ajit Khaparde 
Reviewed-by: Somnath Kotur 
---
 drivers/net/bnxt/bnxt_ethdev.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 1904db9..69f1117 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -1826,6 +1826,14 @@ int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int 
wait_to_complete)
if (bp->link_info == NULL)
goto out;
 
+   /* Only single function PF can bring the phy down.
+* In certain scenarios, device is not obliged link down even when 
forced.
+* When port is stopped, report link down in those cases.
+*/
+   if (!eth_dev->data->dev_started &&
+   (!BNXT_SINGLE_PF(bp) || bnxt_force_link_config(bp)))
+   goto out;
+
do {
/* Retrieve link info from hardware */
rc = bnxt_get_hwrm_link_config(bp, &new);
@@ -1843,12 +1851,6 @@ int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int 
wait_to_complete)
rte_delay_ms(BNXT_LINK_WAIT_INTERVAL);
} while (cnt--);
 
-   /* Only single function PF can bring phy down.
-* When port is stopped, report link down for VF/MH/NPAR functions.
-*/
-   if (!BNXT_SINGLE_PF(bp) && !eth_dev->data->dev_started)
-   memset(&new, 0, sizeof(new));
-
 out:
/* Timed out or success */
if (new.link_status != eth_dev->data->dev_link.link_status ||
-- 
2.10.1



[PATCH 13/17] net/bnxt: force PHY update on certain configurations

2022-04-27 Thread Kalesh A P
From: Kalesh AP 

Device is not obliged link down in certain scenarios, even
when forced. When FW does not allow any user other than the BMC
to shutdown the port, bnxt_get_hwrm_link_config() call always
returns link up. Force phy update always in that case,
else user configuration for speed/autoneg would not get applied
correctly.

Fixes: 7bc8e9a227cc ("net/bnxt: support async link notification")
Cc: sta...@dpdk.org

Signed-off-by: Kalesh AP 
Reviewed-by: Ajit Khaparde 
Reviewed-by: Somnath Kotur 
---
 drivers/net/bnxt/bnxt.h|  3 +--
 drivers/net/bnxt/bnxt_ethdev.c | 22 ++
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index e4e8e8e..e86e51e 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -72,8 +72,7 @@
 #define BROADCOM_DEV_ID_58818_VF   0xd82e
 
 #define BROADCOM_DEV_957508_N2100  0x5208
-#define IS_BNXT_DEV_957508_N2100(bp)   \
-   ((bp)->pdev->id.subsystem_device_id == BROADCOM_DEV_957508_N2100)
+#define BROADCOM_DEV_957414_N225   0x4145
 
 #define BNXT_MAX_MTU   9574
 #define BNXT_NUM_VLANS 2
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 181de42..1904db9 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -659,6 +659,19 @@ static int bnxt_init_ctx_mem(struct bnxt *bp)
return rc;
 }
 
+static inline bool bnxt_force_link_config(struct bnxt *bp)
+{
+   uint16_t subsystem_device_id = bp->pdev->id.subsystem_device_id;
+
+   switch (subsystem_device_id) {
+   case BROADCOM_DEV_957508_N2100:
+   case BROADCOM_DEV_957414_N225:
+   return true;
+   default:
+   return false;
+   }
+}
+
 static int bnxt_update_phy_setting(struct bnxt *bp)
 {
struct rte_eth_link new;
@@ -671,11 +684,12 @@ static int bnxt_update_phy_setting(struct bnxt *bp)
}
 
/*
-* On BCM957508-N2100 adapters, FW will not allow any user other
-* than BMC to shutdown the port. bnxt_get_hwrm_link_config() call
-* always returns link up. Force phy update always in that case.
+* Device is not obliged link down in certain scenarios, even
+* when forced. When FW does not allow any user other than BMC
+* to shutdown the port, bnxt_get_hwrm_link_config() call always
+* returns link up. Force phy update always in that case.
 */
-   if (!new.link_status || IS_BNXT_DEV_957508_N2100(bp)) {
+   if (!new.link_status || bnxt_force_link_config(bp)) {
rc = bnxt_set_hwrm_link_config(bp, true);
if (rc) {
PMD_DRV_LOG(ERR, "Failed to update PHY settings\n");
-- 
2.10.1



[PATCH 15/17] net/bnxt: recheck FW readiness if FW is in reset process

2022-04-27 Thread Kalesh A P
From: Kalesh AP 

If Firmware is still in reset process and returns the error
HWRM_ERR_CODE_HOT_RESET_PROGRESS, retry VER_GET command.
We have to do it in bnxt_handle_if_change_status().

Fixes: 0b533591238f ("net/bnxt: inform firmware about IF state changes")
Cc: sta...@dpdk.org

Signed-off-by: Kalesh AP 
Reviewed-by: Somnath Kotur 
Reviewed-by: Ajit Khaparde 
---
 drivers/net/bnxt/bnxt_ethdev.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 69f1117..abcb534 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -177,6 +177,7 @@ static int bnxt_restore_vlan_filters(struct bnxt *bp);
 static void bnxt_dev_recover(void *arg);
 static void bnxt_free_error_recovery_info(struct bnxt *bp);
 static void bnxt_free_rep_info(struct bnxt *bp);
+static int bnxt_check_fw_ready(struct bnxt *bp);
 
 int is_bnxt_in_error(struct bnxt *bp)
 {
@@ -1350,6 +1351,11 @@ static int bnxt_handle_if_change_status(struct bnxt *bp)
 
/* clear fatal flag so that re-init happens */
bp->flags &= ~BNXT_FLAG_FATAL_ERROR;
+
+   rc = bnxt_check_fw_ready(bp);
+   if (rc)
+   return rc;
+
rc = bnxt_init_resources(bp, true);
 
bp->flags &= ~BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE;
@@ -4324,7 +4330,7 @@ static int bnxt_restore_filters(struct bnxt *bp)
 
 static int bnxt_check_fw_ready(struct bnxt *bp)
 {
-   int timeout = bp->fw_reset_max_msecs;
+   int timeout = bp->fw_reset_max_msecs ? : BNXT_MAX_FW_RESET_TIMEOUT;
int rc = 0;
 
do {
-- 
2.10.1



[PATCH 16/17] net/bnxt: fix freeing of VNIC filters

2022-04-27 Thread Kalesh A P
From: Somnath Kotur 

In bnxt_free_all_filters(), all the filters attached to a vnic are removed.
But each of these filters hold a backreference ptr to the vnic and they
need to be reset to NULL now. Otherwise, during a normal testpmd quit, as
part of dev_close_op(), first bnxt_free_all_filters() is invoked in
dev_stop, followed by bnxt_free_filter_mem() from bnxt_uninit_resources(),
which finds a filter with a vnic back reference ptr and now
bnxt_hwrm_clean_up_l2_filter() also tries to remove the filter from the
vnic's filter list which was already done as part of
bnxt_free_all_filters().

Fixes: f0f6b5e6cf9("net/bnxt: fix reusing L2 filter")
Cc: sta...@dpdk.org

Signed-off-by: Somnath Kotur 
Reviewed-by: Kalesh AP 
---
 drivers/net/bnxt/bnxt_filter.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/bnxt/bnxt_filter.c b/drivers/net/bnxt/bnxt_filter.c
index 1d08e03..b0c3bbd 100644
--- a/drivers/net/bnxt/bnxt_filter.c
+++ b/drivers/net/bnxt/bnxt_filter.c
@@ -99,6 +99,8 @@ void bnxt_free_all_filters(struct bnxt *bp)
bnxt_filter_info, next);
STAILQ_INSERT_TAIL(&bp->free_filter_list,
filter, next);
+   if (filter->vnic)
+   filter->vnic = NULL;
filter = temp_filter;
}
STAILQ_INIT(&vnic->filter);
-- 
2.10.1



[PATCH 17/17] net/bnxt: don't wait for link up completion in dev start

2022-04-27 Thread Kalesh A P
From: Somnath Kotur 

Invoking bnxt_link_update_op() with wait_for_completion set would
result in the driver waiting for 10s in case the port link is down to
complete port initialization (dev_start_op()).
Change it by not waiting for the completion when invoking it in
dev_start_op()

Signed-off-by: Somnath Kotur 
Reviewed-by: Kalesh AP 
---
 drivers/net/bnxt/bnxt_ethdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index abcb534..0f0f40b 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -1608,7 +1608,7 @@ int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 
eth_dev->data->dev_started = 1;
 
-   bnxt_link_update_op(eth_dev, 1);
+   bnxt_link_update_op(eth_dev, 0);
 
if (rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER)
vlan_mask |= RTE_ETH_VLAN_FILTER_MASK;
-- 
2.10.1



[PATCH v5 0/7] app/test: add inline IPsec and reassembly cases

2022-04-27 Thread Akhil Goyal
IP reassembly offload was added in last release.
The test app for unit testing IP reassembly of inline
inbound IPsec flows is added in this patchset.
For testing IP reassembly, base inline IPsec is also
added. The app is enhanced in v4 to handle more functional
unit test cases for inline IPsec similar to Lookaside IPsec.
The functions from Lookaside more are reused to verify
functional cases.

changed in v5:
- removed soft/hard expiry patches which are deferred for next release
- skipped tests if no port is added.
- added release notes.
Changes in v4:
- rebased over next-crypto
- updated app to take benefit from Lookaside protocol
test functions.
- Added more functional cases
- Added soft and hard expiry event subtypes in ethdev
for testing SA soft and hard pkt/byte expiry events.
- reassembly cases are squashed in a single patch

Changes in v3:
- incorporated latest ethdev changes for reassembly.
- skipped build on windows as it needs rte_ipsec lib which is not
  compiled on windows.
changes in v2:
- added IPsec burst mode case
- updated as per the latest ethdev changes.


Akhil Goyal (6):
  app/test: add unit cases for inline IPsec offload
  test/security: add inline inbound IPsec cases
  test/security: add combined mode inline IPsec cases
  test/security: add inline IPsec reassembly cases
  test/security: add more inline IPsec functional cases
  test/security: add ESN and anti-replay cases for inline

Vamsi Attunuru (1):
  test/security: add inline IPsec IPv6 flow label cases

 MAINTAINERS   |2 +-
 app/test/meson.build  |1 +
 app/test/test_cryptodev_security_ipsec.c  |   35 +-
 app/test/test_cryptodev_security_ipsec.h  |   10 +
 app/test/test_security_inline_proto.c | 2372 +
 app/test/test_security_inline_proto_vectors.h |  704 +
 doc/guides/rel_notes/release_22_07.rst|5 +
 7 files changed, 3127 insertions(+), 2 deletions(-)
 create mode 100644 app/test/test_security_inline_proto.c
 create mode 100644 app/test/test_security_inline_proto_vectors.h

-- 
2.25.1



[PATCH v5 1/7] app/test: add unit cases for inline IPsec offload

2022-04-27 Thread Akhil Goyal
A new test suite is added in test app to test inline IPsec protocol
offload. In this patch, predefined vectors from Lookaside IPsec test
are used to verify the IPsec functionality without the need of
external traffic generators. The sent packet is loopbacked onto the same
interface which is received and matched with the expected output.
The test suite can be updated further with other functional test cases.
In this patch encap only cases are added.
The testsuite can be run using:
RTE> inline_ipsec_autotest

Signed-off-by: Akhil Goyal 
Signed-off-by: Nithin Dabilpuram 
---
 MAINTAINERS   |   2 +-
 app/test/meson.build  |   1 +
 app/test/test_security_inline_proto.c | 882 ++
 app/test/test_security_inline_proto_vectors.h |  20 +
 doc/guides/rel_notes/release_22_07.rst|   5 +
 5 files changed, 909 insertions(+), 1 deletion(-)
 create mode 100644 app/test/test_security_inline_proto.c
 create mode 100644 app/test/test_security_inline_proto_vectors.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 15008c03bc..89affa08ff 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -440,7 +440,7 @@ M: Akhil Goyal 
 T: git://dpdk.org/next/dpdk-next-crypto
 F: lib/security/
 F: doc/guides/prog_guide/rte_security.rst
-F: app/test/test_security.c
+F: app/test/test_security*
 
 Compression API - EXPERIMENTAL
 M: Fan Zhang 
diff --git a/app/test/meson.build b/app/test/meson.build
index 5fc1dd1b7b..39952c6c4f 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -125,6 +125,7 @@ test_sources = files(
 'test_rwlock.c',
 'test_sched.c',
 'test_security.c',
+'test_security_inline_proto.c',
 'test_service_cores.c',
 'test_spinlock.c',
 'test_stack.c',
diff --git a/app/test/test_security_inline_proto.c 
b/app/test/test_security_inline_proto.c
new file mode 100644
index 00..249474be91
--- /dev/null
+++ b/app/test/test_security_inline_proto.c
@@ -0,0 +1,882 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell.
+ */
+
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#include "test.h"
+#include "test_security_inline_proto_vectors.h"
+
+#ifdef RTE_EXEC_ENV_WINDOWS
+static int
+test_inline_ipsec(void)
+{
+   printf("Inline ipsec not supported on Windows, skipping test\n");
+   return TEST_SKIPPED;
+}
+
+#else
+
+#define NB_ETHPORTS_USED   1
+#define MEMPOOL_CACHE_SIZE 32
+#define MAX_PKT_BURST  32
+#define RTE_TEST_RX_DESC_DEFAULT   1024
+#define RTE_TEST_TX_DESC_DEFAULT   1024
+#define RTE_PORT_ALL   (~(uint16_t)0x0)
+
+#define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */
+#define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */
+#define RX_WTHRESH 0 /**< Default values of RX write-back threshold reg. */
+
+#define TX_PTHRESH 32 /**< Default values of TX prefetch threshold reg. */
+#define TX_HTHRESH 0  /**< Default values of TX host threshold reg. */
+#define TX_WTHRESH 0  /**< Default values of TX write-back threshold reg. */
+
+#define MAX_TRAFFIC_BURST  2048
+#define NB_MBUF10240
+
+extern struct ipsec_test_data pkt_aes_128_gcm;
+extern struct ipsec_test_data pkt_aes_192_gcm;
+extern struct ipsec_test_data pkt_aes_256_gcm;
+extern struct ipsec_test_data pkt_aes_128_gcm_frag;
+extern struct ipsec_test_data pkt_aes_128_cbc_null;
+extern struct ipsec_test_data pkt_null_aes_xcbc;
+extern struct ipsec_test_data pkt_aes_128_cbc_hmac_sha384;
+extern struct ipsec_test_data pkt_aes_128_cbc_hmac_sha512;
+
+static struct rte_mempool *mbufpool;
+static struct rte_mempool *sess_pool;
+static struct rte_mempool *sess_priv_pool;
+/* ethernet addresses of ports */
+static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
+
+static struct rte_eth_conf port_conf = {
+   .rxmode = {
+   .mq_mode = RTE_ETH_MQ_RX_NONE,
+   .split_hdr_size = 0,
+   .offloads = RTE_ETH_RX_OFFLOAD_CHECKSUM |
+   RTE_ETH_RX_OFFLOAD_SECURITY,
+   },
+   .txmode = {
+   .mq_mode = RTE_ETH_MQ_TX_NONE,
+   .offloads = RTE_ETH_TX_OFFLOAD_SECURITY |
+   RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE,
+   },
+   .lpbk_mode = 1,  /* enable loopback */
+};
+
+static struct rte_eth_rxconf rx_conf = {
+   .rx_thresh = {
+   .pthresh = RX_PTHRESH,
+   .hthresh = RX_HTHRESH,
+   .wthresh = RX_WTHRESH,
+   },
+   .rx_free_thresh = 32,
+};
+
+static struct rte_eth_txconf tx_conf = {
+   .tx_thresh = {
+   .pthresh = TX_PTHRESH,
+   .hthresh = TX_HTHRESH,
+   .wthresh = TX_WTHRESH,
+   },
+   .tx_free_thresh = 32, /* Use PMD default values */
+   .tx_rs_thresh = 32, /* Use PMD default values */
+};
+
+uint16_t port_id;
+
+static uint64_t link_mbps;
+

[PATCH v5 2/7] test/security: add inline inbound IPsec cases

2022-04-27 Thread Akhil Goyal
Added test cases for inline Inbound protocol offload
verification with known test vectors from Lookaside mode.

Signed-off-by: Akhil Goyal 
---
 app/test/test_security_inline_proto.c | 65 +++
 1 file changed, 65 insertions(+)

diff --git a/app/test/test_security_inline_proto.c 
b/app/test/test_security_inline_proto.c
index 249474be91..7dd9ba7aff 100644
--- a/app/test/test_security_inline_proto.c
+++ b/app/test/test_security_inline_proto.c
@@ -819,6 +819,24 @@ test_ipsec_inline_proto_known_vec(const void *test_data)
false, &flags);
 }
 
+static int
+test_ipsec_inline_proto_known_vec_inb(const void *test_data)
+{
+   const struct ipsec_test_data *td = test_data;
+   struct ipsec_test_flags flags;
+   struct ipsec_test_data td_inb;
+
+   memset(&flags, 0, sizeof(flags));
+
+   if (td->ipsec_xform.direction == RTE_SECURITY_IPSEC_SA_DIR_EGRESS)
+   test_ipsec_td_in_from_out(td, &td_inb);
+   else
+   memcpy(&td_inb, td, sizeof(td_inb));
+
+   return test_ipsec_inline_proto_process(&td_inb, NULL, 1, false, &flags);
+}
+
+
 static struct unit_test_suite inline_ipsec_testsuite  = {
.suite_name = "Inline IPsec Ethernet Device Unit Test Suite",
.setup = inline_ipsec_testsuite_setup,
@@ -865,6 +883,53 @@ static struct unit_test_suite inline_ipsec_testsuite  = {
ut_setup_inline_ipsec, ut_teardown_inline_ipsec,
test_ipsec_inline_proto_known_vec,
&pkt_null_aes_xcbc),
+   TEST_CASE_NAMED_WITH_DATA(
+   "Inbound known vector (ESP tunnel mode IPv4 AES-GCM 
128)",
+   ut_setup_inline_ipsec, ut_teardown_inline_ipsec,
+   test_ipsec_inline_proto_known_vec_inb, 
&pkt_aes_128_gcm),
+   TEST_CASE_NAMED_WITH_DATA(
+   "Inbound known vector (ESP tunnel mode IPv4 AES-GCM 
192)",
+   ut_setup_inline_ipsec, ut_teardown_inline_ipsec,
+   test_ipsec_inline_proto_known_vec_inb, 
&pkt_aes_192_gcm),
+   TEST_CASE_NAMED_WITH_DATA(
+   "Inbound known vector (ESP tunnel mode IPv4 AES-GCM 
256)",
+   ut_setup_inline_ipsec, ut_teardown_inline_ipsec,
+   test_ipsec_inline_proto_known_vec_inb, 
&pkt_aes_256_gcm),
+   TEST_CASE_NAMED_WITH_DATA(
+   "Inbound known vector (ESP tunnel mode IPv4 AES-CBC 
128)",
+   ut_setup_inline_ipsec, ut_teardown_inline_ipsec,
+   test_ipsec_inline_proto_known_vec_inb, 
&pkt_aes_128_cbc_null),
+   TEST_CASE_NAMED_WITH_DATA(
+   "Inbound known vector (ESP tunnel mode IPv4 AES-CBC 128 
HMAC-SHA256 [16B ICV])",
+   ut_setup_inline_ipsec, ut_teardown_inline_ipsec,
+   test_ipsec_inline_proto_known_vec_inb,
+   &pkt_aes_128_cbc_hmac_sha256),
+   TEST_CASE_NAMED_WITH_DATA(
+   "Inbound known vector (ESP tunnel mode IPv4 AES-CBC 128 
HMAC-SHA384 [24B ICV])",
+   ut_setup_inline_ipsec, ut_teardown_inline_ipsec,
+   test_ipsec_inline_proto_known_vec_inb,
+   &pkt_aes_128_cbc_hmac_sha384),
+   TEST_CASE_NAMED_WITH_DATA(
+   "Inbound known vector (ESP tunnel mode IPv4 AES-CBC 128 
HMAC-SHA512 [32B ICV])",
+   ut_setup_inline_ipsec, ut_teardown_inline_ipsec,
+   test_ipsec_inline_proto_known_vec_inb,
+   &pkt_aes_128_cbc_hmac_sha512),
+   TEST_CASE_NAMED_WITH_DATA(
+   "Inbound known vector (ESP tunnel mode IPv6 AES-GCM 
128)",
+   ut_setup_inline_ipsec, ut_teardown_inline_ipsec,
+   test_ipsec_inline_proto_known_vec_inb, 
&pkt_aes_256_gcm_v6),
+   TEST_CASE_NAMED_WITH_DATA(
+   "Inbound known vector (ESP tunnel mode IPv6 AES-CBC 128 
HMAC-SHA256 [16B ICV])",
+   ut_setup_inline_ipsec, ut_teardown_inline_ipsec,
+   test_ipsec_inline_proto_known_vec_inb,
+   &pkt_aes_128_cbc_hmac_sha256_v6),
+   TEST_CASE_NAMED_WITH_DATA(
+   "Inbound known vector (ESP tunnel mode IPv4 NULL 
AES-XCBC-MAC [12B ICV])",
+   ut_setup_inline_ipsec, ut_teardown_inline_ipsec,
+   test_ipsec_inline_proto_known_vec_inb,
+   &pkt_null_aes_xcbc),
+
+
 
TEST_CASES_END() /**< NULL terminate unit test array */
},
-- 
2.25.1



[PATCH v5 3/7] test/security: add combined mode inline IPsec cases

2022-04-27 Thread Akhil Goyal
Added combined encap and decap test cases for various algorithm
combinations

Signed-off-by: Akhil Goyal 
---
 app/test/test_security_inline_proto.c | 102 ++
 1 file changed, 102 insertions(+)

diff --git a/app/test/test_security_inline_proto.c 
b/app/test/test_security_inline_proto.c
index 7dd9ba7aff..ea36d1188c 100644
--- a/app/test/test_security_inline_proto.c
+++ b/app/test/test_security_inline_proto.c
@@ -660,6 +660,92 @@ test_ipsec_inline_proto_process(struct ipsec_test_data *td,
return ret;
 }
 
+static int
+test_ipsec_inline_proto_all(const struct ipsec_test_flags *flags)
+{
+   struct ipsec_test_data td_outb;
+   struct ipsec_test_data td_inb;
+   unsigned int i, nb_pkts = 1, pass_cnt = 0, fail_cnt = 0;
+   int ret;
+
+   if (flags->iv_gen || flags->sa_expiry_pkts_soft ||
+   flags->sa_expiry_pkts_hard)
+   nb_pkts = IPSEC_TEST_PACKETS_MAX;
+
+   for (i = 0; i < RTE_DIM(alg_list); i++) {
+   test_ipsec_td_prepare(alg_list[i].param1,
+ alg_list[i].param2,
+ flags, &td_outb, 1);
+
+   if (!td_outb.aead) {
+   enum rte_crypto_cipher_algorithm cipher_alg;
+   enum rte_crypto_auth_algorithm auth_alg;
+
+   cipher_alg = td_outb.xform.chain.cipher.cipher.algo;
+   auth_alg = td_outb.xform.chain.auth.auth.algo;
+
+   if (td_outb.aes_gmac && cipher_alg != 
RTE_CRYPTO_CIPHER_NULL)
+   continue;
+
+   /* ICV is not applicable for NULL auth */
+   if (flags->icv_corrupt &&
+   auth_alg == RTE_CRYPTO_AUTH_NULL)
+   continue;
+
+   /* IV is not applicable for NULL cipher */
+   if (flags->iv_gen &&
+   cipher_alg == RTE_CRYPTO_CIPHER_NULL)
+   continue;
+   }
+
+   if (flags->udp_encap)
+   td_outb.ipsec_xform.options.udp_encap = 1;
+
+   ret = test_ipsec_inline_proto_process(&td_outb, &td_inb, 
nb_pkts,
+   false, flags);
+   if (ret == TEST_SKIPPED)
+   continue;
+
+   if (ret == TEST_FAILED) {
+   printf("\n TEST FAILED");
+   test_ipsec_display_alg(alg_list[i].param1,
+  alg_list[i].param2);
+   fail_cnt++;
+   continue;
+   }
+
+   test_ipsec_td_update(&td_inb, &td_outb, 1, flags);
+
+   ret = test_ipsec_inline_proto_process(&td_inb, NULL, nb_pkts,
+   false, flags);
+   if (ret == TEST_SKIPPED)
+   continue;
+
+   if (ret == TEST_FAILED) {
+   printf("\n TEST FAILED");
+   test_ipsec_display_alg(alg_list[i].param1,
+  alg_list[i].param2);
+   fail_cnt++;
+   continue;
+   }
+
+   if (flags->display_alg)
+   test_ipsec_display_alg(alg_list[i].param1,
+  alg_list[i].param2);
+
+   pass_cnt++;
+   }
+
+   printf("Tests passed: %d, failed: %d", pass_cnt, fail_cnt);
+   if (fail_cnt > 0)
+   return TEST_FAILED;
+   if (pass_cnt > 0)
+   return TEST_SUCCESS;
+   else
+   return TEST_SKIPPED;
+}
+
+
 static int
 ut_setup_inline_ipsec(void)
 {
@@ -836,6 +922,17 @@ test_ipsec_inline_proto_known_vec_inb(const void 
*test_data)
return test_ipsec_inline_proto_process(&td_inb, NULL, 1, false, &flags);
 }
 
+static int
+test_ipsec_inline_proto_display_list(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.display_alg = true;
+
+   return test_ipsec_inline_proto_all(&flags);
+}
 
 static struct unit_test_suite inline_ipsec_testsuite  = {
.suite_name = "Inline IPsec Ethernet Device Unit Test Suite",
@@ -929,6 +1026,11 @@ static struct unit_test_suite inline_ipsec_testsuite  = {
test_ipsec_inline_proto_known_vec_inb,
&pkt_null_aes_xcbc),
 
+   TEST_CASE_NAMED_ST(
+   "Combined test alg list",
+   ut_setup_inline_ipsec, ut_teardown_inline_ipsec,
+   test_ipsec_inline_proto_display_list),
+
 
 
TEST_CASES_END() /**< NULL terminate unit test array */
-- 
2.25.1



[PATCH v5 4/7] test/security: add inline IPsec reassembly cases

2022-04-27 Thread Akhil Goyal
Added unit test cases for IP reassembly of inline IPsec
inbound scenarios.
In these cases, known test vectors of fragments are first
processed for inline outbound processing and then received
back on loopback interface for inbound processing along with
IP reassembly of the corresponding decrypted packets.
The resultant plain text reassembled packet is compared with
original unfragmented packet.

In this patch, cases are added for 2/4/5 fragments for both
IPv4 and IPv6 packets. A few negative test cases are also added
like incomplete fragments, out of place fragments, duplicate
fragments.

Signed-off-by: Akhil Goyal 
---
 app/test/test_security_inline_proto.c | 421 ++-
 app/test/test_security_inline_proto_vectors.h | 684 ++
 2 files changed, 1104 insertions(+), 1 deletion(-)

diff --git a/app/test/test_security_inline_proto.c 
b/app/test/test_security_inline_proto.c
index ea36d1188c..46636af072 100644
--- a/app/test/test_security_inline_proto.c
+++ b/app/test/test_security_inline_proto.c
@@ -41,6 +41,9 @@ test_inline_ipsec(void)
 #define MAX_TRAFFIC_BURST  2048
 #define NB_MBUF10240
 
+#define ENCAP_DECAP_BURST_SZ   33
+#define APP_REASS_TIMEOUT  10
+
 extern struct ipsec_test_data pkt_aes_128_gcm;
 extern struct ipsec_test_data pkt_aes_192_gcm;
 extern struct ipsec_test_data pkt_aes_256_gcm;
@@ -94,6 +97,8 @@ uint16_t port_id;
 
 static uint64_t link_mbps;
 
+static int ip_reassembly_dynfield_offset = -1;
+
 static struct rte_flow *default_flow[RTE_MAX_ETHPORTS];
 
 /* Create Inline IPsec session */
@@ -527,6 +532,347 @@ destroy_default_flow(uint16_t portid)
 struct rte_mbuf **tx_pkts_burst;
 struct rte_mbuf **rx_pkts_burst;
 
+static int
+compare_pkt_data(struct rte_mbuf *m, uint8_t *ref, unsigned int tot_len)
+{
+   unsigned int len;
+   unsigned int nb_segs = m->nb_segs;
+   unsigned int matched = 0;
+   struct rte_mbuf *save = m;
+
+   while (m) {
+   len = tot_len;
+   if (len > m->data_len)
+   len = m->data_len;
+   if (len != 0) {
+   if (memcmp(rte_pktmbuf_mtod(m, char *),
+   ref + matched, len)) {
+   printf("\nReassembly case failed: Data 
Mismatch");
+   rte_hexdump(stdout, "Reassembled",
+   rte_pktmbuf_mtod(m, char *),
+   len);
+   rte_hexdump(stdout, "reference",
+   ref + matched,
+   len);
+   return TEST_FAILED;
+   }
+   }
+   tot_len -= len;
+   matched += len;
+   m = m->next;
+   }
+
+   if (tot_len) {
+   printf("\nReassembly case failed: Data Missing %u",
+  tot_len);
+   printf("\nnb_segs %u, tot_len %u", nb_segs, tot_len);
+   rte_pktmbuf_dump(stderr, save, -1);
+   return TEST_FAILED;
+   }
+   return TEST_SUCCESS;
+}
+
+static inline bool
+is_ip_reassembly_incomplete(struct rte_mbuf *mbuf)
+{
+   static uint64_t ip_reassembly_dynflag;
+   int ip_reassembly_dynflag_offset;
+
+   if (ip_reassembly_dynflag == 0) {
+   ip_reassembly_dynflag_offset = rte_mbuf_dynflag_lookup(
+   RTE_MBUF_DYNFLAG_IP_REASSEMBLY_INCOMPLETE_NAME, NULL);
+   if (ip_reassembly_dynflag_offset < 0)
+   return false;
+   ip_reassembly_dynflag = RTE_BIT64(ip_reassembly_dynflag_offset);
+   }
+
+   return (mbuf->ol_flags & ip_reassembly_dynflag) != 0;
+}
+
+static void
+free_mbuf(struct rte_mbuf *mbuf)
+{
+   rte_eth_ip_reassembly_dynfield_t dynfield;
+
+   if (!mbuf)
+   return;
+
+   if (!is_ip_reassembly_incomplete(mbuf)) {
+   rte_pktmbuf_free(mbuf);
+   } else {
+   if (ip_reassembly_dynfield_offset < 0)
+   return;
+
+   while (mbuf) {
+   dynfield = *RTE_MBUF_DYNFIELD(mbuf,
+   ip_reassembly_dynfield_offset,
+   rte_eth_ip_reassembly_dynfield_t *);
+   rte_pktmbuf_free(mbuf);
+   mbuf = dynfield.next_frag;
+   }
+   }
+}
+
+
+static int
+get_and_verify_incomplete_frags(struct rte_mbuf *mbuf,
+   struct reassembly_vector *vector)
+{
+   rte_eth_ip_reassembly_dynfield_t *dynfield[MAX_PKT_BURST];
+   int j = 0, ret;
+   /**
+* IP reassembly offload is incomplete, and fragments are listed in
+* dynfield which can be reassembled in SW.
+*/
+   printf("\nHW IP Reassembly is not comple

[PATCH v5 5/7] test/security: add more inline IPsec functional cases

2022-04-27 Thread Akhil Goyal
Added more inline IPsec functional verification cases.
These cases do not have known vectors but are verified
using encap + decap test for all the algo combinations.

Signed-off-by: Akhil Goyal 
---
 app/test/test_security_inline_proto.c | 517 ++
 1 file changed, 517 insertions(+)

diff --git a/app/test/test_security_inline_proto.c 
b/app/test/test_security_inline_proto.c
index 46636af072..055b753634 100644
--- a/app/test/test_security_inline_proto.c
+++ b/app/test/test_security_inline_proto.c
@@ -1314,6 +1314,394 @@ test_ipsec_inline_proto_display_list(const void *data 
__rte_unused)
return test_ipsec_inline_proto_all(&flags);
 }
 
+static int
+test_ipsec_inline_proto_udp_encap(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.udp_encap = true;
+
+   return test_ipsec_inline_proto_all(&flags);
+}
+
+static int
+test_ipsec_inline_proto_udp_ports_verify(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.udp_encap = true;
+   flags.udp_ports_verify = true;
+
+   return test_ipsec_inline_proto_all(&flags);
+}
+
+static int
+test_ipsec_inline_proto_err_icv_corrupt(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.icv_corrupt = true;
+
+   return test_ipsec_inline_proto_all(&flags);
+}
+
+static int
+test_ipsec_inline_proto_tunnel_dst_addr_verify(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.tunnel_hdr_verify = RTE_SECURITY_IPSEC_TUNNEL_VERIFY_DST_ADDR;
+
+   return test_ipsec_inline_proto_all(&flags);
+}
+
+static int
+test_ipsec_inline_proto_tunnel_src_dst_addr_verify(const void *data 
__rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.tunnel_hdr_verify = RTE_SECURITY_IPSEC_TUNNEL_VERIFY_SRC_DST_ADDR;
+
+   return test_ipsec_inline_proto_all(&flags);
+}
+
+static int
+test_ipsec_inline_proto_inner_ip_csum(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.ip_csum = true;
+
+   return test_ipsec_inline_proto_all(&flags);
+}
+
+static int
+test_ipsec_inline_proto_inner_l4_csum(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.l4_csum = true;
+
+   return test_ipsec_inline_proto_all(&flags);
+}
+
+static int
+test_ipsec_inline_proto_tunnel_v4_in_v4(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.ipv6 = false;
+   flags.tunnel_ipv6 = false;
+
+   return test_ipsec_inline_proto_all(&flags);
+}
+
+static int
+test_ipsec_inline_proto_tunnel_v6_in_v6(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.ipv6 = true;
+   flags.tunnel_ipv6 = true;
+
+   return test_ipsec_inline_proto_all(&flags);
+}
+
+static int
+test_ipsec_inline_proto_tunnel_v4_in_v6(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.ipv6 = false;
+   flags.tunnel_ipv6 = true;
+
+   return test_ipsec_inline_proto_all(&flags);
+}
+
+static int
+test_ipsec_inline_proto_tunnel_v6_in_v4(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.ipv6 = true;
+   flags.tunnel_ipv6 = false;
+
+   return test_ipsec_inline_proto_all(&flags);
+}
+
+static int
+test_ipsec_inline_proto_transport_v4(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.ipv6 = false;
+   flags.transport = true;
+
+   return test_ipsec_inline_proto_all(&flags);
+}
+
+static int
+test_ipsec_inline_proto_transport_l4_csum(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags = {
+   .l4_csum = true,
+   .transport = true,
+   };
+
+   return test_ipsec_inline_proto_all(&flags);
+}
+
+static int
+test_ipsec_inline_proto_stats(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.stats_success = true;
+
+   return test_ipsec_inline_proto_all(&flags);
+}
+
+static int
+test_ipsec_inline_proto_pkt_fragment(const void *data __rte_unused)
+{
+   struct ipsec_test_flags flags;
+
+   memset(&flags, 0, sizeof(flags));
+
+   flags.fragment = true;
+
+   return test_ipsec_inline_proto_all(&flags);
+
+}
+
+static int
+test_ipsec_inline_proto_copy_df_inner_0(const void *data __rte_unused)
+{
+   st

[PATCH v5 6/7] test/security: add ESN and anti-replay cases for inline

2022-04-27 Thread Akhil Goyal
Added cases to test anti replay for inline IPsec processing
with and without extended sequence number support.

Signed-off-by: Akhil Goyal 
---
 app/test/test_security_inline_proto.c | 308 ++
 1 file changed, 308 insertions(+)

diff --git a/app/test/test_security_inline_proto.c 
b/app/test/test_security_inline_proto.c
index 055b753634..009405f403 100644
--- a/app/test/test_security_inline_proto.c
+++ b/app/test/test_security_inline_proto.c
@@ -1091,6 +1091,136 @@ test_ipsec_inline_proto_all(const struct 
ipsec_test_flags *flags)
return TEST_SKIPPED;
 }
 
+static int
+test_ipsec_inline_proto_process_with_esn(struct ipsec_test_data td[],
+   struct ipsec_test_data res_d[],
+   int nb_pkts,
+   bool silent,
+   const struct ipsec_test_flags *flags)
+{
+   struct rte_security_session_conf sess_conf = {0};
+   struct ipsec_test_data *res_d_tmp = NULL;
+   struct rte_crypto_sym_xform cipher = {0};
+   struct rte_crypto_sym_xform auth = {0};
+   struct rte_crypto_sym_xform aead = {0};
+   struct rte_mbuf *rx_pkt = NULL;
+   struct rte_mbuf *tx_pkt = NULL;
+   int nb_rx, nb_sent;
+   struct rte_security_session *ses;
+   struct rte_security_ctx *ctx;
+   uint32_t ol_flags;
+   int i, ret;
+
+   if (td[0].aead) {
+   sess_conf.crypto_xform = &aead;
+   } else {
+   if (td[0].ipsec_xform.direction ==
+   RTE_SECURITY_IPSEC_SA_DIR_EGRESS) {
+   sess_conf.crypto_xform = &cipher;
+   sess_conf.crypto_xform->type = 
RTE_CRYPTO_SYM_XFORM_CIPHER;
+   sess_conf.crypto_xform->next = &auth;
+   sess_conf.crypto_xform->next->type = 
RTE_CRYPTO_SYM_XFORM_AUTH;
+   } else {
+   sess_conf.crypto_xform = &auth;
+   sess_conf.crypto_xform->type = 
RTE_CRYPTO_SYM_XFORM_AUTH;
+   sess_conf.crypto_xform->next = &cipher;
+   sess_conf.crypto_xform->next->type = 
RTE_CRYPTO_SYM_XFORM_CIPHER;
+   }
+   }
+
+   /* Create Inline IPsec session. */
+   ret = create_inline_ipsec_session(&td[0], port_id, &ses, &ctx,
+ &ol_flags, flags, &sess_conf);
+   if (ret)
+   return ret;
+
+   if (td[0].ipsec_xform.direction == RTE_SECURITY_IPSEC_SA_DIR_INGRESS)
+   create_default_flow(port_id);
+
+   for (i = 0; i < nb_pkts; i++) {
+   tx_pkt = init_packet(mbufpool, td[i].input_text.data,
+   td[i].input_text.len);
+   if (tx_pkt == NULL) {
+   ret = TEST_FAILED;
+   goto out;
+   }
+
+   if (test_ipsec_pkt_update(rte_pktmbuf_mtod_offset(tx_pkt,
+   uint8_t *, RTE_ETHER_HDR_LEN), flags)) {
+   ret = TEST_FAILED;
+   goto out;
+   }
+
+   if (td[i].ipsec_xform.direction ==
+   RTE_SECURITY_IPSEC_SA_DIR_EGRESS) {
+   if (flags->antireplay) {
+   sess_conf.ipsec.esn.value =
+   td[i].ipsec_xform.esn.value;
+   ret = rte_security_session_update(ctx, ses,
+   &sess_conf);
+   if (ret) {
+   printf("Could not update ESN in 
session\n");
+   rte_pktmbuf_free(tx_pkt);
+   goto out;
+   }
+   }
+   if (ol_flags & RTE_SECURITY_TX_OLOAD_NEED_MDATA)
+   rte_security_set_pkt_metadata(ctx, ses,
+   tx_pkt, NULL);
+   tx_pkt->ol_flags |= RTE_MBUF_F_TX_SEC_OFFLOAD;
+   }
+   /* Send packet to ethdev for inline IPsec processing. */
+   nb_sent = rte_eth_tx_burst(port_id, 0, &tx_pkt, 1);
+   if (nb_sent != 1) {
+   printf("\nUnable to TX packets");
+   rte_pktmbuf_free(tx_pkt);
+   ret = TEST_FAILED;
+   goto out;
+   }
+
+   rte_pause();
+
+   /* Receive back packet on loopback interface. */
+   do {
+   rte_delay_ms(1);
+   nb_rx = rte_eth_rx_burst(port_id, 0, &rx_pkt, 1);
+   } while (nb_rx == 0);
+
+   rte_pktmbuf_adj(rx_pkt, RTE_ETHER_HDR_LEN);
+
+   if (res_d != NULL)
+   res_d_tmp = &res_d[i];
+
+   ret = test_ipsec_post_proces

  1   2   >