[PATCH] event/cnxk: add SLMTST support to Tx adapter
Scheduled LMTST uses in-core LSW (LMTST scheduling widget) to coordinate with SSO and send a LMTST to the destination coprocessor without the need for the core to be the head of the scheduling context it is currently holding. Use SLMTST to send mbuf to NIX-TX for transmit. SLMTST only supports transmitting a single WQE. Signed-off-by: Pavan Nikhilesh --- Depends-on: Series-22634 drivers/common/cnxk/hw/ssow.h| 7 +++ drivers/common/cnxk/roc_dev_priv.h | 6 ++ drivers/common/cnxk/roc_io.h | 8 drivers/common/cnxk/roc_io_generic.h | 7 +++ drivers/common/cnxk/roc_nix.c| 19 +++ drivers/common/cnxk/roc_nix.h| 4 drivers/common/cnxk/roc_sso.c| 23 +++ drivers/common/cnxk/roc_sso.h| 2 ++ drivers/common/cnxk/version.map | 2 ++ drivers/event/cnxk/cn10k_eventdev.c | 11 +++ drivers/event/cnxk/cn10k_worker.h| 19 +-- drivers/event/cnxk/cnxk_eventdev.h | 2 +- 12 files changed, 103 insertions(+), 7 deletions(-) diff --git a/drivers/common/cnxk/hw/ssow.h b/drivers/common/cnxk/hw/ssow.h index 618ab7973b..b40238bc6c 100644 --- a/drivers/common/cnxk/hw/ssow.h +++ b/drivers/common/cnxk/hw/ssow.h @@ -62,6 +62,13 @@ #define SSOW_GW_RESULT_GW_NO_WORK (0x1ull) /* [CN10K, .) */ #define SSOW_GW_RESULT_GW_ERROR (0x2ull) /* [CN10K, .) */ +#define SSOW_LSW_MODE_NO_LSW (0x0) +#define SSOW_LSW_MODE_WAIT (0x1) +#define SSOW_LSW_MODE_IMMED (0x2) + +#define SSOW_LSW_WQE_RELEASE_WAIT_ACK (0x0) +#define SSOW_LSW_WQE_RELEASE_IMMED(0x1) + #define SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT 63 #define SSOW_LF_GWS_TAG_PEND_SWITCH_BIT 62 #define SSOW_LF_GWS_TAG_PEND_DESCHED_BIT 58 diff --git a/drivers/common/cnxk/roc_dev_priv.h b/drivers/common/cnxk/roc_dev_priv.h index 302dc0feb0..e301487f4c 100644 --- a/drivers/common/cnxk/roc_dev_priv.h +++ b/drivers/common/cnxk/roc_dev_priv.h @@ -54,6 +54,12 @@ dev_get_pf(uint16_t pf_func) return (pf_func >> RVU_PFVF_PF_SHIFT) & RVU_PFVF_PF_MASK; } +static inline int +dev_get_func(uint16_t pf_func) +{ + return (pf_func >> RVU_PFVF_FUNC_SHIFT) & RVU_PFVF_FUNC_MASK; +} + static inline int dev_pf_func(int pf, int vf) { diff --git a/drivers/common/cnxk/roc_io.h b/drivers/common/cnxk/roc_io.h index 62e98d9d00..6a76e3fa71 100644 --- a/drivers/common/cnxk/roc_io.h +++ b/drivers/common/cnxk/roc_io.h @@ -154,6 +154,14 @@ roc_lmt_submit_steorl(uint64_t data, plt_iova_t io_address) [rs] "r"(io_address)); } +static __plt_always_inline void +roc_lmt_submit_stsmaxl(uint64_t data, plt_iova_t io_address) +{ + asm volatile(".cpu generic+lse\n" +"stsmaxl %x[d], [%[rs]]" ::[d] "r"(data), +[rs] "r"(io_address)); +} + static __plt_always_inline void roc_lmt_mov(void *out, const void *in, const uint32_t lmtext) { diff --git a/drivers/common/cnxk/roc_io_generic.h b/drivers/common/cnxk/roc_io_generic.h index 42764455cc..097ed8af09 100644 --- a/drivers/common/cnxk/roc_io_generic.h +++ b/drivers/common/cnxk/roc_io_generic.h @@ -98,6 +98,13 @@ roc_lmt_submit_steorl(uint64_t data, plt_iova_t io_address) PLT_SET_USED(io_address); } +static __plt_always_inline void +roc_lmt_submit_stsmaxl(uint64_t data, plt_iova_t io_address) +{ + PLT_SET_USED(data); + PLT_SET_USED(io_address); +} + static __plt_always_inline void roc_lmt_mov(void *out, const void *in, const uint32_t lmtext) { diff --git a/drivers/common/cnxk/roc_nix.c b/drivers/common/cnxk/roc_nix.c index 151d8c3426..16d707b5ff 100644 --- a/drivers/common/cnxk/roc_nix.c +++ b/drivers/common/cnxk/roc_nix.c @@ -139,6 +139,25 @@ roc_nix_max_pkt_len(struct roc_nix *roc_nix) return NIX_RPM_MAX_HW_FRS; } +int +roc_nix_sched_lmt_enable(struct roc_nix *roc_nix) +{ + struct nix *nix = roc_nix_to_nix_priv(roc_nix); + struct mbox *mbox = (&nix->dev)->mbox; + struct lmtst_tbl_setup_req *req; + + req = mbox_alloc_msg_lmtst_tbl_setup(mbox); + if (req == NULL) + return -ENOSPC; + req->pcifunc = 0; + req->ssow_pf_func = dev_get_pf(idev_sso_pffunc_get()) << 8; + req->ssow_pf_func |= + (uint64_t)(dev_get_func(idev_sso_pffunc_get()) & 0xFF); + req->sched_ena = 1; + + return mbox_process(mbox); +} + int roc_nix_lf_alloc(struct roc_nix *roc_nix, uint32_t nb_rxq, uint32_t nb_txq, uint64_t rx_cfg) diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h index dbb816d961..b985fb5df4 100644 --- a/drivers/common/cnxk/roc_nix.h +++ b/drivers/common/cnxk/roc_nix.h @@ -904,4 +904,8 @@ int __roc_api roc_nix_mcast_mcam_entry_write(struct roc_nix *roc_nix, uint64_t action); int __roc_api roc_nix_mcast_mcam_entry_ena_dis(struct roc_nix *roc_nix, uint32_t index, bool enable); + +/* SSO */ +in
[PATCH v2] net/iavf: fix segfaults when calling API after VF reset failed
Some pointers will be set to NULL when iavf_dev_reset() failed, for example vf->vf_res, vf->vsi_res vf->rss_key and etc. APIs access these NULL pointers will trigger segfault. This patch adds closed flag to indicate that the VF is closed, and rejects API calls in this state to avoid coredump. Fixes: e74e1bb6280d ("net/iavf: enable port reset") Cc: sta...@dpdk.org Signed-off-by: Yiding Zhou --- drivers/net/iavf/iavf.h| 1 + drivers/net/iavf/iavf_ethdev.c | 57 +++--- drivers/net/iavf/iavf_rxtx.c | 10 ++ drivers/net/iavf/iavf_vchnl.c | 17 ++ 4 files changed, 81 insertions(+), 4 deletions(-) diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h index a01d18e61b..b3b582dd21 100644 --- a/drivers/net/iavf/iavf.h +++ b/drivers/net/iavf/iavf.h @@ -298,6 +298,7 @@ struct iavf_adapter { bool tx_vec_allowed; uint32_t ptype_tbl[IAVF_MAX_PKT_TYPE] __rte_cache_min_aligned; bool stopped; + bool closed; uint16_t fdir_ref_cnt; struct iavf_devargs devargs; }; diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c index d6190ac24a..91b6e64840 100644 --- a/drivers/net/iavf/iavf_ethdev.c +++ b/drivers/net/iavf/iavf_ethdev.c @@ -229,9 +229,15 @@ static const struct eth_dev_ops iavf_eth_dev_ops = { }; static int -iavf_tm_ops_get(struct rte_eth_dev *dev __rte_unused, +iavf_tm_ops_get(struct rte_eth_dev *dev, void *arg) { + struct iavf_adapter *adapter = + IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); + + if (adapter->closed) + return -EIO; + if (!arg) return -EINVAL; @@ -342,6 +348,9 @@ iavf_set_mc_addr_list(struct rte_eth_dev *dev, return -EINVAL; } + if (adapter->closed) + return -EIO; + /* flush previous addresses */ err = iavf_add_del_mc_addr_list(adapter, vf->mc_addrs, vf->mc_addrs_num, false); @@ -613,6 +622,9 @@ iavf_dev_configure(struct rte_eth_dev *dev) dev->data->nb_tx_queues); int ret; + if (ad->closed) + return -EIO; + ad->rx_bulk_alloc_allowed = true; /* Initialize to TRUE. If any of Rx queues doesn't meet the * vector Rx/Tx preconditions, it will be reset. @@ -932,6 +944,9 @@ iavf_dev_start(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); + if (adapter->closed) + return -1; + adapter->stopped = 0; vf->max_pkt_len = dev->data->mtu + IAVF_ETH_OVERHEAD; @@ -1009,6 +1024,9 @@ iavf_dev_stop(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); + if (adapter->closed) + return -1; + if (!(vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) && dev->data->dev_conf.intr_conf.rxq != 0) rte_intr_disable(intr_handle); @@ -1046,6 +1064,9 @@ iavf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); struct iavf_info *vf = &adapter->vf; + if (adapter->closed) + return -EIO; + dev_info->max_rx_queues = IAVF_MAX_NUM_QUEUES_LV; dev_info->max_tx_queues = IAVF_MAX_NUM_QUEUES_LV; dev_info->min_rx_bufsize = IAVF_BUF_SIZE_MIN; @@ -1286,6 +1307,9 @@ iavf_dev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter); int err; + if (adapter->closed) + return -EIO; + if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) { err = iavf_add_del_vlan_v2(adapter, vlan_id, on); if (err) @@ -1362,6 +1386,9 @@ iavf_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask) struct rte_eth_conf *dev_conf = &dev->data->dev_conf; int err; + if (adapter->closed) + return -EIO; + if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) return iavf_dev_vlan_offload_set_v2(dev, mask); @@ -1394,6 +1421,9 @@ iavf_dev_rss_reta_update(struct rte_eth_dev *dev, uint16_t i, idx, shift; int ret; + if (adapter->closed) + return -EIO; + if (!(vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF)) return -ENOTSUP; @@ -1439,6 +1469,9 @@ iavf_dev_rss_reta_query(struct rte_eth_dev *dev, struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter); uint16_t i, idx, shift; + if (adapter->closed) + return -EIO; + if (!(vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF)) return -ENOTSUP; @@ -1492,6 +1525,9 @@ iavf_dev_rss_hash_update(struct rte_eth_dev *dev, adapter->dev_data->dev_conf.rx_adv_conf.rss_conf = *rss_conf; + if (adapter->closed) + return -
RE: [PATCH] net/mlx5: fix RSS hash types adjustment
Hi, > -Original Message- > From: Dariusz Sosnowski > Sent: Thursday, April 21, 2022 11:37 PM > To: Matan Azrad ; Slava Ovsiienko > ; Jack Min > Cc: dev@dpdk.org; Raslan Darawsheh ; > sta...@dpdk.org > Subject: [PATCH] net/mlx5: fix RSS hash types adjustment > > When an indirect action was created with an RSS action configured to > hash on both source and destination L3 addresses (or L4 ports), it caused > shared hrxq to be configured to hash only on destination address > (or port). > > This patch fixes this behavior by refining RSS types specified in > configuration before calculating hash types used for hrxq. Refining RSS > types removes *_SRC_ONLY and *_DST_ONLY flags if they are both set. > > Fixes: 212d17b6a650 ("net/mlx5: fix missing shared RSS hash types") > Cc: jack...@nvidia.com > Cc: sta...@dpdk.org > > Signed-off-by: Dariusz Sosnowski > Acked-by: Viacheslav Ovsiienko Patch applied to next-net-mlx, Kindest regards, Raslan Darawsheh
RE: [PATCH 0/2] net/mlx5: LRO fixes
Hi, > -Original Message- > From: Michael Baum > Sent: Monday, April 25, 2022 12:30 PM > To: dev@dpdk.org > Cc: Matan Azrad ; Raslan Darawsheh > ; Slava Ovsiienko > Subject: [PATCH 0/2] net/mlx5: LRO fixes > > Independent fixes about LRO supporting. > > Michael Baum (2): > net/mlx5: fix miss LRO validation in RxQ setup > net/mlx5: fix LRO configuration in drop RxQ > > drivers/net/mlx5/mlx5_devx.c | 5 +++-- > drivers/net/mlx5/mlx5_rxq.c | 8 > 2 files changed, 11 insertions(+), 2 deletions(-) > > -- > 2.25.1 Series applied to next-net-mlx, Kindest regards, Raslan Darawsheh
[PATCH v4 0/3] cryptodev: move dh type from xform to dh op
Operation type (PUBLIC_KEY_GENERATION, SHARED_SECRET) should be free to choose for any operation. One xform/session should be enough to perform both DH operations, if op_type would be xform member, session would have to be to be created twice for the same group. Similar problem would be observed in sessionless case. Additionally, it will help extend DH to support Elliptic Curves. v4: - changed op_type coment - added openssl fix Arek Kusztal (3): cryptodev: move dh type from xform to dh op crypto/openssl: move dh type from xform to dh op test/crypto: move dh type from xform to dh op app/test/test_cryptodev_asym.c | 11 +++--- drivers/crypto/openssl/rte_openssl_pmd.c | 54 ++-- drivers/crypto/openssl/rte_openssl_pmd_ops.c | 26 -- lib/cryptodev/rte_crypto_asym.h | 14 4 files changed, 16 insertions(+), 89 deletions(-) -- 2.13.6
[PATCH v4 1/3] cryptodev: move dh type from xform to dh op
Operation type (PUBLIC_KEY_GENERATION, SHARED_SECRET) should be free to choose for any operation. One xform/session should be enough to perform both DH operations, if op_type would be xform member, session would have to be to be created twice for the same group. Similar problem would be observed in sessionless case. Additionally, it will help extend DH to support Elliptic Curves. Signed-off-by: Arek Kusztal --- lib/cryptodev/rte_crypto_asym.h | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/cryptodev/rte_crypto_asym.h b/lib/cryptodev/rte_crypto_asym.h index cd24d4b07b..4697a7bc59 100644 --- a/lib/cryptodev/rte_crypto_asym.h +++ b/lib/cryptodev/rte_crypto_asym.h @@ -256,8 +256,6 @@ struct rte_crypto_modinv_xform { * */ struct rte_crypto_dh_xform { - enum rte_crypto_asym_op_type type; - /**< Setup xform for key generate or shared secret compute */ rte_crypto_uint p; /**< Prime modulus data */ rte_crypto_uint g; @@ -391,27 +389,29 @@ struct rte_crypto_rsa_op_param { * @note: */ struct rte_crypto_dh_op_param { + enum rte_crypto_asym_op_type op_type; + /**< Diffie-Hellman operation type */ rte_crypto_uint pub_key; /**< -* Output generated public key when xform type is +* Output generated public key when op_type is * DH PUB_KEY_GENERATION. -* Input peer public key when xform type is DH +* Input peer public key when op_type is DH * SHARED_SECRET_COMPUTATION * */ rte_crypto_uint priv_key; /**< -* Output generated private key if xform type is +* Output generated private key if op_type is * DH PRIVATE_KEY_GENERATION -* Input when xform type is DH SHARED_SECRET_COMPUTATION. +* Input when op_type is DH SHARED_SECRET_COMPUTATION. * */ rte_crypto_uint shared_secret; /**< * Output with calculated shared secret -* when dh xform set up with op type = SHARED_SECRET_COMPUTATION. +* when dh op_type = SHARED_SECRET_COMPUTATION. * */ }; -- 2.13.6
[PATCH v4 2/3] crypto/openssl: move dh type from xform to dh op
This commit reflects API changes of location of operation type in Diffie-Hellman. Signed-off-by: Arek Kusztal --- drivers/crypto/openssl/rte_openssl_pmd.c | 54 ++-- drivers/crypto/openssl/rte_openssl_pmd_ops.c | 26 -- 2 files changed, 3 insertions(+), 77 deletions(-) diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c b/drivers/crypto/openssl/rte_openssl_pmd.c index d80e1052e2..409711c097 100644 --- a/drivers/crypto/openssl/rte_openssl_pmd.c +++ b/drivers/crypto/openssl/rte_openssl_pmd.c @@ -1696,12 +1696,7 @@ process_openssl_dh_op(struct rte_crypto_op *cop, BIGNUM *priv_key = NULL; int ret = 0; - if (sess->u.dh.key_op & - (1 << RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE)) { - /* compute shared secret using peer public key -* and current private key -* shared secret = peer_key ^ priv_key mod p -*/ + if (op->op_type == RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE) { BIGNUM *peer_key = NULL; /* copy private key and peer key and compute shared secret */ @@ -1735,10 +1730,6 @@ process_openssl_dh_op(struct rte_crypto_op *cop, if (ret < 0) { cop->status = RTE_CRYPTO_OP_STATUS_ERROR; BN_free(peer_key); - /* priv key is already loaded into dh, -* let's not free that directly here. -* DH_free() will auto free it later. -*/ return 0; } cop->status = RTE_CRYPTO_OP_STATUS_SUCCESS; @@ -1747,50 +1738,12 @@ process_openssl_dh_op(struct rte_crypto_op *cop, return 0; } - /* -* other options are public and private key generations. -* -* if user provides private key, -* then first set DH with user provided private key -*/ - if ((sess->u.dh.key_op & - (1 << RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE)) && - !(sess->u.dh.key_op & - (1 << RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE))) { - /* generate public key using user-provided private key -* pub_key = g ^ priv_key mod p -*/ - - /* load private key into DH */ - priv_key = BN_bin2bn(op->priv_key.data, - op->priv_key.length, - priv_key); - if (priv_key == NULL) { - cop->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED; - return -1; - } - ret = set_dh_priv_key(dh_key, priv_key); - if (ret) { - OPENSSL_LOG(ERR, "Failed to set private key\n"); - cop->status = RTE_CRYPTO_OP_STATUS_ERROR; - BN_free(priv_key); - return 0; - } - } - - /* generate public and private key pair. -* -* if private key already set, generates only public key. -* -* if private key is not already set, then set it to random value -* and update internal private key. -*/ if (!DH_generate_key(dh_key)) { cop->status = RTE_CRYPTO_OP_STATUS_ERROR; return 0; } - if (sess->u.dh.key_op & (1 << RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE)) { + if (op->op_type == RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE) { const BIGNUM *pub_key = NULL; OPENSSL_LOG(DEBUG, "%s:%d update public key\n", @@ -1804,8 +1757,7 @@ process_openssl_dh_op(struct rte_crypto_op *cop, op->pub_key.data); } - if (sess->u.dh.key_op & - (1 << RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE)) { + if (op->op_type == RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE) { const BIGNUM *priv_key = NULL; OPENSSL_LOG(DEBUG, "%s:%d updated priv key\n", diff --git a/drivers/crypto/openssl/rte_openssl_pmd_ops.c b/drivers/crypto/openssl/rte_openssl_pmd_ops.c index 1cb07794bd..02802ab0c2 100644 --- a/drivers/crypto/openssl/rte_openssl_pmd_ops.c +++ b/drivers/crypto/openssl/rte_openssl_pmd_ops.c @@ -1000,32 +1000,6 @@ static int openssl_set_asym_session_parameters( goto err_dh; } - /* -* setup xfrom for -* public key generate, or -* DH Priv key generate, or both -* public and private key generate -*/ - asym_session->u.dh.key_op = (1 << xform->dh.type); - - if (xform->dh.type == - RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE) { - /* check if next is pubkey */ -
[PATCH v4 3/3] test/crypto: move dh type from xform to dh op
This commit reflects API changes in Diffie-Hellman, now for setting crypto operation type asym_op no xform is responsible. Signed-off-by: Arek Kusztal --- app/test/test_cryptodev_asym.c | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/app/test/test_cryptodev_asym.c b/app/test/test_cryptodev_asym.c index 573af2a537..a5e385f4bd 100644 --- a/app/test/test_cryptodev_asym.c +++ b/app/test/test_cryptodev_asym.c @@ -1064,8 +1064,8 @@ test_dh_gen_shared_sec(struct rte_crypto_asym_xform *xfrm) asym_op = op->asym; /* Setup a xform and op to generate private key only */ - xform.dh.type = RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE; xform.next = NULL; + asym_op->dh.op_type = RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE; asym_op->dh.priv_key.data = dh_test_params.priv_key.data; asym_op->dh.priv_key.length = dh_test_params.priv_key.length; asym_op->dh.pub_key.data = (uint8_t *)peer; @@ -1146,7 +1146,7 @@ test_dh_gen_priv_key(struct rte_crypto_asym_xform *xfrm) asym_op = op->asym; /* Setup a xform and op to generate private key only */ - xform.dh.type = RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE; + asym_op->dh.op_type = RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE; xform.next = NULL; asym_op->dh.priv_key.data = output; asym_op->dh.priv_key.length = sizeof(output); @@ -1229,7 +1229,7 @@ test_dh_gen_pub_key(struct rte_crypto_asym_xform *xfrm) * using test private key * */ - xform.dh.type = RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE; + asym_op->dh.op_type = RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE; xform.next = NULL; asym_op->dh.pub_key.data = output; @@ -1319,9 +1319,10 @@ test_dh_gen_kp(struct rte_crypto_asym_xform *xfrm) /* Setup a xform chain to generate * private key first followed by * public key -*/xform.dh.type = RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE; +*/ + asym_op->dh.op_type = RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE; pub_key_xform.xform_type = RTE_CRYPTO_ASYM_XFORM_DH; - pub_key_xform.dh.type = RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE; + asym_op->dh.op_type = RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE; xform.next = &pub_key_xform; asym_op->dh.pub_key.data = out_pub_key; -- 2.13.6
RE: [PATCH v4 1/3] cryptodev: move dh type from xform to dh op
> -Original Message- > From: Kusztal, ArkadiuszX > Sent: Wednesday, April 27, 2022 8:44 AM > To: dev@dpdk.org > Cc: gak...@marvell.com; Zhang, Roy Fan ; Kusztal, > ArkadiuszX > Subject: [PATCH v4 1/3] cryptodev: move dh type from xform to dh op > > Operation type (PUBLIC_KEY_GENERATION, SHARED_SECRET) should > be free to choose for any operation. One xform/session should > be enough to perform both DH operations, if op_type would be xform > member, session would have to be to be created twice for the same > group. Similar problem would be observed in sessionless case. > Additionally, it will help extend DH to support Elliptic Curves. > > Signed-off-by: Arek Kusztal > --- Acked-by: Fan Zhang
RE: [PATCH v4 2/3] crypto/openssl: move dh type from xform to dh op
> -Original Message- > From: Kusztal, ArkadiuszX > Sent: Wednesday, April 27, 2022 8:44 AM > To: dev@dpdk.org > Cc: gak...@marvell.com; Zhang, Roy Fan ; Kusztal, > ArkadiuszX > Subject: [PATCH v4 2/3] crypto/openssl: move dh type from xform to dh op > > This commit reflects API changes of location of > operation type in Diffie-Hellman. > > Signed-off-by: Arek Kusztal > --- Acked-by: Fan Zhang
RE: [PATCH v4 3/3] test/crypto: move dh type from xform to dh op
> -Original Message- > From: Kusztal, ArkadiuszX > Sent: Wednesday, April 27, 2022 8:44 AM > To: dev@dpdk.org > Cc: gak...@marvell.com; Zhang, Roy Fan ; Kusztal, > ArkadiuszX > Subject: [PATCH v4 3/3] test/crypto: move dh type from xform to dh op > > This commit reflects API changes in Diffie-Hellman, > now for setting crypto operation type asym_op no xform > is responsible. > > Signed-off-by: Arek Kusztal > --- Acked-by: Fan Zhang
RE: [PATCH v4 0/3] cryptodev: move dh type from xform to dh op
> -Original Message- > From: Kusztal, ArkadiuszX > Sent: Wednesday, April 27, 2022 8:44 AM > To: dev@dpdk.org > Cc: gak...@marvell.com; Zhang, Roy Fan ; Kusztal, > ArkadiuszX > Subject: [PATCH v4 0/3] cryptodev: move dh type from xform to dh op > > Operation type (PUBLIC_KEY_GENERATION, SHARED_SECRET) should > be free to choose for any operation. One xform/session should > be enough to perform both DH operations, if op_type would be xform > member, session would have to be to be created twice for the same > group. Similar problem would be observed in sessionless case. > Additionally, it will help extend DH to support Elliptic Curves. > > v4: > - changed op_type coment > - added openssl fix > > Arek Kusztal (3): > cryptodev: move dh type from xform to dh op > crypto/openssl: move dh type from xform to dh op > test/crypto: move dh type from xform to dh op > > app/test/test_cryptodev_asym.c | 11 +++--- > drivers/crypto/openssl/rte_openssl_pmd.c | 54 > ++-- > drivers/crypto/openssl/rte_openssl_pmd_ops.c | 26 -- > lib/cryptodev/rte_crypto_asym.h | 14 > 4 files changed, 16 insertions(+), 89 deletions(-) > > -- > 2.13.6 Series-acked-by: Fan Zhang
RE: [RFC] eal: allow worker lcore stacks to be allocated from hugepage memory
+CC: EAL and Memory maintainers. > From: Don Wallwork [mailto:d...@xsightlabs.com] > Sent: Tuesday, 26 April 2022 23.26 > > On 4/26/2022 5:21 PM, Stephen Hemminger wrote: > > On Tue, 26 Apr 2022 17:01:18 -0400 > > Don Wallwork wrote: > > > >> On 4/26/2022 10:58 AM, Stephen Hemminger wrote: > >>> On Tue, 26 Apr 2022 08:19:59 -0400 > >>> Don Wallwork wrote: > >>> > Add support for using hugepages for worker lcore stack memory. > The > intent is to improve performance by reducing stack memory related > TLB > misses and also by using memory local to the NUMA node of each > lcore. This certainly seems like a good idea! However, I wonder: Does the O/S assign memory local to the NUMA node to an lcore-pinned thread's stack when instantiating the tread? And does the DPDK EAL ensure that the preconditions for the O/S to do that are present? (Not relevant for this patch, but the same locality questions come to mind regarding Thread Local Storage.) > > Platforms desiring to make use of this capability must enable the > associated option flag and stack size settings in platform config > files. > --- > lib/eal/linux/eal.c | 39 > +++ > 1 file changed, 39 insertions(+) > > >>> Good idea but having a fixed size stack makes writing complex > application > >>> more difficult. Plus you lose the safety of guard pages. Would it be possible to add a guard page or guard region by using the O/S memory allocator instead of rte_zmalloc_socket()? Since the stack is considered private to the process, i.e. not accessible from other processes, this patch does not need to provide remote access to stack memory from secondary processes - and thus it is not a requirement for this features to use DPDK managed memory. > >> Thanks for the quick reply. > >> > >> The expectation is that use of this optional feature would be > limited to > >> cases where > >> the performance gains justify the implications of these tradeoffs. > For > >> example, a specific > >> data plane application may be okay with limited stack size and could > be > >> tested to ensure > >> stack usage remains within limits. How to identify the required stack size and verify it... If aiming for small stacks, some instrumentation would be nice, like rte_mempool_audit() and rte_mempool_list_dump(). Alternatively, just assume that the stack is "always big enough", and don't worry about it - like the default O/S stack size. And as Stephen already mentioned: Regardless of stack size, overflowing the stack will cause memory corruption instead of a segmentation fault. Keep in mind that the required stack size not only depends on the application, but also on DPDK and other libraries being used by the application. > >> > >> Also, since this applies only to worker threads, the main thread > would > >> not be impacted > >> by this change. > >> > >> > > I would prefer it as a runtime, not compile time option. > > That way distributions could ship DPDK and application could opt in > if it wanted. > Good point.. I'll work on a v2 and will post that when it's ready. May I suggest using the stack size configured in the O/S, from pthread_attr_getstacksize() or similar, instead of choosing the stack size manually? If you want it to be configurable, use the default size unless explicitly specified otherwise. Do the worker threads need a different stack size than the main thread? In my opinion: "Nice to have", not "must have". Do the worker threads need different stack sizes individually? In my opinion: Perhaps "nice to have", certainly not "must have".
RE: [PATCH] security: fix comments
> From: Anoob Joseph [mailto:ano...@marvell.com] > Sent: Wednesday, 27 April 2022 08.02 > > Fix comments to reflect the field. > > Fixes: ad7515a39f2a ("security: add SA lifetime configuration") > Cc: ano...@marvell.com > > Reported-by: Thomas Monjalon > Signed-off-by: Anoob Joseph > --- > lib/security/rte_security.h | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/lib/security/rte_security.h b/lib/security/rte_security.h > index 2a43cbe..675db94 100644 > --- a/lib/security/rte_security.h > +++ b/lib/security/rte_security.h > @@ -311,9 +311,9 @@ struct rte_security_ipsec_lifetime { > uint64_t bytes_soft_limit; > /**< Soft expiry limit in bytes */ > uint64_t packets_hard_limit; > - /**< Soft expiry limit in number of packets */ > + /**< Hard expiry limit in number of packets */ > uint64_t bytes_hard_limit; > - /**< Soft expiry limit in bytes */ > + /**< Hard expiry limit in bytes */ > }; > > /** > -- > 2.7.4 > Reviewed-by: Morten Brørup
Re: [PATCH v2] net/nfp: update how MAX MTU is read
Hello, I have a question about the Checks that ran on this patch in patchwork [1]. It appears the job ci/iol-x86_64-compile-testing, dpdk_mingw64_compile have failed on a Windows Server 2019 build. But the logs from the job appears to be incomplete as it contains only 19 lines of output and stops without an error in the configuration part of meson. The failure is only flagged as a warning and not as an error in patchwork, is it it possible that the job in question fails to capture all output or that it fails to complete sometimes? What can we do to on our end to remedy this? My concern is that that the patch is blocked due to the warning and I'm unclear on how move forward, sorry if the case is that I'm just impatient. 1. https://patchwork.dpdk.org/project/dpdk/patch/20220420134638.24010-1-walter.heym...@corigine.com/ On 2022-04-20 15:46:39 +0200, Walter Heymans wrote: > The 'max_rx_pktlen' value was previously read from hardware, which was > set by the running firmware. This caused confusion due to different > meanings of 'MAX_MTU'. This patch updates the 'max_rx_pktlen' to the > maximum value that the NFP NIC can support. The 'max_mtu' value that is > read from hardware, is assigned to the 'dev_info->max_mtu' variable. > > If more layer 2 metadata must be used, the firmware can be updated to > report a smaller 'max_mtu' value. > > The constant defined for NFP_FRAME_SIZE_MAX is derived for the maximum > supported buffer size of 10240, minus 136 bytes that is reserved by the > hardware and another 56 bytes reserved for expansion in firmware. This > results in a usable maximum packet length of 10048 bytes. > > Signed-off-by: Walter Heymans > Signed-off-by: Niklas Söderlund > Reviewed-by: Louis Peens > Reviewed-by: Chaoyong He > Reviewed-by: Richard Donkin > --- > drivers/net/nfp/nfp_common.c | 11 ++- > drivers/net/nfp/nfp_common.h | 3 +++ > 2 files changed, 13 insertions(+), 1 deletion(-) > > diff --git a/drivers/net/nfp/nfp_common.c b/drivers/net/nfp/nfp_common.c > index b26770dbfb..52fbda1a79 100644 > --- a/drivers/net/nfp/nfp_common.c > +++ b/drivers/net/nfp/nfp_common.c > @@ -692,7 +692,16 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct > rte_eth_dev_info *dev_info) > dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues; > dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues; > dev_info->min_rx_bufsize = RTE_ETHER_MIN_MTU; > - dev_info->max_rx_pktlen = hw->max_mtu; > + /* > + * The maximum rx packet length (max_rx_pktlen) is set to the > + * maximum supported frame size that the NFP can handle. This > + * includes layer 2 headers, CRC and other metadata that can > + * optionally be used. > + * The maximum layer 3 MTU (max_mtu) is read from hardware, > + * which was set by the firmware loaded onto the card. > + */ > + dev_info->max_rx_pktlen = NFP_FRAME_SIZE_MAX; > + dev_info->max_mtu = hw->max_mtu; > /* Next should change when PF support is implemented */ > dev_info->max_mac_addrs = 1; > > diff --git a/drivers/net/nfp/nfp_common.h b/drivers/net/nfp/nfp_common.h > index 8b35fa119c..8db5ec23f8 100644 > --- a/drivers/net/nfp/nfp_common.h > +++ b/drivers/net/nfp/nfp_common.h > @@ -98,6 +98,9 @@ struct nfp_net_adapter; > /* Number of supported physical ports */ > #define NFP_MAX_PHYPORTS 12 > > +/* Maximum supported NFP frame size (MTU + layer 2 headers) */ > +#define NFP_FRAME_SIZE_MAX 10048 > + > #include > #include > > -- > 2.25.1 > -- Kind Regards, Niklas Söderlund
[PATCH v3] sched: enable/disable TC OV at runtime
Added new API to enable or disable TC over subscription for best effort traffic class at subport level. Added changes after review and increased throughput. By default TC OV is disabled. Signed-off-by: Marcin Danilewicz --- lib/sched/rte_sched.c | 189 +++--- lib/sched/rte_sched.h | 18 lib/sched/version.map | 3 + 3 files changed, 178 insertions(+), 32 deletions(-) diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c index ec74bee939..6e7d81df46 100644 --- a/lib/sched/rte_sched.c +++ b/lib/sched/rte_sched.c @@ -213,6 +213,9 @@ struct rte_sched_subport { uint8_t *bmp_array; struct rte_mbuf **queue_array; uint8_t memory[0] __rte_cache_aligned; + + /* TC oversubscription activation */ + int is_tc_ov_enabled; } __rte_cache_aligned; struct rte_sched_port { @@ -1165,6 +1168,45 @@ rte_sched_cman_config(struct rte_sched_port *port, } #endif +int +rte_sched_subport_tc_ov_config(struct rte_sched_port *port, + uint32_t subport_id, + bool tc_ov_enable) +{ + struct rte_sched_subport *s; + struct rte_sched_subport_profile *profile; + + if (port == NULL) { + RTE_LOG(ERR, SCHED, + "%s: Incorrect value for parameter port\n", __func__); + return -EINVAL; + } + + if (subport_id >= port->n_subports_per_port) { + RTE_LOG(ERR, SCHED, + "%s: Incorrect value for parameter subport id\n", __func__); + return -EINVAL; + } + + s = port->subports[subport_id]; + s->is_tc_ov_enabled = tc_ov_enable ? 1 : 0; + + if (s->is_tc_ov_enabled) { + /* TC oversubscription */ + s->tc_ov_wm_min = port->mtu; + s->tc_ov_period_id = 0; + s->tc_ov = 0; + s->tc_ov_n = 0; + s->tc_ov_rate = 0; + + profile = port->subport_profiles + s->profile; + s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period, + s->pipe_tc_be_rate_max); + s->tc_ov_wm = s->tc_ov_wm_max; + } + return 0; +} + int rte_sched_subport_config(struct rte_sched_port *port, uint32_t subport_id, @@ -1254,6 +1296,9 @@ rte_sched_subport_config(struct rte_sched_port *port, s->n_pipe_profiles = params->n_pipe_profiles; s->n_max_pipe_profiles = params->n_max_pipe_profiles; + /* TC over-subscription is disabled by default */ + s->is_tc_ov_enabled = 0; + #ifdef RTE_SCHED_CMAN if (params->cman_params != NULL) { s->cman_enabled = true; @@ -1316,13 +1361,6 @@ rte_sched_subport_config(struct rte_sched_port *port, for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) s->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID; - - /* TC oversubscription */ - s->tc_ov_wm_min = port->mtu; - s->tc_ov_period_id = 0; - s->tc_ov = 0; - s->tc_ov_n = 0; - s->tc_ov_rate = 0; } { @@ -1342,9 +1380,6 @@ rte_sched_subport_config(struct rte_sched_port *port, else profile->tc_credits_per_period[i] = 0; - s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period, - s->pipe_tc_be_rate_max); - s->tc_ov_wm = s->tc_ov_wm_max; s->profile = subport_profile_id; } @@ -1417,17 +1452,20 @@ rte_sched_pipe_config(struct rte_sched_port *port, double pipe_tc_be_rate = (double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] / (double) params->tc_period; - uint32_t tc_be_ov = s->tc_ov; - /* Unplug pipe from its subport */ - s->tc_ov_n -= params->tc_ov_weight; - s->tc_ov_rate -= pipe_tc_be_rate; - s->tc_ov = s->tc_ov_rate > subport_tc_be_rate; + if (s->is_tc_ov_enabled) { + uint32_t tc_be_ov = s->tc_ov; - if (s->tc_ov != tc_be_ov) { - RTE_LOG(DEBUG, SCHED, - "Subport %u Best-effort TC oversubscription is OFF (%.4lf >= %.4lf)\n", - subport_id, subport_tc_be_rate, s->tc_ov_rate); + /* Unplug pipe from its subport */ + s->tc_ov_n -= params->tc_ov_weight; + s->tc_ov_rate -= pipe_tc_be_rate; + s->tc_ov = s->tc_ov_rate > subport_tc_be_rate; + + if (s->tc_ov != tc_be_ov) { + RTE_LOG(DEBUG, SCHED, + "Subport %u Best-effort TC oversubscription is OFF
Re: [PATCH] event/cnxk: add SLMTST support to Tx adapter
Pavan Nikhilesh writes: > Scheduled LMTST uses in-core LSW (LMTST scheduling widget) to > coordinate with SSO and send a LMTST to the destination > coprocessor without the need for the core to be the head of > the scheduling context it is currently holding. > > Use SLMTST to send mbuf to NIX-TX for transmit. SLMTST only > supports transmitting a single WQE. > > Signed-off-by: Pavan Nikhilesh > --- > Depends-on: Series-22634 > > drivers/common/cnxk/hw/ssow.h| 7 +++ > drivers/common/cnxk/roc_dev_priv.h | 6 ++ > drivers/common/cnxk/roc_io.h | 8 > drivers/common/cnxk/roc_io_generic.h | 7 +++ > drivers/common/cnxk/roc_nix.c| 19 +++ > drivers/common/cnxk/roc_nix.h| 4 > drivers/common/cnxk/roc_sso.c| 23 +++ > drivers/common/cnxk/roc_sso.h| 2 ++ > drivers/common/cnxk/version.map | 2 ++ > drivers/event/cnxk/cn10k_eventdev.c | 11 +++ > drivers/event/cnxk/cn10k_worker.h| 19 +-- > drivers/event/cnxk/cnxk_eventdev.h | 2 +- > 12 files changed, 103 insertions(+), 7 deletions(-) > Acked-by: Ray Kinsella -- Regards, Ray K
[PATCH v7 00/18] add virtio_blk device support to vdpa/ifc
This patch set add virtio_blk device support to vdpa/ifc driver. With a lot of similarities, I re-use part of vdpa/ifc driver. Distinguish the virtio net and blk device by device id, and implement specific features and ops. Add example to vdpa to support virtio_blk device. To support blk device live migration, some modification to vhost lib. Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg. v7: Check on expected fd num in new vhost msg handler. Sanity check on vhost msg size. Fix typo. Add commit log to help understand code. Remove duplicated code. Add new API to get vDPA device type. v6: Fix some commit log. Add vhost socket in log output to make it more user-friendly. When driver ops fail, just output some log, do not break message handler. Check vhost msg size in msg handler. v5: Fix some coding style issues. v4: Add args "isblk" to vdpa example to specify a block device, fix some Issue in example. Make sure code specify for block device does not affect net device. v3: Fix some compile issues. v2: Fix some coding style issues. Andy Pei (18): vdpa/ifc: add support for virtio blk device vhost: add vDPA ops for blk device vhost: add vhost msg support vdpa/ifc: add blk ops for ifc device vdpa/ifc: add vDPA interrupt for blk device vdpa/ifc: add block device SW live-migration vhost: add API to get vDPA device type vdpa/ifc: add get device type ops to ifc driver examples/vdpa: add vDPA blk support in example usertools: add support for virtio blk device vdpa/ifc: add set vring state for blk device vdpa/ifc: add some log at vDPA launch before qemu connect vdpa/ifc: read virtio max queues from hardware vdpa/ifc: add interrupt and handle for virtio blk vdpa/ifc: add is blk flag to ifcvf HW struct vdpa/ifc/base: access correct register for blk device vdpa/ifc: blk device pause without no inflight IO vhost: make sure each queue callfd is configured drivers/vdpa/ifc/base/ifcvf.c| 36 +++- drivers/vdpa/ifc/base/ifcvf.h| 20 +- drivers/vdpa/ifc/ifcvf_vdpa.c| 392 +-- examples/vdpa/main.c | 57 ++ examples/vdpa/vdpa_blk_compact.h | 65 +++ lib/vhost/rte_vhost.h| 17 ++ lib/vhost/socket.c | 39 lib/vhost/vdpa_driver.h | 11 +- lib/vhost/version.map| 2 + lib/vhost/vhost_user.c | 97 ++ lib/vhost/vhost_user.h | 13 ++ usertools/dpdk-devbind.py| 5 +- 12 files changed, 730 insertions(+), 24 deletions(-) create mode 100644 examples/vdpa/vdpa_blk_compact.h -- 1.8.3.1
[PATCH v7 01/18] vdpa/ifc: add support for virtio blk device
Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id. Blk and net device are implemented with proper feature and ops. Signed-off-by: Andy Pei Reviewed-by: Maxime Coquelin --- drivers/vdpa/ifc/base/ifcvf.h | 16 +++- drivers/vdpa/ifc/ifcvf_vdpa.c | 92 +++ 2 files changed, 98 insertions(+), 10 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 573a35f..01522c6 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -5,8 +5,17 @@ #ifndef _IFCVF_H_ #define _IFCVF_H_ +#include #include "ifcvf_osdep.h" +#define IFCVF_NET 0 +#define IFCVF_BLK 1 + +/* for BLK */ +#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID0x1001 +#define IFCVF_BLK_MODERN_DEVICE_ID 0x1042 +#define IFCVF_BLK_DEVICE_ID 0x0002 + #define IFCVF_VENDOR_ID0x1AF4 #define IFCVF_DEVICE_ID0x1041 #define IFCVF_SUBSYS_VENDOR_ID 0x8086 @@ -57,7 +66,6 @@ #define IFCVF_32_BIT_MASK 0x - struct ifcvf_pci_cap { u8 cap_vndr;/* Generic PCI field: PCI_CAP_ID_VNDR */ u8 cap_next;/* Generic PCI field: next ptr. */ @@ -126,7 +134,11 @@ struct ifcvf_hw { u8 notify_region; u32notify_off_multiplier; struct ifcvf_pci_common_cfg *common_cfg; - struct ifcvf_net_config *dev_cfg; + union { + struct ifcvf_net_config *net_cfg; + struct virtio_blk_config *blk_cfg; + void *dev_cfg; + }; u8 *isr; u16*notify_base; u16*notify_addr[IFCVF_MAX_QUEUES * 2]; diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 9f05595..e3210a8 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -58,6 +58,7 @@ struct ifcvf_internal { struct rte_vdpa_device *vdev; uint16_t max_queues; uint64_t features; + int device_type; rte_atomic32_t started; rte_atomic32_t dev_attached; rte_atomic32_t running; @@ -75,6 +76,12 @@ struct internal_list { struct ifcvf_internal *internal; }; +/* vdpa device info includes device features and devcic operation. */ +struct rte_vdpa_dev_info { + uint64_t features; + struct rte_vdpa_dev_ops *ops; +}; + TAILQ_HEAD(internal_list_head, internal_list); static struct internal_list_head internal_list = TAILQ_HEAD_INITIALIZER(internal_list); @@ -1167,6 +1174,48 @@ struct internal_list { return 0; } +static int16_t +ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev) +{ + uint16_t pci_device_id = pci_dev->id.device_id; + uint16_t device_id; + + if (pci_device_id < 0x1000 || pci_device_id > 0x107f) { + DRV_LOG(ERR, "Probe device is not a virtio device\n"); + return -1; + } + + if (pci_device_id < 0x1040) { + /* Transitional devices: use the PCI subsystem device id as +* virtio device id, same as legacy driver always did. +*/ + device_id = pci_dev->id.subsystem_device_id; + } else { + /* Modern devices: simply use PCI device id, +* but start from 0x1040. +*/ + device_id = pci_device_id - 0x1040; + } + + return device_id; +} + +struct rte_vdpa_dev_info dev_info[] = { + { + .features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | + (1ULL << VIRTIO_NET_F_CTRL_VQ) | + (1ULL << VIRTIO_NET_F_STATUS) | + (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | + (1ULL << VHOST_F_LOG_ALL), + .ops = &ifcvf_ops, + }, + { + .features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | + (1ULL << VHOST_F_LOG_ALL), + .ops = NULL, + }, +}; + static int ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_device *pci_dev) @@ -1178,6 +1227,7 @@ struct internal_list { int sw_fallback_lm = 0; struct rte_kvargs *kvlist = NULL; int ret = 0; + int16_t device_id; if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; @@ -1227,13 +1277,24 @@ struct internal_list { internal->configured = 0; internal->max_queues = IFCVF_MAX_QUEUES; features = ifcvf_get_features(&internal->hw); - internal->features = (features & - ~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) | - (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | - (1ULL << VIRTIO_NET_F_CTRL_VQ) | - (1ULL << VIRTIO_NET_F_STATUS) | - (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | - (1ULL << VHOST_F_LOG_ALL); + + device_id = ifcvf_pci_get_devi
[PATCH v7 02/18] vhost: add vDPA ops for blk device
Get_config and set_config are necessary ops for blk device. Add get_config and set_config ops to vDPA ops. Signed-off-by: Andy Pei Reviewed-by: Maxime Coquelin --- lib/vhost/vdpa_driver.h | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h index 88138be..e59a834 100644 --- a/lib/vhost/vdpa_driver.h +++ b/lib/vhost/vdpa_driver.h @@ -72,8 +72,12 @@ struct rte_vdpa_dev_ops { /** Reset statistics of the queue */ int (*reset_stats)(struct rte_vdpa_device *dev, int qid); - /** Reserved for future extension */ - void *reserved[2]; + /** Get the device configuration space */ + int (*get_config)(int vid, uint8_t *config, uint32_t len); + + /** Set the device configuration space */ + int (*set_config)(int vid, uint8_t *config, uint32_t offset, + uint32_t size, uint32_t flags); }; /** -- 1.8.3.1
[PATCH v7 03/18] vhost: add vhost msg support
Add support for VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG. VHOST_USER_GET_CONFIG and VHOST_USER_SET_CONFIG message is only supported by virtio blk VDPA device. Signed-off-by: Andy Pei --- lib/vhost/vhost_user.c | 83 ++ lib/vhost/vhost_user.h | 13 2 files changed, 96 insertions(+) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index 1d39067..e925428 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -80,6 +80,8 @@ [VHOST_USER_NET_SET_MTU] = "VHOST_USER_NET_SET_MTU", [VHOST_USER_SET_SLAVE_REQ_FD] = "VHOST_USER_SET_SLAVE_REQ_FD", [VHOST_USER_IOTLB_MSG] = "VHOST_USER_IOTLB_MSG", + [VHOST_USER_GET_CONFIG] = "VHOST_USER_GET_CONFIG", + [VHOST_USER_SET_CONFIG] = "VHOST_USER_SET_CONFIG", [VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS", [VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS", [VHOST_USER_POSTCOPY_ADVISE] = "VHOST_USER_POSTCOPY_ADVISE", @@ -2542,6 +2544,85 @@ static int is_vring_iotlb(struct virtio_net *dev, } static int +vhost_user_get_config(struct virtio_net **pdev, + struct vhu_msg_context *ctx, + int main_fd __rte_unused) +{ + struct virtio_net *dev = *pdev; + struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev; + int ret = 0; + + if (validate_msg_fds(dev, ctx, 0) != 0) + return RTE_VHOST_MSG_RESULT_ERR; + + if (vdpa_dev->ops->get_config) { + ret = vdpa_dev->ops->get_config(dev->vid, + ctx->msg.payload.cfg.region, + ctx->msg.payload.cfg.size); + if (ret != 0) { + ctx->msg.size = 0; + VHOST_LOG_CONFIG(ERR, +"(%s) get_config() return error!\n", +dev->ifname); + } + } else { + VHOST_LOG_CONFIG(ERR, "(%s) get_config() not supported!\n", +dev->ifname); + } + + return RTE_VHOST_MSG_RESULT_REPLY; +} + +static int +vhost_user_set_config(struct virtio_net **pdev, + struct vhu_msg_context *ctx, + int main_fd __rte_unused) +{ + struct virtio_net *dev = *pdev; + struct rte_vdpa_device *vdpa_dev = dev->vdpa_dev; + int ret = 0; + + if (validate_msg_fds(dev, ctx, 0) != 0) + return RTE_VHOST_MSG_RESULT_ERR; + + if (ctx->msg.size != sizeof(struct vhost_user_config)) { + VHOST_LOG_CONFIG(ERR, + "(%s) invalid set config msg size: %"PRIu32" != %d\n", + dev->ifname, ctx->msg.size, + (int)sizeof(struct vhost_user_config)); + goto out; + } + + if (ctx->msg.payload.cfg.size > VHOST_USER_MAX_CONFIG_SIZE) { + VHOST_LOG_CONFIG(ERR, + "(%s) vhost_user_config size: %"PRIu32", should not be larger than %d\n", + dev->ifname, ctx->msg.payload.cfg.size, + VHOST_USER_MAX_CONFIG_SIZE); + goto out; + } + + if (vdpa_dev->ops->set_config) { + ret = vdpa_dev->ops->set_config(dev->vid, + ctx->msg.payload.cfg.region, + ctx->msg.payload.cfg.offset, + ctx->msg.payload.cfg.size, + ctx->msg.payload.cfg.flags); + if (ret) + VHOST_LOG_CONFIG(ERR, +"(%s) set_config() return error!\n", +dev->ifname); + } else { + VHOST_LOG_CONFIG(ERR, "(%s) set_config() not supported!\n", +dev->ifname); + } + + return RTE_VHOST_MSG_RESULT_OK; + +out: + return RTE_VHOST_MSG_RESULT_ERR; +} + +static int vhost_user_iotlb_msg(struct virtio_net **pdev, struct vhu_msg_context *ctx, int main_fd __rte_unused) @@ -2782,6 +2863,8 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev, [VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu, [VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd, [VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg, + [VHOST_USER_GET_CONFIG] = vhost_user_get_config, + [VHOST_USER_SET_CONFIG] = vhost_user_set_config, [VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise, [VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen, [VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end, diff --git a/lib/vhost/vhost_user.h b/lib/vhost/vhost_user.h index c946cc2..97cfb2f 100644 --- a/lib/vhost/vhost_user.h +++ b/lib/vhost/vhost_user.h @@ -50,6 +50,8 @@ VHOST_USE
[PATCH v7 04/18] vdpa/ifc: add blk ops for ifc device
For virtio blk device, re-use part of ifc driver ops. Implement ifcvf_blk_get_config for virtio blk device. Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio blk device. Signed-off-by: Andy Pei Reviewed-by: Maxime Coquelin --- drivers/vdpa/ifc/base/ifcvf.h | 4 ++ drivers/vdpa/ifc/ifcvf_vdpa.c | 85 ++- 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 01522c6..769c603 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -66,6 +66,10 @@ #define IFCVF_32_BIT_MASK 0x +#ifndef VHOST_USER_PROTOCOL_F_CONFIG +#define VHOST_USER_PROTOCOL_F_CONFIG 9 +#endif + struct ifcvf_pci_cap { u8 cap_vndr;/* Generic PCI field: PCI_CAP_ID_VNDR */ u8 cap_next;/* Generic PCI field: next ptr. */ diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index e3210a8..8ee041f 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1088,6 +1088,10 @@ struct rte_vdpa_dev_info { 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \ 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \ 1ULL << VHOST_USER_PROTOCOL_F_STATUS) + +#define VDPA_BLK_PROTOCOL_FEATURES \ + (1ULL << VHOST_USER_PROTOCOL_F_CONFIG) + static int ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features) { @@ -1200,6 +1204,85 @@ struct rte_vdpa_dev_info { return device_id; } +static int +ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len) +{ + struct virtio_blk_config *dev_cfg; + struct ifcvf_internal *internal; + struct rte_vdpa_device *vdev; + struct internal_list *list; + uint32_t i; + uint64_t capacity = 0; + uint8_t *byte; + + if (len < sizeof(struct virtio_blk_config)) { + DRV_LOG(ERR, "Invalid len: %u, required: %u", + len, (uint32_t)sizeof(struct virtio_blk_config)); + return -1; + } + + vdev = rte_vhost_get_vdpa_device(vid); + list = find_internal_resource_by_vdev(vdev); + if (list == NULL) { + DRV_LOG(ERR, "Invalid vDPA device: %p", vdev); + return -1; + } + + internal = list->internal; + + for (i = 0; i < sizeof(struct virtio_blk_config); i++) + config[i] = *((u8 *)internal->hw.blk_cfg + i); + + dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg; + + /* cannot read 64-bit register in one attempt, so read byte by byte. */ + for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) { + byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i; + capacity |= (uint64_t)*byte << (i * 8); + } + DRV_LOG(INFO, "capacity : %"PRIu64"G", capacity >> 21); + + DRV_LOG(INFO, "size_max : 0x%08x", dev_cfg->size_max); + DRV_LOG(INFO, "seg_max : 0x%08x", dev_cfg->seg_max); + DRV_LOG(INFO, "blk_size : 0x%08x", dev_cfg->blk_size); + DRV_LOG(INFO, "geometry"); + DRV_LOG(INFO, " cylinders: %u", dev_cfg->geometry.cylinders); + DRV_LOG(INFO, " heads: %u", dev_cfg->geometry.heads); + DRV_LOG(INFO, " sectors : %u", dev_cfg->geometry.sectors); + DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues); + + DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n", + config[0], config[1], config[2], config[3], config[4], + config[5], config[6], config[7]); + return 0; +} + +static int +ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev, + uint64_t *features) +{ + RTE_SET_USED(vdev); + + *features = VDPA_SUPPORTED_PROTOCOL_FEATURES; + *features |= VDPA_BLK_PROTOCOL_FEATURES; + return 0; +} + +static struct rte_vdpa_dev_ops ifcvf_blk_ops = { + .get_queue_num = ifcvf_get_queue_num, + .get_features = ifcvf_get_vdpa_features, + .set_features = ifcvf_set_features, + .get_protocol_features = ifcvf_blk_get_protocol_features, + .dev_conf = ifcvf_dev_config, + .dev_close = ifcvf_dev_close, + .set_vring_state = NULL, + .migration_done = NULL, + .get_vfio_group_fd = ifcvf_get_vfio_group_fd, + .get_vfio_device_fd = ifcvf_get_vfio_device_fd, + .get_notify_area = ifcvf_get_notify_area, + .get_config = ifcvf_blk_get_config, +}; + struct rte_vdpa_dev_info dev_info[] = { { .features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | @@ -1212,7 +1295,7 @@ struct rte_vdpa_dev_info dev_info[] = { { .features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | (1ULL << VHOST_F_LOG_ALL), - .ops = NULL, + .ops = &ifcvf_blk_ops, }, }; -- 1.8.3.1
[PATCH v7 05/18] vdpa/ifc: add vDPA interrupt for blk device
For the block device type, we use one queue to transfer both read and write requests, so we have to relay commands on all queues. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 8ee041f..07fc3ca 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -370,6 +370,7 @@ struct rte_vdpa_dev_info { irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; irq_set->start = 0; fd_ptr = (int *)&irq_set->data; + /* The first interrupt is for the configure space change notification */ fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = rte_intr_fd_get(internal->pdev->intr_handle); @@ -379,7 +380,13 @@ struct rte_vdpa_dev_info { for (i = 0; i < nr_vring; i++) { rte_vhost_get_vhost_vring(internal->vid, i, &vring); fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd; - if ((i & 1) == 0 && m_rx == true) { + if (m_rx == true && + ((i & 1) == 0 || internal->device_type == IFCVF_BLK)) { + /* For the net we only need to relay rx queue, +* which will change the mem of VM. +* For the blk we need to relay all the read cmd +* of each queue +*/ fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); if (fd < 0) { DRV_LOG(ERR, "can't setup eventfd: %s", -- 1.8.3.1
[PATCH v7 06/18] vdpa/ifc: add block device SW live-migration
Add SW live-migration support to block device. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 33 + 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 07fc3ca..8a260b7 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -312,6 +312,7 @@ struct rte_vdpa_dev_info { vdpa_ifcvf_stop(struct ifcvf_internal *internal) { struct ifcvf_hw *hw = &internal->hw; + struct rte_vhost_vring vq; uint32_t i; int vid; uint64_t features = 0; @@ -319,6 +320,22 @@ struct rte_vdpa_dev_info { uint64_t len; vid = internal->vid; + + /* to make sure no packet is lost for blk device +* do not stop until last_avail_idx == last_used_idx +*/ + if (internal->device_type == IFCVF_BLK) { + for (i = 0; i < hw->nr_vring; i++) { + rte_vhost_get_vhost_vring(internal->vid, i, &vq); + while (vq.avail->idx != vq.used->idx) { + ifcvf_notify_queue(hw, i); + usleep(10); + } + hw->vring[i].last_avail_idx = vq.avail->idx; + hw->vring[i].last_used_idx = vq.used->idx; + } + } + ifcvf_stop_hw(hw); for (i = 0; i < hw->nr_vring; i++) @@ -642,8 +659,10 @@ struct rte_vdpa_dev_info { } hw->vring[i].avail = gpa; - /* Direct I/O for Tx queue, relay for Rx queue */ - if (i & 1) { + /* NET: Direct I/O for Tx queue, relay for Rx queue +* BLK: relay every queue +*/ + if ((internal->device_type == IFCVF_NET) && (i & 1)) { gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used); if (gpa == 0) { DRV_LOG(ERR, "Fail to get GPA for used ring."); @@ -693,8 +712,12 @@ struct rte_vdpa_dev_info { for (i = 0; i < hw->nr_vring; i++) { /* synchronize remaining new used entries if any */ - if ((i & 1) == 0) + if (internal->device_type == IFCVF_NET) { + if ((i & 1) == 0) + update_used_ring(internal, i); + } else if (internal->device_type == IFCVF_BLK) { update_used_ring(internal, i); + } rte_vhost_get_vhost_vring(vid, i, &vq); len = IFCVF_USED_RING_LEN(vq.size); @@ -756,7 +779,9 @@ struct rte_vdpa_dev_info { } } - for (qid = 0; qid < q_num; qid += 2) { + for (qid = 0; qid < q_num; qid += 1) { + if ((internal->device_type == IFCVF_NET) && (qid & 1)) + continue; ev.events = EPOLLIN | EPOLLPRI; /* leave a flag to mark it's for interrupt */ ev.data.u64 = 1 | qid << 1 | -- 1.8.3.1
[PATCH v7 07/18] vhost: add API to get vDPA device type
Vhost backend of different devices have different features. Add a API to get vDPA device type, net device or blk device currently, so users can set different features for different kinds of devices. Signed-off-by: Andy Pei --- lib/vhost/rte_vhost.h | 17 + lib/vhost/socket.c | 39 +++ lib/vhost/vdpa_driver.h | 3 +++ lib/vhost/version.map | 2 ++ 4 files changed, 61 insertions(+) diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h index c733f85..c977a24 100644 --- a/lib/vhost/rte_vhost.h +++ b/lib/vhost/rte_vhost.h @@ -117,6 +117,9 @@ #define RTE_MAX_VHOST_DEVICE 1024 +#define VDPA_DEVICE_TYPE_NET 0 +#define VDPA_DEVICE_TYPE_BLK 1 + struct rte_vdpa_device; /** @@ -486,6 +489,20 @@ struct rte_vdpa_device * rte_vhost_driver_get_vdpa_device(const char *path); /** + * Get the device type of the vdpa device. + * + * @param path + * The vhost-user socket file path + * @param type + * the device type of the vdpa device + * @return + * 0 on success, -1 on failure + */ +__rte_experimental +int +rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type); + +/** * Set the feature bits the vhost-user driver supports. * * @param path diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c index b304339..7da90e8 100644 --- a/lib/vhost/socket.c +++ b/lib/vhost/socket.c @@ -619,6 +619,45 @@ struct rte_vdpa_device * } int +rte_vhost_driver_get_vdpa_dev_type(const char *path, uint32_t *type) +{ + struct vhost_user_socket *vsocket; + struct rte_vdpa_device *vdpa_dev; + uint32_t vdpa_type = 0; + int ret = 0; + + pthread_mutex_lock(&vhost_user.mutex); + vsocket = find_vhost_user_socket(path); + if (!vsocket) { + VHOST_LOG_CONFIG(ERR, +"(%s) socket file is not registered yet.\n", +path); + ret = -1; + goto unlock_exit; + } + + vdpa_dev = vsocket->vdpa_dev; + if (!vdpa_dev) { + ret = -1; + goto unlock_exit; + } + + if (vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type) < 0) { + VHOST_LOG_CONFIG(ERR, + "(%s) failed to get vdpa dev type for socket file.\n", + path); + ret = -1; + goto unlock_exit; + } + + *type = vdpa_type; + +unlock_exit: + pthread_mutex_unlock(&vhost_user.mutex); + return ret; +} + +int rte_vhost_driver_disable_features(const char *path, uint64_t features) { struct vhost_user_socket *vsocket; diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h index e59a834..9cbd7cd 100644 --- a/lib/vhost/vdpa_driver.h +++ b/lib/vhost/vdpa_driver.h @@ -78,6 +78,9 @@ struct rte_vdpa_dev_ops { /** Set the device configuration space */ int (*set_config)(int vid, uint8_t *config, uint32_t offset, uint32_t size, uint32_t flags); + + /** get device type: net device, blk device... */ + int (*get_dev_type)(struct rte_vdpa_device *dev, uint32_t *type); }; /** diff --git a/lib/vhost/version.map b/lib/vhost/version.map index 0a66c58..fe4e8de 100644 --- a/lib/vhost/version.map +++ b/lib/vhost/version.map @@ -87,6 +87,8 @@ EXPERIMENTAL { # added in 22.03 rte_vhost_async_dma_configure; + + rte_vhost_driver_get_vdpa_dev_type; }; INTERNAL { -- 1.8.3.1
[PATCH v7 08/18] vdpa/ifc: add get device type ops to ifc driver
Add get device type ops to ifc driver. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 8a260b7..99a6ab0 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1300,6 +1300,15 @@ struct rte_vdpa_dev_info { return 0; } +static int +ifcvf_blk_get_device_type(struct rte_vdpa_device *vdev, + uint32_t *type) +{ + RTE_SET_USED(vdev); + *type = VDPA_DEVICE_TYPE_BLK; + return 0; +} + static struct rte_vdpa_dev_ops ifcvf_blk_ops = { .get_queue_num = ifcvf_get_queue_num, .get_features = ifcvf_get_vdpa_features, @@ -1313,6 +1322,7 @@ struct rte_vdpa_dev_info { .get_vfio_device_fd = ifcvf_get_vfio_device_fd, .get_notify_area = ifcvf_get_notify_area, .get_config = ifcvf_blk_get_config, + .get_dev_type = ifcvf_blk_get_device_type, }; struct rte_vdpa_dev_info dev_info[] = { -- 1.8.3.1
[PATCH v7 09/18] examples/vdpa: add vDPA blk support in example
Add virtio blk device support to vDPA example. Signed-off-by: Andy Pei --- examples/vdpa/main.c | 57 +++ examples/vdpa/vdpa_blk_compact.h | 65 2 files changed, 122 insertions(+) create mode 100644 examples/vdpa/vdpa_blk_compact.h diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c index 5ab0765..2544141 100644 --- a/examples/vdpa/main.c +++ b/examples/vdpa/main.c @@ -20,6 +20,7 @@ #include #include #include +#include "vdpa_blk_compact.h" #define MAX_PATH_LEN 128 #define MAX_VDPA_SAMPLE_PORTS 1024 @@ -159,8 +160,54 @@ struct vdpa_port { }; static int +vdpa_blk_device_set_features_and_protocol(const char *path) +{ + uint64_t protocol_features = 0; + int ret; + + ret = rte_vhost_driver_set_features(path, VHOST_BLK_FEATURES); + if (ret != 0) { + RTE_LOG(ERR, VDPA, + "rte_vhost_driver_set_features for %s failed.\n", + path); + goto out; + } + + ret = rte_vhost_driver_disable_features(path, + VHOST_BLK_DISABLED_FEATURES); + if (ret != 0) { + RTE_LOG(ERR, VDPA, + "rte_vhost_driver_disable_features for %s failed.\n", + path); + goto out; + } + + ret = rte_vhost_driver_get_protocol_features(path, &protocol_features); + if (ret != 0) { + RTE_LOG(ERR, VDPA, + "rte_vhost_driver_get_protocol_features for %s failed.\n", + path); + goto out; + } + + protocol_features |= VHOST_BLK_PROTOCOL_FEATURES; + + ret = rte_vhost_driver_set_protocol_features(path, protocol_features); + if (ret != 0) { + RTE_LOG(ERR, VDPA, + "rte_vhost_driver_set_protocol_features for %s failed.\n", + path); + goto out; + } + +out: + return ret; +} + +static int start_vdpa(struct vdpa_port *vport) { + uint32_t device_type = 0; int ret; char *socket_path = vport->ifname; @@ -192,6 +239,16 @@ struct vdpa_port { "attach vdpa device failed: %s\n", socket_path); + ret = rte_vhost_driver_get_vdpa_dev_type(socket_path, &device_type); + if (ret == 0 && device_type == VDPA_DEVICE_TYPE_BLK) { + RTE_LOG(NOTICE, VDPA, "is a blk device\n"); + ret = vdpa_blk_device_set_features_and_protocol(socket_path); + if (ret != 0) + rte_exit(EXIT_FAILURE, + "set vhost blk driver features and protocol features failed: %s\n", + socket_path); + } + if (rte_vhost_driver_start(socket_path) < 0) rte_exit(EXIT_FAILURE, "start vhost driver failed: %s\n", diff --git a/examples/vdpa/vdpa_blk_compact.h b/examples/vdpa/vdpa_blk_compact.h new file mode 100644 index 000..136c3f6 --- /dev/null +++ b/examples/vdpa/vdpa_blk_compact.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2022 Intel Corporation + */ + +#ifndef _VDPA_BLK_COMPACT_H_ +#define _VDPA_BLK_COMPACT_H_ + +/** + * @file + * + * Device specific vhost lib + */ + +#include + +#include +#include + +/* Feature bits */ +#define VIRTIO_BLK_F_SIZE_MAX 1/* Indicates maximum segment size */ +#define VIRTIO_BLK_F_SEG_MAX 2/* Indicates maximum # of segments */ +#define VIRTIO_BLK_F_GEOMETRY 4/* Legacy geometry available */ +#define VIRTIO_BLK_F_BLK_SIZE 6/* Block size of disk is available */ +#define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */ +#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ + +/* Legacy feature bits */ +#ifndef VIRTIO_BLK_NO_LEGACY +#define VIRTIO_BLK_F_BARRIER 0/* Does host support barriers? */ +#define VIRTIO_BLK_F_SCSI 7/* Supports scsi command passthru */ +#define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */ +#endif /* !VIRTIO_BLK_NO_LEGACY */ + +#ifndef VHOST_USER_F_PROTOCOL_FEATURES +#define VHOST_USER_F_PROTOCOL_FEATURES 30 +#endif + +#define VHOST_BLK_FEATURES_BASE ((1ULL << VHOST_F_LOG_ALL) | \ + (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \ + (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \ + (1ULL << VIRTIO_RING_F_EVENT_IDX) | \ + (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \ + (1ULL << VIRTIO_F_VERSION_1)) + +#define VHOST_BLK_DISABLED_FEATURES_BASE ((1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \ + (1ULL << VIRTIO_RING_F_EVENT_IDX)) + +#define VHOST_BLK_FEATURES (VHOST_BLK_FEATURES_BASE | \ + (1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \ + (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \ + (1ULL << VIRTIO_BL
[PATCH v7 10/18] usertools: add support for virtio blk device
Add virtio blk device support to devbind. Signed-off-by: Andy Pei --- usertools/dpdk-devbind.py | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py index ace4627..7231be4 100755 --- a/usertools/dpdk-devbind.py +++ b/usertools/dpdk-devbind.py @@ -72,6 +72,9 @@ cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4', 'SVendor': None, 'SDevice': None} +virtio_blk = {'Class': '01', 'Vendor': "1af4", 'Device': '1001', +'SVendor': None, 'SDevice': None} + network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class] baseband_devices = [acceleration_class] crypto_devices = [encryption_class, intel_processor_class] @@ -82,7 +85,7 @@ compress_devices = [cavium_zip] regex_devices = [cn9k_ree] misc_devices = [cnxk_bphy, cnxk_bphy_cgx, cnxk_inl_dev, -intel_ntb_skx, intel_ntb_icx] +intel_ntb_skx, intel_ntb_icx, virtio_blk] # global dict ethernet devices present. Dictionary indexed by PCI address. # Each device within this is itself a dictionary of device properties -- 1.8.3.1
[PATCH v7 11/18] vdpa/ifc: add set vring state for blk device
Set_vring_state op is mandatory, add set_vring_state for blk device. Currently set_vring_state for blk device is not implemented. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 99a6ab0..ca49bc3 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1290,6 +1290,16 @@ struct rte_vdpa_dev_info { } static int +ifcvf_blk_set_vring_state(int vid, int vring, int state) +{ + RTE_SET_USED(vid); + RTE_SET_USED(vring); + RTE_SET_USED(state); + + return 0; +} + +static int ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features) { @@ -1316,7 +1326,7 @@ struct rte_vdpa_dev_info { .get_protocol_features = ifcvf_blk_get_protocol_features, .dev_conf = ifcvf_dev_config, .dev_close = ifcvf_dev_close, - .set_vring_state = NULL, + .set_vring_state = ifcvf_blk_set_vring_state, .migration_done = NULL, .get_vfio_group_fd = ifcvf_get_vfio_group_fd, .get_vfio_device_fd = ifcvf_get_vfio_device_fd, -- 1.8.3.1
[PATCH v7 12/18] vdpa/ifc: add some log at vDPA launch before qemu connect
Add some log of virtio blk device config space information at VDPA launch before qemu connects. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 28 1 file changed, 28 insertions(+) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index ca49bc3..4060a44 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1363,6 +1363,9 @@ struct rte_vdpa_dev_info dev_info[] = { struct rte_kvargs *kvlist = NULL; int ret = 0; int16_t device_id; + uint64_t capacity = 0; + uint8_t *byte; + uint32_t i; if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; @@ -1429,6 +1432,31 @@ struct rte_vdpa_dev_info dev_info[] = { internal->features = features & ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); internal->features |= dev_info[IFCVF_BLK].features; + + /* cannot read 64-bit register in one attempt, +* so read byte by byte. +*/ + for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) { + byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i; + capacity |= (uint64_t)*byte << (i * 8); + } + DRV_LOG(INFO, "capacity : %"PRIu64"G", capacity >> 21); + + DRV_LOG(INFO, "size_max : 0x%08x", + internal->hw.blk_cfg->size_max); + DRV_LOG(INFO, "seg_max : 0x%08x", + internal->hw.blk_cfg->seg_max); + DRV_LOG(INFO, "blk_size : 0x%08x", + internal->hw.blk_cfg->blk_size); + DRV_LOG(INFO, "geometry"); + DRV_LOG(INFO, "cylinders: %u", + internal->hw.blk_cfg->geometry.cylinders); + DRV_LOG(INFO, "heads: %u", + internal->hw.blk_cfg->geometry.heads); + DRV_LOG(INFO, "sectors : %u", + internal->hw.blk_cfg->geometry.sectors); + DRV_LOG(INFO, "num_queues: 0x%08x", + internal->hw.blk_cfg->num_queues); } list->internal = internal; -- 1.8.3.1
[PATCH v7 13/18] vdpa/ifc: read virtio max queues from hardware
Original code max_queues is set to IFCVF_MAX_QUEUES. New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 4060a44..5a8cf1c 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1457,6 +1457,10 @@ struct rte_vdpa_dev_info dev_info[] = { internal->hw.blk_cfg->geometry.sectors); DRV_LOG(INFO, "num_queues: 0x%08x", internal->hw.blk_cfg->num_queues); + + /* reset max_queue here, to minimum modification */ + internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES, + internal->hw.blk_cfg->num_queues); } list->internal = internal; -- 1.8.3.1
[PATCH v7 14/18] vdpa/ifc: add interrupt and handle for virtio blk
Create a thread to poll and relay config space change interrupt. Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 112 ++ 1 file changed, 112 insertions(+) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 5a8cf1c..0e94e1f 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -53,7 +53,9 @@ struct ifcvf_internal { int vfio_group_fd; int vfio_dev_fd; pthread_t tid; /* thread for notify relay */ + pthread_t intr_tid; /* thread for intr relay */ int epfd; + int csc_fd; int vid; struct rte_vdpa_device *vdev; uint16_t max_queues; @@ -558,6 +560,107 @@ struct rte_vdpa_dev_info { return 0; } +static void +virtio_interrupt_handler(struct ifcvf_internal *internal) +{ + int vid = internal->vid; + int ret; + + ret = rte_vhost_slave_config_change(vid, 1); + if (ret) + DRV_LOG(ERR, "failed to notify the guest about configuration space change."); +} + +static void * +intr_relay(void *arg) +{ + struct ifcvf_internal *internal = (struct ifcvf_internal *)arg; + struct epoll_event csc_event; + struct epoll_event ev; + uint64_t buf; + int nbytes; + int csc_fd, csc_val = 0; + + csc_fd = epoll_create(1); + if (csc_fd < 0) { + DRV_LOG(ERR, "failed to create epoll for config space change."); + return NULL; + } + + ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP; + ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle); + if (epoll_ctl(csc_fd, EPOLL_CTL_ADD, + rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) { + DRV_LOG(ERR, "epoll add error: %s", strerror(errno)); + return NULL; + } + + internal->csc_fd = csc_fd; + + for (;;) { + csc_val = epoll_wait(csc_fd, &csc_event, 1, -1); + if (csc_val < 0) { + if (errno == EINTR) + continue; + DRV_LOG(ERR, "epoll_wait return fail\n"); + return NULL; + } else if (csc_val == 0) { + continue; + } else { + /* csc_val > 0 */ + nbytes = read(csc_event.data.fd, &buf, 8); + if (nbytes < 0) { + if (errno == EINTR || errno == EWOULDBLOCK) + continue; + DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n", + csc_event.data.fd, + strerror(errno)); + return NULL; + } else if (nbytes == 0) { + DRV_LOG(ERR, "Read nothing from file descriptor %d\n", + csc_event.data.fd); + continue; + } else { + virtio_interrupt_handler(internal); + } + } + } + + return NULL; +} + +static int +setup_intr_relay(struct ifcvf_internal *internal) +{ + int ret; + + ret = pthread_create(&internal->intr_tid, NULL, intr_relay, + (void *)internal); + if (ret) { + DRV_LOG(ERR, "failed to create notify relay pthread."); + return -1; + } + return 0; +} + +static int +unset_intr_relay(struct ifcvf_internal *internal) +{ + void *status; + + if (internal->intr_tid) { + pthread_cancel(internal->intr_tid); + pthread_join(internal->intr_tid, &status); + } + internal->intr_tid = 0; + + if (internal->csc_fd >= 0) + close(internal->csc_fd); + internal->csc_fd = -1; + + return 0; +} + static int update_datapath(struct ifcvf_internal *internal) { @@ -584,10 +687,16 @@ struct rte_vdpa_dev_info { if (ret) goto err; + ret = setup_intr_relay(internal); + if (ret) + goto err; + rte_atomic32_set(&internal->running, 1); } else if (rte_atomic32_read(&internal->running) && (!rte_atomic32_read(&internal->started) || !rte_atomic32_read(&internal->dev_attached))) { + ret = unset_intr_relay(internal); + ret = unset_notify_relay(internal); if (ret) goto err; @@ -880,6 +989,9 @@ struct rte_vdpa_dev_info { /* stop the direct IO data path */ unset_notify_relay(internal); vdpa_ifcvf_stop(internal); + + unset_intr_relay(internal);
[PATCH v7 15/18] vdpa/ifc: add is blk flag to ifcvf HW struct
Add is_blk flag to ifcvf_hw, and init is_blk during probe. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.h | 1 + drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 769c603..8591ef1 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -149,6 +149,7 @@ struct ifcvf_hw { u8 *lm_cfg; struct vring_info vring[IFCVF_MAX_QUEUES * 2]; u8 nr_vring; + u8 is_blk; struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE]; }; diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 0e94e1f..4923bc1 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1536,11 +1536,13 @@ struct rte_vdpa_dev_info dev_info[] = { if (device_id == VIRTIO_ID_NET) { internal->device_type = IFCVF_NET; + internal->hw.is_blk = IFCVF_NET; internal->features = features & ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); internal->features |= dev_info[IFCVF_NET].features; } else if (device_id == VIRTIO_ID_BLOCK) { internal->device_type = IFCVF_BLK; + internal->hw.is_blk = IFCVF_BLK; internal->features = features & ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); internal->features |= dev_info[IFCVF_BLK].features; -- 1.8.3.1
[PATCH v7 16/18] vdpa/ifc/base: access correct register for blk device
1.last_avail_idx is lower 16 bit of the register. 2.address of ring_state register is different between net and blk device. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.c | 36 +--- drivers/vdpa/ifc/base/ifcvf.h | 1 + 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index d10c1fd..4d5881a 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -218,10 +218,18 @@ &cfg->queue_used_hi); IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size); - *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + - (i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) = - (u32)hw->vring[i].last_avail_idx | - ((u32)hw->vring[i].last_used_idx << 16); + if (hw->is_blk == IFCVF_BLK) { + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + } else if (hw->is_blk == IFCVF_NET) { + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + (i / 2) * IFCVF_LM_CFG_SIZE + + (i % 2) * 4) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + } IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector); if (IFCVF_READ_REG16(&cfg->queue_msix_vector) == @@ -254,9 +262,23 @@ IFCVF_WRITE_REG16(i, &cfg->queue_select); IFCVF_WRITE_REG16(0, &cfg->queue_enable); IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector); - ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET + - (i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4); - hw->vring[i].last_avail_idx = (u16)(ring_state >> 16); + + if (hw->is_blk) { + ring_state = *(u32 *)(hw->lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE); + } else if (hw->is_blk == IFCVF_NET) { + ring_state = *(u32 *)(hw->lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + (i / 2) * IFCVF_LM_CFG_SIZE + + (i % 2) * 4); + } + + if (hw->is_blk == IFCVF_BLK) + hw->vring[i].last_avail_idx = + (u16)(ring_state & IFCVF_16_BIT_MASK); + else if (hw->is_blk == IFCVF_NET) + hw->vring[i].last_avail_idx = (u16)(ring_state >> 16); hw->vring[i].last_used_idx = (u16)(ring_state >> 16); } } diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 8591ef1..ff11b12 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -65,6 +65,7 @@ #define IFCVF_MEDIATED_VRING 0x2000 #define IFCVF_32_BIT_MASK 0x +#define IFCVF_16_BIT_MASK 0x #ifndef VHOST_USER_PROTOCOL_F_CONFIG #define VHOST_USER_PROTOCOL_F_CONFIG 9 -- 1.8.3.1
[PATCH v7 17/18] vdpa/ifc: blk device pause without no inflight IO
When virtio blk device is pause, make sure hardware last_avail_idx and last_used_idx are the same. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 25 + 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 4923bc1..def6adf 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -314,12 +314,12 @@ struct rte_vdpa_dev_info { vdpa_ifcvf_stop(struct ifcvf_internal *internal) { struct ifcvf_hw *hw = &internal->hw; - struct rte_vhost_vring vq; uint32_t i; int vid; uint64_t features = 0; uint64_t log_base = 0, log_size = 0; uint64_t len; + u32 ring_state = 0; vid = internal->vid; @@ -328,13 +328,22 @@ struct rte_vdpa_dev_info { */ if (internal->device_type == IFCVF_BLK) { for (i = 0; i < hw->nr_vring; i++) { - rte_vhost_get_vhost_vring(internal->vid, i, &vq); - while (vq.avail->idx != vq.used->idx) { - ifcvf_notify_queue(hw, i); - usleep(10); - } - hw->vring[i].last_avail_idx = vq.avail->idx; - hw->vring[i].last_used_idx = vq.used->idx; + do { + if (hw->lm_cfg != NULL) + ring_state = *(u32 *)(hw->lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE); + hw->vring[i].last_avail_idx = + (u16)(ring_state & IFCVF_16_BIT_MASK); + hw->vring[i].last_used_idx = + (u16)(ring_state >> 16); + if (hw->vring[i].last_avail_idx != + hw->vring[i].last_used_idx) { + ifcvf_notify_queue(hw, i); + usleep(10); + } + } while (hw->vring[i].last_avail_idx != + hw->vring[i].last_used_idx); } } -- 1.8.3.1
[PATCH v7 18/18] vhost: make sure each queue callfd is configured
During the vhost data path building process, qemu will create a call fd at first, and create another call fd in the end. The final call fd will be used to relay notify. In the original code, after kick fd is set, dev_conf will set the first call fd. Even though the actual call fd will set, the data path will not work correctly. Signed-off-by: Andy Pei --- lib/vhost/vhost_user.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index e925428..82122b6 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -3230,12 +3230,26 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev, if (!vdpa_dev) goto out; + if (request != VHOST_USER_SET_VRING_CALL) + goto out; + if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) { if (vdpa_dev->ops->dev_conf(dev->vid)) VHOST_LOG_CONFIG(ERR, "(%s) failed to configure vDPA device\n", dev->ifname); else dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED; + } else { + /* when VIRTIO_DEV_VDPA_CONFIGURED already configured +* close the device and config the device again, +* make sure the call fd of each queue is configured correctly. +*/ + if (vdpa_dev->ops->dev_close(dev->vid)) + VHOST_LOG_CONFIG(ERR, +"Failed to close vDPA device\n"); + if (vdpa_dev->ops->dev_conf(dev->vid)) + VHOST_LOG_CONFIG(ERR, +"Failed to re-config vDPA device\n"); } out: -- 1.8.3.1
[PATCH 1/1] examples/ipsec-secgw: create lookaside sessions at init
In event lookaside mode same session could be handled with multiple cores, and session creation in datapath will cause situation where multiple cores will try to create same session simultaneously. To avoid such case and enable event lookaside mode in future, lookaside sessions are now created at initialization in sa_add_rules(). All sessions(inline and lookaside) now created during init process, so session pool information was removed from ipsec context. Core id was added to obtain correct crypto device queue pair for the current core. Signed-off-by: Volodymyr Fialko --- Depends-on: series-22265 ("examples/ipsec-secgw: examples/ipsec-secgw: destroy lookaside sessions") Depends-on: series-22593 ("examples/ipsec-secgw: move fast path helper functions") examples/ipsec-secgw/ipsec-secgw.c | 27 +++ examples/ipsec-secgw/ipsec.c | 101 +-- examples/ipsec-secgw/ipsec.h | 13 ++-- examples/ipsec-secgw/ipsec_process.c | 33 +++-- examples/ipsec-secgw/ipsec_worker.c | 8 +-- examples/ipsec-secgw/sa.c| 45 +++- 6 files changed, 121 insertions(+), 106 deletions(-) diff --git a/examples/ipsec-secgw/ipsec-secgw.c b/examples/ipsec-secgw/ipsec-secgw.c index 57acc01e3b..05b57ce3f5 100644 --- a/examples/ipsec-secgw/ipsec-secgw.c +++ b/examples/ipsec-secgw/ipsec-secgw.c @@ -684,16 +684,12 @@ ipsec_poll_mode_worker(void) qconf->inbound.sp6_ctx = socket_ctx[socket_id].sp_ip6_in; qconf->inbound.sa_ctx = socket_ctx[socket_id].sa_in; qconf->inbound.cdev_map = cdev_map_in; - qconf->inbound.session_pool = socket_ctx[socket_id].session_pool; - qconf->inbound.session_priv_pool = - socket_ctx[socket_id].session_priv_pool; + qconf->inbound.lcore_id = lcore_id; qconf->outbound.sp4_ctx = socket_ctx[socket_id].sp_ip4_out; qconf->outbound.sp6_ctx = socket_ctx[socket_id].sp_ip6_out; qconf->outbound.sa_ctx = socket_ctx[socket_id].sa_out; qconf->outbound.cdev_map = cdev_map_out; - qconf->outbound.session_pool = socket_ctx[socket_id].session_pool; - qconf->outbound.session_priv_pool = - socket_ctx[socket_id].session_priv_pool; + qconf->outbound.lcore_id = lcore_id; qconf->frag.pool_indir = socket_ctx[socket_id].mbuf_pool_indir; rc = ipsec_sad_lcore_cache_init(app_sa_prm.cache_sz); @@ -1458,7 +1454,7 @@ check_all_ports_link_status(uint32_t port_mask) } static int32_t -add_mapping(struct rte_hash *map, const char *str, uint16_t cdev_id, +add_mapping(const char *str, uint16_t cdev_id, uint16_t qp, struct lcore_params *params, struct ipsec_ctx *ipsec_ctx, const struct rte_cryptodev_capabilities *cipher, @@ -1477,7 +1473,7 @@ add_mapping(struct rte_hash *map, const char *str, uint16_t cdev_id, if (aead) key.aead_algo = aead->sym.aead.algo; - ret = rte_hash_lookup(map, &key); + ret = rte_hash_lookup(ipsec_ctx->cdev_map, &key); if (ret != -ENOENT) return 0; @@ -1499,7 +1495,7 @@ add_mapping(struct rte_hash *map, const char *str, uint16_t cdev_id, cdev_id, qp, i); } - ret = rte_hash_add_key_data(map, &key, (void *)i); + ret = rte_hash_add_key_data(ipsec_ctx->cdev_map, &key, (void *)i); if (ret < 0) { printf("Faled to insert cdev mapping for (lcore %u, " "cdev %u, qp %u), errno %d\n", @@ -1517,20 +1513,19 @@ add_cdev_mapping(struct rte_cryptodev_info *dev_info, uint16_t cdev_id, { int32_t ret = 0; const struct rte_cryptodev_capabilities *i, *j; - struct rte_hash *map; struct lcore_conf *qconf; struct ipsec_ctx *ipsec_ctx; const char *str; qconf = &lcore_conf[params->lcore_id]; - if ((unprotected_port_mask & (1 << params->port_id)) == 0) { - map = cdev_map_out; + if (!is_unprotected_port(params->port_id)) { ipsec_ctx = &qconf->outbound; + ipsec_ctx->cdev_map = cdev_map_out; str = "Outbound"; } else { - map = cdev_map_in; ipsec_ctx = &qconf->inbound; + ipsec_ctx->cdev_map = cdev_map_in; str = "Inbound"; } @@ -1545,7 +1540,7 @@ add_cdev_mapping(struct rte_cryptodev_info *dev_info, uint16_t cdev_id, continue; if (i->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AEAD) { - ret |= add_mapping(map, str, cdev_id, qp, params, + ret |= add_mapping(str, cdev_id, qp, params, ipsec_ctx, NULL, NULL, i); continue; } @@ -1561,7 +1556,7 @@ add_cdev_mapping(struct rte_cryptodev_info *dev_info, uint16_t cdev_id, if (j->sym.x
[PATCH v3] sched: enable/disable TC OV at runtime
Added new API to enable or disable TC over subscription for best effort traffic class at subport level. Added changes after review and increased throughput. By default TC OV is disabled. Signed-off-by: Marcin Danilewicz --- lib/sched/rte_sched.c | 189 +++--- lib/sched/rte_sched.h | 18 lib/sched/version.map | 3 + 3 files changed, 178 insertions(+), 32 deletions(-) diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c index ec74bee939..6e7d81df46 100644 --- a/lib/sched/rte_sched.c +++ b/lib/sched/rte_sched.c @@ -213,6 +213,9 @@ struct rte_sched_subport { uint8_t *bmp_array; struct rte_mbuf **queue_array; uint8_t memory[0] __rte_cache_aligned; + + /* TC oversubscription activation */ + int is_tc_ov_enabled; } __rte_cache_aligned; struct rte_sched_port { @@ -1165,6 +1168,45 @@ rte_sched_cman_config(struct rte_sched_port *port, } #endif +int +rte_sched_subport_tc_ov_config(struct rte_sched_port *port, + uint32_t subport_id, + bool tc_ov_enable) +{ + struct rte_sched_subport *s; + struct rte_sched_subport_profile *profile; + + if (port == NULL) { + RTE_LOG(ERR, SCHED, + "%s: Incorrect value for parameter port\n", __func__); + return -EINVAL; + } + + if (subport_id >= port->n_subports_per_port) { + RTE_LOG(ERR, SCHED, + "%s: Incorrect value for parameter subport id\n", __func__); + return -EINVAL; + } + + s = port->subports[subport_id]; + s->is_tc_ov_enabled = tc_ov_enable ? 1 : 0; + + if (s->is_tc_ov_enabled) { + /* TC oversubscription */ + s->tc_ov_wm_min = port->mtu; + s->tc_ov_period_id = 0; + s->tc_ov = 0; + s->tc_ov_n = 0; + s->tc_ov_rate = 0; + + profile = port->subport_profiles + s->profile; + s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period, + s->pipe_tc_be_rate_max); + s->tc_ov_wm = s->tc_ov_wm_max; + } + return 0; +} + int rte_sched_subport_config(struct rte_sched_port *port, uint32_t subport_id, @@ -1254,6 +1296,9 @@ rte_sched_subport_config(struct rte_sched_port *port, s->n_pipe_profiles = params->n_pipe_profiles; s->n_max_pipe_profiles = params->n_max_pipe_profiles; + /* TC over-subscription is disabled by default */ + s->is_tc_ov_enabled = 0; + #ifdef RTE_SCHED_CMAN if (params->cman_params != NULL) { s->cman_enabled = true; @@ -1316,13 +1361,6 @@ rte_sched_subport_config(struct rte_sched_port *port, for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) s->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID; - - /* TC oversubscription */ - s->tc_ov_wm_min = port->mtu; - s->tc_ov_period_id = 0; - s->tc_ov = 0; - s->tc_ov_n = 0; - s->tc_ov_rate = 0; } { @@ -1342,9 +1380,6 @@ rte_sched_subport_config(struct rte_sched_port *port, else profile->tc_credits_per_period[i] = 0; - s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period, - s->pipe_tc_be_rate_max); - s->tc_ov_wm = s->tc_ov_wm_max; s->profile = subport_profile_id; } @@ -1417,17 +1452,20 @@ rte_sched_pipe_config(struct rte_sched_port *port, double pipe_tc_be_rate = (double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] / (double) params->tc_period; - uint32_t tc_be_ov = s->tc_ov; - /* Unplug pipe from its subport */ - s->tc_ov_n -= params->tc_ov_weight; - s->tc_ov_rate -= pipe_tc_be_rate; - s->tc_ov = s->tc_ov_rate > subport_tc_be_rate; + if (s->is_tc_ov_enabled) { + uint32_t tc_be_ov = s->tc_ov; - if (s->tc_ov != tc_be_ov) { - RTE_LOG(DEBUG, SCHED, - "Subport %u Best-effort TC oversubscription is OFF (%.4lf >= %.4lf)\n", - subport_id, subport_tc_be_rate, s->tc_ov_rate); + /* Unplug pipe from its subport */ + s->tc_ov_n -= params->tc_ov_weight; + s->tc_ov_rate -= pipe_tc_be_rate; + s->tc_ov = s->tc_ov_rate > subport_tc_be_rate; + + if (s->tc_ov != tc_be_ov) { + RTE_LOG(DEBUG, SCHED, + "Subport %u Best-effort TC oversubscription is OFF (%
RE: [PATCH v3] sched: enable/disable TC OV at runtime
Marcin, Every time you send a new version, you need to copy the maintainers and the other relevant people, otherwise there is a high chance we are not going to see your patch, thanks! I only saw this one due to pure chance ;) Regards, Cristian > -Original Message- > From: Marcin Danilewicz > Sent: Wednesday, April 27, 2022 9:59 AM > To: dev@dpdk.org > Subject: [PATCH v3] sched: enable/disable TC OV at runtime > > Added new API to enable or disable TC over subscription for best > effort traffic class at subport level. > Added changes after review and increased throughput. > > By default TC OV is disabled. > > Signed-off-by: Marcin Danilewicz > --- > lib/sched/rte_sched.c | 189 +++--- > lib/sched/rte_sched.h | 18 > lib/sched/version.map | 3 + > 3 files changed, 178 insertions(+), 32 deletions(-) > > diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c > index ec74bee939..6e7d81df46 100644 > --- a/lib/sched/rte_sched.c > +++ b/lib/sched/rte_sched.c > @@ -213,6 +213,9 @@ struct rte_sched_subport { > uint8_t *bmp_array; > struct rte_mbuf **queue_array; > uint8_t memory[0] __rte_cache_aligned; > + > + /* TC oversubscription activation */ > + int is_tc_ov_enabled; > } __rte_cache_aligned; > > struct rte_sched_port { > @@ -1165,6 +1168,45 @@ rte_sched_cman_config(struct rte_sched_port > *port, > } > #endif > > +int > +rte_sched_subport_tc_ov_config(struct rte_sched_port *port, > + uint32_t subport_id, > + bool tc_ov_enable) > +{ > + struct rte_sched_subport *s; > + struct rte_sched_subport_profile *profile; > + > + if (port == NULL) { > + RTE_LOG(ERR, SCHED, > + "%s: Incorrect value for parameter port\n", __func__); > + return -EINVAL; > + } > + > + if (subport_id >= port->n_subports_per_port) { > + RTE_LOG(ERR, SCHED, > + "%s: Incorrect value for parameter subport id\n", > __func__); > + return -EINVAL; > + } > + > + s = port->subports[subport_id]; > + s->is_tc_ov_enabled = tc_ov_enable ? 1 : 0; > + > + if (s->is_tc_ov_enabled) { > + /* TC oversubscription */ > + s->tc_ov_wm_min = port->mtu; > + s->tc_ov_period_id = 0; > + s->tc_ov = 0; > + s->tc_ov_n = 0; > + s->tc_ov_rate = 0; > + > + profile = port->subport_profiles + s->profile; > + s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile- > >tc_period, > + s->pipe_tc_be_rate_max); > + s->tc_ov_wm = s->tc_ov_wm_max; > + } > + return 0; > +} > + > int > rte_sched_subport_config(struct rte_sched_port *port, > uint32_t subport_id, > @@ -1254,6 +1296,9 @@ rte_sched_subport_config(struct rte_sched_port > *port, > s->n_pipe_profiles = params->n_pipe_profiles; > s->n_max_pipe_profiles = params->n_max_pipe_profiles; > > + /* TC over-subscription is disabled by default */ > + s->is_tc_ov_enabled = 0; > + > #ifdef RTE_SCHED_CMAN > if (params->cman_params != NULL) { > s->cman_enabled = true; > @@ -1316,13 +1361,6 @@ rte_sched_subport_config(struct rte_sched_port > *port, > > for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) > s->grinder_base_bmp_pos[i] = > RTE_SCHED_PIPE_INVALID; > - > - /* TC oversubscription */ > - s->tc_ov_wm_min = port->mtu; > - s->tc_ov_period_id = 0; > - s->tc_ov = 0; > - s->tc_ov_n = 0; > - s->tc_ov_rate = 0; > } > > { > @@ -1342,9 +1380,6 @@ rte_sched_subport_config(struct rte_sched_port > *port, > else > profile->tc_credits_per_period[i] = 0; > > - s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile- > >tc_period, > - s- > >pipe_tc_be_rate_max); > - s->tc_ov_wm = s->tc_ov_wm_max; > s->profile = subport_profile_id; > > } > @@ -1417,17 +1452,20 @@ rte_sched_pipe_config(struct rte_sched_port > *port, > double pipe_tc_be_rate = > (double) params- > >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] > / (double) params->tc_period; > - uint32_t tc_be_ov = s->tc_ov; > > - /* Unplug pipe from its subport */ > - s->tc_ov_n -= params->tc_ov_weight; > - s->tc_ov_rate -= pipe_tc_be_rate; > - s->tc_ov = s->tc_ov_rate > subport_tc_be_rate; > + if (s->is_tc_ov_enabled) { > + uint32_t tc_be_ov = s->tc_ov; > > - if (s->tc_ov != tc_be_ov) { > - RTE_LOG(DEBUG, SCHED, > - "Subport %u Best-effort TC oversubscription is > OFF
RE: [PATCH v3] sched: enable/disable TC OV at runtime
Adding Jasvinder > -Original Message- > From: Dumitrescu, Cristian > Sent: Wednesday, April 27, 2022 10:37 AM > To: Marcin Danilewicz ; dev@dpdk.org > Subject: RE: [PATCH v3] sched: enable/disable TC OV at runtime > > Marcin, > > Every time you send a new version, you need to copy the maintainers and the > other relevant people, otherwise there is a high chance we are not going to > see > your patch, thanks! I only saw this one due to pure chance ;) > > Regards, > Cristian > > > -Original Message- > > From: Marcin Danilewicz > > Sent: Wednesday, April 27, 2022 9:59 AM > > To: dev@dpdk.org > > Subject: [PATCH v3] sched: enable/disable TC OV at runtime > > > > Added new API to enable or disable TC over subscription for best > > effort traffic class at subport level. > > Added changes after review and increased throughput. > > > > By default TC OV is disabled. > > > > Signed-off-by: Marcin Danilewicz > > --- > > lib/sched/rte_sched.c | 189 +++--- > > lib/sched/rte_sched.h | 18 > > lib/sched/version.map | 3 + > > 3 files changed, 178 insertions(+), 32 deletions(-) > > > > diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c > > index ec74bee939..6e7d81df46 100644 > > --- a/lib/sched/rte_sched.c > > +++ b/lib/sched/rte_sched.c > > @@ -213,6 +213,9 @@ struct rte_sched_subport { > > uint8_t *bmp_array; > > struct rte_mbuf **queue_array; > > uint8_t memory[0] __rte_cache_aligned; > > + > > + /* TC oversubscription activation */ > > + int is_tc_ov_enabled; > > } __rte_cache_aligned; > > > > struct rte_sched_port { > > @@ -1165,6 +1168,45 @@ rte_sched_cman_config(struct rte_sched_port > > *port, > > } > > #endif > > > > +int > > +rte_sched_subport_tc_ov_config(struct rte_sched_port *port, > > + uint32_t subport_id, > > + bool tc_ov_enable) > > +{ > > + struct rte_sched_subport *s; > > + struct rte_sched_subport_profile *profile; > > + > > + if (port == NULL) { > > + RTE_LOG(ERR, SCHED, > > + "%s: Incorrect value for parameter port\n", __func__); > > + return -EINVAL; > > + } > > + > > + if (subport_id >= port->n_subports_per_port) { > > + RTE_LOG(ERR, SCHED, > > + "%s: Incorrect value for parameter subport id\n", > > __func__); > > + return -EINVAL; > > + } > > + > > + s = port->subports[subport_id]; > > + s->is_tc_ov_enabled = tc_ov_enable ? 1 : 0; > > + > > + if (s->is_tc_ov_enabled) { > > + /* TC oversubscription */ > > + s->tc_ov_wm_min = port->mtu; > > + s->tc_ov_period_id = 0; > > + s->tc_ov = 0; > > + s->tc_ov_n = 0; > > + s->tc_ov_rate = 0; > > + > > + profile = port->subport_profiles + s->profile; > > + s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile- > > >tc_period, > > + s->pipe_tc_be_rate_max); > > + s->tc_ov_wm = s->tc_ov_wm_max; > > + } > > + return 0; > > +} > > + > > int > > rte_sched_subport_config(struct rte_sched_port *port, > > uint32_t subport_id, > > @@ -1254,6 +1296,9 @@ rte_sched_subport_config(struct rte_sched_port > > *port, > > s->n_pipe_profiles = params->n_pipe_profiles; > > s->n_max_pipe_profiles = params->n_max_pipe_profiles; > > > > + /* TC over-subscription is disabled by default */ > > + s->is_tc_ov_enabled = 0; > > + > > #ifdef RTE_SCHED_CMAN > > if (params->cman_params != NULL) { > > s->cman_enabled = true; > > @@ -1316,13 +1361,6 @@ rte_sched_subport_config(struct rte_sched_port > > *port, > > > > for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) > > s->grinder_base_bmp_pos[i] = > > RTE_SCHED_PIPE_INVALID; > > - > > - /* TC oversubscription */ > > - s->tc_ov_wm_min = port->mtu; > > - s->tc_ov_period_id = 0; > > - s->tc_ov = 0; > > - s->tc_ov_n = 0; > > - s->tc_ov_rate = 0; > > } > > > > { > > @@ -1342,9 +1380,6 @@ rte_sched_subport_config(struct rte_sched_port > > *port, > > else > > profile->tc_credits_per_period[i] = 0; > > > > - s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile- > > >tc_period, > > - s- > > >pipe_tc_be_rate_max); > > - s->tc_ov_wm = s->tc_ov_wm_max; > > s->profile = subport_profile_id; > > > > } > > @@ -1417,17 +1452,20 @@ rte_sched_pipe_config(struct rte_sched_port > > *port, > > double pipe_tc_be_rate = > > (double) params- > > >tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] > > / (double) params->tc_period; > > - uint32_t tc_be_ov = s->tc_ov; > > > > - /* Unplug pipe from its subport */ > > - s->tc_ov_n -= params->tc_ov_weight; > > - s-
Re: [PATCH v2] net/nfp: update how MAX MTU is read
On 4/27/2022 9:37 AM, Niklas Söderlund wrote: Hello, I have a question about the Checks that ran on this patch in patchwork [1]. It appears the job ci/iol-x86_64-compile-testing, dpdk_mingw64_compile have failed on a Windows Server 2019 build. But the logs from the job appears to be incomplete as it contains only 19 lines of output and stops without an error in the configuration part of meson. It is not clear why it failed, patch looks nothing specific to Windows. I have triggered a new build (on top of next-net), please give ~15 minutes. The failure is only flagged as a warning and not as an error in patchwork, is it it possible that the job in question fails to capture all output or that it fails to complete sometimes? The patchwork warning is to highlight new version of patches needs to be send as reply to previous version. This enables all versions are in same email thread, and this helps reviewer to see previous versions and comments/changes to previous versions easily. Also this makes possible to see all versions and history in one place in mail list archives. There is nothing to do for this version, but please use 'git send-email', '--in-reply-to' option for new patches. What can we do to on our end to remedy this? My concern is that that the patch is blocked due to the warning and I'm unclear on how move forward, sorry if the case is that I'm just impatient. The patch is not blocked for above reasons, it is in the queue (which is moving a little slow in this release for some operational reasons). 1. https://patchwork.dpdk.org/project/dpdk/patch/20220420134638.24010-1-walter.heym...@corigine.com/ On 2022-04-20 15:46:39 +0200, Walter Heymans wrote: The 'max_rx_pktlen' value was previously read from hardware, which was set by the running firmware. This caused confusion due to different meanings of 'MAX_MTU'. This patch updates the 'max_rx_pktlen' to the maximum value that the NFP NIC can support. The 'max_mtu' value that is read from hardware, is assigned to the 'dev_info->max_mtu' variable. If more layer 2 metadata must be used, the firmware can be updated to report a smaller 'max_mtu' value. The constant defined for NFP_FRAME_SIZE_MAX is derived for the maximum supported buffer size of 10240, minus 136 bytes that is reserved by the hardware and another 56 bytes reserved for expansion in firmware. This results in a usable maximum packet length of 10048 bytes. Signed-off-by: Walter Heymans Signed-off-by: Niklas Söderlund Reviewed-by: Louis Peens Reviewed-by: Chaoyong He Reviewed-by: Richard Donkin --- drivers/net/nfp/nfp_common.c | 11 ++- drivers/net/nfp/nfp_common.h | 3 +++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/nfp/nfp_common.c b/drivers/net/nfp/nfp_common.c index b26770dbfb..52fbda1a79 100644 --- a/drivers/net/nfp/nfp_common.c +++ b/drivers/net/nfp/nfp_common.c @@ -692,7 +692,16 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues; dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues; dev_info->min_rx_bufsize = RTE_ETHER_MIN_MTU; - dev_info->max_rx_pktlen = hw->max_mtu; + /* +* The maximum rx packet length (max_rx_pktlen) is set to the +* maximum supported frame size that the NFP can handle. This +* includes layer 2 headers, CRC and other metadata that can +* optionally be used. +* The maximum layer 3 MTU (max_mtu) is read from hardware, +* which was set by the firmware loaded onto the card. +*/ + dev_info->max_rx_pktlen = NFP_FRAME_SIZE_MAX; + dev_info->max_mtu = hw->max_mtu; /* Next should change when PF support is implemented */ dev_info->max_mac_addrs = 1; diff --git a/drivers/net/nfp/nfp_common.h b/drivers/net/nfp/nfp_common.h index 8b35fa119c..8db5ec23f8 100644 --- a/drivers/net/nfp/nfp_common.h +++ b/drivers/net/nfp/nfp_common.h @@ -98,6 +98,9 @@ struct nfp_net_adapter; /* Number of supported physical ports */ #define NFP_MAX_PHYPORTS 12 +/* Maximum supported NFP frame size (MTU + layer 2 headers) */ +#define NFP_FRAME_SIZE_MAX 10048 + #include #include -- 2.25.1
[PATCH v2] cryptodev: add elliptic curve diffie hellman
This commit adds Elliptic Curve Diffie-Hellman option to Cryptodev. This could be achieved with EC point multiplication but: 1) Phase 1 of DH is used with EC generator, multiplication expect setting generator manually. 2) It will unify usage of DH. 3) Can be extended easily to support X25519 and X448. Signed-off-by: Arek Kusztal --- v2: - added ecdh comments to operation types Depends-on: series-22684 ("cryptodev: move dh type from xform to dh op") lib/cryptodev/rte_crypto_asym.h | 46 +++-- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/lib/cryptodev/rte_crypto_asym.h b/lib/cryptodev/rte_crypto_asym.h index 4697a7bc59..64d97ae054 100644 --- a/lib/cryptodev/rte_crypto_asym.h +++ b/lib/cryptodev/rte_crypto_asym.h @@ -91,6 +91,8 @@ enum rte_crypto_asym_xform_type { /**< Elliptic Curve Digital Signature Algorithm * Perform Signature Generation and Verification. */ + RTE_CRYPTO_ASYM_XFORM_ECDH, + /**< Elliptic Curve Diffie Hellman */ RTE_CRYPTO_ASYM_XFORM_ECPM, /**< Elliptic Curve Point Multiplication */ RTE_CRYPTO_ASYM_XFORM_TYPE_LIST_END @@ -112,9 +114,9 @@ enum rte_crypto_asym_op_type { RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE, /**< DH Private Key generation operation */ RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE, - /**< DH Public Key generation operation */ + /**< DH/ECDH Public Key generation operation */ RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE, - /**< DH Shared Secret compute operation */ + /**< DH/ECDH Shared Secret compute operation */ RTE_CRYPTO_ASYM_OP_LIST_END }; @@ -385,34 +387,38 @@ struct rte_crypto_rsa_op_param { }; /** - * Diffie-Hellman Operations params. + * Diffie-Hellman/Elliptic Curve Diffie-Hellman operation. * @note: */ struct rte_crypto_dh_op_param { enum rte_crypto_asym_op_type op_type; /**< Diffie-Hellman operation type */ - rte_crypto_uint pub_key; + rte_crypto_param priv_key; /**< -* Output generated public key when op_type is -* DH PUB_KEY_GENERATION. -* Input peer public key when op_type is DH -* SHARED_SECRET_COMPUTATION -* +* Diffie-Hellman private part +* For DH and ECDH it is big-endian integer. +* Input for both phases of Diffie-Hellman */ - - rte_crypto_uint priv_key; + union { + rte_crypto_uint pub_key; + struct rte_crypto_ec_point pub_point; + }; /**< -* Output generated private key if op_type is -* DH PRIVATE_KEY_GENERATION -* Input when op_type is DH SHARED_SECRET_COMPUTATION. -* +* Diffie-Hellman public part +* For DH it is big-endian unsigned integer. +* For ECDH it is a point on the curve. +* Output for RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE +* Input for RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE */ - - rte_crypto_uint shared_secret; + union { + rte_crypto_uint shared_secret; + struct rte_crypto_ec_point shared_point; + }; /**< -* Output with calculated shared secret -* when dh op_type = SHARED_SECRET_COMPUTATION. -* +* Diffie-Hellman shared secret +* For DH it is big-endian unsigned integer. +* For ECDH it is a point on the curve. +* Output for RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE */ }; -- 2.13.6
[PATCH v6 00/12] complete common VF features for DCF
The DCF PMD support the below dev ops, dev_supported_ptypes_get dev_link_update xstats_get xstats_get_names xstats_reset promiscuous_enable promiscuous_disable allmulticast_enable allmulticast_disable mac_addr_add mac_addr_remove set_mc_addr_list vlan_filter_set vlan_offload_set mac_addr_set reta_update reta_query rss_hash_update rss_hash_conf_get rxq_info_get txq_info_get mtu_set tx_done_cleanup get_monitor_addr v6: * add patch: 1.net/ice: support DCF new VLAN capabilities * remove patch: 1.doc: update for ice DCF datapath configuration * Split doc into specific patch. v5: * remove patch: 1.complete common VF features for DCF 2.net/ice: enable CVL DCF device reset API 3.net/ice: support IPv6 NVGRE tunnel 4.net/ice: support new pattern of IPv4 5.net/ice: treat unknown package as OS default package 6.net/ice: handle virtchnl event message without interrupt 7.net/ice: add DCF request queues function 8.net/ice: negotiate large VF and request more queues 9.net/ice: enable multiple queues configurations for large VF 10.net/ice: enable IRQ mapping configuration for large VF 11.net/ice: add enable/disable queues for DCF large VF v4: * remove patch: 1.testpmd: force flow flush 2.net/ice: fix DCF ACL flow engine 3.net/ice: fix DCF reset * add patch: 1.net/ice: add extended stats 2.net/ice: support queue information getting 3.net/ice: implement power management 4.doc: update for ice DCF datapath configuration v3: * remove patch: 1.net/ice/base: add VXLAN support for switch filter 2.net/ice: add VXLAN support for switch filter 3.common/iavf: support flushing rules and reporting DCF id 4.net/ice/base: fix ethertype filter input set 5.net/ice/base: support IPv6 GRE UDP pattern 6.net/ice/base: support new patterns of TCP and UDP 7.net/ice: support new patterns of TCP and UDP 8.net/ice/base: support IPv4 GRE tunnel 9.net/ice: support IPv4 GRE raw pattern type 10.net/ice/base: update Profile ID table for VXLAN 11.net/ice/base: update Protocol ID table to match DVM DDP v2: * remove patch: 1.net/iavf: support checking if device is an MDCF instance 2.net/ice: support MDCF(multi-DCF) instance 3.net/ice/base: support custom DDP buildin recipe 4.net/ice: support buildin recipe configuration 5.net/ice/base: support custom ddp package version 6.net/ice: disable ACL function for MDCF instance Alvin Zhang (3): net/ice: support dcf promisc configuration net/ice: support dcf VLAN filter and offload configuration net/ice: support DCF new VLAN capabilities Jie Wang (2): net/ice: add ops MTU-SET to dcf net/ice: add ops dev-supported-ptypes-get to dcf Kevin Liu (4): net/ice: support dcf MAC configuration net/ice: add extended stats net/ice: support queue information getting net/ice: implement power management Robin Zhang (1): net/ice: cleanup Tx buffers Steve Yang (2): net/ice: enable RSS RETA ops for DCF hardware net/ice: enable RSS HASH ops for DCF hardware doc/guides/nics/features/ice_dcf.ini | 10 + doc/guides/rel_notes/release_22_07.rst | 14 + drivers/net/ice/ice_dcf.c | 40 +- drivers/net/ice/ice_dcf.h | 29 +- drivers/net/ice/ice_dcf_ethdev.c | 820 - drivers/net/ice/ice_dcf_ethdev.h | 10 + 6 files changed, 885 insertions(+), 38 deletions(-) -- 2.33.1
[PATCH v6 01/12] net/ice: enable RSS RETA ops for DCF hardware
From: Steve Yang RSS RETA should be updated and queried by application, Add related ops ('.reta_update', '.reta_query') for DCF. Signed-off-by: Steve Yang Signed-off-by: Kevin Liu --- doc/guides/nics/features/ice_dcf.ini | 1 + doc/guides/rel_notes/release_22_07.rst | 3 + drivers/net/ice/ice_dcf.c | 2 +- drivers/net/ice/ice_dcf.h | 1 + drivers/net/ice/ice_dcf_ethdev.c | 77 ++ 5 files changed, 83 insertions(+), 1 deletion(-) diff --git a/doc/guides/nics/features/ice_dcf.ini b/doc/guides/nics/features/ice_dcf.ini index 54073f0b88..5221c99a9c 100644 --- a/doc/guides/nics/features/ice_dcf.ini +++ b/doc/guides/nics/features/ice_dcf.ini @@ -15,6 +15,7 @@ L3 checksum offload = P L4 checksum offload = P Inner L3 checksum= P Inner L4 checksum= P +RSS reta update = Y Basic stats = Y Linux= Y x86-32 = Y diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst index 90123bb807..cbdc90760c 100644 --- a/doc/guides/rel_notes/release_22_07.rst +++ b/doc/guides/rel_notes/release_22_07.rst @@ -60,6 +60,9 @@ New Features * Added Tx QoS queue rate limitation support. * Added quanta size configuration support. +* **Updated Intel ice driver.** + + * Added enable RSS RETA ops for DCF hardware. Removed Items - diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c index 7f0c074b01..070d1b71ac 100644 --- a/drivers/net/ice/ice_dcf.c +++ b/drivers/net/ice/ice_dcf.c @@ -790,7 +790,7 @@ ice_dcf_configure_rss_key(struct ice_dcf_hw *hw) return err; } -static int +int ice_dcf_configure_rss_lut(struct ice_dcf_hw *hw) { struct virtchnl_rss_lut *rss_lut; diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h index 6ec766ebda..b2c6aa2684 100644 --- a/drivers/net/ice/ice_dcf.h +++ b/drivers/net/ice/ice_dcf.h @@ -122,6 +122,7 @@ int ice_dcf_send_aq_cmd(void *dcf_hw, struct ice_aq_desc *desc, int ice_dcf_handle_vsi_update_event(struct ice_dcf_hw *hw); int ice_dcf_init_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw); void ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw); +int ice_dcf_configure_rss_lut(struct ice_dcf_hw *hw); int ice_dcf_init_rss(struct ice_dcf_hw *hw); int ice_dcf_configure_queues(struct ice_dcf_hw *hw); int ice_dcf_config_irq_map(struct ice_dcf_hw *hw); diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c index 59610e058f..1ac66ed990 100644 --- a/drivers/net/ice/ice_dcf_ethdev.c +++ b/drivers/net/ice/ice_dcf_ethdev.c @@ -761,6 +761,81 @@ ice_dcf_dev_flow_ops_get(struct rte_eth_dev *dev, return 0; } +static int +ice_dcf_dev_rss_reta_update(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + struct ice_dcf_adapter *adapter = dev->data->dev_private; + struct ice_dcf_hw *hw = &adapter->real_hw; + uint8_t *lut; + uint16_t i, idx, shift; + int ret; + + if (!(hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF)) + return -ENOTSUP; + + if (reta_size != hw->vf_res->rss_lut_size) { + PMD_DRV_LOG(ERR, "The size of hash lookup table configured " + "(%d) doesn't match the number of hardware can " + "support (%d)", reta_size, hw->vf_res->rss_lut_size); + return -EINVAL; + } + + lut = rte_zmalloc("rss_lut", reta_size, 0); + if (!lut) { + PMD_DRV_LOG(ERR, "No memory can be allocated"); + return -ENOMEM; + } + /* store the old lut table temporarily */ + rte_memcpy(lut, hw->rss_lut, reta_size); + + for (i = 0; i < reta_size; i++) { + idx = i / RTE_ETH_RETA_GROUP_SIZE; + shift = i % RTE_ETH_RETA_GROUP_SIZE; + if (reta_conf[idx].mask & (1ULL << shift)) + lut[i] = reta_conf[idx].reta[shift]; + } + + rte_memcpy(hw->rss_lut, lut, reta_size); + /* send virtchnnl ops to configure rss*/ + ret = ice_dcf_configure_rss_lut(hw); + if (ret) /* revert back */ + rte_memcpy(hw->rss_lut, lut, reta_size); + rte_free(lut); + + return ret; +} + +static int +ice_dcf_dev_rss_reta_query(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + struct ice_dcf_adapter *adapter = dev->data->dev_private; + struct ice_dcf_hw *hw = &adapter->real_hw; + uint16_t i, idx, shift; + + if (!(hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF)) + return -ENOTSUP; + + if (reta_size != hw->vf_res->rss_lut_size) { + PMD_DRV_LOG(ERR, "The size of hash lookup table configured " + "(%d) doesn't match the number of
[PATCH v6 02/12] net/ice: enable RSS HASH ops for DCF hardware
From: Steve Yang RSS HASH should be updated and queried by application, Add related ops ('.rss_hash_update', '.rss_hash_conf_get') for DCF. Because DCF doesn't support configure RSS HASH, only HASH key can be updated within ops '.rss_hash_update'. Signed-off-by: Steve Yang Signed-off-by: Kevin Liu --- doc/guides/nics/features/ice_dcf.ini | 1 + doc/guides/rel_notes/release_22_07.rst | 1 + drivers/net/ice/ice_dcf.c | 2 +- drivers/net/ice/ice_dcf.h | 1 + drivers/net/ice/ice_dcf_ethdev.c | 51 ++ 5 files changed, 55 insertions(+), 1 deletion(-) diff --git a/doc/guides/nics/features/ice_dcf.ini b/doc/guides/nics/features/ice_dcf.ini index 5221c99a9c..d9c1b25407 100644 --- a/doc/guides/nics/features/ice_dcf.ini +++ b/doc/guides/nics/features/ice_dcf.ini @@ -16,6 +16,7 @@ L4 checksum offload = P Inner L3 checksum= P Inner L4 checksum= P RSS reta update = Y +RSS key update = Y Basic stats = Y Linux= Y x86-32 = Y diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst index cbdc90760c..cc2c243e81 100644 --- a/doc/guides/rel_notes/release_22_07.rst +++ b/doc/guides/rel_notes/release_22_07.rst @@ -63,6 +63,7 @@ New Features * **Updated Intel ice driver.** * Added enable RSS RETA ops for DCF hardware. + * Added enable RSS HASH ops for DCF hardware. Removed Items - diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c index 070d1b71ac..89c0203ba3 100644 --- a/drivers/net/ice/ice_dcf.c +++ b/drivers/net/ice/ice_dcf.c @@ -758,7 +758,7 @@ ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw) hw->ets_config = NULL; } -static int +int ice_dcf_configure_rss_key(struct ice_dcf_hw *hw) { struct virtchnl_rss_key *rss_key; diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h index b2c6aa2684..f0b45af5ae 100644 --- a/drivers/net/ice/ice_dcf.h +++ b/drivers/net/ice/ice_dcf.h @@ -122,6 +122,7 @@ int ice_dcf_send_aq_cmd(void *dcf_hw, struct ice_aq_desc *desc, int ice_dcf_handle_vsi_update_event(struct ice_dcf_hw *hw); int ice_dcf_init_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw); void ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw); +int ice_dcf_configure_rss_key(struct ice_dcf_hw *hw); int ice_dcf_configure_rss_lut(struct ice_dcf_hw *hw); int ice_dcf_init_rss(struct ice_dcf_hw *hw); int ice_dcf_configure_queues(struct ice_dcf_hw *hw); diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c index 1ac66ed990..ccad7fc304 100644 --- a/drivers/net/ice/ice_dcf_ethdev.c +++ b/drivers/net/ice/ice_dcf_ethdev.c @@ -836,6 +836,55 @@ ice_dcf_dev_rss_reta_query(struct rte_eth_dev *dev, return 0; } +static int +ice_dcf_dev_rss_hash_update(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf) +{ + struct ice_dcf_adapter *adapter = dev->data->dev_private; + struct ice_dcf_hw *hw = &adapter->real_hw; + + if (!(hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF)) + return -ENOTSUP; + + /* HENA setting, it is enabled by default, no change */ + if (!rss_conf->rss_key || rss_conf->rss_key_len == 0) { + PMD_DRV_LOG(DEBUG, "No key to be configured"); + return 0; + } else if (rss_conf->rss_key_len != hw->vf_res->rss_key_size) { + PMD_DRV_LOG(ERR, "The size of hash key configured " + "(%d) doesn't match the size of hardware can " + "support (%d)", rss_conf->rss_key_len, + hw->vf_res->rss_key_size); + return -EINVAL; + } + + rte_memcpy(hw->rss_key, rss_conf->rss_key, rss_conf->rss_key_len); + + return ice_dcf_configure_rss_key(hw); +} + +static int +ice_dcf_dev_rss_hash_conf_get(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf) +{ + struct ice_dcf_adapter *adapter = dev->data->dev_private; + struct ice_dcf_hw *hw = &adapter->real_hw; + + if (!(hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF)) + return -ENOTSUP; + + /* Just set it to default value now. */ + rss_conf->rss_hf = ICE_RSS_OFFLOAD_ALL; + + if (!rss_conf->rss_key) + return 0; + + rss_conf->rss_key_len = hw->vf_res->rss_key_size; + rte_memcpy(rss_conf->rss_key, hw->rss_key, rss_conf->rss_key_len); + + return 0; +} + #define ICE_DCF_32_BIT_WIDTH (CHAR_BIT * 4) #define ICE_DCF_48_BIT_WIDTH (CHAR_BIT * 6) #define ICE_DCF_48_BIT_MASK RTE_LEN2MASK(ICE_DCF_48_BIT_WIDTH, uint64_t) @@ -1184,6 +1233,8 @@ static const struct eth_dev_ops ice_dcf_eth_dev_ops = { .tm_ops_get = ice_dcf_tm_ops_get, .reta_update = ice_dcf_dev_rss_reta_update, .reta_query = ice_dcf_dev_rss_reta_
[PATCH v6 03/12] net/ice: cleanup Tx buffers
From: Robin Zhang Add support for ops rte_eth_tx_done_cleanup in dcf Signed-off-by: Robin Zhang Signed-off-by: Kevin Liu --- doc/guides/rel_notes/release_22_07.rst | 1 + drivers/net/ice/ice_dcf_ethdev.c | 1 + 2 files changed, 2 insertions(+) diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst index cc2c243e81..bbd3d296de 100644 --- a/doc/guides/rel_notes/release_22_07.rst +++ b/doc/guides/rel_notes/release_22_07.rst @@ -64,6 +64,7 @@ New Features * Added enable RSS RETA ops for DCF hardware. * Added enable RSS HASH ops for DCF hardware. + * Added cleanup Tx buffers. Removed Items - diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c index ccad7fc304..d8b5961514 100644 --- a/drivers/net/ice/ice_dcf_ethdev.c +++ b/drivers/net/ice/ice_dcf_ethdev.c @@ -1235,6 +1235,7 @@ static const struct eth_dev_ops ice_dcf_eth_dev_ops = { .reta_query = ice_dcf_dev_rss_reta_query, .rss_hash_update = ice_dcf_dev_rss_hash_update, .rss_hash_conf_get = ice_dcf_dev_rss_hash_conf_get, + .tx_done_cleanup = ice_tx_done_cleanup, }; static int -- 2.33.1
[PATCH v6 04/12] net/ice: add ops MTU-SET to dcf
From: Jie Wang add API "mtu_set" to dcf, and it can configure the port mtu through cmdline. Signed-off-by: Jie Wang Signed-off-by: Kevin Liu --- doc/guides/nics/features/ice_dcf.ini | 1 + doc/guides/rel_notes/release_22_07.rst | 1 + drivers/net/ice/ice_dcf_ethdev.c | 14 ++ drivers/net/ice/ice_dcf_ethdev.h | 6 ++ 4 files changed, 22 insertions(+) diff --git a/doc/guides/nics/features/ice_dcf.ini b/doc/guides/nics/features/ice_dcf.ini index d9c1b25407..be34ab4692 100644 --- a/doc/guides/nics/features/ice_dcf.ini +++ b/doc/guides/nics/features/ice_dcf.ini @@ -17,6 +17,7 @@ Inner L3 checksum= P Inner L4 checksum= P RSS reta update = Y RSS key update = Y +MTU update = Y Basic stats = Y Linux= Y x86-32 = Y diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst index bbd3d296de..dc37de85f3 100644 --- a/doc/guides/rel_notes/release_22_07.rst +++ b/doc/guides/rel_notes/release_22_07.rst @@ -65,6 +65,7 @@ New Features * Added enable RSS RETA ops for DCF hardware. * Added enable RSS HASH ops for DCF hardware. * Added cleanup Tx buffers. + * Added add ops MTU-SET to dcf. Removed Items - diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c index d8b5961514..06d752fd61 100644 --- a/drivers/net/ice/ice_dcf_ethdev.c +++ b/drivers/net/ice/ice_dcf_ethdev.c @@ -1081,6 +1081,19 @@ ice_dcf_link_update(struct rte_eth_dev *dev, return rte_eth_linkstatus_set(dev, &new_link); } +static int +ice_dcf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu __rte_unused) +{ + /* mtu setting is forbidden if port is start */ + if (dev->data->dev_started != 0) { + PMD_DRV_LOG(ERR, "port %d must be stopped before configuration", + dev->data->port_id); + return -EBUSY; + } + + return 0; +} + bool ice_dcf_adminq_need_retry(struct ice_adapter *ad) { @@ -1236,6 +1249,7 @@ static const struct eth_dev_ops ice_dcf_eth_dev_ops = { .rss_hash_update = ice_dcf_dev_rss_hash_update, .rss_hash_conf_get = ice_dcf_dev_rss_hash_conf_get, .tx_done_cleanup = ice_tx_done_cleanup, + .mtu_set = ice_dcf_dev_mtu_set, }; static int diff --git a/drivers/net/ice/ice_dcf_ethdev.h b/drivers/net/ice/ice_dcf_ethdev.h index 11a1305038..f2faf26f58 100644 --- a/drivers/net/ice/ice_dcf_ethdev.h +++ b/drivers/net/ice/ice_dcf_ethdev.h @@ -15,6 +15,12 @@ #define ICE_DCF_MAX_RINGS 1 +#define ICE_DCF_FRAME_SIZE_MAX 9728 +#define ICE_DCF_VLAN_TAG_SIZE 4 +#define ICE_DCF_ETH_OVERHEAD \ + (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + ICE_DCF_VLAN_TAG_SIZE * 2) +#define ICE_DCF_ETH_MAX_LEN (RTE_ETHER_MTU + ICE_DCF_ETH_OVERHEAD) + struct ice_dcf_queue { uint64_t dummy; }; -- 2.33.1
[PATCH v6 05/12] net/ice: add ops dev-supported-ptypes-get to dcf
From: Jie Wang add API "dev_supported_ptypes_get" to dcf, that dcf pmd can get ptypes through the new API. Signed-off-by: Jie Wang Signed-off-by: Kevin Liu --- doc/guides/rel_notes/release_22_07.rst | 1 + drivers/net/ice/ice_dcf_ethdev.c | 80 -- 2 files changed, 50 insertions(+), 31 deletions(-) diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst index dc37de85f3..a39196c605 100644 --- a/doc/guides/rel_notes/release_22_07.rst +++ b/doc/guides/rel_notes/release_22_07.rst @@ -66,6 +66,7 @@ New Features * Added enable RSS HASH ops for DCF hardware. * Added cleanup Tx buffers. * Added add ops MTU-SET to dcf. + * Added add ops dev-supported-ptypes-get to dcf. Removed Items - diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c index 06d752fd61..6a577a6582 100644 --- a/drivers/net/ice/ice_dcf_ethdev.c +++ b/drivers/net/ice/ice_dcf_ethdev.c @@ -1218,38 +1218,56 @@ ice_dcf_dev_reset(struct rte_eth_dev *dev) return ret; } +static const uint32_t * +ice_dcf_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused) +{ + static const uint32_t ptypes[] = { + RTE_PTYPE_L2_ETHER, + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, + RTE_PTYPE_L4_FRAG, + RTE_PTYPE_L4_ICMP, + RTE_PTYPE_L4_NONFRAG, + RTE_PTYPE_L4_SCTP, + RTE_PTYPE_L4_TCP, + RTE_PTYPE_L4_UDP, + RTE_PTYPE_UNKNOWN + }; + return ptypes; +} + static const struct eth_dev_ops ice_dcf_eth_dev_ops = { - .dev_start = ice_dcf_dev_start, - .dev_stop= ice_dcf_dev_stop, - .dev_close = ice_dcf_dev_close, - .dev_reset = ice_dcf_dev_reset, - .dev_configure = ice_dcf_dev_configure, - .dev_infos_get = ice_dcf_dev_info_get, - .rx_queue_setup = ice_rx_queue_setup, - .tx_queue_setup = ice_tx_queue_setup, - .rx_queue_release= ice_dev_rx_queue_release, - .tx_queue_release= ice_dev_tx_queue_release, - .rx_queue_start = ice_dcf_rx_queue_start, - .tx_queue_start = ice_dcf_tx_queue_start, - .rx_queue_stop = ice_dcf_rx_queue_stop, - .tx_queue_stop = ice_dcf_tx_queue_stop, - .link_update = ice_dcf_link_update, - .stats_get = ice_dcf_stats_get, - .stats_reset = ice_dcf_stats_reset, - .promiscuous_enable = ice_dcf_dev_promiscuous_enable, - .promiscuous_disable = ice_dcf_dev_promiscuous_disable, - .allmulticast_enable = ice_dcf_dev_allmulticast_enable, - .allmulticast_disable= ice_dcf_dev_allmulticast_disable, - .flow_ops_get= ice_dcf_dev_flow_ops_get, - .udp_tunnel_port_add = ice_dcf_dev_udp_tunnel_port_add, - .udp_tunnel_port_del = ice_dcf_dev_udp_tunnel_port_del, - .tm_ops_get = ice_dcf_tm_ops_get, - .reta_update = ice_dcf_dev_rss_reta_update, - .reta_query = ice_dcf_dev_rss_reta_query, - .rss_hash_update = ice_dcf_dev_rss_hash_update, - .rss_hash_conf_get = ice_dcf_dev_rss_hash_conf_get, - .tx_done_cleanup = ice_tx_done_cleanup, - .mtu_set = ice_dcf_dev_mtu_set, + .dev_start= ice_dcf_dev_start, + .dev_stop = ice_dcf_dev_stop, + .dev_close= ice_dcf_dev_close, + .dev_reset= ice_dcf_dev_reset, + .dev_configure= ice_dcf_dev_configure, + .dev_infos_get= ice_dcf_dev_info_get, + .dev_supported_ptypes_get = ice_dcf_dev_supported_ptypes_get, + .rx_queue_setup = ice_rx_queue_setup, + .tx_queue_setup = ice_tx_queue_setup, + .rx_queue_release = ice_dev_rx_queue_release, + .tx_queue_release = ice_dev_tx_queue_release, + .rx_queue_start = ice_dcf_rx_queue_start, + .tx_queue_start = ice_dcf_tx_queue_start, + .rx_queue_stop= ice_dcf_rx_queue_stop, + .tx_queue_stop= ice_dcf_tx_queue_stop, + .link_update = ice_dcf_link_update, + .stats_get= ice_dcf_stats_get, + .stats_reset = ice_dcf_stats_reset, + .promiscuous_enable = ice_dcf_dev_promiscuous_enable, + .promiscuous_disable = ice_dcf_dev_promiscuous_disable, + .allmulticast_enable = ice_dcf_dev_allmulticast_enable, + .allmulticast_disable = ice_dcf_dev_allmulticast_disable, + .flow_ops_get = ice_dcf_dev_flow_ops_get, + .udp_tunnel_port_add = ice_dcf_dev_udp_tunnel_port_add, + .udp_tunnel_port_del = ice_dcf_dev_udp
[PATCH v6 06/12] net/ice: support dcf promisc configuration
From: Alvin Zhang Support configuration of unicast and multicast promisc on dcf. Signed-off-by: Alvin Zhang Signed-off-by: Kevin Liu --- doc/guides/nics/features/ice_dcf.ini | 2 + doc/guides/rel_notes/release_22_07.rst | 1 + drivers/net/ice/ice_dcf_ethdev.c | 77 -- drivers/net/ice/ice_dcf_ethdev.h | 3 + 4 files changed, 79 insertions(+), 4 deletions(-) diff --git a/doc/guides/nics/features/ice_dcf.ini b/doc/guides/nics/features/ice_dcf.ini index be34ab4692..fe3ada8733 100644 --- a/doc/guides/nics/features/ice_dcf.ini +++ b/doc/guides/nics/features/ice_dcf.ini @@ -18,6 +18,8 @@ Inner L4 checksum= P RSS reta update = Y RSS key update = Y MTU update = Y +Promiscuous mode = Y +Allmulticast mode= Y Basic stats = Y Linux= Y x86-32 = Y diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst index a39196c605..c7ba4453ff 100644 --- a/doc/guides/rel_notes/release_22_07.rst +++ b/doc/guides/rel_notes/release_22_07.rst @@ -67,6 +67,7 @@ New Features * Added cleanup Tx buffers. * Added add ops MTU-SET to dcf. * Added add ops dev-supported-ptypes-get to dcf. + * Added support dcf promisc configuration. Removed Items - diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c index 6a577a6582..87d281ee93 100644 --- a/drivers/net/ice/ice_dcf_ethdev.c +++ b/drivers/net/ice/ice_dcf_ethdev.c @@ -727,27 +727,95 @@ ice_dcf_dev_info_get(struct rte_eth_dev *dev, } static int -ice_dcf_dev_promiscuous_enable(__rte_unused struct rte_eth_dev *dev) +dcf_config_promisc(struct ice_dcf_adapter *adapter, + bool enable_unicast, + bool enable_multicast) { + struct ice_dcf_hw *hw = &adapter->real_hw; + struct virtchnl_promisc_info promisc; + struct dcf_virtchnl_cmd args; + int err; + + promisc.flags = 0; + promisc.vsi_id = hw->vsi_res->vsi_id; + + if (enable_unicast) + promisc.flags |= FLAG_VF_UNICAST_PROMISC; + + if (enable_multicast) + promisc.flags |= FLAG_VF_MULTICAST_PROMISC; + + memset(&args, 0, sizeof(args)); + args.v_op = VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE; + args.req_msg = (uint8_t *)&promisc; + args.req_msglen = sizeof(promisc); + + err = ice_dcf_execute_virtchnl_cmd(hw, &args); + if (err) { + PMD_DRV_LOG(ERR, + "fail to execute command VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE"); + return err; + } + + adapter->promisc_unicast_enabled = enable_unicast; + adapter->promisc_multicast_enabled = enable_multicast; return 0; } +static int +ice_dcf_dev_promiscuous_enable(__rte_unused struct rte_eth_dev *dev) +{ + struct ice_dcf_adapter *adapter = dev->data->dev_private; + + if (adapter->promisc_unicast_enabled) { + PMD_DRV_LOG(INFO, "promiscuous has been enabled"); + return 0; + } + + return dcf_config_promisc(adapter, true, + adapter->promisc_multicast_enabled); +} + static int ice_dcf_dev_promiscuous_disable(__rte_unused struct rte_eth_dev *dev) { - return 0; + struct ice_dcf_adapter *adapter = dev->data->dev_private; + + if (!adapter->promisc_unicast_enabled) { + PMD_DRV_LOG(INFO, "promiscuous has been disabled"); + return 0; + } + + return dcf_config_promisc(adapter, false, + adapter->promisc_multicast_enabled); } static int ice_dcf_dev_allmulticast_enable(__rte_unused struct rte_eth_dev *dev) { - return 0; + struct ice_dcf_adapter *adapter = dev->data->dev_private; + + if (adapter->promisc_multicast_enabled) { + PMD_DRV_LOG(INFO, "allmulticast has been enabled"); + return 0; + } + + return dcf_config_promisc(adapter, adapter->promisc_unicast_enabled, + true); } static int ice_dcf_dev_allmulticast_disable(__rte_unused struct rte_eth_dev *dev) { - return 0; + struct ice_dcf_adapter *adapter = dev->data->dev_private; + + if (!adapter->promisc_multicast_enabled) { + PMD_DRV_LOG(INFO, "allmulticast has been disabled"); + return 0; + } + + return dcf_config_promisc(adapter, adapter->promisc_unicast_enabled, + false); } static int @@ -1299,6 +1367,7 @@ ice_dcf_dev_init(struct rte_eth_dev *eth_dev) return -1; } + dcf_config_promisc(adapter, false, false); return 0; } diff --git a/drivers/net/ice/ice_dcf_ethdev.h b/drivers/net/ice/ice_dcf_ethdev.h index f2faf26f58..22e450527b 100644 --- a/drivers/net/ice/ice_dcf_ethdev.h +++ b/drivers/net/ice/ice_dcf_ethdev.h @@ -33,6 +33,9 @@ struct ice_dcf_adapter
[PATCH v6 07/12] net/ice: support dcf MAC configuration
Below PMD ops are supported in this patch: .mac_addr_add = dcf_dev_add_mac_addr .mac_addr_remove = dcf_dev_del_mac_addr .set_mc_addr_list = dcf_set_mc_addr_list .mac_addr_set = dcf_dev_set_default_mac_addr Signed-off-by: Alvin Zhang Signed-off-by: Kevin Liu --- doc/guides/nics/features/ice_dcf.ini | 1 + doc/guides/rel_notes/release_22_07.rst | 1 + drivers/net/ice/ice_dcf.c | 9 +- drivers/net/ice/ice_dcf.h | 4 +- drivers/net/ice/ice_dcf_ethdev.c | 218 - drivers/net/ice/ice_dcf_ethdev.h | 5 +- 6 files changed, 228 insertions(+), 10 deletions(-) diff --git a/doc/guides/nics/features/ice_dcf.ini b/doc/guides/nics/features/ice_dcf.ini index fe3ada8733..c9bdbcd6cc 100644 --- a/doc/guides/nics/features/ice_dcf.ini +++ b/doc/guides/nics/features/ice_dcf.ini @@ -20,6 +20,7 @@ RSS key update = Y MTU update = Y Promiscuous mode = Y Allmulticast mode= Y +Unicast MAC filter = Y Basic stats = Y Linux= Y x86-32 = Y diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst index c7ba4453ff..e29ec16720 100644 --- a/doc/guides/rel_notes/release_22_07.rst +++ b/doc/guides/rel_notes/release_22_07.rst @@ -68,6 +68,7 @@ New Features * Added add ops MTU-SET to dcf. * Added add ops dev-supported-ptypes-get to dcf. * Added support dcf promisc configuration. + * Added support dcf MAC configuration. Removed Items - diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c index 89c0203ba3..55ae68c456 100644 --- a/drivers/net/ice/ice_dcf.c +++ b/drivers/net/ice/ice_dcf.c @@ -1089,10 +1089,11 @@ ice_dcf_query_stats(struct ice_dcf_hw *hw, } int -ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool add) +ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, +struct rte_ether_addr *addr, +bool add, uint8_t type) { struct virtchnl_ether_addr_list *list; - struct rte_ether_addr *addr; struct dcf_virtchnl_cmd args; int len, err = 0; @@ -1105,7 +1106,6 @@ ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool add) } len = sizeof(struct virtchnl_ether_addr_list); - addr = hw->eth_dev->data->mac_addrs; len += sizeof(struct virtchnl_ether_addr); list = rte_zmalloc(NULL, len, 0); @@ -1116,9 +1116,10 @@ ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool add) rte_memcpy(list->list[0].addr, addr->addr_bytes, sizeof(addr->addr_bytes)); + PMD_DRV_LOG(DEBUG, "add/rm mac:" RTE_ETHER_ADDR_PRT_FMT, RTE_ETHER_ADDR_BYTES(addr)); - + list->list[0].type = type; list->vsi_id = hw->vsi_res->vsi_id; list->num_elements = 1; diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h index f0b45af5ae..78df202a77 100644 --- a/drivers/net/ice/ice_dcf.h +++ b/drivers/net/ice/ice_dcf.h @@ -131,7 +131,9 @@ int ice_dcf_switch_queue(struct ice_dcf_hw *hw, uint16_t qid, bool rx, bool on); int ice_dcf_disable_queues(struct ice_dcf_hw *hw); int ice_dcf_query_stats(struct ice_dcf_hw *hw, struct virtchnl_eth_stats *pstats); -int ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool add); +int ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, +struct rte_ether_addr *addr, bool add, +uint8_t type); int ice_dcf_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete); void ice_dcf_tm_conf_init(struct rte_eth_dev *dev); diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c index 87d281ee93..0d944f9fd2 100644 --- a/drivers/net/ice/ice_dcf_ethdev.c +++ b/drivers/net/ice/ice_dcf_ethdev.c @@ -26,6 +26,12 @@ #include "ice_dcf_ethdev.h" #include "ice_rxtx.h" +#define DCF_NUM_MACADDR_MAX 64 + +static int dcf_add_del_mc_addr_list(struct ice_dcf_hw *hw, + struct rte_ether_addr *mc_addrs, + uint32_t mc_addrs_num, bool add); + static int ice_dcf_dev_udp_tunnel_port_add(struct rte_eth_dev *dev, struct rte_eth_udp_tunnel *udp_tunnel); @@ -561,12 +567,22 @@ ice_dcf_dev_start(struct rte_eth_dev *dev) return ret; } - ret = ice_dcf_add_del_all_mac_addr(hw, true); + ret = ice_dcf_add_del_all_mac_addr(hw, hw->eth_dev->data->mac_addrs, + true, VIRTCHNL_ETHER_ADDR_PRIMARY); if (ret) { PMD_DRV_LOG(ERR, "Failed to add mac addr"); return ret; } + if (dcf_ad->mc_addrs_num) { + /* flush previous addresses */ + ret = dcf_add_del_mc_addr_list(hw, dcf_ad->mc_addrs,
[PATCH v6 08/12] net/ice: support dcf VLAN filter and offload configuration
From: Alvin Zhang Below PMD ops are supported in this patch: .vlan_filter_set = dcf_dev_vlan_filter_set .vlan_offload_set = dcf_dev_vlan_offload_set Signed-off-by: Alvin Zhang Signed-off-by: Kevin Liu --- doc/guides/nics/features/ice_dcf.ini | 2 + doc/guides/rel_notes/release_22_07.rst | 1 + drivers/net/ice/ice_dcf_ethdev.c | 101 + 3 files changed, 104 insertions(+) diff --git a/doc/guides/nics/features/ice_dcf.ini b/doc/guides/nics/features/ice_dcf.ini index c9bdbcd6cc..01e7527915 100644 --- a/doc/guides/nics/features/ice_dcf.ini +++ b/doc/guides/nics/features/ice_dcf.ini @@ -21,6 +21,8 @@ MTU update = Y Promiscuous mode = Y Allmulticast mode= Y Unicast MAC filter = Y +VLAN filter = Y +VLAN offload = Y Basic stats = Y Linux= Y x86-32 = Y diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst index e29ec16720..268f3bba9a 100644 --- a/doc/guides/rel_notes/release_22_07.rst +++ b/doc/guides/rel_notes/release_22_07.rst @@ -69,6 +69,7 @@ New Features * Added add ops dev-supported-ptypes-get to dcf. * Added support dcf promisc configuration. * Added support dcf MAC configuration. + * Added support dcf VLAN filter and offload configuration. Removed Items - diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c index 0d944f9fd2..e58cdf47d2 100644 --- a/drivers/net/ice/ice_dcf_ethdev.c +++ b/drivers/net/ice/ice_dcf_ethdev.c @@ -1026,6 +1026,105 @@ dcf_dev_set_default_mac_addr(struct rte_eth_dev *dev, return 0; } +static int +dcf_add_del_vlan(struct ice_dcf_hw *hw, uint16_t vlanid, bool add) +{ + struct virtchnl_vlan_filter_list *vlan_list; + uint8_t cmd_buffer[sizeof(struct virtchnl_vlan_filter_list) + + sizeof(uint16_t)]; + struct dcf_virtchnl_cmd args; + int err; + + vlan_list = (struct virtchnl_vlan_filter_list *)cmd_buffer; + vlan_list->vsi_id = hw->vsi_res->vsi_id; + vlan_list->num_elements = 1; + vlan_list->vlan_id[0] = vlanid; + + memset(&args, 0, sizeof(args)); + args.v_op = add ? VIRTCHNL_OP_ADD_VLAN : VIRTCHNL_OP_DEL_VLAN; + args.req_msg = cmd_buffer; + args.req_msglen = sizeof(cmd_buffer); + err = ice_dcf_execute_virtchnl_cmd(hw, &args); + if (err) + PMD_DRV_LOG(ERR, "fail to execute command %s", + add ? "OP_ADD_VLAN" : "OP_DEL_VLAN"); + + return err; +} + +static int +dcf_enable_vlan_strip(struct ice_dcf_hw *hw) +{ + struct dcf_virtchnl_cmd args; + int ret; + + memset(&args, 0, sizeof(args)); + args.v_op = VIRTCHNL_OP_ENABLE_VLAN_STRIPPING; + ret = ice_dcf_execute_virtchnl_cmd(hw, &args); + if (ret) + PMD_DRV_LOG(ERR, + "Failed to execute command of OP_ENABLE_VLAN_STRIPPING"); + + return ret; +} + +static int +dcf_disable_vlan_strip(struct ice_dcf_hw *hw) +{ + struct dcf_virtchnl_cmd args; + int ret; + + memset(&args, 0, sizeof(args)); + args.v_op = VIRTCHNL_OP_DISABLE_VLAN_STRIPPING; + ret = ice_dcf_execute_virtchnl_cmd(hw, &args); + if (ret) + PMD_DRV_LOG(ERR, + "Failed to execute command of OP_DISABLE_VLAN_STRIPPING"); + + return ret; +} + +static int +dcf_dev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) +{ + struct ice_dcf_adapter *adapter = dev->data->dev_private; + struct ice_dcf_hw *hw = &adapter->real_hw; + int err; + + if (!(hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)) + return -ENOTSUP; + + err = dcf_add_del_vlan(hw, vlan_id, on); + if (err) + return -EIO; + return 0; +} + +static int +dcf_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask) +{ + struct ice_dcf_adapter *adapter = dev->data->dev_private; + struct ice_dcf_hw *hw = &adapter->real_hw; + struct rte_eth_conf *dev_conf = &dev->data->dev_conf; + int err; + + if (!(hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)) + return -ENOTSUP; + + /* Vlan stripping setting */ + if (mask & RTE_ETH_VLAN_STRIP_MASK) { + /* Enable or disable VLAN stripping */ + if (dev_conf->rxmode.offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP) + err = dcf_enable_vlan_strip(hw); + else + err = dcf_disable_vlan_strip(hw); + + if (err) + return -EIO; + } + return 0; +} + static int ice_dcf_dev_flow_ops_get(struct rte_eth_dev *dev, const struct rte_flow_ops **ops) @@ -1538,6 +1637,8 @@ static const struct eth_dev_ops ice_dcf_eth_dev_ops = { .mac_addr_remove = dcf
[PATCH v6 09/12] net/ice: add extended stats
Add implementation of xstats() functions in DCF PMD. Signed-off-by: Kevin Liu --- doc/guides/nics/features/ice_dcf.ini | 1 + doc/guides/rel_notes/release_22_07.rst | 1 + drivers/net/ice/ice_dcf.h | 22 drivers/net/ice/ice_dcf_ethdev.c | 75 ++ 4 files changed, 99 insertions(+) diff --git a/doc/guides/nics/features/ice_dcf.ini b/doc/guides/nics/features/ice_dcf.ini index 01e7527915..54ea7f150c 100644 --- a/doc/guides/nics/features/ice_dcf.ini +++ b/doc/guides/nics/features/ice_dcf.ini @@ -23,6 +23,7 @@ Allmulticast mode= Y Unicast MAC filter = Y VLAN filter = Y VLAN offload = Y +Extended stats = Y Basic stats = Y Linux= Y x86-32 = Y diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst index 268f3bba9a..1f404a6ee5 100644 --- a/doc/guides/rel_notes/release_22_07.rst +++ b/doc/guides/rel_notes/release_22_07.rst @@ -70,6 +70,7 @@ New Features * Added support dcf promisc configuration. * Added support dcf MAC configuration. * Added support dcf VLAN filter and offload configuration. + * Added add extended stats. Removed Items - diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h index 78df202a77..44a61404c3 100644 --- a/drivers/net/ice/ice_dcf.h +++ b/drivers/net/ice/ice_dcf.h @@ -15,6 +15,12 @@ #include "base/ice_type.h" #include "ice_logs.h" +/* ICE_DCF_DEV_PRIVATE_TO */ +#define ICE_DCF_DEV_PRIVATE_TO_ADAPTER(adapter) \ + ((struct ice_dcf_adapter *)adapter) +#define ICE_DCF_DEV_PRIVATE_TO_VF(adapter) \ + (&((struct ice_dcf_adapter *)adapter)->vf) + struct dcf_virtchnl_cmd { TAILQ_ENTRY(dcf_virtchnl_cmd) next; @@ -74,6 +80,22 @@ struct ice_dcf_tm_conf { bool committed; }; +struct ice_dcf_eth_stats { + u64 rx_bytes; /* gorc */ + u64 rx_unicast; /* uprc */ + u64 rx_multicast; /* mprc */ + u64 rx_broadcast; /* bprc */ + u64 rx_discards;/* rdpc */ + u64 rx_unknown_protocol;/* rupp */ + u64 tx_bytes; /* gotc */ + u64 tx_unicast; /* uptc */ + u64 tx_multicast; /* mptc */ + u64 tx_broadcast; /* bptc */ + u64 tx_discards;/* tdpc */ + u64 tx_errors; /* tepc */ + u64 rx_no_desc; /* repc */ + u64 rx_errors; /* repc */ +}; struct ice_dcf_hw { struct iavf_hw avf; diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c index e58cdf47d2..6503700e02 100644 --- a/drivers/net/ice/ice_dcf_ethdev.c +++ b/drivers/net/ice/ice_dcf_ethdev.c @@ -45,6 +45,30 @@ ice_dcf_dev_init(struct rte_eth_dev *eth_dev); static int ice_dcf_dev_uninit(struct rte_eth_dev *eth_dev); +struct rte_ice_dcf_xstats_name_off { + char name[RTE_ETH_XSTATS_NAME_SIZE]; + unsigned int offset; +}; + +static const struct rte_ice_dcf_xstats_name_off rte_ice_dcf_stats_strings[] = { + {"rx_bytes", offsetof(struct ice_dcf_eth_stats, rx_bytes)}, + {"rx_unicast_packets", offsetof(struct ice_dcf_eth_stats, rx_unicast)}, + {"rx_multicast_packets", offsetof(struct ice_dcf_eth_stats, rx_multicast)}, + {"rx_broadcast_packets", offsetof(struct ice_dcf_eth_stats, rx_broadcast)}, + {"rx_dropped_packets", offsetof(struct ice_dcf_eth_stats, rx_discards)}, + {"rx_unknown_protocol_packets", offsetof(struct ice_dcf_eth_stats, + rx_unknown_protocol)}, + {"tx_bytes", offsetof(struct ice_dcf_eth_stats, tx_bytes)}, + {"tx_unicast_packets", offsetof(struct ice_dcf_eth_stats, tx_unicast)}, + {"tx_multicast_packets", offsetof(struct ice_dcf_eth_stats, tx_multicast)}, + {"tx_broadcast_packets", offsetof(struct ice_dcf_eth_stats, tx_broadcast)}, + {"tx_dropped_packets", offsetof(struct ice_dcf_eth_stats, tx_discards)}, + {"tx_error_packets", offsetof(struct ice_dcf_eth_stats, tx_errors)}, +}; + +#define ICE_DCF_NB_XSTATS (sizeof(rte_ice_dcf_stats_strings) / \ + sizeof(rte_ice_dcf_stats_strings[0])) + static uint16_t ice_dcf_recv_pkts(__rte_unused void *rx_queue, __rte_unused struct rte_mbuf **bufs, @@ -1358,6 +1382,54 @@ ice_dcf_stats_reset(struct rte_eth_dev *dev) return 0; } +static int ice_dcf_xstats_get_names(__rte_unused struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, + __rte_unused unsigned int limit) +{ + unsigned int i; + + if (xstats_names != NULL) + for (i = 0; i < ICE_DCF_NB_XSTATS; i++) { + snprintf(xstats_names[i].name, + sizeof(xstats_names[i].name), + "%s", rt
[PATCH v6 10/12] net/ice: support queue information getting
Add below ops, rxq_info_get txq_info_get Signed-off-by: Kevin Liu --- doc/guides/rel_notes/release_22_07.rst | 1 + drivers/net/ice/ice_dcf_ethdev.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst index 1f404a6ee5..0d6577cd74 100644 --- a/doc/guides/rel_notes/release_22_07.rst +++ b/doc/guides/rel_notes/release_22_07.rst @@ -71,6 +71,7 @@ New Features * Added support dcf MAC configuration. * Added support dcf VLAN filter and offload configuration. * Added add extended stats. + * Added support queue information getting. Removed Items - diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c index 6503700e02..9217392d04 100644 --- a/drivers/net/ice/ice_dcf_ethdev.c +++ b/drivers/net/ice/ice_dcf_ethdev.c @@ -1698,6 +1698,8 @@ static const struct eth_dev_ops ice_dcf_eth_dev_ops = { .tx_queue_start = ice_dcf_tx_queue_start, .rx_queue_stop= ice_dcf_rx_queue_stop, .tx_queue_stop= ice_dcf_tx_queue_stop, + .rxq_info_get = ice_rxq_info_get, + .txq_info_get = ice_txq_info_get, .link_update = ice_dcf_link_update, .stats_get= ice_dcf_stats_get, .stats_reset = ice_dcf_stats_reset, -- 2.33.1
[PATCH v6 11/12] net/ice: implement power management
Implement support for the power management API by implementing a 'get_monitor_addr' function that will return an address of an RX ring's status bit. Signed-off-by: Kevin Liu --- doc/guides/nics/features/ice_dcf.ini | 1 + doc/guides/rel_notes/release_22_07.rst | 1 + drivers/net/ice/ice_dcf_ethdev.c | 1 + 3 files changed, 3 insertions(+) diff --git a/doc/guides/nics/features/ice_dcf.ini b/doc/guides/nics/features/ice_dcf.ini index 54ea7f150c..3b11622d4c 100644 --- a/doc/guides/nics/features/ice_dcf.ini +++ b/doc/guides/nics/features/ice_dcf.ini @@ -25,6 +25,7 @@ VLAN filter = Y VLAN offload = Y Extended stats = Y Basic stats = Y +Power mgmt address monitor = Y Linux= Y x86-32 = Y x86-64 = Y diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst index 0d6577cd74..004a6d3343 100644 --- a/doc/guides/rel_notes/release_22_07.rst +++ b/doc/guides/rel_notes/release_22_07.rst @@ -72,6 +72,7 @@ New Features * Added support dcf VLAN filter and offload configuration. * Added add extended stats. * Added support queue information getting. + * Added implement power management. Removed Items - diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c index 9217392d04..236c0395e0 100644 --- a/drivers/net/ice/ice_dcf_ethdev.c +++ b/drivers/net/ice/ice_dcf_ethdev.c @@ -1700,6 +1700,7 @@ static const struct eth_dev_ops ice_dcf_eth_dev_ops = { .tx_queue_stop= ice_dcf_tx_queue_stop, .rxq_info_get = ice_rxq_info_get, .txq_info_get = ice_txq_info_get, + .get_monitor_addr = ice_get_monitor_addr, .link_update = ice_dcf_link_update, .stats_get= ice_dcf_stats_get, .stats_reset = ice_dcf_stats_reset, -- 2.33.1
[PATCH v6 12/12] net/ice: support DCF new VLAN capabilities
From: Alvin Zhang The new VLAN virtchnl opcodes introduce new capabilities like VLAN filtering, stripping and insertion. The DCF needs to query the VLAN capabilities based on current device configuration firstly. DCF is able to configure inner VLAN filter when port VLAN is enabled base on negotiation; and DCF is able to configure outer VLAN (0x8100) if port VLAN is disabled to be compatible with legacy mode. When port VLAN is updated by DCF, the DCF needs to reset to query the new VLAN capabilities. Signed-off-by: Alvin Zhang Signed-off-by: Kevin Liu --- doc/guides/rel_notes/release_22_07.rst | 1 + drivers/net/ice/ice_dcf.c | 27 drivers/net/ice/ice_dcf.h | 1 + drivers/net/ice/ice_dcf_ethdev.c | 171 ++--- 4 files changed, 183 insertions(+), 17 deletions(-) diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst index 004a6d3343..7c932a7c8a 100644 --- a/doc/guides/rel_notes/release_22_07.rst +++ b/doc/guides/rel_notes/release_22_07.rst @@ -73,6 +73,7 @@ New Features * Added add extended stats. * Added support queue information getting. * Added implement power management. + * Added support DCF new VLAN capabilities. Removed Items - diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c index 55ae68c456..885d58c0f4 100644 --- a/drivers/net/ice/ice_dcf.c +++ b/drivers/net/ice/ice_dcf.c @@ -587,6 +587,29 @@ ice_dcf_get_supported_rxdid(struct ice_dcf_hw *hw) return 0; } +static int +dcf_get_vlan_offload_caps_v2(struct ice_dcf_hw *hw) +{ + struct virtchnl_vlan_caps vlan_v2_caps; + struct dcf_virtchnl_cmd args; + int ret; + + memset(&args, 0, sizeof(args)); + args.v_op = VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS; + args.rsp_msgbuf = (uint8_t *)&vlan_v2_caps; + args.rsp_buflen = sizeof(vlan_v2_caps); + + ret = ice_dcf_execute_virtchnl_cmd(hw, &args); + if (ret) { + PMD_DRV_LOG(ERR, + "Failed to execute command of VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS"); + return ret; + } + + rte_memcpy(&hw->vlan_v2_caps, &vlan_v2_caps, sizeof(vlan_v2_caps)); + return 0; +} + int ice_dcf_init_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw) { @@ -701,6 +724,10 @@ ice_dcf_init_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw) rte_intr_enable(pci_dev->intr_handle); ice_dcf_enable_irq0(hw); + if ((hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) && + dcf_get_vlan_offload_caps_v2(hw)) + goto err_rss; + return 0; err_rss: diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h index 44a61404c3..7f42ebabe9 100644 --- a/drivers/net/ice/ice_dcf.h +++ b/drivers/net/ice/ice_dcf.h @@ -129,6 +129,7 @@ struct ice_dcf_hw { uint16_t nb_msix; uint16_t rxq_map[16]; struct virtchnl_eth_stats eth_stats_offset; + struct virtchnl_vlan_caps vlan_v2_caps; /* Link status */ bool link_up; diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c index 236c0395e0..8005eb2ab8 100644 --- a/drivers/net/ice/ice_dcf_ethdev.c +++ b/drivers/net/ice/ice_dcf_ethdev.c @@ -1050,6 +1050,46 @@ dcf_dev_set_default_mac_addr(struct rte_eth_dev *dev, return 0; } +static int +dcf_add_del_vlan_v2(struct ice_dcf_hw *hw, uint16_t vlanid, bool add) +{ + struct virtchnl_vlan_supported_caps *supported_caps = + &hw->vlan_v2_caps.filtering.filtering_support; + struct virtchnl_vlan *vlan_setting; + struct virtchnl_vlan_filter_list_v2 vlan_filter; + struct dcf_virtchnl_cmd args; + uint32_t filtering_caps; + int err; + + if (supported_caps->outer) { + filtering_caps = supported_caps->outer; + vlan_setting = &vlan_filter.filters[0].outer; + } else { + filtering_caps = supported_caps->inner; + vlan_setting = &vlan_filter.filters[0].inner; + } + + if (!(filtering_caps & VIRTCHNL_VLAN_ETHERTYPE_8100)) + return -ENOTSUP; + + memset(&vlan_filter, 0, sizeof(vlan_filter)); + vlan_filter.vport_id = hw->vsi_res->vsi_id; + vlan_filter.num_elements = 1; + vlan_setting->tpid = RTE_ETHER_TYPE_VLAN; + vlan_setting->tci = vlanid; + + memset(&args, 0, sizeof(args)); + args.v_op = add ? VIRTCHNL_OP_ADD_VLAN_V2 : VIRTCHNL_OP_DEL_VLAN_V2; + args.req_msg = (uint8_t *)&vlan_filter; + args.req_msglen = sizeof(vlan_filter); + err = ice_dcf_execute_virtchnl_cmd(hw, &args); + if (err) + PMD_DRV_LOG(ERR, "fail to execute command %s", + add ? "OP_ADD_VLAN_V2" : "OP_DEL_VLAN_V2"); + + return err; +} + static int dcf_add_del_vlan(struct ice_dcf_hw *hw, uint16_t vlanid, bool add) { @@ -1076,6 +1116,116 @@ dcf_
RE: [PATCH v6 01/12] net/ice: enable RSS RETA ops for DCF hardware
> -Original Message- > From: Liu, KevinX > Sent: Thursday, April 28, 2022 2:13 AM > To: dev@dpdk.org > Cc: Yang, Qiming ; Zhang, Qi Z > ; Yang, SteveX ; Liu, KevinX > > Subject: [PATCH v6 01/12] net/ice: enable RSS RETA ops for DCF hardware > > From: Steve Yang > > RSS RETA should be updated and queried by application, Add related ops > ('.reta_update', '.reta_query') for DCF. > > Signed-off-by: Steve Yang > Signed-off-by: Kevin Liu > --- > doc/guides/nics/features/ice_dcf.ini | 1 + > doc/guides/rel_notes/release_22_07.rst | 3 + > drivers/net/ice/ice_dcf.c | 2 +- > drivers/net/ice/ice_dcf.h | 1 + > drivers/net/ice/ice_dcf_ethdev.c | 77 ++ > 5 files changed, 83 insertions(+), 1 deletion(-) > > diff --git a/doc/guides/nics/features/ice_dcf.ini > b/doc/guides/nics/features/ice_dcf.ini > index 54073f0b88..5221c99a9c 100644 > --- a/doc/guides/nics/features/ice_dcf.ini > +++ b/doc/guides/nics/features/ice_dcf.ini > @@ -15,6 +15,7 @@ L3 checksum offload = P > L4 checksum offload = P > Inner L3 checksum= P > Inner L4 checksum= P > +RSS reta update = Y > Basic stats = Y > Linux= Y > x86-32 = Y > diff --git a/doc/guides/rel_notes/release_22_07.rst > b/doc/guides/rel_notes/release_22_07.rst > index 90123bb807..cbdc90760c 100644 > --- a/doc/guides/rel_notes/release_22_07.rst > +++ b/doc/guides/rel_notes/release_22_07.rst > @@ -60,6 +60,9 @@ New Features >* Added Tx QoS queue rate limitation support. >* Added quanta size configuration support. > > +* **Updated Intel ice driver.** > + > + * Added enable RSS RETA ops for DCF hardware. There is no DCF hardware, better change to Added support for RSS RETA configure in DCF mode.
RE: [PATCH v6 03/12] net/ice: cleanup Tx buffers
> -Original Message- > From: Liu, KevinX > Sent: Thursday, April 28, 2022 2:13 AM > To: dev@dpdk.org > Cc: Yang, Qiming ; Zhang, Qi Z > ; Yang, SteveX ; Zhang, > RobinX ; Liu, KevinX > Subject: [PATCH v6 03/12] net/ice: cleanup Tx buffers > > From: Robin Zhang > > Add support for ops rte_eth_tx_done_cleanup in dcf > > Signed-off-by: Robin Zhang > Signed-off-by: Kevin Liu > --- > doc/guides/rel_notes/release_22_07.rst | 1 + > drivers/net/ice/ice_dcf_ethdev.c | 1 + > 2 files changed, 2 insertions(+) > > diff --git a/doc/guides/rel_notes/release_22_07.rst > b/doc/guides/rel_notes/release_22_07.rst > index cc2c243e81..bbd3d296de 100644 > --- a/doc/guides/rel_notes/release_22_07.rst > +++ b/doc/guides/rel_notes/release_22_07.rst > @@ -64,6 +64,7 @@ New Features > > * Added enable RSS RETA ops for DCF hardware. > * Added enable RSS HASH ops for DCF hardware. > + * Added cleanup Tx buffers. Please keep the pattern be consistent, Added support for Tx buffer cleanup in DCF mode. Anyway, this is not worth for a release note update, you can remove it. > > Removed Items > - > diff --git a/drivers/net/ice/ice_dcf_ethdev.c > b/drivers/net/ice/ice_dcf_ethdev.c > index ccad7fc304..d8b5961514 100644 > --- a/drivers/net/ice/ice_dcf_ethdev.c > +++ b/drivers/net/ice/ice_dcf_ethdev.c > @@ -1235,6 +1235,7 @@ static const struct eth_dev_ops > ice_dcf_eth_dev_ops = { > .reta_query = ice_dcf_dev_rss_reta_query, > .rss_hash_update = ice_dcf_dev_rss_hash_update, > .rss_hash_conf_get = ice_dcf_dev_rss_hash_conf_get, > + .tx_done_cleanup = ice_tx_done_cleanup, > }; > > static int > -- > 2.33.1
RE: [PATCH v6 05/12] net/ice: add ops dev-supported-ptypes-get to dcf
> -Original Message- > From: Liu, KevinX > Sent: Thursday, April 28, 2022 2:13 AM > To: dev@dpdk.org > Cc: Yang, Qiming ; Zhang, Qi Z > ; Yang, SteveX ; Wang, Jie1X > ; Liu, KevinX > Subject: [PATCH v6 05/12] net/ice: add ops dev-supported-ptypes-get to dcf > > From: Jie Wang > > add API "dev_supported_ptypes_get" to dcf, that dcf pmd can get ptypes > through the new API. > > Signed-off-by: Jie Wang > Signed-off-by: Kevin Liu > --- > doc/guides/rel_notes/release_22_07.rst | 1 + > drivers/net/ice/ice_dcf_ethdev.c | 80 -- > 2 files changed, 50 insertions(+), 31 deletions(-) > > diff --git a/doc/guides/rel_notes/release_22_07.rst > b/doc/guides/rel_notes/release_22_07.rst > index dc37de85f3..a39196c605 100644 > --- a/doc/guides/rel_notes/release_22_07.rst > +++ b/doc/guides/rel_notes/release_22_07.rst > @@ -66,6 +66,7 @@ New Features > * Added enable RSS HASH ops for DCF hardware. > * Added cleanup Tx buffers. > * Added add ops MTU-SET to dcf. > + * Added add ops dev-supported-ptypes-get to dcf. Misc feature is not necessary for release notes update, please remove this. > > Removed Items > - > diff --git a/drivers/net/ice/ice_dcf_ethdev.c > b/drivers/net/ice/ice_dcf_ethdev.c > index 06d752fd61..6a577a6582 100644 > --- a/drivers/net/ice/ice_dcf_ethdev.c > +++ b/drivers/net/ice/ice_dcf_ethdev.c > @@ -1218,38 +1218,56 @@ ice_dcf_dev_reset(struct rte_eth_dev *dev) > return ret; > } > > +static const uint32_t * > +ice_dcf_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused) > +{ > + static const uint32_t ptypes[] = { > + RTE_PTYPE_L2_ETHER, > + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, > + RTE_PTYPE_L4_FRAG, > + RTE_PTYPE_L4_ICMP, > + RTE_PTYPE_L4_NONFRAG, > + RTE_PTYPE_L4_SCTP, > + RTE_PTYPE_L4_TCP, > + RTE_PTYPE_L4_UDP, > + RTE_PTYPE_UNKNOWN > + }; > + return ptypes; > +} > + > static const struct eth_dev_ops ice_dcf_eth_dev_ops = { > - .dev_start = ice_dcf_dev_start, > - .dev_stop= ice_dcf_dev_stop, > - .dev_close = ice_dcf_dev_close, > - .dev_reset = ice_dcf_dev_reset, > - .dev_configure = ice_dcf_dev_configure, > - .dev_infos_get = ice_dcf_dev_info_get, > - .rx_queue_setup = ice_rx_queue_setup, > - .tx_queue_setup = ice_tx_queue_setup, > - .rx_queue_release= ice_dev_rx_queue_release, > - .tx_queue_release= ice_dev_tx_queue_release, > - .rx_queue_start = ice_dcf_rx_queue_start, > - .tx_queue_start = ice_dcf_tx_queue_start, > - .rx_queue_stop = ice_dcf_rx_queue_stop, > - .tx_queue_stop = ice_dcf_tx_queue_stop, > - .link_update = ice_dcf_link_update, > - .stats_get = ice_dcf_stats_get, > - .stats_reset = ice_dcf_stats_reset, > - .promiscuous_enable = ice_dcf_dev_promiscuous_enable, > - .promiscuous_disable = ice_dcf_dev_promiscuous_disable, > - .allmulticast_enable = ice_dcf_dev_allmulticast_enable, > - .allmulticast_disable= ice_dcf_dev_allmulticast_disable, > - .flow_ops_get= ice_dcf_dev_flow_ops_get, > - .udp_tunnel_port_add = ice_dcf_dev_udp_tunnel_port_add, > - .udp_tunnel_port_del = ice_dcf_dev_udp_tunnel_port_del, > - .tm_ops_get = ice_dcf_tm_ops_get, > - .reta_update = ice_dcf_dev_rss_reta_update, > - .reta_query = ice_dcf_dev_rss_reta_query, > - .rss_hash_update = ice_dcf_dev_rss_hash_update, > - .rss_hash_conf_get = ice_dcf_dev_rss_hash_conf_get, > - .tx_done_cleanup = ice_tx_done_cleanup, > - .mtu_set = ice_dcf_dev_mtu_set, > + .dev_start= ice_dcf_dev_start, > + .dev_stop = ice_dcf_dev_stop, > + .dev_close= ice_dcf_dev_close, > + .dev_reset= ice_dcf_dev_reset, > + .dev_configure= ice_dcf_dev_configure, > + .dev_infos_get= ice_dcf_dev_info_get, > + .dev_supported_ptypes_get = ice_dcf_dev_supported_ptypes_get, > + .rx_queue_setup = ice_rx_queue_setup, > + .tx_queue_setup = ice_tx_queue_setup, > + .rx_queue_release = ice_dev_rx_queue_release, > + .tx_queue_release = ice_dev_tx_queue_release, > + .rx_queue_start = ice_dcf_rx_queue_start, > + .tx_queue_start = ice_dcf_tx_queue_start, > + .rx_queue_stop= ice_dcf_rx_queue_stop, > + .tx_queue_stop= ice_dcf_tx_queue_stop, > + .link_update = ice_dcf_link_update, > + .stats_get= ice_dcf_stats_get, > + .stats_reset = ice_dcf_stats_reset, > + .promiscuous_enable
RE: [PATCH v6 12/12] net/ice: support DCF new VLAN capabilities
> -Original Message- > From: Liu, KevinX > Sent: Thursday, April 28, 2022 2:13 AM > To: dev@dpdk.org > Cc: Yang, Qiming ; Zhang, Qi Z > ; Yang, SteveX ; Alvin Zhang > ; Liu, KevinX > Subject: [PATCH v6 12/12] net/ice: support DCF new VLAN capabilities > > From: Alvin Zhang > > The new VLAN virtchnl opcodes introduce new capabilities like VLAN filtering, > stripping and insertion. > > The DCF needs to query the VLAN capabilities based on current device > configuration firstly. > > DCF is able to configure inner VLAN filter when port VLAN is enabled base on > negotiation; and DCF is able to configure outer VLAN (0x8100) if port VLAN is > disabled to be compatible with legacy mode. > > When port VLAN is updated by DCF, the DCF needs to reset to query the new > VLAN capabilities. > > Signed-off-by: Alvin Zhang > Signed-off-by: Kevin Liu > --- > doc/guides/rel_notes/release_22_07.rst | 1 + > drivers/net/ice/ice_dcf.c | 27 > drivers/net/ice/ice_dcf.h | 1 + > drivers/net/ice/ice_dcf_ethdev.c | 171 ++--- > 4 files changed, 183 insertions(+), 17 deletions(-) > > diff --git a/doc/guides/rel_notes/release_22_07.rst > b/doc/guides/rel_notes/release_22_07.rst > index 004a6d3343..7c932a7c8a 100644 > --- a/doc/guides/rel_notes/release_22_07.rst > +++ b/doc/guides/rel_notes/release_22_07.rst > @@ -73,6 +73,7 @@ New Features > * Added add extended stats. > * Added support queue information getting. > * Added implement power management. > + * Added support DCF new VLAN capabilities. This feature is not exposed to user, no need release note update. > > Removed Items > - > diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c index > 55ae68c456..885d58c0f4 100644 > --- a/drivers/net/ice/ice_dcf.c > +++ b/drivers/net/ice/ice_dcf.c > @@ -587,6 +587,29 @@ ice_dcf_get_supported_rxdid(struct ice_dcf_hw *hw) > return 0; > } > > +static int > +dcf_get_vlan_offload_caps_v2(struct ice_dcf_hw *hw) { > + struct virtchnl_vlan_caps vlan_v2_caps; > + struct dcf_virtchnl_cmd args; > + int ret; > + > + memset(&args, 0, sizeof(args)); > + args.v_op = VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS; > + args.rsp_msgbuf = (uint8_t *)&vlan_v2_caps; > + args.rsp_buflen = sizeof(vlan_v2_caps); > + > + ret = ice_dcf_execute_virtchnl_cmd(hw, &args); > + if (ret) { > + PMD_DRV_LOG(ERR, > + "Failed to execute command of > VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS"); > + return ret; > + } > + > + rte_memcpy(&hw->vlan_v2_caps, &vlan_v2_caps, > sizeof(vlan_v2_caps)); > + return 0; > +} > + > int > ice_dcf_init_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw) { @@ - > 701,6 +724,10 @@ ice_dcf_init_hw(struct rte_eth_dev *eth_dev, struct > ice_dcf_hw *hw) > rte_intr_enable(pci_dev->intr_handle); > ice_dcf_enable_irq0(hw); > > + if ((hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) > && > + dcf_get_vlan_offload_caps_v2(hw)) > + goto err_rss; > + > return 0; > > err_rss: > diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h index > 44a61404c3..7f42ebabe9 100644 > --- a/drivers/net/ice/ice_dcf.h > +++ b/drivers/net/ice/ice_dcf.h > @@ -129,6 +129,7 @@ struct ice_dcf_hw { > uint16_t nb_msix; > uint16_t rxq_map[16]; > struct virtchnl_eth_stats eth_stats_offset; > + struct virtchnl_vlan_caps vlan_v2_caps; > > /* Link status */ > bool link_up; > diff --git a/drivers/net/ice/ice_dcf_ethdev.c > b/drivers/net/ice/ice_dcf_ethdev.c > index 236c0395e0..8005eb2ab8 100644 > --- a/drivers/net/ice/ice_dcf_ethdev.c > +++ b/drivers/net/ice/ice_dcf_ethdev.c > @@ -1050,6 +1050,46 @@ dcf_dev_set_default_mac_addr(struct > rte_eth_dev *dev, > return 0; > } > > +static int > +dcf_add_del_vlan_v2(struct ice_dcf_hw *hw, uint16_t vlanid, bool add) { > + struct virtchnl_vlan_supported_caps *supported_caps = > + &hw->vlan_v2_caps.filtering.filtering_support; > + struct virtchnl_vlan *vlan_setting; > + struct virtchnl_vlan_filter_list_v2 vlan_filter; > + struct dcf_virtchnl_cmd args; > + uint32_t filtering_caps; > + int err; > + > + if (supported_caps->outer) { > + filtering_caps = supported_caps->outer; > + vlan_setting = &vlan_filter.filters[0].outer; > + } else { > + filtering_caps = supported_caps->inner; > + vlan_setting = &vlan_filter.filters[0].inner; > + } > + > + if (!(filtering_caps & VIRTCHNL_VLAN_ETHERTYPE_8100)) > + return -ENOTSUP; > + > + memset(&vlan_filter, 0, sizeof(vlan_filter)); > + vlan_filter.vport_id = hw->vsi_res->vsi_id; > + vlan_filter.num_elements = 1; > + vlan_setting->tpid = RTE_ETHER_TYPE_VLAN; > + vlan_setting->tci = vlanid; > + > + memset(&args, 0, sizeof(args)); > + args.v_op = add ?
Re: [PATCH v3 2/3] examples/dma: fix Tx drop statistic is not collected
On 24/04/2022 07:07, Chengwen Feng wrote: The Tx drop statistic was designed to collected by rte_eth_dev_tx_buffer mechanism, but the application uses rte_eth_tx_burst to send packets and this lead the Tx drop statistic was not collected. This patch removes rte_eth_dev_tx_buffer mechanism to fix the problem. Fixes: 632bcd9b5d4f ("examples/ioat: print statistics") Cc: sta...@dpdk.org Signed-off-by: Chengwen Feng Acked-by: Bruce Richardson --- examples/dma/dmafwd.c | 27 +-- 1 file changed, 5 insertions(+), 22 deletions(-) Acked-by: Kevin Laatz
Re: [PATCH v3 3/3] examples/dma: add force minimal copy size parameter
On 24/04/2022 07:07, Chengwen Feng wrote: This patch adds force minimal copy size parameter (-m/--force-min-copy-size), so when do copy by CPU or DMA, the real copy size will be the maximum of mbuf's data_len and this parameter. This parameter was designed to compare the performance between CPU copy and DMA copy. User could send small packets with a high rate to drive the performance test. Signed-off-by: Chengwen Feng Acked-by: Bruce Richardson --- examples/dma/dmafwd.c | 30 +++--- 1 file changed, 27 insertions(+), 3 deletions(-) Acked-by: Kevin Laatz
[PATCH v2 1/2] event/cnxk: add additional checks in OP_RELEASE
Add additional checks while performing RTE_EVENT_OP_RELEASE to ensure that there are no pending SWTAGs and FLUSHEs in flight. Signed-off-by: Pavan Nikhilesh --- v2 Changes: - Fix compilation with RTE_LIBRTE_MEMPOOL_DEBUG enabled. drivers/event/cnxk/cn10k_eventdev.c | 4 +--- drivers/event/cnxk/cn10k_worker.c | 8 ++-- drivers/event/cnxk/cn9k_eventdev.c | 4 +--- drivers/event/cnxk/cn9k_worker.c| 16 drivers/event/cnxk/cn9k_worker.h| 3 +-- drivers/event/cnxk/cnxk_worker.h| 17 ++--- 6 files changed, 35 insertions(+), 17 deletions(-) diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 9b4d2895ec..2fa2cd31c2 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -137,9 +137,7 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, if (fn != NULL && ev.u64 != 0) fn(arg, ev); if (ev.sched_type != SSO_TT_EMPTY) - cnxk_sso_hws_swtag_flush( - ws->base + SSOW_LF_GWS_WQE0, - ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + cnxk_sso_hws_swtag_flush(ws->base); do { val = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE); } while (val & BIT_ULL(56)); diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c index 975a22336a..0d99b4c5e5 100644 --- a/drivers/event/cnxk/cn10k_worker.c +++ b/drivers/event/cnxk/cn10k_worker.c @@ -18,8 +18,12 @@ cn10k_sso_hws_enq(void *port, const struct rte_event *ev) cn10k_sso_hws_forward_event(ws, ev); break; case RTE_EVENT_OP_RELEASE: - cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_WQE0, -ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + if (ws->swtag_req) { + cnxk_sso_hws_desched(ev->u64, ws->base); + ws->swtag_req = 0; + break; + } + cnxk_sso_hws_swtag_flush(ws->base); break; default: return 0; diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c index 4bba477dd1..41bbe3cb22 100644 --- a/drivers/event/cnxk/cn9k_eventdev.c +++ b/drivers/event/cnxk/cn9k_eventdev.c @@ -156,9 +156,7 @@ cn9k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, if (fn != NULL && ev.u64 != 0) fn(arg, ev); if (ev.sched_type != SSO_TT_EMPTY) - cnxk_sso_hws_swtag_flush( - ws_base + SSOW_LF_GWS_TAG, - ws_base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + cnxk_sso_hws_swtag_flush(ws_base); do { val = plt_read64(ws_base + SSOW_LF_GWS_PENDSTATE); } while (val & BIT_ULL(56)); diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c index a981bc986f..41dbe6cafb 100644 --- a/drivers/event/cnxk/cn9k_worker.c +++ b/drivers/event/cnxk/cn9k_worker.c @@ -19,8 +19,12 @@ cn9k_sso_hws_enq(void *port, const struct rte_event *ev) cn9k_sso_hws_forward_event(ws, ev); break; case RTE_EVENT_OP_RELEASE: - cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_TAG, -ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + if (ws->swtag_req) { + cnxk_sso_hws_desched(ev->u64, ws->base); + ws->swtag_req = 0; + break; + } + cnxk_sso_hws_swtag_flush(ws->base); break; default: return 0; @@ -78,8 +82,12 @@ cn9k_sso_hws_dual_enq(void *port, const struct rte_event *ev) cn9k_sso_hws_dual_forward_event(dws, base, ev); break; case RTE_EVENT_OP_RELEASE: - cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG, -base + SSOW_LF_GWS_OP_SWTAG_FLUSH); + if (dws->swtag_req) { + cnxk_sso_hws_desched(ev->u64, base); + dws->swtag_req = 0; + break; + } + cnxk_sso_hws_swtag_flush(base); break; default: return 0; diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h index 917d1e0b40..88eb4e9cf9 100644 --- a/drivers/event/cnxk/cn9k_worker.h +++ b/drivers/event/cnxk/cn9k_worker.h @@ -841,8 +841,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, return 1; } - cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG, -
[PATCH v2 2/2] event/cnxk: move post-processing to separate function
Move event post-processing to a separate function. Do complete event post-processing in tear-down functions to prevent incorrect memory free. Signed-off-by: Pavan Nikhilesh --- drivers/event/cnxk/cn10k_eventdev.c | 5 +- drivers/event/cnxk/cn10k_worker.h | 189 +--- drivers/event/cnxk/cn9k_eventdev.c | 9 +- drivers/event/cnxk/cn9k_worker.h| 114 ++--- 4 files changed, 137 insertions(+), 180 deletions(-) diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 2fa2cd31c2..94829e789c 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -133,7 +133,10 @@ cn10k_sso_hws_flush_events(void *hws, uint8_t queue_id, uintptr_t base, while (aq_cnt || cq_ds_cnt || ds_cnt) { plt_write64(req, ws->base + SSOW_LF_GWS_OP_GET_WORK0); - cn10k_sso_hws_get_work_empty(ws, &ev); + cn10k_sso_hws_get_work_empty( + ws, &ev, + (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | + NIX_RX_MULTI_SEG_F | CPT_RX_WQE_F); if (fn != NULL && ev.u64 != 0) fn(arg, ev); if (ev.sched_type != SSO_TT_EMPTY) diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h index c96048f47d..034f508dd8 100644 --- a/drivers/event/cnxk/cn10k_worker.h +++ b/drivers/event/cnxk/cn10k_worker.h @@ -196,15 +196,87 @@ cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags, } } +static __rte_always_inline void +cn10k_sso_hws_post_process(struct cn10k_sso_hws *ws, uint64_t *u64, + const uint32_t flags) +{ + uint64_t tstamp_ptr; + + u64[0] = (u64[0] & (0x3ull << 32)) << 6 | +(u64[0] & (0x3FFull << 36)) << 4 | (u64[0] & 0x); + if ((flags & CPT_RX_WQE_F) && + (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_CRYPTODEV)) { + u64[1] = cn10k_cpt_crypto_adapter_dequeue(u64[1]); + } else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == RTE_EVENT_TYPE_ETHDEV) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]); + uint64_t mbuf; + + mbuf = u64[1] - sizeof(struct rte_mbuf); + rte_prefetch0((void *)mbuf); + if (flags & NIX_RX_OFFLOAD_SECURITY_F) { + const uint64_t mbuf_init = + 0x10001ULL | RTE_PKTMBUF_HEADROOM | + (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0); + struct rte_mbuf *m; + uintptr_t sa_base; + uint64_t iova = 0; + uint8_t loff = 0; + uint16_t d_off; + uint64_t cq_w1; + uint64_t cq_w5; + + m = (struct rte_mbuf *)mbuf; + d_off = (uintptr_t)(m->buf_addr) - (uintptr_t)m; + d_off += RTE_PKTMBUF_HEADROOM; + + cq_w1 = *(uint64_t *)(u64[1] + 8); + cq_w5 = *(uint64_t *)(u64[1] + 40); + + sa_base = cnxk_nix_sa_base_get(port, ws->lookup_mem); + sa_base &= ~(ROC_NIX_INL_SA_BASE_ALIGN - 1); + + mbuf = (uint64_t)nix_sec_meta_to_mbuf_sc( + cq_w1, cq_w5, sa_base, (uintptr_t)&iova, &loff, + (struct rte_mbuf *)mbuf, d_off, flags, + mbuf_init | ((uint64_t)port) << 48); + if (loff) + roc_npa_aura_op_free(m->pool->pool_id, 0, iova); + } + + u64[0] = CNXK_CLR_SUB_EVENT(u64[0]); + cn10k_wqe_to_mbuf(u64[1], mbuf, port, u64[0] & 0xF, flags, + ws->lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)u64[1]) + + CNXK_SSO_WQE_SG_PTR); + cn10k_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, ws->tstamp, +flags & NIX_RX_OFFLOAD_TSTAMP_F, +(uint64_t *)tstamp_ptr); + u64[1] = mbuf; + } else if (CNXK_EVENT_TYPE_FROM_TAG(u64[0]) == + RTE_EVENT_TYPE_ETHDEV_VECTOR) { + uint8_t port = CNXK_SUB_EVENT_FROM_TAG(u64[0]); + __uint128_t vwqe_hdr = *(__uint128_t *)u64[1]; + + vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) | + ((vwqe_hdr & 0x) << 48) | ((uint64_t)port << 32); + *(uint64_t *)u64[1] = (uint64_t)vwqe_hdr; + cn10k_process_vwqe(u64[1], port, flags, ws->lookup_mem, + ws->tstamp, ws->lmt_base
RE: [PATCH] doc: fix support table for ETH and VLAN flow items
>-Original Message- >From: Ferruh Yigit >Sent: Tuesday, April 26, 2022 1:48 PM >To: Asaf Penso ; Ilya Maximets ; >dev@dpdk.org; Ori Kam >Cc: Ajit Khaparde ; Rahul Lakkireddy >; Hemant Agrawal >; Haiyue Wang ; John >Daley ; Guoyang Zhou ; >Min Hu (Connor) ; Beilei Xing >; Jingjing Wu ; Qi Zhang >; Rosen Xu ; Matan Azrad >; Slava Ovsiienko ; Liron Himi >; Jiawen Wu ; Dekel Peled >; NBU-Contact-Thomas Monjalon (EXTERNAL) >; sta...@dpdk.org >Subject: Re: [PATCH] doc: fix support table for ETH and VLAN flow items > >On 4/26/2022 9:55 AM, Asaf Penso wrote: >>> -Original Message- >>> From: Ferruh Yigit >>> Sent: Wednesday, April 20, 2022 8:52 PM >>> To: Ilya Maximets ; dev@dpdk.org; Asaf Penso >>> >>> Cc: Ajit Khaparde ; Rahul Lakkireddy >>> ; Hemant Agrawal >>> ; Haiyue Wang ; >John >>> Daley ; Guoyang Zhou >; >>> Min Hu (Connor) ; Beilei Xing >>> ; Jingjing Wu ; Qi >>> Zhang ; Rosen Xu ; Matan >>> Azrad ; Slava Ovsiienko ; >>> Liron Himi ; Jiawen Wu >; >>> Ori Kam ; Dekel Peled ; >>> NBU-Contact- Thomas Monjalon (EXTERNAL) ; >>> sta...@dpdk.org; NBU-Contact-Thomas Monjalon (EXTERNAL) >>> >>> Subject: Re: [PATCH] doc: fix support table for ETH and VLAN flow >>> items >>> >>> On 3/16/2022 12:01 PM, Ilya Maximets wrote: 'has_vlan' attribute is only supported by sfc, mlx5 and cnxk. Other drivers doesn't support it. Most of them (like i40e) just ignore it silently. Some drivers (like mlx4) never had a full support of the eth item even before introduction of 'has_vlan' (mlx4 allows to match on the destination MAC only). Same for the 'has_more_vlan' flag of the vlan item. Changing the support level to 'partial' for all such drivers. This doesn't solve the issue, but at least marks the problematic drivers. >>> >>> Hi Asaf, >>> >>> This was the kind of maintanance issue I was referring to have this >>> kind of capability documentation for flow API. >>> >> Are you referring to the fact that fields like has_vlan are not part of the >table? >> If so, you are right, but IMHO having the high level items still allows the >users to understand what is supported quickly. >> We can have another level of tables per each relevant item to address this >specific issue. >> In this case, we'll have a table for ETH that elaborates the different >> fields' >support, like has_vlan. >> If you are referring to a different issue, please elaborate. >> > >'vlan' in the .ini file is already to document the flow API VLAN support, so I >am >not suggesting adding more to the table. > >My point was it is hard to make this kind documentation correct. > Yes, and I think that between having none and having everything, what we currently have is closer to everything, and is very useful for the users. >> >>> All below drivers are using 'RTE_FLOW_ITEM_TYPE_VLAN', the script >>> verifies this, but are they actually supporting VLAN filter and in which >>> case? >>> >>> We need comment from driver maintainers about the support level. >> >> @Ori Kam, please comment for mlx driver. >> >>> Some details are available in: https://bugs.dpdk.org/show_bug.cgi?id=958 Fixes: 09315fc83861 ("ethdev: add VLAN attributes to ethernet and VLAN items") Cc: sta...@dpdk.org Signed-off-by: Ilya Maximets --- I added the stable in CC, but the patch should be extended while backporting. For 21.11 the cnxk driver should be also updated, for 20.11, sfc driver should also be included. doc/guides/nics/features/bnxt.ini | 4 ++-- doc/guides/nics/features/cxgbe.ini | 4 ++-- doc/guides/nics/features/dpaa2.ini | 4 ++-- doc/guides/nics/features/e1000.ini | 2 +- doc/guides/nics/features/enic.ini | 4 ++-- doc/guides/nics/features/hinic.ini | 2 +- doc/guides/nics/features/hns3.ini | 4 ++-- doc/guides/nics/features/i40e.ini | 4 ++-- doc/guides/nics/features/iavf.ini | 4 ++-- doc/guides/nics/features/ice.ini| 4 ++-- doc/guides/nics/features/igc.ini| 2 +- doc/guides/nics/features/ipn3ke.ini | 4 ++-- doc/guides/nics/features/ixgbe.ini | 4 ++-- doc/guides/nics/features/mlx4.ini | 4 ++-- doc/guides/nics/features/mvpp2.ini | 4 ++-- doc/guides/nics/features/tap.ini| 4 ++-- doc/guides/nics/features/txgbe.ini | 4 ++-- 17 files changed, 31 insertions(+), 31 deletions(-) diff --git a/doc/guides/nics/features/bnxt.ini b/doc/guides/nics/features/bnxt.ini index afb5414b49..ac682c5779 100644 --- a/doc/guides/nics/features/bnxt.ini +++ b/doc/guides/nics/features/bnxt.ini @@ -57,7 +57,7 @@ Perf doc = Y [rte_flow items] any = Y -eth = Y +eth = P ipv4 = Y ipv6 = Y gre = Y >>
[Bug 996] DPDK:20.11.1: net/ena crash while fetching xstats
https://bugs.dpdk.org/show_bug.cgi?id=996 Michal Krawczyk (m...@semihalf.com) changed: What|Removed |Added Resolution|WONTFIX |--- Status|RESOLVED|UNCONFIRMED --- Comment #3 from Michal Krawczyk (m...@semihalf.com) --- Hey Amiya, sorry for the late reply, I was OOO for one week. Thank you for providing us with more details. If you aren't calling any API that needs to use the ENA admin queue from the secondary process, the situation you're seeing shouldn't happen. I just executed simple application on DPDK v20.11.1 in MP mode - the main process is fetching the xstats, the secondary process is simply performing the packets forwarding. The application is not crashing for my case. >From what I understand, the crash happens, because: 1. The ENA admin queue is not using the shared memory 2. The secondary process sends the request and saves it in the secondary process memory 3. The primary process receives the interrupt and executes the completion handler 4. The completion handler cannot find the relevant request (as it's in the secondary process memory) and the app crashes. Please double check if: 1. The xstats aren't being fetched from the secondary process 2. You aren't calling any of API below from the secondary process, which also uses the ENA admin queue: - rte_eth_dev_set_mtu() - rte_eth_dev_rss_reta_update() - rte_eth_dev_rss_reta_query() The point 1. is much more likely as you've described it's a regression in v20.11, and indeed - the xstats were extended after v19.11 release. If none of the above is true, every other information that could potentially get us closer to the core of the issue may be helpful (we can't reproduce this on our side). Thanks, Michal -- You are receiving this mail because: You are the assignee for the bug.
[PATCH 1/3] eventdev: add function to quiesce an event port
Add function to quiesce any core specific resources consumed by the event port. When the application decides to migrate the event port to another lcore or teardown the current lcore it may to call `rte_event_port_quiesce` to make sure that all the data associated with the event port are released from the lcore, this might also include any prefetched events. While releasing the event port from the lcore, this function calls the user-provided flush callback once per event. Signed-off-by: Pavan Nikhilesh --- lib/eventdev/eventdev_pmd.h | 19 +++ lib/eventdev/rte_eventdev.c | 19 +++ lib/eventdev/rte_eventdev.h | 33 + lib/eventdev/version.map| 3 +++ 4 files changed, 74 insertions(+) diff --git a/lib/eventdev/eventdev_pmd.h b/lib/eventdev/eventdev_pmd.h index ce469d47a6..cf9f2146a1 100644 --- a/lib/eventdev/eventdev_pmd.h +++ b/lib/eventdev/eventdev_pmd.h @@ -381,6 +381,23 @@ typedef int (*eventdev_port_setup_t)(struct rte_eventdev *dev, */ typedef void (*eventdev_port_release_t)(void *port); +/** + * Quiesce any core specific resources consumed by the event port + * + * @param dev + * Event device pointer. + * @param port + * Event port pointer. + * @param flush_cb + * User-provided event flush function. + * @param args + * Arguments to be passed to the user-provided event flush function. + * + */ +typedef void (*eventdev_port_quiesce_t)(struct rte_eventdev *dev, void *port, + eventdev_port_flush_t flush_cb, + void *args); + /** * Link multiple source event queues to destination event port. * @@ -1218,6 +1235,8 @@ struct eventdev_ops { /**< Set up an event port. */ eventdev_port_release_t port_release; /**< Release an event port. */ + eventdev_port_quiesce_t port_quiesce; + /**< Quiesce an event port. */ eventdev_port_link_t port_link; /**< Link event queues to an event port. */ diff --git a/lib/eventdev/rte_eventdev.c b/lib/eventdev/rte_eventdev.c index 532a253553..541fa5dc61 100644 --- a/lib/eventdev/rte_eventdev.c +++ b/lib/eventdev/rte_eventdev.c @@ -730,6 +730,25 @@ rte_event_port_setup(uint8_t dev_id, uint8_t port_id, return 0; } +void +rte_event_port_quiesce(uint8_t dev_id, uint8_t port_id, + eventdev_port_flush_t release_cb, void *args) +{ + struct rte_eventdev *dev; + + RTE_EVENTDEV_VALID_DEVID_OR_RET(dev_id); + dev = &rte_eventdevs[dev_id]; + + if (!is_valid_port(dev, port_id)) { + RTE_EDEV_LOG_ERR("Invalid port_id=%" PRIu8, port_id); + return; + } + + if (dev->dev_ops->port_quiesce) + (*dev->dev_ops->port_quiesce)(dev, dev->data->ports[port_id], + release_cb, args); +} + int rte_event_dev_attr_get(uint8_t dev_id, uint32_t attr_id, uint32_t *attr_value) diff --git a/lib/eventdev/rte_eventdev.h b/lib/eventdev/rte_eventdev.h index 42a5660169..c86d8a5576 100644 --- a/lib/eventdev/rte_eventdev.h +++ b/lib/eventdev/rte_eventdev.h @@ -830,6 +830,39 @@ int rte_event_port_setup(uint8_t dev_id, uint8_t port_id, const struct rte_event_port_conf *port_conf); +typedef void (*eventdev_port_flush_t)(uint8_t dev_id, struct rte_event event, + void *arg); +/**< Callback function prototype that can be passed during + * rte_event_port_release(), invoked once per a released event. + */ + +/** + * Quiesce any core specific resources consumed by the event port. + * + * Event ports are generally coupled with lcores, and a given Hardware + * implementation might require the PMD to store port specific data in the + * lcore. + * When the application decides to migrate the event port to an other lcore + * or teardown the current lcore it may to call `rte_event_port_quiesce` + * to make sure that all the data associated with the event port are released + * from the lcore, this might also include any prefetched events. + * While releasing the event port from the lcore, this function calls the + * user-provided flush callback once per event. + * + * The event port specific config is not reset. + * + * @param dev_id + * The identifier of the device. + * @param port_id + * The index of the event port to setup. The value must be in the range + * [0, nb_event_ports - 1] previously supplied to rte_event_dev_configure(). + * @param release_cb + * Callback function invoked once per flushed event. + */ +__rte_experimental +void rte_event_port_quiesce(uint8_t dev_id, uint8_t port_id, + eventdev_port_flush_t release_cb, void *args); + /** * The queue depth of the port on the enqueue side */ diff --git a/lib/eventdev/version.map b/lib/eventdev/version.map index cd5dada07f..1907093539 100644 --- a/lib/eventdev/version.map +++ b/lib/eventdev/versio
[PATCH 2/3] eventdev: update examples to use port quiesce
Quiesce event ports used by the workers core on exit to free up any outstanding resources. Signed-off-by: Pavan Nikhilesh Change-Id: Iea1f933d4f4926630d82a9883fbe3f1e75876097 --- Depends-on: Series-22677 app/test-eventdev/test_perf_common.c | 8 app/test-eventdev/test_pipeline_common.c | 12 examples/eventdev_pipeline/pipeline_common.h | 9 + examples/ipsec-secgw/ipsec_worker.c | 13 + examples/l2fwd-event/l2fwd_common.c | 13 + examples/l3fwd/l3fwd_event.c | 13 + 6 files changed, 68 insertions(+) diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c index f673a9fddd..2016583979 100644 --- a/app/test-eventdev/test_perf_common.c +++ b/app/test-eventdev/test_perf_common.c @@ -985,6 +985,13 @@ perf_opt_dump(struct evt_options *opt, uint8_t nb_queues) evt_dump("prod_enq_burst_sz", "%d", opt->prod_enq_burst_sz); } +static void +perf_event_port_flush(uint8_t dev_id __rte_unused, struct rte_event ev, + void *args) +{ + rte_mempool_put(args, ev.event_ptr); +} + void perf_worker_cleanup(struct rte_mempool *const pool, uint8_t dev_id, uint8_t port_id, struct rte_event events[], uint16_t nb_enq, @@ -1000,6 +1007,7 @@ perf_worker_cleanup(struct rte_mempool *const pool, uint8_t dev_id, events[i].op = RTE_EVENT_OP_RELEASE; rte_event_enqueue_burst(dev_id, port_id, events, nb_deq); } + rte_event_port_quiesce(dev_id, port_id, perf_event_port_flush, pool); } void diff --git a/app/test-eventdev/test_pipeline_common.c b/app/test-eventdev/test_pipeline_common.c index a8dd07..82e5745071 100644 --- a/app/test-eventdev/test_pipeline_common.c +++ b/app/test-eventdev/test_pipeline_common.c @@ -518,6 +518,16 @@ pipeline_vector_array_free(struct rte_event events[], uint16_t num) } } +static void +pipeline_event_port_flush(uint8_t dev_id __rte_unused, struct rte_event ev, + void *args __rte_unused) +{ + if (ev.event_type & RTE_EVENT_TYPE_VECTOR) + pipeline_vector_array_free(&ev, 1); + else + rte_pktmbuf_free(ev.mbuf); +} + void pipeline_worker_cleanup(uint8_t dev, uint8_t port, struct rte_event ev[], uint16_t enq, uint16_t deq) @@ -542,6 +552,8 @@ pipeline_worker_cleanup(uint8_t dev, uint8_t port, struct rte_event ev[], rte_event_enqueue_burst(dev, port, ev, deq); } + + rte_event_port_quiesce(dev, port, pipeline_event_port_flush, NULL); } void diff --git a/examples/eventdev_pipeline/pipeline_common.h b/examples/eventdev_pipeline/pipeline_common.h index 9899b257b0..28b6ab85ff 100644 --- a/examples/eventdev_pipeline/pipeline_common.h +++ b/examples/eventdev_pipeline/pipeline_common.h @@ -140,6 +140,13 @@ schedule_devices(unsigned int lcore_id) } } +static void +event_port_flush(uint8_t dev_id __rte_unused, struct rte_event ev, +void *args __rte_unused) +{ + rte_mempool_put(args, ev.event_ptr); +} + static inline void worker_cleanup(uint8_t dev_id, uint8_t port_id, struct rte_event events[], uint16_t nb_enq, uint16_t nb_deq) @@ -160,6 +167,8 @@ worker_cleanup(uint8_t dev_id, uint8_t port_id, struct rte_event events[], events[i].op = RTE_EVENT_OP_RELEASE; rte_event_enqueue_burst(dev_id, port_id, events, nb_deq); } + + rte_event_port_quiesce(dev_id, port_id, event_port_flush, NULL); } void set_worker_generic_setup_data(struct setup_data *caps, bool burst); diff --git a/examples/ipsec-secgw/ipsec_worker.c b/examples/ipsec-secgw/ipsec_worker.c index 3df5acf384..7f259e4cf3 100644 --- a/examples/ipsec-secgw/ipsec_worker.c +++ b/examples/ipsec-secgw/ipsec_worker.c @@ -737,6 +737,13 @@ ipsec_ev_vector_drv_mode_process(struct eh_event_link_info *links, * selected. */ +static void +ipsec_event_port_flush(uint8_t eventdev_id __rte_unused, struct rte_event ev, + void *args __rte_unused) +{ + rte_pktmbuf_free(ev.mbuf); +} + /* Workers registered */ #define IPSEC_EVENTMODE_WORKERS2 @@ -861,6 +868,9 @@ ipsec_wrkr_non_burst_int_port_drv_mode(struct eh_event_link_info *links, rte_event_enqueue_burst(links[0].eventdev_id, links[0].event_port_id, &ev, 1); } + + rte_event_port_quiesce(links[0].eventdev_id, links[0].event_port_id, + ipsec_event_port_flush, NULL); } /* @@ -974,6 +984,9 @@ ipsec_wrkr_non_burst_int_port_app_mode(struct eh_event_link_info *links, rte_event_enqueue_burst(links[0].eventdev_id, links[0].event_port_id, &ev, 1); } + + rte_event_port_quiesce(links[0].eventdev_id, links[0].event_port_id, +
[PATCH 3/3] event/cnxk: implement event port quiesce function
Implement event port quiesce function to clean up any lcore resources used. Signed-off-by: Pavan Nikhilesh Change-Id: I7dda3d54dc698645d25ebbfbabd81760940fe649 --- drivers/event/cnxk/cn10k_eventdev.c | 78 ++--- drivers/event/cnxk/cn9k_eventdev.c | 60 +- 2 files changed, 130 insertions(+), 8 deletions(-) diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 94829e789c..d84c5d2d1e 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -167,15 +167,23 @@ cn10k_sso_hws_reset(void *arg, void *hws) uint64_t u64[2]; } gw; uint8_t pend_tt; + bool is_pend; plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL); /* Wait till getwork/swtp/waitw/desched completes. */ + is_pend = false; + /* Work in WQE0 is always consumed, unless its a SWTAG. */ + pend_state = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE); + if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) || + ws->swtag_req) + is_pend = true; + do { pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE); } while (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54))); pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0)); - if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */ + if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */ if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED) cnxk_sso_hws_swtag_untag(base + SSOW_LF_GWS_OP_SWTAG_UNTAG); @@ -189,15 +197,10 @@ cn10k_sso_hws_reset(void *arg, void *hws) switch (dev->gw_mode) { case CN10K_GW_MODE_PREF: + case CN10K_GW_MODE_PREF_WFE: while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63)) ; break; - case CN10K_GW_MODE_PREF_WFE: - while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & - SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT) - continue; - plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL); - break; case CN10K_GW_MODE_NONE: default: break; @@ -533,6 +536,66 @@ cn10k_sso_port_release(void *port) rte_free(gws_cookie); } +static void +cn10k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port, + eventdev_port_flush_t flush_cb, void *args) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct cn10k_sso_hws *ws = port; + struct rte_event ev; + uint64_t ptag; + bool is_pend; + + is_pend = false; + /* Work in WQE0 is always consumed, unless its a SWTAG. */ + ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE); + if (ptag & (BIT_ULL(62) | BIT_ULL(54)) || ws->swtag_req) + is_pend = true; + do { + ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE); + } while (ptag & +(BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54))); + + cn10k_sso_hws_get_work_empty(ws, &ev, +(NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | +NIX_RX_MULTI_SEG_F | CPT_RX_WQE_F); + if (is_pend && ev.u64) { + if (flush_cb) + flush_cb(event_dev->data->dev_id, ev, args); + cnxk_sso_hws_swtag_flush(ws->base); + } + + /* Check if we have work in PRF_WQE0, if so extract it. */ + switch (dev->gw_mode) { + case CN10K_GW_MODE_PREF: + case CN10K_GW_MODE_PREF_WFE: + while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) & + BIT_ULL(63)) + ; + break; + case CN10K_GW_MODE_NONE: + default: + break; + } + + if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) != + SSO_TT_EMPTY) { + plt_write64(BIT_ULL(16) | 1, + ws->base + SSOW_LF_GWS_OP_GET_WORK0); + cn10k_sso_hws_get_work_empty( + ws, &ev, + (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | + NIX_RX_MULTI_SEG_F | CPT_RX_WQE_F); + if (ev.u64) { + if (flush_cb) + flush_cb(event_dev->data->dev_id, ev, args); + cnxk_sso_hws_swtag_flush(ws->base); + } + } + ws->swtag_req = 0; + plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL); +} + static int cn10k_sso_port_link(struct rte_eventdev *event_dev, void *port, const uint8_t queues[], const uint8_t priorities[], @@
[PATCH 2/3 v2] eventdev: update examples to use port quiesce
Quiesce event ports used by the workers core on exit to free up any outstanding resources. Signed-off-by: Pavan Nikhilesh --- Depends-on: Series-22677 app/test-eventdev/test_perf_common.c | 8 app/test-eventdev/test_pipeline_common.c | 12 examples/eventdev_pipeline/pipeline_common.h | 9 + examples/ipsec-secgw/ipsec_worker.c | 13 + examples/l2fwd-event/l2fwd_common.c | 13 + examples/l3fwd/l3fwd_event.c | 13 + 6 files changed, 68 insertions(+) diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c index f673a9fddd..2016583979 100644 --- a/app/test-eventdev/test_perf_common.c +++ b/app/test-eventdev/test_perf_common.c @@ -985,6 +985,13 @@ perf_opt_dump(struct evt_options *opt, uint8_t nb_queues) evt_dump("prod_enq_burst_sz", "%d", opt->prod_enq_burst_sz); } +static void +perf_event_port_flush(uint8_t dev_id __rte_unused, struct rte_event ev, + void *args) +{ + rte_mempool_put(args, ev.event_ptr); +} + void perf_worker_cleanup(struct rte_mempool *const pool, uint8_t dev_id, uint8_t port_id, struct rte_event events[], uint16_t nb_enq, @@ -1000,6 +1007,7 @@ perf_worker_cleanup(struct rte_mempool *const pool, uint8_t dev_id, events[i].op = RTE_EVENT_OP_RELEASE; rte_event_enqueue_burst(dev_id, port_id, events, nb_deq); } + rte_event_port_quiesce(dev_id, port_id, perf_event_port_flush, pool); } void diff --git a/app/test-eventdev/test_pipeline_common.c b/app/test-eventdev/test_pipeline_common.c index a8dd07..82e5745071 100644 --- a/app/test-eventdev/test_pipeline_common.c +++ b/app/test-eventdev/test_pipeline_common.c @@ -518,6 +518,16 @@ pipeline_vector_array_free(struct rte_event events[], uint16_t num) } } +static void +pipeline_event_port_flush(uint8_t dev_id __rte_unused, struct rte_event ev, + void *args __rte_unused) +{ + if (ev.event_type & RTE_EVENT_TYPE_VECTOR) + pipeline_vector_array_free(&ev, 1); + else + rte_pktmbuf_free(ev.mbuf); +} + void pipeline_worker_cleanup(uint8_t dev, uint8_t port, struct rte_event ev[], uint16_t enq, uint16_t deq) @@ -542,6 +552,8 @@ pipeline_worker_cleanup(uint8_t dev, uint8_t port, struct rte_event ev[], rte_event_enqueue_burst(dev, port, ev, deq); } + + rte_event_port_quiesce(dev, port, pipeline_event_port_flush, NULL); } void diff --git a/examples/eventdev_pipeline/pipeline_common.h b/examples/eventdev_pipeline/pipeline_common.h index 9899b257b0..28b6ab85ff 100644 --- a/examples/eventdev_pipeline/pipeline_common.h +++ b/examples/eventdev_pipeline/pipeline_common.h @@ -140,6 +140,13 @@ schedule_devices(unsigned int lcore_id) } } +static void +event_port_flush(uint8_t dev_id __rte_unused, struct rte_event ev, +void *args __rte_unused) +{ + rte_mempool_put(args, ev.event_ptr); +} + static inline void worker_cleanup(uint8_t dev_id, uint8_t port_id, struct rte_event events[], uint16_t nb_enq, uint16_t nb_deq) @@ -160,6 +167,8 @@ worker_cleanup(uint8_t dev_id, uint8_t port_id, struct rte_event events[], events[i].op = RTE_EVENT_OP_RELEASE; rte_event_enqueue_burst(dev_id, port_id, events, nb_deq); } + + rte_event_port_quiesce(dev_id, port_id, event_port_flush, NULL); } void set_worker_generic_setup_data(struct setup_data *caps, bool burst); diff --git a/examples/ipsec-secgw/ipsec_worker.c b/examples/ipsec-secgw/ipsec_worker.c index 3df5acf384..7f259e4cf3 100644 --- a/examples/ipsec-secgw/ipsec_worker.c +++ b/examples/ipsec-secgw/ipsec_worker.c @@ -737,6 +737,13 @@ ipsec_ev_vector_drv_mode_process(struct eh_event_link_info *links, * selected. */ +static void +ipsec_event_port_flush(uint8_t eventdev_id __rte_unused, struct rte_event ev, + void *args __rte_unused) +{ + rte_pktmbuf_free(ev.mbuf); +} + /* Workers registered */ #define IPSEC_EVENTMODE_WORKERS2 @@ -861,6 +868,9 @@ ipsec_wrkr_non_burst_int_port_drv_mode(struct eh_event_link_info *links, rte_event_enqueue_burst(links[0].eventdev_id, links[0].event_port_id, &ev, 1); } + + rte_event_port_quiesce(links[0].eventdev_id, links[0].event_port_id, + ipsec_event_port_flush, NULL); } /* @@ -974,6 +984,9 @@ ipsec_wrkr_non_burst_int_port_app_mode(struct eh_event_link_info *links, rte_event_enqueue_burst(links[0].eventdev_id, links[0].event_port_id, &ev, 1); } + + rte_event_port_quiesce(links[0].eventdev_id, links[0].event_port_id, + ipsec_event_port_flush, NULL); }
[PATCH 1/3 v2] eventdev: add function to quiesce an event port
Add function to quiesce any core specific resources consumed by the event port. When the application decides to migrate the event port to another lcore or teardown the current lcore it may to call `rte_event_port_quiesce` to make sure that all the data associated with the event port are released from the lcore, this might also include any prefetched events. While releasing the event port from the lcore, this function calls the user-provided flush callback once per event. Signed-off-by: Pavan Nikhilesh --- v2 Changes: - Remove internal Change-Id tag from commit messages. lib/eventdev/eventdev_pmd.h | 19 +++ lib/eventdev/rte_eventdev.c | 19 +++ lib/eventdev/rte_eventdev.h | 33 + lib/eventdev/version.map| 3 +++ 4 files changed, 74 insertions(+) diff --git a/lib/eventdev/eventdev_pmd.h b/lib/eventdev/eventdev_pmd.h index ce469d47a6..cf9f2146a1 100644 --- a/lib/eventdev/eventdev_pmd.h +++ b/lib/eventdev/eventdev_pmd.h @@ -381,6 +381,23 @@ typedef int (*eventdev_port_setup_t)(struct rte_eventdev *dev, */ typedef void (*eventdev_port_release_t)(void *port); +/** + * Quiesce any core specific resources consumed by the event port + * + * @param dev + * Event device pointer. + * @param port + * Event port pointer. + * @param flush_cb + * User-provided event flush function. + * @param args + * Arguments to be passed to the user-provided event flush function. + * + */ +typedef void (*eventdev_port_quiesce_t)(struct rte_eventdev *dev, void *port, + eventdev_port_flush_t flush_cb, + void *args); + /** * Link multiple source event queues to destination event port. * @@ -1218,6 +1235,8 @@ struct eventdev_ops { /**< Set up an event port. */ eventdev_port_release_t port_release; /**< Release an event port. */ + eventdev_port_quiesce_t port_quiesce; + /**< Quiesce an event port. */ eventdev_port_link_t port_link; /**< Link event queues to an event port. */ diff --git a/lib/eventdev/rte_eventdev.c b/lib/eventdev/rte_eventdev.c index 532a253553..541fa5dc61 100644 --- a/lib/eventdev/rte_eventdev.c +++ b/lib/eventdev/rte_eventdev.c @@ -730,6 +730,25 @@ rte_event_port_setup(uint8_t dev_id, uint8_t port_id, return 0; } +void +rte_event_port_quiesce(uint8_t dev_id, uint8_t port_id, + eventdev_port_flush_t release_cb, void *args) +{ + struct rte_eventdev *dev; + + RTE_EVENTDEV_VALID_DEVID_OR_RET(dev_id); + dev = &rte_eventdevs[dev_id]; + + if (!is_valid_port(dev, port_id)) { + RTE_EDEV_LOG_ERR("Invalid port_id=%" PRIu8, port_id); + return; + } + + if (dev->dev_ops->port_quiesce) + (*dev->dev_ops->port_quiesce)(dev, dev->data->ports[port_id], + release_cb, args); +} + int rte_event_dev_attr_get(uint8_t dev_id, uint32_t attr_id, uint32_t *attr_value) diff --git a/lib/eventdev/rte_eventdev.h b/lib/eventdev/rte_eventdev.h index 42a5660169..c86d8a5576 100644 --- a/lib/eventdev/rte_eventdev.h +++ b/lib/eventdev/rte_eventdev.h @@ -830,6 +830,39 @@ int rte_event_port_setup(uint8_t dev_id, uint8_t port_id, const struct rte_event_port_conf *port_conf); +typedef void (*eventdev_port_flush_t)(uint8_t dev_id, struct rte_event event, + void *arg); +/**< Callback function prototype that can be passed during + * rte_event_port_release(), invoked once per a released event. + */ + +/** + * Quiesce any core specific resources consumed by the event port. + * + * Event ports are generally coupled with lcores, and a given Hardware + * implementation might require the PMD to store port specific data in the + * lcore. + * When the application decides to migrate the event port to an other lcore + * or teardown the current lcore it may to call `rte_event_port_quiesce` + * to make sure that all the data associated with the event port are released + * from the lcore, this might also include any prefetched events. + * While releasing the event port from the lcore, this function calls the + * user-provided flush callback once per event. + * + * The event port specific config is not reset. + * + * @param dev_id + * The identifier of the device. + * @param port_id + * The index of the event port to setup. The value must be in the range + * [0, nb_event_ports - 1] previously supplied to rte_event_dev_configure(). + * @param release_cb + * Callback function invoked once per flushed event. + */ +__rte_experimental +void rte_event_port_quiesce(uint8_t dev_id, uint8_t port_id, + eventdev_port_flush_t release_cb, void *args); + /** * The queue depth of the port on the enqueue side */ diff --git a/lib/eventdev/version.map b/lib/eventdev/version.map index cd5dada07f..190709353
[PATCH 3/3 v2] event/cnxk: implement event port quiesce function
Implement event port quiesce function to clean up any lcore resources used. Signed-off-by: Pavan Nikhilesh --- drivers/event/cnxk/cn10k_eventdev.c | 78 ++--- drivers/event/cnxk/cn9k_eventdev.c | 60 +- 2 files changed, 130 insertions(+), 8 deletions(-) diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c index 94829e789c..d84c5d2d1e 100644 --- a/drivers/event/cnxk/cn10k_eventdev.c +++ b/drivers/event/cnxk/cn10k_eventdev.c @@ -167,15 +167,23 @@ cn10k_sso_hws_reset(void *arg, void *hws) uint64_t u64[2]; } gw; uint8_t pend_tt; + bool is_pend; plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL); /* Wait till getwork/swtp/waitw/desched completes. */ + is_pend = false; + /* Work in WQE0 is always consumed, unless its a SWTAG. */ + pend_state = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE); + if (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(54)) || + ws->swtag_req) + is_pend = true; + do { pend_state = plt_read64(base + SSOW_LF_GWS_PENDSTATE); } while (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54))); pend_tt = CNXK_TT_FROM_TAG(plt_read64(base + SSOW_LF_GWS_WQE0)); - if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */ + if (is_pend && pend_tt != SSO_TT_EMPTY) { /* Work was pending */ if (pend_tt == SSO_TT_ATOMIC || pend_tt == SSO_TT_ORDERED) cnxk_sso_hws_swtag_untag(base + SSOW_LF_GWS_OP_SWTAG_UNTAG); @@ -189,15 +197,10 @@ cn10k_sso_hws_reset(void *arg, void *hws) switch (dev->gw_mode) { case CN10K_GW_MODE_PREF: + case CN10K_GW_MODE_PREF_WFE: while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & BIT_ULL(63)) ; break; - case CN10K_GW_MODE_PREF_WFE: - while (plt_read64(base + SSOW_LF_GWS_PRF_WQE0) & - SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT) - continue; - plt_write64(0, base + SSOW_LF_GWS_OP_GWC_INVAL); - break; case CN10K_GW_MODE_NONE: default: break; @@ -533,6 +536,66 @@ cn10k_sso_port_release(void *port) rte_free(gws_cookie); } +static void +cn10k_sso_port_quiesce(struct rte_eventdev *event_dev, void *port, + eventdev_port_flush_t flush_cb, void *args) +{ + struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev); + struct cn10k_sso_hws *ws = port; + struct rte_event ev; + uint64_t ptag; + bool is_pend; + + is_pend = false; + /* Work in WQE0 is always consumed, unless its a SWTAG. */ + ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE); + if (ptag & (BIT_ULL(62) | BIT_ULL(54)) || ws->swtag_req) + is_pend = true; + do { + ptag = plt_read64(ws->base + SSOW_LF_GWS_PENDSTATE); + } while (ptag & +(BIT_ULL(62) | BIT_ULL(58) | BIT_ULL(56) | BIT_ULL(54))); + + cn10k_sso_hws_get_work_empty(ws, &ev, +(NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | +NIX_RX_MULTI_SEG_F | CPT_RX_WQE_F); + if (is_pend && ev.u64) { + if (flush_cb) + flush_cb(event_dev->data->dev_id, ev, args); + cnxk_sso_hws_swtag_flush(ws->base); + } + + /* Check if we have work in PRF_WQE0, if so extract it. */ + switch (dev->gw_mode) { + case CN10K_GW_MODE_PREF: + case CN10K_GW_MODE_PREF_WFE: + while (plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0) & + BIT_ULL(63)) + ; + break; + case CN10K_GW_MODE_NONE: + default: + break; + } + + if (CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_PRF_WQE0)) != + SSO_TT_EMPTY) { + plt_write64(BIT_ULL(16) | 1, + ws->base + SSOW_LF_GWS_OP_GET_WORK0); + cn10k_sso_hws_get_work_empty( + ws, &ev, + (NIX_RX_OFFLOAD_MAX - 1) | NIX_RX_REAS_F | + NIX_RX_MULTI_SEG_F | CPT_RX_WQE_F); + if (ev.u64) { + if (flush_cb) + flush_cb(event_dev->data->dev_id, ev, args); + cnxk_sso_hws_swtag_flush(ws->base); + } + } + ws->swtag_req = 0; + plt_write64(0, ws->base + SSOW_LF_GWS_OP_GWC_INVAL); +} + static int cn10k_sso_port_link(struct rte_eventdev *event_dev, void *port, const uint8_t queues[], const uint8_t priorities[], @@ -852,6 +915,7 @@ static struct eventdev_ops cn10k_sso_de
Re: [PATCH 1/2] app/testpmd: fix stats get when display fwd stats
On 4/6/2022 2:15 PM, Min Hu (Connor) wrote: In function 'fwd_stats_display', if function 'rte_eth_stats_get' fails, 'stats' is uncertainty value. The display result will be abnormal. This patch check the return value of 'rte_eth_stats_get' to avoid display abnormal stats. Fixes: 53324971a14e ("app/testpmd: display/clear forwarding stats on demand") Cc: sta...@dpdk.org Signed-off-by: Min Hu (Connor) --- app/test-pmd/config.c | 10 -- app/test-pmd/testpmd.c | 16 ++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c index cc8e7aa138..bd689f9f86 100644 --- a/app/test-pmd/config.c +++ b/app/test-pmd/config.c @@ -249,14 +249,20 @@ nic_stats_display(portid_t port_id) diff_ns; uint64_t mpps_rx, mpps_tx, mbps_rx, mbps_tx; struct rte_eth_stats stats; - static const char *nic_stats_border = ""; + int ret; if (port_id_is_invalid(port_id, ENABLED_WARN)) { print_valid_ports(); return; } - rte_eth_stats_get(port_id, &stats); + ret = rte_eth_stats_get(port_id, &stats); + if (ret != 0) { + fprintf(stderr, + "%s: Error: failed to get stats (port %u): %d", + __func__, port_id, ret); + return; + } printf("\n %s NIC statistics for port %-2d %s\n", nic_stats_border, port_id, nic_stats_border); diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index fe2ce19f99..79bb23264b 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -1982,6 +1982,7 @@ fwd_stats_display(void) struct rte_port *port; streamid_t sm_id; portid_t pt_id; + int ret; int i; memset(ports_stats, 0, sizeof(ports_stats)); @@ -2013,7 +2014,13 @@ fwd_stats_display(void) pt_id = fwd_ports_ids[i]; port = &ports[pt_id]; - rte_eth_stats_get(pt_id, &stats); + ret = rte_eth_stats_get(pt_id, &stats); + if (ret != 0) { + fprintf(stderr, + "%s: Error: failed to get stats (port %u): %d", + __func__, pt_id, ret); + continue; + } stats.ipackets -= port->stats.ipackets; stats.opackets -= port->stats.opackets; stats.ibytes -= port->stats.ibytes; @@ -2108,11 +2115,16 @@ fwd_stats_reset(void) { streamid_t sm_id; portid_t pt_id; + int ret; int i; for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) { pt_id = fwd_ports_ids[i]; - rte_eth_stats_get(pt_id, &ports[pt_id].stats); + ret = rte_eth_stats_get(pt_id, &ports[pt_id].stats); + if (ret != 0) + fprintf(stderr, + "%s: Error: failed to clear stats (port %u):%d", + __func__, pt_id, ret); Should we clear "ports[pt_id].stats" in this condition. } for (sm_id = 0; sm_id < cur_fwd_config.nb_fwd_streams; sm_id++) { struct fwd_stream *fs = fwd_streams[sm_id]; As such LGTM Acked-by: Aman Singh
[PATCH v6 1/2] hash: split x86 and SW hash CRC intrinsics
Split x86 and SW hash crc intrinsics into a separate files. Signed-off-by: Pavan Nikhilesh --- v6 Changes: - Simplify rte_hash_crc_set_alg function. (Ruifeng) v5 Changes: - Move CRC functions to implementation specific files to remove ifdef clutter. (Ruifeng) lib/hash/hash_crc_sw.h | 419 lib/hash/hash_crc_x86.h | 62 ++ lib/hash/rte_hash_crc.h | 396 + 3 files changed, 483 insertions(+), 394 deletions(-) create mode 100644 lib/hash/hash_crc_sw.h create mode 100644 lib/hash/hash_crc_x86.h diff --git a/lib/hash/hash_crc_sw.h b/lib/hash/hash_crc_sw.h new file mode 100644 index 00..4790a0970b --- /dev/null +++ b/lib/hash/hash_crc_sw.h @@ -0,0 +1,419 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef _HASH_CRC_SW_H_ +#define _HASH_CRC_SW_H_ + +/* Lookup tables for software implementation of CRC32C */ +static const uint32_t crc32c_tables[8][256] = { + {0x, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, +0x26A1E7E8, 0xD4CA64EB, 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, +0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, 0x105EC76F, 0xE235446C, +0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384, +0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, +0xBC267848, 0x4E4DFB4B, 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, +0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, 0xAA64D611, 0x580F5512, +0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA, +0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, +0x1642AE59, 0xE4292D5A, 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, +0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595, 0x417B1DBC, 0xB3109EBF, +0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957, +0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, +0xED03A29B, 0x1F682198, 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, +0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38, 0xDBFC821C, 0x2997011F, +0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7, +0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, +0x4767748A, 0xB50CF789, 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, +0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46, 0x7198540D, 0x83F3D70E, +0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6, +0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, +0xDDE0EB2A, 0x2F8B6829, 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, +0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93, 0x082F63B7, 0xFA44E0B4, +0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C, +0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, +0xB4091BFF, 0x466298FC, 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, +0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, 0xA24BB5A6, 0x502036A5, +0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D, +0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, +0x0E330A81, 0xFC588982, 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, +0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, 0x38CC2A06, 0xCAA7A905, +0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED, +0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, +0xE52CC12C, 0x1747422F, 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, +0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0, 0xD3D3E1AB, 0x21B862A8, +0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540, +0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, +0x7FAB5E8C, 0x8DC0DD8F, 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, +0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1, 0x69E9F0D5, 0x9B8273D6, +0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E, +0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, +0xD5CF889D, 0x27A40B9E, 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, +0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351}, + {0x, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB, +0x69CF5132, 0x7A6DC945, 0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, +0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD, 0x3FC5F181, 0x2C6769F6, +0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4, +0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, 0xEC5B53E5, 0xFFF9CB92, +0xCB1E630B, 0xD8BCFB7C, 0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B, +0x310182DE, 0x22A31AA9, 0x16
[PATCH v6 2/2] hash: unify crc32 selection for x86 and Arm
Merge crc32 hash calculation public API implementation for x86 and Arm. Select the best available CRC32 algorithm when unsupported algorithm on a given CPU architecture is requested by an application. Previously, if an application directly includes `rte_crc_arm64.h` without including `rte_hash_crc.h` it will fail to compile. Signed-off-by: Pavan Nikhilesh --- .../{rte_crc_arm64.h => hash_crc_arm64.h} | 69 ++--- lib/hash/hash_crc_x86.h | 89 +++ lib/hash/meson.build | 1 - lib/hash/rte_hash_crc.h | 145 +- 4 files changed, 136 insertions(+), 168 deletions(-) rename lib/hash/{rte_crc_arm64.h => hash_crc_arm64.h} (65%) diff --git a/lib/hash/rte_crc_arm64.h b/lib/hash/hash_crc_arm64.h similarity index 65% rename from lib/hash/rte_crc_arm64.h rename to lib/hash/hash_crc_arm64.h index b4628cfc09..172894335f 100644 --- a/lib/hash/rte_crc_arm64.h +++ b/lib/hash/hash_crc_arm64.h @@ -2,23 +2,8 @@ * Copyright(c) 2015 Cavium, Inc */ -#ifndef _RTE_CRC_ARM64_H_ -#define _RTE_CRC_ARM64_H_ - -/** - * @file - * - * RTE CRC arm64 Hash - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include -#include +#ifndef _HASH_CRC_ARM64_H_ +#define _HASH_CRC_ARM64_H_ static inline uint32_t crc32c_arm64_u8(uint8_t data, uint32_t init_val) @@ -61,40 +46,8 @@ crc32c_arm64_u64(uint64_t data, uint32_t init_val) } /** - * Allow or disallow use of arm64 SIMD instrinsics for CRC32 hash - * calculation. - * - * @param alg - * An OR of following flags: - * - (CRC32_SW) Don't use arm64 crc intrinsics - * - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available - * - */ -static inline void -rte_hash_crc_set_alg(uint8_t alg) -{ - switch (alg) { - case CRC32_ARM64: - if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32)) - alg = CRC32_SW; - /* fall-through */ - case CRC32_SW: - crc32_alg = alg; - /* fall-through */ - default: - break; - } -} - -/* Setting the best available algorithm */ -RTE_INIT(rte_hash_crc_init_alg) -{ - rte_hash_crc_set_alg(CRC32_ARM64); -} - -/** - * Use single crc32 instruction to perform a hash on a 1 byte value. - * Fall back to software crc32 implementation in case arm64 crc intrinsics is + * Use single crc32 instruction to perform a hash on a byte value. + * Fall back to software crc32 implementation in case ARM CRC is * not supported * * @param data @@ -115,7 +68,7 @@ rte_hash_crc_1byte(uint8_t data, uint32_t init_val) /** * Use single crc32 instruction to perform a hash on a 2 bytes value. - * Fall back to software crc32 implementation in case arm64 crc intrinsics is + * Fall back to software crc32 implementation in case ARM CRC is * not supported * * @param data @@ -136,7 +89,7 @@ rte_hash_crc_2byte(uint16_t data, uint32_t init_val) /** * Use single crc32 instruction to perform a hash on a 4 byte value. - * Fall back to software crc32 implementation in case arm64 crc intrinsics is + * Fall back to software crc32 implementation in case ARM CRC is * not supported * * @param data @@ -157,7 +110,7 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val) /** * Use single crc32 instruction to perform a hash on a 8 byte value. - * Fall back to software crc32 implementation in case arm64 crc intrinsics is + * Fall back to software crc32 implementation in case ARM CRC is * not supported * * @param data @@ -170,14 +123,10 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val) static inline uint32_t rte_hash_crc_8byte(uint64_t data, uint32_t init_val) { - if (likely(crc32_alg == CRC32_ARM64)) + if (likely(crc32_alg & CRC32_ARM64)) return crc32c_arm64_u64(data, init_val); return crc32c_2words(data, init_val); } -#ifdef __cplusplus -} -#endif - -#endif /* _RTE_CRC_ARM64_H_ */ +#endif /* _HASH_CRC_ARM64_H_ */ diff --git a/lib/hash/hash_crc_x86.h b/lib/hash/hash_crc_x86.h index b80a742afa..19eb3584e7 100644 --- a/lib/hash/hash_crc_x86.h +++ b/lib/hash/hash_crc_x86.h @@ -59,4 +59,93 @@ crc32c_sse42_u64(uint64_t data, uint64_t init_val) return (uint32_t)init_val; } +/** + * Use single crc32 instruction to perform a hash on a byte value. + * Fall back to software crc32 implementation in case SSE4.2 is + * not supported + * + * @param data + * Data to perform hash on. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc_1byte(uint8_t data, uint32_t init_val) +{ + if (likely(crc32_alg & CRC32_SSE42)) + return crc32c_sse42_u8(data, init_val); + + return crc32c_1byte(data, init_val); +} + +/** + * Use single crc32 instruction to perform a hash on a 2 bytes value. + * Fall back to software crc32 implementation in case SSE4.2 is + * not supported + * +
[PATCH] doc: describe ixgbe devargs fiber_sdp3_no_tx_disable
The devargs option for the IXGBE driver is introduced in order to inform the driver to skip checking SDP3 as an indicator of laser enable/disable for SFP modules. Signed-off-by: Jeff Daly --- doc/guides/nics/ixgbe.rst | 17 + 1 file changed, 17 insertions(+) diff --git a/doc/guides/nics/ixgbe.rst b/doc/guides/nics/ixgbe.rst index 82fa453fa28e..ad1a3da6101e 100644 --- a/doc/guides/nics/ixgbe.rst +++ b/doc/guides/nics/ixgbe.rst @@ -101,6 +101,23 @@ To guarantee the constraint, capabilities in dev_conf.rxmode.offloads will be ch fdir_conf->mode will also be checked. +Disable SDP3 TX_DISABLE for Fiber Links +^^^ + +The following ``devargs`` option can be enabled at runtime. It must +be passed as part of EAL arguments. For example, + +.. code-block:: console + + dpdk-testpmd -a fiber_sdp3_no_tx_disable=1 -- -i + +- ``fiber_sdp3_no_tx_disable`` (default **0**) + + Not all IXGBE implementations with SFP cages use the SDP3 signal as + TX_DISABLE as a means to disable the laser on fiber SFP modules. + This option informs the driver that in this case, SDP3 is not to be + used as a check for link up by testing for laser on/off. + VF Runtime Options ^^ -- 2.25.1
RE: OVS DPDK DMA-Dev library/Design Discussion
> -Original Message- > From: Ilya Maximets > Sent: Monday, April 25, 2022 10:46 PM > To: Mcnamara, John ; Hu, Jiayu > ; Maxime Coquelin ; Van > Haaren, Harry ; Morten Brørup > ; Richardson, Bruce > > Cc: i.maxim...@ovn.org; Pai G, Sunil ; Stokes, > Ian ; Ferriter, Cian ; > ovs-...@openvswitch.org; dev@dpdk.org; O'Driscoll, Tim > ; Finn, Emma > Subject: Re: OVS DPDK DMA-Dev library/Design Discussion > > ... > > FWIW, I think it makes sense to PoC and test options that are going to > be simply unavailable going forward if not explored now. > Especially because we don't have any good solutions anyway ("Deferral > of Work" is architecturally wrong solution for OVS). I agree that there is value in doing PoCs and we have been doing that for over a year based on different proposals and none of them show the potential of the Deferral of Work approach. It isn't productive to keep building PoCs indefinitely; at some point we need to make progress with merging a specific solution upstream. > > Let's have another call so that we can move towards a single solution > that the DPDK and OVS communities agree on. I'll set up a call for next > week in a similar time slot to the previous one. > > Is there any particular reason we can't use a mailing list to discuss > that topic further? The discussion can continue on the mailing list. It just seemed more efficient and interactive to discuss this in a meeting. John --
[dpdk-dev] [PATCH 00/17] bnxt PMD fixes
From: Kalesh AP This patch set contains bug fixes in bnxt PMD. Please apply. Kalesh AP (12): net/bnxt: update HWRM structures net/bnxt: fix device capability reporting net/bnxt: fix to remove an unused macro net/bnxt: fix Rxq configure net/bnxt: fix support for tunnel stateless offloads net/bnxt: fix RSS action support net/bnxt: add check for dupliate queue ids net/bnxt: avoid unnecessary endianness conversion net/bnxt: fix setting autoneg speed net/bnxt: force PHY update on certain configurations net/bnxt: fix reporting link status when port is stopped net/bnxt: recheck FW readiness if FW is in reset process Somnath Kotur (5): net/bnxt: remove support for COUNT action net/bnxt: fix to reconfigure the VNIC's default receive ring net/bnxt: fix to handle queue stop during RSS flow create net/bnxt: fix freeing of VNIC filters net/bnxt: don't wait for link up completion in dev start drivers/net/bnxt/bnxt.h| 29 +- drivers/net/bnxt/bnxt_ethdev.c | 58 +- drivers/net/bnxt/bnxt_filter.c |2 + drivers/net/bnxt/bnxt_flow.c | 92 +- drivers/net/bnxt/bnxt_hwrm.c | 15 +- drivers/net/bnxt/bnxt_hwrm.h | 20 + drivers/net/bnxt/bnxt_reps.c |6 +- drivers/net/bnxt/bnxt_rxq.c| 75 +- drivers/net/bnxt/bnxt_rxq.h|1 + drivers/net/bnxt/bnxt_txq.c| 29 + drivers/net/bnxt/bnxt_txq.h|1 + drivers/net/bnxt/hsi_struct_def_dpdk.h | 4025 12 files changed, 3809 insertions(+), 544 deletions(-) -- 2.10.1
[PATCH 02/17] net/bnxt: fix device capability reporting
From: Kalesh AP 1. Added two functions bnxt_get_tx_port_offloads() and bnxt_get_rx_port_offloads() to report the device tx/rx offload capabilities to the application. 2. This avoids few duplicate code in the driver and make VF-rep capability the same as VF. 3. This will help in selectively reporting offload capabilities based on FW support. Fixes: 0a6d2a720078 ("net/bnxt: get device infos") Cc: sta...@dpdk.org Signed-off-by: Kalesh AP Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur --- drivers/net/bnxt/bnxt.h| 24 drivers/net/bnxt/bnxt_ethdev.c | 10 ++ drivers/net/bnxt/bnxt_reps.c | 6 ++ drivers/net/bnxt/bnxt_rxq.c| 25 + drivers/net/bnxt/bnxt_rxq.h| 1 + drivers/net/bnxt/bnxt_txq.c| 23 +++ drivers/net/bnxt/bnxt_txq.h| 1 + 7 files changed, 54 insertions(+), 36 deletions(-) diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h index 44724a9..5eddb4f 100644 --- a/drivers/net/bnxt/bnxt.h +++ b/drivers/net/bnxt/bnxt.h @@ -580,30 +580,6 @@ struct bnxt_rep_info { RTE_ETH_RSS_NONFRAG_IPV6_UDP | \ RTE_ETH_RSS_LEVEL_MASK) -#define BNXT_DEV_TX_OFFLOAD_SUPPORT (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | \ -RTE_ETH_TX_OFFLOAD_TCP_CKSUM | \ -RTE_ETH_TX_OFFLOAD_UDP_CKSUM | \ -RTE_ETH_TX_OFFLOAD_TCP_TSO | \ -RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM | \ -RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO | \ -RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO | \ -RTE_ETH_TX_OFFLOAD_IPIP_TNL_TSO | \ -RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO | \ -RTE_ETH_TX_OFFLOAD_QINQ_INSERT | \ -RTE_ETH_TX_OFFLOAD_MULTI_SEGS) - -#define BNXT_DEV_RX_OFFLOAD_SUPPORT (RTE_ETH_RX_OFFLOAD_VLAN_FILTER | \ -RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | \ -RTE_ETH_RX_OFFLOAD_UDP_CKSUM | \ -RTE_ETH_RX_OFFLOAD_TCP_CKSUM | \ -RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM | \ -RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM | \ -RTE_ETH_RX_OFFLOAD_KEEP_CRC | \ -RTE_ETH_RX_OFFLOAD_VLAN_EXTEND | \ -RTE_ETH_RX_OFFLOAD_TCP_LRO | \ -RTE_ETH_RX_OFFLOAD_SCATTER | \ -RTE_ETH_RX_OFFLOAD_RSS_HASH) - #define BNXT_HWRM_SHORT_REQ_LENsizeof(struct hwrm_short_input) struct bnxt_flow_stat_info { diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c index 261fe0b..fac3925 100644 --- a/drivers/net/bnxt/bnxt_ethdev.c +++ b/drivers/net/bnxt/bnxt_ethdev.c @@ -971,16 +971,10 @@ static int bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev, dev_info->min_rx_bufsize = 1; dev_info->max_rx_pktlen = BNXT_MAX_PKT_LEN; - dev_info->rx_offload_capa = BNXT_DEV_RX_OFFLOAD_SUPPORT; - if (bp->flags & BNXT_FLAG_PTP_SUPPORTED) - dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_TIMESTAMP; - if (bp->vnic_cap_flags & BNXT_VNIC_CAP_VLAN_RX_STRIP) - dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP; + dev_info->rx_offload_capa = bnxt_get_rx_port_offloads(bp); dev_info->tx_queue_offload_capa = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE; - dev_info->tx_offload_capa = BNXT_DEV_TX_OFFLOAD_SUPPORT | + dev_info->tx_offload_capa = bnxt_get_tx_port_offloads(bp) | dev_info->tx_queue_offload_capa; - if (bp->fw_cap & BNXT_FW_CAP_VLAN_TX_INSERT) - dev_info->tx_offload_capa |= RTE_ETH_TX_OFFLOAD_VLAN_INSERT; dev_info->flow_type_rss_offloads = BNXT_ETH_RSS_SUPPORT; dev_info->speed_capa = bnxt_get_speed_capabilities(bp); diff --git a/drivers/net/bnxt/bnxt_reps.c b/drivers/net/bnxt/bnxt_reps.c index e773932..8a5b777 100644 --- a/drivers/net/bnxt/bnxt_reps.c +++ b/drivers/net/bnxt/bnxt_reps.c @@ -567,10 +567,8 @@ int bnxt_rep_dev_info_get_op(struct rte_eth_dev *eth_dev, dev_info->min_rx_bufsize = 1; dev_info->max_rx_pktlen = BNXT_MAX_PKT_LEN; - dev_info->rx_offload_capa = BNXT_DEV_RX_OFFLOAD_SUPPORT; - if (parent_bp->flags & BNXT_FLAG_PTP_SUPPORTED) - dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_TIMESTAMP; - dev_info->tx_offload_capa = BNXT_DEV_TX_OFFLOAD_SUPPORT; + dev_info->rx_offload_capa = bnxt_get_rx_port_offloads(parent_bp); + dev_info->tx_offload_capa = bnxt_get_tx_port_offloads(parent_bp); dev_info->flow_type_rss_offloads = BNXT_ETH_RSS_SUPPORT;
[PATCH 03/17] net/bnxt: fix to remove an unused macro
From: Kalesh AP BNXT_FLAG_UPDATE_HASH is redundant now, remove it. Fixes: 1ebb765090a6 ("net/bnxt: fix config RSS update") Cc: sta...@dpdk.org Signed-off-by: Kalesh AP Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur --- drivers/net/bnxt/bnxt.h| 1 - drivers/net/bnxt/bnxt_ethdev.c | 2 -- drivers/net/bnxt/bnxt_rxq.c| 3 --- 3 files changed, 6 deletions(-) diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h index 5eddb4f..9e5ff74 100644 --- a/drivers/net/bnxt/bnxt.h +++ b/drivers/net/bnxt/bnxt.h @@ -648,7 +648,6 @@ struct bnxt { #define BNXT_FLAG_PORT_STATS BIT(2) #define BNXT_FLAG_JUMBOBIT(3) #define BNXT_FLAG_SHORT_CMDBIT(4) -#define BNXT_FLAG_UPDATE_HASH BIT(5) #define BNXT_FLAG_PTP_SUPPORTEDBIT(6) #define BNXT_FLAG_MULTI_HOST BIT(7) #define BNXT_FLAG_EXT_RX_PORT_STATSBIT(8) diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c index fac3925..181de42 100644 --- a/drivers/net/bnxt/bnxt_ethdev.c +++ b/drivers/net/bnxt/bnxt_ethdev.c @@ -2125,8 +2125,6 @@ static int bnxt_rss_hash_update_op(struct rte_eth_dev *eth_dev, return -EINVAL; } - bp->flags |= BNXT_FLAG_UPDATE_HASH; - /* Update the default RSS VNIC(s) */ vnic = BNXT_GET_DEFAULT_VNIC(bp); vnic->hash_type = bnxt_rte_to_hwrm_hash_types(rss_conf->rss_hf); diff --git a/drivers/net/bnxt/bnxt_rxq.c b/drivers/net/bnxt/bnxt_rxq.c index 0cfd0e5..3c2283b 100644 --- a/drivers/net/bnxt/bnxt_rxq.c +++ b/drivers/net/bnxt/bnxt_rxq.c @@ -175,9 +175,6 @@ int bnxt_mq_rx_configure(struct bnxt *bp) if (dev_conf->rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) { struct rte_eth_rss_conf *rss = &bp->rss_conf; - if (bp->flags & BNXT_FLAG_UPDATE_HASH) - bp->flags &= ~BNXT_FLAG_UPDATE_HASH; - for (i = 0; i < bp->nr_vnics; i++) { uint32_t lvl = RTE_ETH_RSS_LEVEL(rss->rss_hf); -- 2.10.1
[PATCH 04/17] net/bnxt: fix Rxq configure
From: Kalesh AP We are currently not handling RX/RSS modes correctly. After launching testpmd with multiple RXQs, if the user tries to set the number of RXQs to 1, driver is not updating the "hash_type" and "hash_mode" values of the VNICs. As a result, driver issues bnxt_vnic_rss_configure() unnecessarily and the FW command fails. Fixed bnxt_mq_rx_configure() to update VNIC RSS fields unconditionally. Fixes: 4191bc8f79a8 ("net/bnxt: handle multi queue mode properly") Cc: sta...@dpdk.org Signed-off-by: Kalesh AP Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur --- drivers/net/bnxt/bnxt_rxq.c | 37 ++--- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/drivers/net/bnxt/bnxt_rxq.c b/drivers/net/bnxt/bnxt_rxq.c index 3c2283b..8977138 100644 --- a/drivers/net/bnxt/bnxt_rxq.c +++ b/drivers/net/bnxt/bnxt_rxq.c @@ -65,6 +65,7 @@ void bnxt_free_rxq_stats(struct bnxt_rx_queue *rxq) int bnxt_mq_rx_configure(struct bnxt *bp) { struct rte_eth_conf *dev_conf = &bp->eth_dev->data->dev_conf; + struct rte_eth_rss_conf *rss = &bp->rss_conf; const struct rte_eth_vmdq_rx_conf *conf = &dev_conf->rx_adv_conf.vmdq_rx_conf; unsigned int i, j, nb_q_per_grp = 1, ring_idx = 0; @@ -172,29 +173,19 @@ int bnxt_mq_rx_configure(struct bnxt *bp) bp->rx_num_qs_per_vnic = nb_q_per_grp; - if (dev_conf->rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) { - struct rte_eth_rss_conf *rss = &bp->rss_conf; - - for (i = 0; i < bp->nr_vnics; i++) { - uint32_t lvl = RTE_ETH_RSS_LEVEL(rss->rss_hf); - - vnic = &bp->vnic_info[i]; - vnic->hash_type = - bnxt_rte_to_hwrm_hash_types(rss->rss_hf); - vnic->hash_mode = - bnxt_rte_to_hwrm_hash_level(bp, - rss->rss_hf, - lvl); - - /* -* Use the supplied key if the key length is -* acceptable and the rss_key is not NULL -*/ - if (rss->rss_key && - rss->rss_key_len <= HW_HASH_KEY_SIZE) - memcpy(vnic->rss_hash_key, - rss->rss_key, rss->rss_key_len); - } + for (i = 0; i < bp->nr_vnics; i++) { + uint32_t lvl = RTE_ETH_RSS_LEVEL(rss->rss_hf); + + vnic = &bp->vnic_info[i]; + vnic->hash_type = bnxt_rte_to_hwrm_hash_types(rss->rss_hf); + vnic->hash_mode = bnxt_rte_to_hwrm_hash_level(bp, rss->rss_hf, lvl); + + /* +* Use the supplied key if the key length is +* acceptable and the rss_key is not NULL +*/ + if (rss->rss_key && rss->rss_key_len <= HW_HASH_KEY_SIZE) + memcpy(vnic->rss_hash_key, rss->rss_key, rss->rss_key_len); } return rc; -- 2.10.1
[PATCH 05/17] net/bnxt: fix support for tunnel stateless offloads
From: Kalesh AP The HW only supports tunnel header parsing globally for supported tunnel types. When a function uses one default VNIC to receive both the tunnel and non-tunnel packets, applying the same stateless offload operation to both tunnel and non-tunnel packets can cause problems in certain scenarios. To workaround these problems, the firmware advertises no tunnel header parsing capabilities to the driver using the HWRM_FUNC_QCAPS. The driver must check this flag setting and accordingly not advertise tunnel packet stateless offload capabilities to the stack. If the device supports VXLAN, GRE, IPIP and GENEVE tunnel parsing, then reports RX_OFFLOAD_OUTER_IPV4_CKSUM, RX_OFFLOAD_OUTER_UDP_CKSUM and TX_OFFLOAD_OUTER_IPV4_CKSUM in the Rx/Tx offload capabilities of the device. Also, advertise tunnel TSO capabilities based on FW support. Fixes: 0a6d2a720078 ("net/bnxt: get device infos") Cc: sta...@dpdk.org Signed-off-by: Kalesh AP Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur --- drivers/net/bnxt/bnxt.h | 1 + drivers/net/bnxt/bnxt_hwrm.c | 5 + drivers/net/bnxt/bnxt_hwrm.h | 20 drivers/net/bnxt/bnxt_rxq.c | 7 --- drivers/net/bnxt/bnxt_txq.c | 18 -- 5 files changed, 42 insertions(+), 9 deletions(-) diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h index 9e5ff74..e4e8e8e 100644 --- a/drivers/net/bnxt/bnxt.h +++ b/drivers/net/bnxt/bnxt.h @@ -871,6 +871,7 @@ struct bnxt { uint32_tmax_mcast_addr; /* maximum number of mcast filters supported */ struct rte_eth_rss_conf rss_conf; /* RSS configuration. */ + uint16_ttunnel_disable_flag; /* tunnel stateless offloads status */ }; static diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c index bff73a9..178a112 100644 --- a/drivers/net/bnxt/bnxt_hwrm.c +++ b/drivers/net/bnxt/bnxt_hwrm.c @@ -939,6 +939,11 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) bp->fw_cap |= BNXT_FW_CAP_VLAN_TX_INSERT; PMD_DRV_LOG(DEBUG, "VLAN acceleration for TX is enabled\n"); } + + bp->tunnel_disable_flag = rte_le_to_cpu_16(resp->tunnel_disable_flag); + if (bp->tunnel_disable_flag) + PMD_DRV_LOG(DEBUG, "Tunnel parsing capability is disabled, flags : %#x\n", + bp->tunnel_disable_flag); unlock: HWRM_UNLOCK(); diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h index 63f8d8c..77f8521 100644 --- a/drivers/net/bnxt/bnxt_hwrm.h +++ b/drivers/net/bnxt/bnxt_hwrm.h @@ -121,6 +121,26 @@ struct bnxt_pf_resource_info { #define BNXT_CTX_VAL_INVAL 0x +#define BNXT_TUNNELED_OFFLOADS_CAP_VXLAN_EN(bp)\ + (!((bp)->tunnel_disable_flag & HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_VXLAN)) +#define BNXT_TUNNELED_OFFLOADS_CAP_NGE_EN(bp) \ + (!((bp)->tunnel_disable_flag & HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_NGE)) +#define BNXT_TUNNELED_OFFLOADS_CAP_GRE_EN(bp) \ + (!((bp)->tunnel_disable_flag & HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_GRE)) +#define BNXT_TUNNELED_OFFLOADS_CAP_IPINIP_EN(bp) \ + (!((bp)->tunnel_disable_flag & HWRM_FUNC_QCAPS_OUTPUT_TUNNEL_DISABLE_FLAG_DISABLE_IPINIP)) + +/* + * If the device supports VXLAN, GRE, IPIP and GENEVE tunnel parsing, then report + * RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM, RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM and + * RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM in the Rx/Tx offload capabilities of the device. + */ +#define BNXT_TUNNELED_OFFLOADS_CAP_ALL_EN(bp) \ + (BNXT_TUNNELED_OFFLOADS_CAP_VXLAN_EN(bp) && \ +BNXT_TUNNELED_OFFLOADS_CAP_NGE_EN(bp) && \ +BNXT_TUNNELED_OFFLOADS_CAP_GRE_EN(bp) && \ +BNXT_TUNNELED_OFFLOADS_CAP_IPINIP_EN(bp)) + int bnxt_hwrm_cfa_l2_clear_rx_mask(struct bnxt *bp, struct bnxt_vnic_info *vnic); int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, struct bnxt_vnic_info *vnic, diff --git a/drivers/net/bnxt/bnxt_rxq.c b/drivers/net/bnxt/bnxt_rxq.c index 8977138..8147404 100644 --- a/drivers/net/bnxt/bnxt_rxq.c +++ b/drivers/net/bnxt/bnxt_rxq.c @@ -34,14 +34,15 @@ uint64_t bnxt_get_rx_port_offloads(struct bnxt *bp) RTE_ETH_RX_OFFLOAD_SCATTER | RTE_ETH_RX_OFFLOAD_RSS_HASH; - rx_offload_capa |= RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM | - RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM; - if (bp->flags & BNXT_FLAG_PTP_SUPPORTED) rx_offload_capa |= RTE_ETH_RX_OFFLOAD_TIMESTAMP; if (bp->vnic_cap_flags & BNXT_VNIC_CAP_VLAN_RX_STRIP) rx_offload_capa |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP; + if (BNXT_TUNNELED_OFFLOADS_CAP_ALL_EN(bp)) + rx_offload_capa |= RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM | +
[PATCH 06/17] net/bnxt: remove support for COUNT action
From: Somnath Kotur 'Count' action was never really implemented in the legacy/AFM model. But there was some place holder code, remove it so that the user will see a failure when a flow with 'count' action is being created. Signed-off-by: Somnath Kotur Reviewed-by: Kalesh AP --- drivers/net/bnxt/bnxt_flow.c | 17 - 1 file changed, 17 deletions(-) diff --git a/drivers/net/bnxt/bnxt_flow.c b/drivers/net/bnxt/bnxt_flow.c index f7c90c4..71a8edd 100644 --- a/drivers/net/bnxt/bnxt_flow.c +++ b/drivers/net/bnxt/bnxt_flow.c @@ -1405,23 +1405,6 @@ bnxt_validate_and_parse_flow(struct rte_eth_dev *dev, bnxt_update_filter_flags_en(filter, filter1, use_ntuple); break; - case RTE_FLOW_ACTION_TYPE_COUNT: - vnic0 = &bp->vnic_info[0]; - filter1 = bnxt_get_l2_filter(bp, filter, vnic0); - if (filter1 == NULL) { - rte_flow_error_set(error, - ENOSPC, - RTE_FLOW_ERROR_TYPE_ACTION, - act, - "New filter not available"); - rc = -rte_errno; - goto ret; - } - - filter->fw_l2_filter_id = filter1->fw_l2_filter_id; - filter->flow_id = filter1->flow_id; - filter->flags = HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_FLAGS_METER; - break; case RTE_FLOW_ACTION_TYPE_VF: act_vf = (const struct rte_flow_action_vf *)act->conf; vf = act_vf->id; -- 2.10.1
[PATCH 07/17] net/bnxt: fix RSS action support
From: Kalesh AP Specifying a subset of Rx queues created by the application in the "flow create" command is invalid. User must either specify all Rx queues created or no queues. Also removed a wrong comment as RSS action will not be supported if user or application specifies MARK or COUNT action. Fixes: 239695f754cb ("net/bnxt: enhance RSS action support") Cc: sta...@dpdk.org Signed-off-by: Kalesh AP Reviewed-by: Somnath Kotur --- drivers/net/bnxt/bnxt_flow.c | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/bnxt/bnxt_flow.c b/drivers/net/bnxt/bnxt_flow.c index 71a8edd..bd96bba 100644 --- a/drivers/net/bnxt/bnxt_flow.c +++ b/drivers/net/bnxt/bnxt_flow.c @@ -1074,7 +1074,6 @@ bnxt_update_filter_flags_en(struct bnxt_filter_info *filter, filter1, filter->fw_l2_filter_id, filter->l2_ref_cnt); } -/* Valid actions supported along with RSS are count and mark. */ static int bnxt_validate_rss_action(const struct rte_flow_action actions[]) { @@ -1123,6 +1122,17 @@ bnxt_vnic_rss_cfg_update(struct bnxt *bp, rss = (const struct rte_flow_action_rss *)act->conf; + /* must specify either all the Rx queues created by application or zero queues */ + if (rss->queue_num && vnic->rx_queue_cnt != rss->queue_num) { + rte_flow_error_set(error, + EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, + "Incorrect RXQ count"); + rc = -rte_errno; + goto ret; + } + /* Currently only Toeplitz hash is supported. */ if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT && rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) { -- 2.10.1
[PATCH 08/17] net/bnxt: fix to reconfigure the VNIC's default receive ring
From: Somnath Kotur When an Rx queue is stopped and restarted, as part of that workflow, for cards that have ring groups, we free and reallocate the ring group. This new ring group is not communicated to the VNIC though via HWRM_VNIC_CFG cmd. Fix to issue HWRM_VNIC_CFG cmd on all adapters now in this scenario. Fixes: ed0ae3502fc9 ("net/bnxt: update ring group after ring stop start") Signed-off-by: Somnath Kotur Reviewed-by: Ajit Khaparde Reviewed-by: Kalesh AP --- drivers/net/bnxt/bnxt_rxq.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/bnxt/bnxt_rxq.c b/drivers/net/bnxt/bnxt_rxq.c index 8147404..9b5ff4c 100644 --- a/drivers/net/bnxt/bnxt_rxq.c +++ b/drivers/net/bnxt/bnxt_rxq.c @@ -488,10 +488,11 @@ int bnxt_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id) if (rc) return rc; - if (BNXT_CHIP_P5(bp)) { - /* Reconfigure default receive ring and MRU. */ - bnxt_hwrm_vnic_cfg(bp, rxq->vnic); - } + if (BNXT_HAS_RING_GRPS(bp)) + rxq->vnic->dflt_ring_grp = bp->grp_info[rx_queue_id].fw_grp_id; + /* Reconfigure default receive ring and MRU. */ + bnxt_hwrm_vnic_cfg(bp, rxq->vnic); + PMD_DRV_LOG(INFO, "Rx queue started %d\n", rx_queue_id); if (dev_conf->rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) { -- 2.10.1
[PATCH 09/17] net/bnxt: add check for dupliate queue ids
From: Kalesh AP Currently driver does not have a check for duplicate queue ids. User must either specify all Rx queues created or no queues in the flow create command. Repeating the queue index is invalid. Also, moved the check for invalid queue to the beginning of the function. Fixes: 239695f754cb ("net/bnxt: enhance RSS action support") Cc: sta...@dpdk.org Signed-off-by: Kalesh AP Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur --- drivers/net/bnxt/bnxt_flow.c | 49 +--- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/drivers/net/bnxt/bnxt_flow.c b/drivers/net/bnxt/bnxt_flow.c index bd96bba..fc5bacf 100644 --- a/drivers/net/bnxt/bnxt_flow.c +++ b/drivers/net/bnxt/bnxt_flow.c @@ -1115,7 +1115,7 @@ bnxt_vnic_rss_cfg_update(struct bnxt *bp, struct rte_flow_error *error) { const struct rte_flow_action_rss *rss; - unsigned int rss_idx, i; + unsigned int rss_idx, i, j; uint16_t hash_type; uint64_t types; int rc; @@ -1133,6 +1133,37 @@ bnxt_vnic_rss_cfg_update(struct bnxt *bp, goto ret; } + /* Validate Rx queues */ + for (i = 0; i < rss->queue_num; i++) { + PMD_DRV_LOG(DEBUG, "RSS action Queue %d\n", rss->queue[i]); + + if (rss->queue[i] >= bp->rx_nr_rings || + !bp->rx_queues[rss->queue[i]]) { + rte_flow_error_set(error, + EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, + "Invalid queue ID for RSS"); + rc = -rte_errno; + goto ret; + } + } + + /* Duplicate queue ids are not supported. */ + for (i = 0; i < rss->queue_num; i++) { + for (j = i + 1; j < rss->queue_num; j++) { + if (rss->queue[i] == rss->queue[j]) { + rte_flow_error_set(error, + EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, + "Duplicate queue ID for RSS"); + rc = -rte_errno; + goto ret; + } + } + } + /* Currently only Toeplitz hash is supported. */ if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT && rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) { @@ -1200,22 +1231,6 @@ bnxt_vnic_rss_cfg_update(struct bnxt *bp, if (rss->queue_num == 0) goto skip_rss_table; - /* Validate Rx queues */ - for (i = 0; i < rss->queue_num; i++) { - PMD_DRV_LOG(DEBUG, "RSS action Queue %d\n", rss->queue[i]); - - if (rss->queue[i] >= bp->rx_nr_rings || - !bp->rx_queues[rss->queue[i]]) { - rte_flow_error_set(error, - EINVAL, - RTE_FLOW_ERROR_TYPE_ACTION, - act, - "Invalid queue ID for RSS"); - rc = -rte_errno; - goto ret; - } - } - /* Prepare the indirection table */ for (rss_idx = 0; rss_idx < HW_HASH_INDEX_SIZE; rss_idx++) { struct bnxt_rx_queue *rxq; -- 2.10.1
[PATCH 10/17] net/bnxt: fix to handle queue stop during RSS flow create
From: Somnath Kotur The programming of the RSS table was not taking into account if any of the queues in the set were stopped prior to the flow creation, hence leading to a vnic RSS config cmd failure thrown by the FW. Fix by programming only the active queues in the RSS action queue set. Fixes: 239695f754cb ("net/bnxt: enhance RSS action support") Cc: sta...@dpdk.org Signed-off-by: Somnath Kotur Reviewed-by: Ajit Khaparde Reviewed-by: Kalesh AP --- drivers/net/bnxt/bnxt_flow.c | 16 +--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/bnxt/bnxt_flow.c b/drivers/net/bnxt/bnxt_flow.c index fc5bacf..f8e1096 100644 --- a/drivers/net/bnxt/bnxt_flow.c +++ b/drivers/net/bnxt/bnxt_flow.c @@ -1115,7 +1115,7 @@ bnxt_vnic_rss_cfg_update(struct bnxt *bp, struct rte_flow_error *error) { const struct rte_flow_action_rss *rss; - unsigned int rss_idx, i, j; + unsigned int rss_idx, i, j, fw_idx; uint16_t hash_type; uint64_t types; int rc; @@ -1232,11 +1232,21 @@ bnxt_vnic_rss_cfg_update(struct bnxt *bp, goto skip_rss_table; /* Prepare the indirection table */ - for (rss_idx = 0; rss_idx < HW_HASH_INDEX_SIZE; rss_idx++) { + for (rss_idx = 0, fw_idx = 0; rss_idx < HW_HASH_INDEX_SIZE; +rss_idx++, fw_idx++) { + uint8_t *rxq_state = bp->eth_dev->data->rx_queue_state; struct bnxt_rx_queue *rxq; uint32_t idx; - idx = rss->queue[rss_idx % rss->queue_num]; + for (i = 0; i < bp->rx_cp_nr_rings; i++) { + idx = rss->queue[fw_idx % rss->queue_num]; + if (rxq_state[idx] != RTE_ETH_QUEUE_STATE_STOPPED) + break; + fw_idx++; + } + + if (i == bp->rx_cp_nr_rings) + return 0; if (BNXT_CHIP_P5(bp)) { rxq = bp->rx_queues[idx]; -- 2.10.1
[PATCH 11/17] net/bnxt: avoid unnecessary endianness conversion
From: Kalesh AP The "active_fec_signal_mode" in HWRM_PORT_PHY_QCFG response is uint8_t. So no need of endianness conversion while parsing response. Also, signal_mode is the first 4bits of "active_fec_signal_mode". Fixes: c23f9ded0391 ("net/bnxt: support 200G PAM4 link") Cc: sta...@dpdk.org Signed-off-by: Kalesh AP Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur --- drivers/net/bnxt/bnxt_hwrm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c index 178a112..d87f0c3 100644 --- a/drivers/net/bnxt/bnxt_hwrm.c +++ b/drivers/net/bnxt/bnxt_hwrm.c @@ -1506,7 +1506,7 @@ static int bnxt_hwrm_port_phy_qcfg(struct bnxt *bp, link_info->phy_ver[1] = resp->phy_min; link_info->phy_ver[2] = resp->phy_bld; link_info->link_signal_mode = - rte_le_to_cpu_16(resp->active_fec_signal_mode); + resp->active_fec_signal_mode & HWRM_PORT_PHY_QCFG_OUTPUT_SIGNAL_MODE_MASK; link_info->force_pam4_link_speed = rte_le_to_cpu_16(resp->force_pam4_link_speed); link_info->support_pam4_speeds = -- 2.10.1
[PATCH 12/17] net/bnxt: fix setting autoneg speed
From: Kalesh AP The "active_fec_signal_mode" in HWRM_PORT_PHY_QCFG response does not return correct value till the link is up. Driver cannot rely on active_fec_signal_mode while setting autoneg speed. While setting autoneg speed, driver is currently checking only "auto_link_speed_mask". Fixed to check "auto_pam4_link_speed_mask" as well. Also, while setting auto mode and setting speed mask, driver will have to set both NRZ and PAM4 mask. Fixes: c23f9ded0391 ("net/bnxt: support 200G PAM4 link") Cc: sta...@dpdk.org Signed-off-by: Kalesh AP Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur --- drivers/net/bnxt/bnxt_hwrm.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c index d87f0c3..9eb8b8d 100644 --- a/drivers/net/bnxt/bnxt_hwrm.c +++ b/drivers/net/bnxt/bnxt_hwrm.c @@ -1424,17 +1424,17 @@ static int bnxt_hwrm_port_phy_cfg(struct bnxt *bp, struct bnxt_link_info *conf) } } /* AutoNeg - Advertise speeds specified. */ - if (conf->auto_link_speed_mask && + if ((conf->auto_link_speed_mask || conf->auto_pam4_link_speed_mask) && !(conf->phy_flags & HWRM_PORT_PHY_CFG_INPUT_FLAGS_FORCE)) { req.auto_mode = HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_SPEED_MASK; - if (conf->auto_pam4_link_speed_mask && - bp->link_info->link_signal_mode) { + if (conf->auto_pam4_link_speed_mask) { enables |= HWRM_PORT_PHY_CFG_IN_EN_AUTO_PAM4_LINK_SPD_MASK; req.auto_link_pam4_speed_mask = rte_cpu_to_le_16(conf->auto_pam4_link_speed_mask); - } else { + } + if (conf->auto_link_speed_mask) { enables |= HWRM_PORT_PHY_CFG_IN_EN_AUTO_LINK_SPEED_MASK; req.auto_link_speed_mask = -- 2.10.1
[PATCH 14/17] net/bnxt: fix reporting link status when port is stopped
From: Kalesh AP Driver forces link down during port stop. But device is not obliged link down in certain scenarios, even when forced. In that case, subsequent link queries returns link as up. Fixed to return link status as down when port is stopped. Driver is already doing that for VF/NPAR/MH functions. Fixes: c09f57b49c13 ("net/bnxt: add start/stop/link update operations") Cc: sta...@dpdk.org Signed-off-by: Kalesh AP Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur --- drivers/net/bnxt/bnxt_ethdev.c | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c index 1904db9..69f1117 100644 --- a/drivers/net/bnxt/bnxt_ethdev.c +++ b/drivers/net/bnxt/bnxt_ethdev.c @@ -1826,6 +1826,14 @@ int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete) if (bp->link_info == NULL) goto out; + /* Only single function PF can bring the phy down. +* In certain scenarios, device is not obliged link down even when forced. +* When port is stopped, report link down in those cases. +*/ + if (!eth_dev->data->dev_started && + (!BNXT_SINGLE_PF(bp) || bnxt_force_link_config(bp))) + goto out; + do { /* Retrieve link info from hardware */ rc = bnxt_get_hwrm_link_config(bp, &new); @@ -1843,12 +1851,6 @@ int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete) rte_delay_ms(BNXT_LINK_WAIT_INTERVAL); } while (cnt--); - /* Only single function PF can bring phy down. -* When port is stopped, report link down for VF/MH/NPAR functions. -*/ - if (!BNXT_SINGLE_PF(bp) && !eth_dev->data->dev_started) - memset(&new, 0, sizeof(new)); - out: /* Timed out or success */ if (new.link_status != eth_dev->data->dev_link.link_status || -- 2.10.1
[PATCH 13/17] net/bnxt: force PHY update on certain configurations
From: Kalesh AP Device is not obliged link down in certain scenarios, even when forced. When FW does not allow any user other than the BMC to shutdown the port, bnxt_get_hwrm_link_config() call always returns link up. Force phy update always in that case, else user configuration for speed/autoneg would not get applied correctly. Fixes: 7bc8e9a227cc ("net/bnxt: support async link notification") Cc: sta...@dpdk.org Signed-off-by: Kalesh AP Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur --- drivers/net/bnxt/bnxt.h| 3 +-- drivers/net/bnxt/bnxt_ethdev.c | 22 ++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h index e4e8e8e..e86e51e 100644 --- a/drivers/net/bnxt/bnxt.h +++ b/drivers/net/bnxt/bnxt.h @@ -72,8 +72,7 @@ #define BROADCOM_DEV_ID_58818_VF 0xd82e #define BROADCOM_DEV_957508_N2100 0x5208 -#define IS_BNXT_DEV_957508_N2100(bp) \ - ((bp)->pdev->id.subsystem_device_id == BROADCOM_DEV_957508_N2100) +#define BROADCOM_DEV_957414_N225 0x4145 #define BNXT_MAX_MTU 9574 #define BNXT_NUM_VLANS 2 diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c index 181de42..1904db9 100644 --- a/drivers/net/bnxt/bnxt_ethdev.c +++ b/drivers/net/bnxt/bnxt_ethdev.c @@ -659,6 +659,19 @@ static int bnxt_init_ctx_mem(struct bnxt *bp) return rc; } +static inline bool bnxt_force_link_config(struct bnxt *bp) +{ + uint16_t subsystem_device_id = bp->pdev->id.subsystem_device_id; + + switch (subsystem_device_id) { + case BROADCOM_DEV_957508_N2100: + case BROADCOM_DEV_957414_N225: + return true; + default: + return false; + } +} + static int bnxt_update_phy_setting(struct bnxt *bp) { struct rte_eth_link new; @@ -671,11 +684,12 @@ static int bnxt_update_phy_setting(struct bnxt *bp) } /* -* On BCM957508-N2100 adapters, FW will not allow any user other -* than BMC to shutdown the port. bnxt_get_hwrm_link_config() call -* always returns link up. Force phy update always in that case. +* Device is not obliged link down in certain scenarios, even +* when forced. When FW does not allow any user other than BMC +* to shutdown the port, bnxt_get_hwrm_link_config() call always +* returns link up. Force phy update always in that case. */ - if (!new.link_status || IS_BNXT_DEV_957508_N2100(bp)) { + if (!new.link_status || bnxt_force_link_config(bp)) { rc = bnxt_set_hwrm_link_config(bp, true); if (rc) { PMD_DRV_LOG(ERR, "Failed to update PHY settings\n"); -- 2.10.1
[PATCH 15/17] net/bnxt: recheck FW readiness if FW is in reset process
From: Kalesh AP If Firmware is still in reset process and returns the error HWRM_ERR_CODE_HOT_RESET_PROGRESS, retry VER_GET command. We have to do it in bnxt_handle_if_change_status(). Fixes: 0b533591238f ("net/bnxt: inform firmware about IF state changes") Cc: sta...@dpdk.org Signed-off-by: Kalesh AP Reviewed-by: Somnath Kotur Reviewed-by: Ajit Khaparde --- drivers/net/bnxt/bnxt_ethdev.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c index 69f1117..abcb534 100644 --- a/drivers/net/bnxt/bnxt_ethdev.c +++ b/drivers/net/bnxt/bnxt_ethdev.c @@ -177,6 +177,7 @@ static int bnxt_restore_vlan_filters(struct bnxt *bp); static void bnxt_dev_recover(void *arg); static void bnxt_free_error_recovery_info(struct bnxt *bp); static void bnxt_free_rep_info(struct bnxt *bp); +static int bnxt_check_fw_ready(struct bnxt *bp); int is_bnxt_in_error(struct bnxt *bp) { @@ -1350,6 +1351,11 @@ static int bnxt_handle_if_change_status(struct bnxt *bp) /* clear fatal flag so that re-init happens */ bp->flags &= ~BNXT_FLAG_FATAL_ERROR; + + rc = bnxt_check_fw_ready(bp); + if (rc) + return rc; + rc = bnxt_init_resources(bp, true); bp->flags &= ~BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE; @@ -4324,7 +4330,7 @@ static int bnxt_restore_filters(struct bnxt *bp) static int bnxt_check_fw_ready(struct bnxt *bp) { - int timeout = bp->fw_reset_max_msecs; + int timeout = bp->fw_reset_max_msecs ? : BNXT_MAX_FW_RESET_TIMEOUT; int rc = 0; do { -- 2.10.1
[PATCH 16/17] net/bnxt: fix freeing of VNIC filters
From: Somnath Kotur In bnxt_free_all_filters(), all the filters attached to a vnic are removed. But each of these filters hold a backreference ptr to the vnic and they need to be reset to NULL now. Otherwise, during a normal testpmd quit, as part of dev_close_op(), first bnxt_free_all_filters() is invoked in dev_stop, followed by bnxt_free_filter_mem() from bnxt_uninit_resources(), which finds a filter with a vnic back reference ptr and now bnxt_hwrm_clean_up_l2_filter() also tries to remove the filter from the vnic's filter list which was already done as part of bnxt_free_all_filters(). Fixes: f0f6b5e6cf9("net/bnxt: fix reusing L2 filter") Cc: sta...@dpdk.org Signed-off-by: Somnath Kotur Reviewed-by: Kalesh AP --- drivers/net/bnxt/bnxt_filter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/bnxt/bnxt_filter.c b/drivers/net/bnxt/bnxt_filter.c index 1d08e03..b0c3bbd 100644 --- a/drivers/net/bnxt/bnxt_filter.c +++ b/drivers/net/bnxt/bnxt_filter.c @@ -99,6 +99,8 @@ void bnxt_free_all_filters(struct bnxt *bp) bnxt_filter_info, next); STAILQ_INSERT_TAIL(&bp->free_filter_list, filter, next); + if (filter->vnic) + filter->vnic = NULL; filter = temp_filter; } STAILQ_INIT(&vnic->filter); -- 2.10.1
[PATCH 17/17] net/bnxt: don't wait for link up completion in dev start
From: Somnath Kotur Invoking bnxt_link_update_op() with wait_for_completion set would result in the driver waiting for 10s in case the port link is down to complete port initialization (dev_start_op()). Change it by not waiting for the completion when invoking it in dev_start_op() Signed-off-by: Somnath Kotur Reviewed-by: Kalesh AP --- drivers/net/bnxt/bnxt_ethdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c index abcb534..0f0f40b 100644 --- a/drivers/net/bnxt/bnxt_ethdev.c +++ b/drivers/net/bnxt/bnxt_ethdev.c @@ -1608,7 +1608,7 @@ int bnxt_dev_start_op(struct rte_eth_dev *eth_dev) eth_dev->data->dev_started = 1; - bnxt_link_update_op(eth_dev, 1); + bnxt_link_update_op(eth_dev, 0); if (rx_offloads & RTE_ETH_RX_OFFLOAD_VLAN_FILTER) vlan_mask |= RTE_ETH_VLAN_FILTER_MASK; -- 2.10.1
[PATCH v5 0/7] app/test: add inline IPsec and reassembly cases
IP reassembly offload was added in last release. The test app for unit testing IP reassembly of inline inbound IPsec flows is added in this patchset. For testing IP reassembly, base inline IPsec is also added. The app is enhanced in v4 to handle more functional unit test cases for inline IPsec similar to Lookaside IPsec. The functions from Lookaside more are reused to verify functional cases. changed in v5: - removed soft/hard expiry patches which are deferred for next release - skipped tests if no port is added. - added release notes. Changes in v4: - rebased over next-crypto - updated app to take benefit from Lookaside protocol test functions. - Added more functional cases - Added soft and hard expiry event subtypes in ethdev for testing SA soft and hard pkt/byte expiry events. - reassembly cases are squashed in a single patch Changes in v3: - incorporated latest ethdev changes for reassembly. - skipped build on windows as it needs rte_ipsec lib which is not compiled on windows. changes in v2: - added IPsec burst mode case - updated as per the latest ethdev changes. Akhil Goyal (6): app/test: add unit cases for inline IPsec offload test/security: add inline inbound IPsec cases test/security: add combined mode inline IPsec cases test/security: add inline IPsec reassembly cases test/security: add more inline IPsec functional cases test/security: add ESN and anti-replay cases for inline Vamsi Attunuru (1): test/security: add inline IPsec IPv6 flow label cases MAINTAINERS |2 +- app/test/meson.build |1 + app/test/test_cryptodev_security_ipsec.c | 35 +- app/test/test_cryptodev_security_ipsec.h | 10 + app/test/test_security_inline_proto.c | 2372 + app/test/test_security_inline_proto_vectors.h | 704 + doc/guides/rel_notes/release_22_07.rst|5 + 7 files changed, 3127 insertions(+), 2 deletions(-) create mode 100644 app/test/test_security_inline_proto.c create mode 100644 app/test/test_security_inline_proto_vectors.h -- 2.25.1
[PATCH v5 1/7] app/test: add unit cases for inline IPsec offload
A new test suite is added in test app to test inline IPsec protocol offload. In this patch, predefined vectors from Lookaside IPsec test are used to verify the IPsec functionality without the need of external traffic generators. The sent packet is loopbacked onto the same interface which is received and matched with the expected output. The test suite can be updated further with other functional test cases. In this patch encap only cases are added. The testsuite can be run using: RTE> inline_ipsec_autotest Signed-off-by: Akhil Goyal Signed-off-by: Nithin Dabilpuram --- MAINTAINERS | 2 +- app/test/meson.build | 1 + app/test/test_security_inline_proto.c | 882 ++ app/test/test_security_inline_proto_vectors.h | 20 + doc/guides/rel_notes/release_22_07.rst| 5 + 5 files changed, 909 insertions(+), 1 deletion(-) create mode 100644 app/test/test_security_inline_proto.c create mode 100644 app/test/test_security_inline_proto_vectors.h diff --git a/MAINTAINERS b/MAINTAINERS index 15008c03bc..89affa08ff 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -440,7 +440,7 @@ M: Akhil Goyal T: git://dpdk.org/next/dpdk-next-crypto F: lib/security/ F: doc/guides/prog_guide/rte_security.rst -F: app/test/test_security.c +F: app/test/test_security* Compression API - EXPERIMENTAL M: Fan Zhang diff --git a/app/test/meson.build b/app/test/meson.build index 5fc1dd1b7b..39952c6c4f 100644 --- a/app/test/meson.build +++ b/app/test/meson.build @@ -125,6 +125,7 @@ test_sources = files( 'test_rwlock.c', 'test_sched.c', 'test_security.c', +'test_security_inline_proto.c', 'test_service_cores.c', 'test_spinlock.c', 'test_stack.c', diff --git a/app/test/test_security_inline_proto.c b/app/test/test_security_inline_proto.c new file mode 100644 index 00..249474be91 --- /dev/null +++ b/app/test/test_security_inline_proto.c @@ -0,0 +1,882 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2022 Marvell. + */ + + +#include +#include + +#include +#include +#include + +#include "test.h" +#include "test_security_inline_proto_vectors.h" + +#ifdef RTE_EXEC_ENV_WINDOWS +static int +test_inline_ipsec(void) +{ + printf("Inline ipsec not supported on Windows, skipping test\n"); + return TEST_SKIPPED; +} + +#else + +#define NB_ETHPORTS_USED 1 +#define MEMPOOL_CACHE_SIZE 32 +#define MAX_PKT_BURST 32 +#define RTE_TEST_RX_DESC_DEFAULT 1024 +#define RTE_TEST_TX_DESC_DEFAULT 1024 +#define RTE_PORT_ALL (~(uint16_t)0x0) + +#define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */ +#define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */ +#define RX_WTHRESH 0 /**< Default values of RX write-back threshold reg. */ + +#define TX_PTHRESH 32 /**< Default values of TX prefetch threshold reg. */ +#define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */ +#define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */ + +#define MAX_TRAFFIC_BURST 2048 +#define NB_MBUF10240 + +extern struct ipsec_test_data pkt_aes_128_gcm; +extern struct ipsec_test_data pkt_aes_192_gcm; +extern struct ipsec_test_data pkt_aes_256_gcm; +extern struct ipsec_test_data pkt_aes_128_gcm_frag; +extern struct ipsec_test_data pkt_aes_128_cbc_null; +extern struct ipsec_test_data pkt_null_aes_xcbc; +extern struct ipsec_test_data pkt_aes_128_cbc_hmac_sha384; +extern struct ipsec_test_data pkt_aes_128_cbc_hmac_sha512; + +static struct rte_mempool *mbufpool; +static struct rte_mempool *sess_pool; +static struct rte_mempool *sess_priv_pool; +/* ethernet addresses of ports */ +static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; + +static struct rte_eth_conf port_conf = { + .rxmode = { + .mq_mode = RTE_ETH_MQ_RX_NONE, + .split_hdr_size = 0, + .offloads = RTE_ETH_RX_OFFLOAD_CHECKSUM | + RTE_ETH_RX_OFFLOAD_SECURITY, + }, + .txmode = { + .mq_mode = RTE_ETH_MQ_TX_NONE, + .offloads = RTE_ETH_TX_OFFLOAD_SECURITY | + RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE, + }, + .lpbk_mode = 1, /* enable loopback */ +}; + +static struct rte_eth_rxconf rx_conf = { + .rx_thresh = { + .pthresh = RX_PTHRESH, + .hthresh = RX_HTHRESH, + .wthresh = RX_WTHRESH, + }, + .rx_free_thresh = 32, +}; + +static struct rte_eth_txconf tx_conf = { + .tx_thresh = { + .pthresh = TX_PTHRESH, + .hthresh = TX_HTHRESH, + .wthresh = TX_WTHRESH, + }, + .tx_free_thresh = 32, /* Use PMD default values */ + .tx_rs_thresh = 32, /* Use PMD default values */ +}; + +uint16_t port_id; + +static uint64_t link_mbps; +
[PATCH v5 2/7] test/security: add inline inbound IPsec cases
Added test cases for inline Inbound protocol offload verification with known test vectors from Lookaside mode. Signed-off-by: Akhil Goyal --- app/test/test_security_inline_proto.c | 65 +++ 1 file changed, 65 insertions(+) diff --git a/app/test/test_security_inline_proto.c b/app/test/test_security_inline_proto.c index 249474be91..7dd9ba7aff 100644 --- a/app/test/test_security_inline_proto.c +++ b/app/test/test_security_inline_proto.c @@ -819,6 +819,24 @@ test_ipsec_inline_proto_known_vec(const void *test_data) false, &flags); } +static int +test_ipsec_inline_proto_known_vec_inb(const void *test_data) +{ + const struct ipsec_test_data *td = test_data; + struct ipsec_test_flags flags; + struct ipsec_test_data td_inb; + + memset(&flags, 0, sizeof(flags)); + + if (td->ipsec_xform.direction == RTE_SECURITY_IPSEC_SA_DIR_EGRESS) + test_ipsec_td_in_from_out(td, &td_inb); + else + memcpy(&td_inb, td, sizeof(td_inb)); + + return test_ipsec_inline_proto_process(&td_inb, NULL, 1, false, &flags); +} + + static struct unit_test_suite inline_ipsec_testsuite = { .suite_name = "Inline IPsec Ethernet Device Unit Test Suite", .setup = inline_ipsec_testsuite_setup, @@ -865,6 +883,53 @@ static struct unit_test_suite inline_ipsec_testsuite = { ut_setup_inline_ipsec, ut_teardown_inline_ipsec, test_ipsec_inline_proto_known_vec, &pkt_null_aes_xcbc), + TEST_CASE_NAMED_WITH_DATA( + "Inbound known vector (ESP tunnel mode IPv4 AES-GCM 128)", + ut_setup_inline_ipsec, ut_teardown_inline_ipsec, + test_ipsec_inline_proto_known_vec_inb, &pkt_aes_128_gcm), + TEST_CASE_NAMED_WITH_DATA( + "Inbound known vector (ESP tunnel mode IPv4 AES-GCM 192)", + ut_setup_inline_ipsec, ut_teardown_inline_ipsec, + test_ipsec_inline_proto_known_vec_inb, &pkt_aes_192_gcm), + TEST_CASE_NAMED_WITH_DATA( + "Inbound known vector (ESP tunnel mode IPv4 AES-GCM 256)", + ut_setup_inline_ipsec, ut_teardown_inline_ipsec, + test_ipsec_inline_proto_known_vec_inb, &pkt_aes_256_gcm), + TEST_CASE_NAMED_WITH_DATA( + "Inbound known vector (ESP tunnel mode IPv4 AES-CBC 128)", + ut_setup_inline_ipsec, ut_teardown_inline_ipsec, + test_ipsec_inline_proto_known_vec_inb, &pkt_aes_128_cbc_null), + TEST_CASE_NAMED_WITH_DATA( + "Inbound known vector (ESP tunnel mode IPv4 AES-CBC 128 HMAC-SHA256 [16B ICV])", + ut_setup_inline_ipsec, ut_teardown_inline_ipsec, + test_ipsec_inline_proto_known_vec_inb, + &pkt_aes_128_cbc_hmac_sha256), + TEST_CASE_NAMED_WITH_DATA( + "Inbound known vector (ESP tunnel mode IPv4 AES-CBC 128 HMAC-SHA384 [24B ICV])", + ut_setup_inline_ipsec, ut_teardown_inline_ipsec, + test_ipsec_inline_proto_known_vec_inb, + &pkt_aes_128_cbc_hmac_sha384), + TEST_CASE_NAMED_WITH_DATA( + "Inbound known vector (ESP tunnel mode IPv4 AES-CBC 128 HMAC-SHA512 [32B ICV])", + ut_setup_inline_ipsec, ut_teardown_inline_ipsec, + test_ipsec_inline_proto_known_vec_inb, + &pkt_aes_128_cbc_hmac_sha512), + TEST_CASE_NAMED_WITH_DATA( + "Inbound known vector (ESP tunnel mode IPv6 AES-GCM 128)", + ut_setup_inline_ipsec, ut_teardown_inline_ipsec, + test_ipsec_inline_proto_known_vec_inb, &pkt_aes_256_gcm_v6), + TEST_CASE_NAMED_WITH_DATA( + "Inbound known vector (ESP tunnel mode IPv6 AES-CBC 128 HMAC-SHA256 [16B ICV])", + ut_setup_inline_ipsec, ut_teardown_inline_ipsec, + test_ipsec_inline_proto_known_vec_inb, + &pkt_aes_128_cbc_hmac_sha256_v6), + TEST_CASE_NAMED_WITH_DATA( + "Inbound known vector (ESP tunnel mode IPv4 NULL AES-XCBC-MAC [12B ICV])", + ut_setup_inline_ipsec, ut_teardown_inline_ipsec, + test_ipsec_inline_proto_known_vec_inb, + &pkt_null_aes_xcbc), + + TEST_CASES_END() /**< NULL terminate unit test array */ }, -- 2.25.1
[PATCH v5 3/7] test/security: add combined mode inline IPsec cases
Added combined encap and decap test cases for various algorithm combinations Signed-off-by: Akhil Goyal --- app/test/test_security_inline_proto.c | 102 ++ 1 file changed, 102 insertions(+) diff --git a/app/test/test_security_inline_proto.c b/app/test/test_security_inline_proto.c index 7dd9ba7aff..ea36d1188c 100644 --- a/app/test/test_security_inline_proto.c +++ b/app/test/test_security_inline_proto.c @@ -660,6 +660,92 @@ test_ipsec_inline_proto_process(struct ipsec_test_data *td, return ret; } +static int +test_ipsec_inline_proto_all(const struct ipsec_test_flags *flags) +{ + struct ipsec_test_data td_outb; + struct ipsec_test_data td_inb; + unsigned int i, nb_pkts = 1, pass_cnt = 0, fail_cnt = 0; + int ret; + + if (flags->iv_gen || flags->sa_expiry_pkts_soft || + flags->sa_expiry_pkts_hard) + nb_pkts = IPSEC_TEST_PACKETS_MAX; + + for (i = 0; i < RTE_DIM(alg_list); i++) { + test_ipsec_td_prepare(alg_list[i].param1, + alg_list[i].param2, + flags, &td_outb, 1); + + if (!td_outb.aead) { + enum rte_crypto_cipher_algorithm cipher_alg; + enum rte_crypto_auth_algorithm auth_alg; + + cipher_alg = td_outb.xform.chain.cipher.cipher.algo; + auth_alg = td_outb.xform.chain.auth.auth.algo; + + if (td_outb.aes_gmac && cipher_alg != RTE_CRYPTO_CIPHER_NULL) + continue; + + /* ICV is not applicable for NULL auth */ + if (flags->icv_corrupt && + auth_alg == RTE_CRYPTO_AUTH_NULL) + continue; + + /* IV is not applicable for NULL cipher */ + if (flags->iv_gen && + cipher_alg == RTE_CRYPTO_CIPHER_NULL) + continue; + } + + if (flags->udp_encap) + td_outb.ipsec_xform.options.udp_encap = 1; + + ret = test_ipsec_inline_proto_process(&td_outb, &td_inb, nb_pkts, + false, flags); + if (ret == TEST_SKIPPED) + continue; + + if (ret == TEST_FAILED) { + printf("\n TEST FAILED"); + test_ipsec_display_alg(alg_list[i].param1, + alg_list[i].param2); + fail_cnt++; + continue; + } + + test_ipsec_td_update(&td_inb, &td_outb, 1, flags); + + ret = test_ipsec_inline_proto_process(&td_inb, NULL, nb_pkts, + false, flags); + if (ret == TEST_SKIPPED) + continue; + + if (ret == TEST_FAILED) { + printf("\n TEST FAILED"); + test_ipsec_display_alg(alg_list[i].param1, + alg_list[i].param2); + fail_cnt++; + continue; + } + + if (flags->display_alg) + test_ipsec_display_alg(alg_list[i].param1, + alg_list[i].param2); + + pass_cnt++; + } + + printf("Tests passed: %d, failed: %d", pass_cnt, fail_cnt); + if (fail_cnt > 0) + return TEST_FAILED; + if (pass_cnt > 0) + return TEST_SUCCESS; + else + return TEST_SKIPPED; +} + + static int ut_setup_inline_ipsec(void) { @@ -836,6 +922,17 @@ test_ipsec_inline_proto_known_vec_inb(const void *test_data) return test_ipsec_inline_proto_process(&td_inb, NULL, 1, false, &flags); } +static int +test_ipsec_inline_proto_display_list(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.display_alg = true; + + return test_ipsec_inline_proto_all(&flags); +} static struct unit_test_suite inline_ipsec_testsuite = { .suite_name = "Inline IPsec Ethernet Device Unit Test Suite", @@ -929,6 +1026,11 @@ static struct unit_test_suite inline_ipsec_testsuite = { test_ipsec_inline_proto_known_vec_inb, &pkt_null_aes_xcbc), + TEST_CASE_NAMED_ST( + "Combined test alg list", + ut_setup_inline_ipsec, ut_teardown_inline_ipsec, + test_ipsec_inline_proto_display_list), + TEST_CASES_END() /**< NULL terminate unit test array */ -- 2.25.1
[PATCH v5 4/7] test/security: add inline IPsec reassembly cases
Added unit test cases for IP reassembly of inline IPsec inbound scenarios. In these cases, known test vectors of fragments are first processed for inline outbound processing and then received back on loopback interface for inbound processing along with IP reassembly of the corresponding decrypted packets. The resultant plain text reassembled packet is compared with original unfragmented packet. In this patch, cases are added for 2/4/5 fragments for both IPv4 and IPv6 packets. A few negative test cases are also added like incomplete fragments, out of place fragments, duplicate fragments. Signed-off-by: Akhil Goyal --- app/test/test_security_inline_proto.c | 421 ++- app/test/test_security_inline_proto_vectors.h | 684 ++ 2 files changed, 1104 insertions(+), 1 deletion(-) diff --git a/app/test/test_security_inline_proto.c b/app/test/test_security_inline_proto.c index ea36d1188c..46636af072 100644 --- a/app/test/test_security_inline_proto.c +++ b/app/test/test_security_inline_proto.c @@ -41,6 +41,9 @@ test_inline_ipsec(void) #define MAX_TRAFFIC_BURST 2048 #define NB_MBUF10240 +#define ENCAP_DECAP_BURST_SZ 33 +#define APP_REASS_TIMEOUT 10 + extern struct ipsec_test_data pkt_aes_128_gcm; extern struct ipsec_test_data pkt_aes_192_gcm; extern struct ipsec_test_data pkt_aes_256_gcm; @@ -94,6 +97,8 @@ uint16_t port_id; static uint64_t link_mbps; +static int ip_reassembly_dynfield_offset = -1; + static struct rte_flow *default_flow[RTE_MAX_ETHPORTS]; /* Create Inline IPsec session */ @@ -527,6 +532,347 @@ destroy_default_flow(uint16_t portid) struct rte_mbuf **tx_pkts_burst; struct rte_mbuf **rx_pkts_burst; +static int +compare_pkt_data(struct rte_mbuf *m, uint8_t *ref, unsigned int tot_len) +{ + unsigned int len; + unsigned int nb_segs = m->nb_segs; + unsigned int matched = 0; + struct rte_mbuf *save = m; + + while (m) { + len = tot_len; + if (len > m->data_len) + len = m->data_len; + if (len != 0) { + if (memcmp(rte_pktmbuf_mtod(m, char *), + ref + matched, len)) { + printf("\nReassembly case failed: Data Mismatch"); + rte_hexdump(stdout, "Reassembled", + rte_pktmbuf_mtod(m, char *), + len); + rte_hexdump(stdout, "reference", + ref + matched, + len); + return TEST_FAILED; + } + } + tot_len -= len; + matched += len; + m = m->next; + } + + if (tot_len) { + printf("\nReassembly case failed: Data Missing %u", + tot_len); + printf("\nnb_segs %u, tot_len %u", nb_segs, tot_len); + rte_pktmbuf_dump(stderr, save, -1); + return TEST_FAILED; + } + return TEST_SUCCESS; +} + +static inline bool +is_ip_reassembly_incomplete(struct rte_mbuf *mbuf) +{ + static uint64_t ip_reassembly_dynflag; + int ip_reassembly_dynflag_offset; + + if (ip_reassembly_dynflag == 0) { + ip_reassembly_dynflag_offset = rte_mbuf_dynflag_lookup( + RTE_MBUF_DYNFLAG_IP_REASSEMBLY_INCOMPLETE_NAME, NULL); + if (ip_reassembly_dynflag_offset < 0) + return false; + ip_reassembly_dynflag = RTE_BIT64(ip_reassembly_dynflag_offset); + } + + return (mbuf->ol_flags & ip_reassembly_dynflag) != 0; +} + +static void +free_mbuf(struct rte_mbuf *mbuf) +{ + rte_eth_ip_reassembly_dynfield_t dynfield; + + if (!mbuf) + return; + + if (!is_ip_reassembly_incomplete(mbuf)) { + rte_pktmbuf_free(mbuf); + } else { + if (ip_reassembly_dynfield_offset < 0) + return; + + while (mbuf) { + dynfield = *RTE_MBUF_DYNFIELD(mbuf, + ip_reassembly_dynfield_offset, + rte_eth_ip_reassembly_dynfield_t *); + rte_pktmbuf_free(mbuf); + mbuf = dynfield.next_frag; + } + } +} + + +static int +get_and_verify_incomplete_frags(struct rte_mbuf *mbuf, + struct reassembly_vector *vector) +{ + rte_eth_ip_reassembly_dynfield_t *dynfield[MAX_PKT_BURST]; + int j = 0, ret; + /** +* IP reassembly offload is incomplete, and fragments are listed in +* dynfield which can be reassembled in SW. +*/ + printf("\nHW IP Reassembly is not comple
[PATCH v5 5/7] test/security: add more inline IPsec functional cases
Added more inline IPsec functional verification cases. These cases do not have known vectors but are verified using encap + decap test for all the algo combinations. Signed-off-by: Akhil Goyal --- app/test/test_security_inline_proto.c | 517 ++ 1 file changed, 517 insertions(+) diff --git a/app/test/test_security_inline_proto.c b/app/test/test_security_inline_proto.c index 46636af072..055b753634 100644 --- a/app/test/test_security_inline_proto.c +++ b/app/test/test_security_inline_proto.c @@ -1314,6 +1314,394 @@ test_ipsec_inline_proto_display_list(const void *data __rte_unused) return test_ipsec_inline_proto_all(&flags); } +static int +test_ipsec_inline_proto_udp_encap(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.udp_encap = true; + + return test_ipsec_inline_proto_all(&flags); +} + +static int +test_ipsec_inline_proto_udp_ports_verify(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.udp_encap = true; + flags.udp_ports_verify = true; + + return test_ipsec_inline_proto_all(&flags); +} + +static int +test_ipsec_inline_proto_err_icv_corrupt(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.icv_corrupt = true; + + return test_ipsec_inline_proto_all(&flags); +} + +static int +test_ipsec_inline_proto_tunnel_dst_addr_verify(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.tunnel_hdr_verify = RTE_SECURITY_IPSEC_TUNNEL_VERIFY_DST_ADDR; + + return test_ipsec_inline_proto_all(&flags); +} + +static int +test_ipsec_inline_proto_tunnel_src_dst_addr_verify(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.tunnel_hdr_verify = RTE_SECURITY_IPSEC_TUNNEL_VERIFY_SRC_DST_ADDR; + + return test_ipsec_inline_proto_all(&flags); +} + +static int +test_ipsec_inline_proto_inner_ip_csum(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.ip_csum = true; + + return test_ipsec_inline_proto_all(&flags); +} + +static int +test_ipsec_inline_proto_inner_l4_csum(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.l4_csum = true; + + return test_ipsec_inline_proto_all(&flags); +} + +static int +test_ipsec_inline_proto_tunnel_v4_in_v4(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.ipv6 = false; + flags.tunnel_ipv6 = false; + + return test_ipsec_inline_proto_all(&flags); +} + +static int +test_ipsec_inline_proto_tunnel_v6_in_v6(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.ipv6 = true; + flags.tunnel_ipv6 = true; + + return test_ipsec_inline_proto_all(&flags); +} + +static int +test_ipsec_inline_proto_tunnel_v4_in_v6(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.ipv6 = false; + flags.tunnel_ipv6 = true; + + return test_ipsec_inline_proto_all(&flags); +} + +static int +test_ipsec_inline_proto_tunnel_v6_in_v4(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.ipv6 = true; + flags.tunnel_ipv6 = false; + + return test_ipsec_inline_proto_all(&flags); +} + +static int +test_ipsec_inline_proto_transport_v4(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.ipv6 = false; + flags.transport = true; + + return test_ipsec_inline_proto_all(&flags); +} + +static int +test_ipsec_inline_proto_transport_l4_csum(const void *data __rte_unused) +{ + struct ipsec_test_flags flags = { + .l4_csum = true, + .transport = true, + }; + + return test_ipsec_inline_proto_all(&flags); +} + +static int +test_ipsec_inline_proto_stats(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.stats_success = true; + + return test_ipsec_inline_proto_all(&flags); +} + +static int +test_ipsec_inline_proto_pkt_fragment(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.fragment = true; + + return test_ipsec_inline_proto_all(&flags); + +} + +static int +test_ipsec_inline_proto_copy_df_inner_0(const void *data __rte_unused) +{ + st
[PATCH v5 6/7] test/security: add ESN and anti-replay cases for inline
Added cases to test anti replay for inline IPsec processing with and without extended sequence number support. Signed-off-by: Akhil Goyal --- app/test/test_security_inline_proto.c | 308 ++ 1 file changed, 308 insertions(+) diff --git a/app/test/test_security_inline_proto.c b/app/test/test_security_inline_proto.c index 055b753634..009405f403 100644 --- a/app/test/test_security_inline_proto.c +++ b/app/test/test_security_inline_proto.c @@ -1091,6 +1091,136 @@ test_ipsec_inline_proto_all(const struct ipsec_test_flags *flags) return TEST_SKIPPED; } +static int +test_ipsec_inline_proto_process_with_esn(struct ipsec_test_data td[], + struct ipsec_test_data res_d[], + int nb_pkts, + bool silent, + const struct ipsec_test_flags *flags) +{ + struct rte_security_session_conf sess_conf = {0}; + struct ipsec_test_data *res_d_tmp = NULL; + struct rte_crypto_sym_xform cipher = {0}; + struct rte_crypto_sym_xform auth = {0}; + struct rte_crypto_sym_xform aead = {0}; + struct rte_mbuf *rx_pkt = NULL; + struct rte_mbuf *tx_pkt = NULL; + int nb_rx, nb_sent; + struct rte_security_session *ses; + struct rte_security_ctx *ctx; + uint32_t ol_flags; + int i, ret; + + if (td[0].aead) { + sess_conf.crypto_xform = &aead; + } else { + if (td[0].ipsec_xform.direction == + RTE_SECURITY_IPSEC_SA_DIR_EGRESS) { + sess_conf.crypto_xform = &cipher; + sess_conf.crypto_xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + sess_conf.crypto_xform->next = &auth; + sess_conf.crypto_xform->next->type = RTE_CRYPTO_SYM_XFORM_AUTH; + } else { + sess_conf.crypto_xform = &auth; + sess_conf.crypto_xform->type = RTE_CRYPTO_SYM_XFORM_AUTH; + sess_conf.crypto_xform->next = &cipher; + sess_conf.crypto_xform->next->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + } + } + + /* Create Inline IPsec session. */ + ret = create_inline_ipsec_session(&td[0], port_id, &ses, &ctx, + &ol_flags, flags, &sess_conf); + if (ret) + return ret; + + if (td[0].ipsec_xform.direction == RTE_SECURITY_IPSEC_SA_DIR_INGRESS) + create_default_flow(port_id); + + for (i = 0; i < nb_pkts; i++) { + tx_pkt = init_packet(mbufpool, td[i].input_text.data, + td[i].input_text.len); + if (tx_pkt == NULL) { + ret = TEST_FAILED; + goto out; + } + + if (test_ipsec_pkt_update(rte_pktmbuf_mtod_offset(tx_pkt, + uint8_t *, RTE_ETHER_HDR_LEN), flags)) { + ret = TEST_FAILED; + goto out; + } + + if (td[i].ipsec_xform.direction == + RTE_SECURITY_IPSEC_SA_DIR_EGRESS) { + if (flags->antireplay) { + sess_conf.ipsec.esn.value = + td[i].ipsec_xform.esn.value; + ret = rte_security_session_update(ctx, ses, + &sess_conf); + if (ret) { + printf("Could not update ESN in session\n"); + rte_pktmbuf_free(tx_pkt); + goto out; + } + } + if (ol_flags & RTE_SECURITY_TX_OLOAD_NEED_MDATA) + rte_security_set_pkt_metadata(ctx, ses, + tx_pkt, NULL); + tx_pkt->ol_flags |= RTE_MBUF_F_TX_SEC_OFFLOAD; + } + /* Send packet to ethdev for inline IPsec processing. */ + nb_sent = rte_eth_tx_burst(port_id, 0, &tx_pkt, 1); + if (nb_sent != 1) { + printf("\nUnable to TX packets"); + rte_pktmbuf_free(tx_pkt); + ret = TEST_FAILED; + goto out; + } + + rte_pause(); + + /* Receive back packet on loopback interface. */ + do { + rte_delay_ms(1); + nb_rx = rte_eth_rx_burst(port_id, 0, &rx_pkt, 1); + } while (nb_rx == 0); + + rte_pktmbuf_adj(rx_pkt, RTE_ETHER_HDR_LEN); + + if (res_d != NULL) + res_d_tmp = &res_d[i]; + + ret = test_ipsec_post_proces