[dpdk-dev] [PATCH 1/5] crypto/dpaa2_sec: fix to check next null for auth only case

2021-07-21 Thread Hemant Agrawal
This patch fixes the issue to check for next pointer as
null in the integrity only case in pdcp-security context.

Fixes: bef594ec5cc8 ("crypto/dpaa2_sec: support PDCP offload")
Cc: sta...@dpdk.org

Signed-off-by: Hemant Agrawal 
---
 drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c | 25 +++--
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c 
b/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c
index 1ccead3641..4438486a8b 100644
--- a/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c
+++ b/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c
@@ -3102,7 +3102,7 @@ dpaa2_sec_set_pdcp_session(struct rte_cryptodev *dev,
struct rte_security_pdcp_xform *pdcp_xform = &conf->pdcp;
struct rte_crypto_sym_xform *xform = conf->crypto_xform;
struct rte_crypto_auth_xform *auth_xform = NULL;
-   struct rte_crypto_cipher_xform *cipher_xform;
+   struct rte_crypto_cipher_xform *cipher_xform = NULL;
dpaa2_sec_session *session = (dpaa2_sec_session *)sess;
struct ctxt_priv *priv;
struct dpaa2_sec_dev_private *dev_priv = dev->data->dev_private;
@@ -3134,18 +3134,18 @@ dpaa2_sec_set_pdcp_session(struct rte_cryptodev *dev,
flc = &priv->flc_desc[0].flc;
 
/* find xfrm types */
-   if (xform->type == RTE_CRYPTO_SYM_XFORM_CIPHER && xform->next == NULL) {
-   cipher_xform = &xform->cipher;
-   } else if (xform->type == RTE_CRYPTO_SYM_XFORM_CIPHER &&
-  xform->next->type == RTE_CRYPTO_SYM_XFORM_AUTH) {
-   session->ext_params.aead_ctxt.auth_cipher_text = true;
+   if (xform->type == RTE_CRYPTO_SYM_XFORM_CIPHER) {
cipher_xform = &xform->cipher;
-   auth_xform = &xform->next->auth;
-   } else if (xform->type == RTE_CRYPTO_SYM_XFORM_AUTH &&
-  xform->next->type == RTE_CRYPTO_SYM_XFORM_CIPHER) {
-   session->ext_params.aead_ctxt.auth_cipher_text = false;
-   cipher_xform = &xform->next->cipher;
+   if (xform->next != NULL) {
+   session->ext_params.aead_ctxt.auth_cipher_text = true;
+   auth_xform = &xform->next->auth;
+   }
+   } else if (xform->type == RTE_CRYPTO_SYM_XFORM_AUTH) {
auth_xform = &xform->auth;
+   if (xform->next != NULL) {
+   session->ext_params.aead_ctxt.auth_cipher_text = false;
+   cipher_xform = &xform->next->cipher;
+   }
} else {
DPAA2_SEC_ERR("Invalid crypto type");
return -EINVAL;
@@ -3184,7 +3184,8 @@ dpaa2_sec_set_pdcp_session(struct rte_cryptodev *dev,
session->pdcp.hfn_threshold = pdcp_xform->hfn_threshold;
session->pdcp.hfn_ovd = pdcp_xform->hfn_ovrd;
/* hfv ovd offset location is stored in iv.offset value*/
-   session->pdcp.hfn_ovd_offset = cipher_xform->iv.offset;
+   if (cipher_xform)
+   session->pdcp.hfn_ovd_offset = cipher_xform->iv.offset;
 
cipherdata.key = (size_t)session->cipher_key.data;
cipherdata.keylen = session->cipher_key.length;
-- 
2.17.1



[dpdk-dev] [PATCH 2/5] crypto/dpaa_sec: support DES-CBC

2021-07-21 Thread Hemant Agrawal
From: Gagandeep Singh 

add DES-CBC support and enable available cipher-only
test cases.

Signed-off-by: Gagandeep Singh 
---
 doc/guides/cryptodevs/features/dpaa_sec.ini |  1 +
 drivers/crypto/dpaa_sec/dpaa_sec.c  | 13 +
 drivers/crypto/dpaa_sec/dpaa_sec.h  | 20 
 3 files changed, 34 insertions(+)

diff --git a/doc/guides/cryptodevs/features/dpaa_sec.ini 
b/doc/guides/cryptodevs/features/dpaa_sec.ini
index 243f3e1d67..5d0d04d601 100644
--- a/doc/guides/cryptodevs/features/dpaa_sec.ini
+++ b/doc/guides/cryptodevs/features/dpaa_sec.ini
@@ -24,6 +24,7 @@ AES CBC (256) = Y
 AES CTR (128) = Y
 AES CTR (192) = Y
 AES CTR (256) = Y
+DES CBC   = Y
 3DES CBC  = Y
 SNOW3G UEA2   = Y
 ZUC EEA3  = Y
diff --git a/drivers/crypto/dpaa_sec/dpaa_sec.c 
b/drivers/crypto/dpaa_sec/dpaa_sec.c
index 19d4684e24..af5c7c499c 100644
--- a/drivers/crypto/dpaa_sec/dpaa_sec.c
+++ b/drivers/crypto/dpaa_sec/dpaa_sec.c
@@ -454,6 +454,7 @@ dpaa_sec_prep_cdb(dpaa_sec_session *ses)
switch (ses->cipher_alg) {
case RTE_CRYPTO_CIPHER_AES_CBC:
case RTE_CRYPTO_CIPHER_3DES_CBC:
+   case RTE_CRYPTO_CIPHER_DES_CBC:
case RTE_CRYPTO_CIPHER_AES_CTR:
case RTE_CRYPTO_CIPHER_3DES_CTR:
shared_desc_len = cnstr_shdsc_blkcipher(
@@ -2043,6 +2044,10 @@ dpaa_sec_cipher_init(struct rte_cryptodev *dev 
__rte_unused,
session->cipher_key.alg = OP_ALG_ALGSEL_AES;
session->cipher_key.algmode = OP_ALG_AAI_CBC;
break;
+   case RTE_CRYPTO_CIPHER_DES_CBC:
+   session->cipher_key.alg = OP_ALG_ALGSEL_DES;
+   session->cipher_key.algmode = OP_ALG_AAI_CBC;
+   break;
case RTE_CRYPTO_CIPHER_3DES_CBC:
session->cipher_key.alg = OP_ALG_ALGSEL_3DES;
session->cipher_key.algmode = OP_ALG_AAI_CBC;
@@ -2218,6 +2223,10 @@ dpaa_sec_chain_init(struct rte_cryptodev *dev 
__rte_unused,
session->cipher_key.alg = OP_ALG_ALGSEL_AES;
session->cipher_key.algmode = OP_ALG_AAI_CBC;
break;
+   case RTE_CRYPTO_CIPHER_DES_CBC:
+   session->cipher_key.alg = OP_ALG_ALGSEL_DES;
+   session->cipher_key.algmode = OP_ALG_AAI_CBC;
+   break;
case RTE_CRYPTO_CIPHER_3DES_CBC:
session->cipher_key.alg = OP_ALG_ALGSEL_3DES;
session->cipher_key.algmode = OP_ALG_AAI_CBC;
@@ -2667,6 +2676,10 @@ dpaa_sec_ipsec_proto_init(struct rte_crypto_cipher_xform 
*cipher_xform,
session->cipher_key.alg = OP_PCL_IPSEC_AES_CBC;
session->cipher_key.algmode = OP_ALG_AAI_CBC;
break;
+   case RTE_CRYPTO_CIPHER_DES_CBC:
+   session->cipher_key.alg = OP_PCL_IPSEC_DES;
+   session->cipher_key.algmode = OP_ALG_AAI_CBC;
+   break;
case RTE_CRYPTO_CIPHER_3DES_CBC:
session->cipher_key.alg = OP_PCL_IPSEC_3DES;
session->cipher_key.algmode = OP_ALG_AAI_CBC;
diff --git a/drivers/crypto/dpaa_sec/dpaa_sec.h 
b/drivers/crypto/dpaa_sec/dpaa_sec.h
index 368699678b..216e8c8b6f 100644
--- a/drivers/crypto/dpaa_sec/dpaa_sec.h
+++ b/drivers/crypto/dpaa_sec/dpaa_sec.h
@@ -456,6 +456,26 @@ static const struct rte_cryptodev_capabilities 
dpaa_sec_capabilities[] = {
}, }
}, }
},
+   {   /* DES CBC */
+   .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+   {.sym = {
+   .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+   {.cipher = {
+   .algo = RTE_CRYPTO_CIPHER_DES_CBC,
+   .block_size = 8,
+   .key_size = {
+   .min = 8,
+   .max = 8,
+   .increment = 0
+   },
+   .iv_size = {
+   .min = 8,
+   .max = 8,
+   .increment = 0
+   }
+   }, }
+   }, }
+   },
{   /* 3DES CBC */
.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
{.sym = {
-- 
2.17.1



[dpdk-dev] [PATCH 3/5] crypto/dpaa_sec: support non-HMAC auth algos

2021-07-21 Thread Hemant Agrawal
From: Gagandeep Singh 

This patch add support for non-HMAC, md5, shax algos.

Signed-off-by: Gagandeep Singh 
---
 doc/guides/cryptodevs/features/dpaa_sec.ini |   8 +-
 drivers/crypto/dpaa_sec/dpaa_sec.c  |  55 +++--
 drivers/crypto/dpaa_sec/dpaa_sec.h  | 126 
 3 files changed, 180 insertions(+), 9 deletions(-)

diff --git a/doc/guides/cryptodevs/features/dpaa_sec.ini 
b/doc/guides/cryptodevs/features/dpaa_sec.ini
index 5d0d04d601..eab14da96c 100644
--- a/doc/guides/cryptodevs/features/dpaa_sec.ini
+++ b/doc/guides/cryptodevs/features/dpaa_sec.ini
@@ -33,11 +33,17 @@ ZUC EEA3  = Y
 ; Supported authentication algorithms of the 'dpaa_sec' crypto driver.
 ;
 [Auth]
+MD5  = Y
 MD5 HMAC = Y
+SHA1 = Y
 SHA1 HMAC= Y
+SHA224   = Y
 SHA224 HMAC  = Y
+SHA256   = Y
 SHA256 HMAC  = Y
+SHA384   = Y
 SHA384 HMAC  = Y
+SHA512   = Y
 SHA512 HMAC  = Y
 SNOW3G UIA2  = Y
 ZUC EIA3 = Y
@@ -53,4 +59,4 @@ AES GCM (256) = Y
 ;
 ; Supported Asymmetric algorithms of the 'dpaa_sec' crypto driver.
 ;
-[Asymmetric]
\ No newline at end of file
+[Asymmetric]
diff --git a/drivers/crypto/dpaa_sec/dpaa_sec.c 
b/drivers/crypto/dpaa_sec/dpaa_sec.c
index af5c7c499c..95b9d7414f 100644
--- a/drivers/crypto/dpaa_sec/dpaa_sec.c
+++ b/drivers/crypto/dpaa_sec/dpaa_sec.c
@@ -489,6 +489,18 @@ dpaa_sec_prep_cdb(dpaa_sec_session *ses)
alginfo_a.algtype = ses->auth_key.alg;
alginfo_a.algmode = ses->auth_key.algmode;
switch (ses->auth_alg) {
+   case RTE_CRYPTO_AUTH_MD5:
+   case RTE_CRYPTO_AUTH_SHA1:
+   case RTE_CRYPTO_AUTH_SHA224:
+   case RTE_CRYPTO_AUTH_SHA256:
+   case RTE_CRYPTO_AUTH_SHA384:
+   case RTE_CRYPTO_AUTH_SHA512:
+   shared_desc_len = cnstr_shdsc_hash(
+   cdb->sh_desc, true,
+   swap, SHR_NEVER, &alginfo_a,
+   !ses->dir,
+   ses->digest_length);
+   break;
case RTE_CRYPTO_AUTH_MD5_HMAC:
case RTE_CRYPTO_AUTH_SHA1_HMAC:
case RTE_CRYPTO_AUTH_SHA224_HMAC:
@@ -2080,43 +2092,70 @@ dpaa_sec_auth_init(struct rte_cryptodev *dev 
__rte_unused,
 {
session->ctxt = DPAA_SEC_AUTH;
session->auth_alg = xform->auth.algo;
-   session->auth_key.data = rte_zmalloc(NULL, xform->auth.key.length,
+   session->auth_key.length = xform->auth.key.length;
+   if (xform->auth.key.length) {
+   session->auth_key.data =
+   rte_zmalloc(NULL, xform->auth.key.length,
 RTE_CACHE_LINE_SIZE);
-   if (session->auth_key.data == NULL && xform->auth.key.length > 0) {
-   DPAA_SEC_ERR("No Memory for auth key");
-   return -ENOMEM;
+   if (session->auth_key.data == NULL) {
+   DPAA_SEC_ERR("No Memory for auth key");
+   return -ENOMEM;
+   }
+   memcpy(session->auth_key.data, xform->auth.key.data,
+   xform->auth.key.length);
+
}
-   session->auth_key.length = xform->auth.key.length;
session->digest_length = xform->auth.digest_length;
if (session->cipher_alg == RTE_CRYPTO_CIPHER_NULL) {
session->iv.offset = xform->auth.iv.offset;
session->iv.length = xform->auth.iv.length;
}
 
-   memcpy(session->auth_key.data, xform->auth.key.data,
-  xform->auth.key.length);
-
switch (xform->auth.algo) {
+   case RTE_CRYPTO_AUTH_SHA1:
+   session->auth_key.alg = OP_ALG_ALGSEL_SHA1;
+   session->auth_key.algmode = OP_ALG_AAI_HASH;
+   break;
case RTE_CRYPTO_AUTH_SHA1_HMAC:
session->auth_key.alg = OP_ALG_ALGSEL_SHA1;
session->auth_key.algmode = OP_ALG_AAI_HMAC;
break;
+   case RTE_CRYPTO_AUTH_MD5:
+   session->auth_key.alg = OP_ALG_ALGSEL_MD5;
+   session->auth_key.algmode = OP_ALG_AAI_HASH;
+   break;
case RTE_CRYPTO_AUTH_MD5_HMAC:
session->auth_key.alg = OP_ALG_ALGSEL_MD5;
session->auth_key.algmode = OP_ALG_AAI_HMAC;
break;
+   case RTE_CRYPTO_AUTH_SHA224:
+   session->auth_key.alg = OP_ALG_ALGSEL_SHA224;
+   session->auth_key.algmode = OP_ALG_AAI_HASH;
+   break;
case RTE_CRYPTO_AUTH_SHA224_HMAC:
session->auth_key.alg = OP_ALG_ALGSEL_SHA224;
session->auth_key.algmode = OP_ALG_AAI_HMAC;
break;
+   case RTE_CRYPTO_AUTH_SHA256:
+   session->auth_key.alg = OP_ALG_ALGSEL_SHA256;
+   session

[dpdk-dev] [PATCH 4/5] crypto/dpaa_sec: support AES-XCBC-MAC

2021-07-21 Thread Hemant Agrawal
From: Gagandeep Singh 

This patch adds support for AES-XCBC-MAC algo.

Signed-off-by: Gagandeep Singh 
---
 doc/guides/cryptodevs/features/dpaa_sec.ini |  1 +
 drivers/crypto/dpaa_sec/dpaa_sec.c  | 21 -
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/doc/guides/cryptodevs/features/dpaa_sec.ini 
b/doc/guides/cryptodevs/features/dpaa_sec.ini
index eab14da96c..d7bc319373 100644
--- a/doc/guides/cryptodevs/features/dpaa_sec.ini
+++ b/doc/guides/cryptodevs/features/dpaa_sec.ini
@@ -47,6 +47,7 @@ SHA512   = Y
 SHA512 HMAC  = Y
 SNOW3G UIA2  = Y
 ZUC EIA3 = Y
+AES XCBC MAC = Y
 
 ;
 ; Supported AEAD algorithms of the 'dpaa_sec' crypto driver.
diff --git a/drivers/crypto/dpaa_sec/dpaa_sec.c 
b/drivers/crypto/dpaa_sec/dpaa_sec.c
index 95b9d7414f..fc9c3a4c2c 100644
--- a/drivers/crypto/dpaa_sec/dpaa_sec.c
+++ b/drivers/crypto/dpaa_sec/dpaa_sec.c
@@ -527,6 +527,14 @@ dpaa_sec_prep_cdb(dpaa_sec_session *ses)
!ses->dir,
ses->digest_length);
break;
+   case RTE_CRYPTO_AUTH_AES_XCBC_MAC:
+   shared_desc_len = cnstr_shdsc_aes_mac(
+   cdb->sh_desc,
+   true, swap, SHR_NEVER,
+   &alginfo_a,
+   !ses->dir,
+   ses->digest_length);
+   break;
default:
DPAA_SEC_ERR("unsupported auth alg %u", ses->auth_alg);
}
@@ -2168,6 +2176,10 @@ dpaa_sec_auth_init(struct rte_cryptodev *dev 
__rte_unused,
session->auth_key.alg = OP_ALG_ALGSEL_ZUCA;
session->auth_key.algmode = OP_ALG_AAI_F9;
break;
+   case RTE_CRYPTO_AUTH_AES_XCBC_MAC:
+   session->auth_key.alg = OP_ALG_ALGSEL_AES;
+   session->auth_key.algmode = OP_ALG_AAI_XCBC_MAC;
+   break;
default:
DPAA_SEC_ERR("Crypto: Unsupported Auth specified %u",
  xform->auth.algo);
@@ -2249,6 +2261,10 @@ dpaa_sec_chain_init(struct rte_cryptodev *dev 
__rte_unused,
session->auth_key.alg = OP_ALG_ALGSEL_SHA512;
session->auth_key.algmode = OP_ALG_AAI_HMAC;
break;
+   case RTE_CRYPTO_AUTH_AES_XCBC_MAC:
+   session->auth_key.alg = OP_ALG_ALGSEL_AES;
+   session->auth_key.algmode = OP_ALG_AAI_XCBC_MAC;
+   break;
default:
DPAA_SEC_ERR("Crypto: Unsupported Auth specified %u",
  auth_xform->algo);
@@ -2688,8 +2704,11 @@ dpaa_sec_ipsec_proto_init(struct rte_crypto_cipher_xform 
*cipher_xform,
case RTE_CRYPTO_AUTH_NULL:
session->auth_key.alg = OP_PCL_IPSEC_HMAC_NULL;
break;
-   case RTE_CRYPTO_AUTH_SHA224_HMAC:
case RTE_CRYPTO_AUTH_AES_XCBC_MAC:
+   session->auth_key.alg = OP_PCL_IPSEC_AES_XCBC_MAC_96;
+   session->auth_key.algmode = OP_ALG_AAI_XCBC_MAC;
+   break;
+   case RTE_CRYPTO_AUTH_SHA224_HMAC:
case RTE_CRYPTO_AUTH_SNOW3G_UIA2:
case RTE_CRYPTO_AUTH_SHA1:
case RTE_CRYPTO_AUTH_SHA256:
-- 
2.17.1



[dpdk-dev] [PATCH 5/5] crypto/dpaa_sec: add support for AES CMAC integrity check

2021-07-21 Thread Hemant Agrawal
From: Gagandeep Singh 

This patch adds support for AES_CMAC integrity in non-security mode.
This patch modifies the camm flib to handles the AES CMAC
without conflicting the proto ALG operations. i.e. by creating
another ALG operation routine.

Signed-off-by: Gagandeep Singh 
---
 doc/guides/cryptodevs/features/dpaa_sec.ini |  1 +
 drivers/crypto/dpaa_sec/dpaa_sec.c  | 10 +
 drivers/crypto/dpaa_sec/dpaa_sec.h  | 43 +
 3 files changed, 54 insertions(+)

diff --git a/doc/guides/cryptodevs/features/dpaa_sec.ini 
b/doc/guides/cryptodevs/features/dpaa_sec.ini
index d7bc319373..6a8f77fb1d 100644
--- a/doc/guides/cryptodevs/features/dpaa_sec.ini
+++ b/doc/guides/cryptodevs/features/dpaa_sec.ini
@@ -48,6 +48,7 @@ SHA512 HMAC  = Y
 SNOW3G UIA2  = Y
 ZUC EIA3 = Y
 AES XCBC MAC = Y
+AES CMAC (128) = Y
 
 ;
 ; Supported AEAD algorithms of the 'dpaa_sec' crypto driver.
diff --git a/drivers/crypto/dpaa_sec/dpaa_sec.c 
b/drivers/crypto/dpaa_sec/dpaa_sec.c
index fc9c3a4c2c..c5416df726 100644
--- a/drivers/crypto/dpaa_sec/dpaa_sec.c
+++ b/drivers/crypto/dpaa_sec/dpaa_sec.c
@@ -528,6 +528,7 @@ dpaa_sec_prep_cdb(dpaa_sec_session *ses)
ses->digest_length);
break;
case RTE_CRYPTO_AUTH_AES_XCBC_MAC:
+   case RTE_CRYPTO_AUTH_AES_CMAC:
shared_desc_len = cnstr_shdsc_aes_mac(
cdb->sh_desc,
true, swap, SHR_NEVER,
@@ -2180,6 +2181,10 @@ dpaa_sec_auth_init(struct rte_cryptodev *dev 
__rte_unused,
session->auth_key.alg = OP_ALG_ALGSEL_AES;
session->auth_key.algmode = OP_ALG_AAI_XCBC_MAC;
break;
+   case RTE_CRYPTO_AUTH_AES_CMAC:
+   session->auth_key.alg = OP_ALG_ALGSEL_AES;
+   session->auth_key.algmode = OP_ALG_AAI_CMAC;
+   break;
default:
DPAA_SEC_ERR("Crypto: Unsupported Auth specified %u",
  xform->auth.algo);
@@ -2265,6 +2270,10 @@ dpaa_sec_chain_init(struct rte_cryptodev *dev 
__rte_unused,
session->auth_key.alg = OP_ALG_ALGSEL_AES;
session->auth_key.algmode = OP_ALG_AAI_XCBC_MAC;
break;
+   case RTE_CRYPTO_AUTH_AES_CMAC:
+   session->auth_key.alg = OP_ALG_ALGSEL_AES;
+   session->auth_key.algmode = OP_ALG_AAI_CMAC;
+   break;
default:
DPAA_SEC_ERR("Crypto: Unsupported Auth specified %u",
  auth_xform->algo);
@@ -2700,6 +2709,7 @@ dpaa_sec_ipsec_proto_init(struct rte_crypto_cipher_xform 
*cipher_xform,
break;
case RTE_CRYPTO_AUTH_AES_CMAC:
session->auth_key.alg = OP_PCL_IPSEC_AES_CMAC_96;
+   session->auth_key.algmode = OP_ALG_AAI_CMAC;
break;
case RTE_CRYPTO_AUTH_NULL:
session->auth_key.alg = OP_PCL_IPSEC_HMAC_NULL;
diff --git a/drivers/crypto/dpaa_sec/dpaa_sec.h 
b/drivers/crypto/dpaa_sec/dpaa_sec.h
index d500a4c246..c94d78e046 100644
--- a/drivers/crypto/dpaa_sec/dpaa_sec.h
+++ b/drivers/crypto/dpaa_sec/dpaa_sec.h
@@ -712,6 +712,49 @@ static const struct rte_cryptodev_capabilities 
dpaa_sec_capabilities[] = {
}, }
}, }
},
+   {   /* AES CMAC */
+   .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+   {.sym = {
+   .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+   {.auth = {
+   .algo = RTE_CRYPTO_AUTH_AES_CMAC,
+   .block_size = 16,
+   .key_size = {
+   .min = 1,
+   .max = 16,
+   .increment = 1
+   },
+   .digest_size = {
+   .min = 12,
+   .max = 16,
+   .increment = 4
+   },
+   .iv_size = { 0 }
+   }, }
+   }, }
+   },
+   {   /* AES XCBC HMAC */
+   .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+   {.sym = {
+   .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+   {.auth = {
+   .algo = RTE_CRYPTO_AUTH_AES_XCBC_MAC,
+   .block_size = 16,
+   .key_size = {
+   .min = 1,
+   .max = 16,
+   .increment = 1
+   },
+   .digest_size = {
+   

[dpdk-dev] [dpdk-dev v3] crypto/snow3g: add support for digest appended ops

2021-07-21 Thread Kai Ji
This patch enable out-of-place auth-cipher operations where
digest should be encrypted among with the rest of raw data.
It also adds support for partially encrypted digest when using
auth-cipher operations.

Fixes: 7c87e2d7b359 ("crypto/snow3g: use IPsec library")
Cc: pablo.de.lara.gua...@intel.com

Signed-off-by: Damian Nowak 
Signed-off-by: Kai Ji 
---
v3:
- Code rebase
- Documentation update

---
 doc/guides/cryptodevs/features/snow3g.ini |   1 +
 drivers/crypto/snow3g/rte_snow3g_pmd.c| 131 +++---
 2 files changed, 119 insertions(+), 13 deletions(-)

diff --git a/doc/guides/cryptodevs/features/snow3g.ini 
b/doc/guides/cryptodevs/features/snow3g.ini
index 14ac7e4b6d..4d4c5b579b 100644
--- a/doc/guides/cryptodevs/features/snow3g.ini
+++ b/doc/guides/cryptodevs/features/snow3g.ini
@@ -8,6 +8,7 @@ Symmetric crypto   = Y
 Sym operation chaining = Y
 Symmetric sessionless  = Y
 Non-Byte aligned data  = Y
+Digest encrypted   = Y
 OOP LB  In LB  Out = Y
 
 ;
diff --git a/drivers/crypto/snow3g/rte_snow3g_pmd.c 
b/drivers/crypto/snow3g/rte_snow3g_pmd.c
index 9aab357846..a7c012be92 100644
--- a/drivers/crypto/snow3g/rte_snow3g_pmd.c
+++ b/drivers/crypto/snow3g/rte_snow3g_pmd.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2016-2018 Intel Corporation
+ * Copyright(c) 2016-2021 Intel Corporation
  */
 
 #include 
@@ -179,6 +179,24 @@ snow3g_get_session(struct snow3g_qp *qp, struct 
rte_crypto_op *op)
return sess;
 }
 
+/** Check if conditions are met for digest-appended operations */
+static uint8_t *
+snow3g_digest_appended_in_src(struct rte_crypto_op *op)
+{
+   unsigned int auth_size, cipher_size;
+
+   auth_size = (op->sym->auth.data.offset >> 3) +
+   (op->sym->auth.data.length >> 3);
+   cipher_size = (op->sym->cipher.data.offset >> 3) +
+   (op->sym->cipher.data.length >> 3);
+
+   if (auth_size < cipher_size)
+   return rte_pktmbuf_mtod_offset(op->sym->m_src,
+   uint8_t *, auth_size);
+
+   return NULL;
+}
+
 /** Encrypt/decrypt mbufs with same cipher key. */
 static uint8_t
 process_snow3g_cipher_op(struct snow3g_qp *qp, struct rte_crypto_op **ops,
@@ -189,20 +207,50 @@ process_snow3g_cipher_op(struct snow3g_qp *qp, struct 
rte_crypto_op **ops,
uint8_t processed_ops = 0;
const void *src[SNOW3G_MAX_BURST];
void *dst[SNOW3G_MAX_BURST];
+   uint8_t *digest_appended[SNOW3G_MAX_BURST];
const void *iv[SNOW3G_MAX_BURST];
uint32_t num_bytes[SNOW3G_MAX_BURST];
+   uint32_t cipher_off, cipher_len;
+   int unencrypted_bytes = 0;
 
for (i = 0; i < num_ops; i++) {
-   src[i] = rte_pktmbuf_mtod(ops[i]->sym->m_src, uint8_t *) +
-   (ops[i]->sym->cipher.data.offset >> 3);
-   dst[i] = ops[i]->sym->m_dst ?
-   rte_pktmbuf_mtod(ops[i]->sym->m_dst, uint8_t *) +
-   (ops[i]->sym->cipher.data.offset >> 3) :
-   rte_pktmbuf_mtod(ops[i]->sym->m_src, uint8_t *) +
-   (ops[i]->sym->cipher.data.offset >> 3);
+   cipher_off = ops[i]->sym->cipher.data.offset >> 3;
+   cipher_len = ops[i]->sym->cipher.data.length >> 3;
+   src[i] = rte_pktmbuf_mtod_offset(
+   ops[i]->sym->m_src, uint8_t *, cipher_off);
+
+   /* If out-of-place operation */
+   if (ops[i]->sym->m_dst &&
+   ops[i]->sym->m_src != ops[i]->sym->m_dst) {
+   dst[i] = rte_pktmbuf_mtod_offset(
+   ops[i]->sym->m_dst, uint8_t *, cipher_off);
+
+   /* In case of out-of-place, auth-cipher operation
+* with partial encryption of the digest, copy
+* the remaining, unencrypted part.
+*/
+   if (session->op == SNOW3G_OP_AUTH_CIPHER)
+   unencrypted_bytes =
+   (ops[i]->sym->auth.data.offset >> 3) +
+   (ops[i]->sym->auth.data.length >> 3) +
+   (SNOW3G_DIGEST_LENGTH) -
+   cipher_off - cipher_len;
+   if (unencrypted_bytes > 0)
+   rte_memcpy(
+   rte_pktmbuf_mtod_offset(
+   ops[i]->sym->m_dst, uint8_t *,
+   cipher_off + cipher_len),
+   rte_pktmbuf_mtod_offset(
+   ops[i]->sym->m_src, uint8_t *,
+   cipher_off + cipher_len),
+   unencrypted_bytes);
+ 

[dpdk-dev] [PATCH] net/virtio: report maximum MTU in device info

2021-07-21 Thread Andrew Rybchenko
From: Ivan Ilchenko 

Fix the driver to report maximum MTU obtained from config if
VIRTIO_NET_F_MTU is supported or calculated based on maximum
Rx packet length.

Fixes: ad97ceece12c ("ethdev: add min/max MTU to device info")
Cc: sta...@dpdk.org

Signed-off-by: Ivan Ilchenko 
Signed-off-by: Andrew Rybchenko 
---
 drivers/net/virtio/virtio_ethdev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/virtio/virtio_ethdev.c 
b/drivers/net/virtio/virtio_ethdev.c
index 6d6e105960..af6305e9d8 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -2502,6 +2502,7 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
+   dev_info->max_mtu = hw->max_mtu;
 
host_features = VIRTIO_OPS(hw)->get_features(hw);
dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
-- 
2.30.2



[dpdk-dev] [PATCH v1 0/1] power: check freq count before filling the freqs array

2021-07-21 Thread Richael Zhuang
v1:
adding check for freq count

Richael Zhuang (1):
  power: check freq count before filling the freqs array

 lib/power/power_cppc_cpufreq.c   | 5 +
 lib/power/power_pstate_cpufreq.c | 5 +
 2 files changed, 10 insertions(+)

-- 
2.20.1



[dpdk-dev] [PATCH v1 1/1] power: check freq count before filling the freqs array

2021-07-21 Thread Richael Zhuang
The freqs array size is RTE_MAX_LCORE_FREQS. Before filling the
array with num_freqs elements, restrict the total num to
RTE_MAX_LCORE_FREQS. This fix aims to fix the coverity scan issue
like:
Overrunning array "pi->freqs" of 256 bytes by passing it to a
function which accesses it at byte offset 464.

Coverity issue: 371913

Signed-off-by: Richael Zhuang 
---
 lib/power/power_cppc_cpufreq.c   | 5 +
 lib/power/power_pstate_cpufreq.c | 5 +
 2 files changed, 10 insertions(+)

diff --git a/lib/power/power_cppc_cpufreq.c b/lib/power/power_cppc_cpufreq.c
index e92973ab54..db63c2cc10 100644
--- a/lib/power/power_cppc_cpufreq.c
+++ b/lib/power/power_cppc_cpufreq.c
@@ -246,6 +246,11 @@ power_get_available_freqs(struct cppc_power_info *pi)
pi->nominal_perf * UNIT_DIFF : pi->nominal_perf;
num_freqs = (nominal_perf - scaling_min_freq) / BUS_FREQ + 1 +
pi->turbo_available;
+   if (num_freqs >= RTE_MAX_LCORE_FREQS) {
+   RTE_LOG(ERR, POWER, "Too many available frequencies : %d\n",
+   num_freqs);
+   goto out;
+   }
 
/* Generate the freq bucket array. */
for (i = 0, pi->nb_freqs = 0; i < num_freqs; i++) {
diff --git a/lib/power/power_pstate_cpufreq.c b/lib/power/power_pstate_cpufreq.c
index 3b607515fd..619090c8d1 100644
--- a/lib/power/power_pstate_cpufreq.c
+++ b/lib/power/power_pstate_cpufreq.c
@@ -419,6 +419,11 @@ power_get_available_freqs(struct pstate_power_info *pi)
 */
num_freqs = (base_max_freq - sys_min_freq) / BUS_FREQ + 1 +
pi->turbo_available;
+   if (num_freqs >= RTE_MAX_LCORE_FREQS) {
+   RTE_LOG(ERR, POWER, "Too many available frequencies : %d\n",
+   num_freqs);
+   goto out;
+   }
 
/* Generate the freq bucket array.
 * If turbo is available the freq bucket[0] value is base_max +1
-- 
2.20.1



Re: [dpdk-dev] [PATCH v3] net/virtio: fix Rx scatter offload

2021-07-21 Thread Andrew Rybchenko

On 7/20/21 7:19 PM, Maxime Coquelin wrote:



On 7/20/21 9:54 AM, Andrew Rybchenko wrote:

From: Ivan Ilchenko 

Report Rx scatter offload capability depending on VIRTIO_NET_F_MRG_RXBUF.

If Rx scatter is not requested, ensure that provided Rx buffers on
each Rx queue are big enough to fit Rx packets up to configured MTU.

Fixes: ce17eddefc20 ("ethdev: introduce Rx queue offloads API")
Cc: sta...@dpdk.org

Signed-off-by: Ivan Ilchenko 
Signed-off-by: Andrew Rybchenko 
Reviewed-by: Maxime Coquelin 
---
v3:
  - fix segfault on MTU set if an Rx queue is not setup

v2:
  - do not overwrite Rx offloads when Rx scatter is added

  drivers/net/virtio/virtio.h|  2 +
  drivers/net/virtio/virtio_ethdev.c | 65 ++
  drivers/net/virtio/virtio_ethdev.h |  5 +++
  drivers/net/virtio/virtio_rxtx.c   | 10 +
  4 files changed, 82 insertions(+)



Thanks for the fix.
I see my R-by is already there, but I confirm this is good to me.


It was inherited from v1, since changes from v1 to v3 are really minor
fixes.


Re: [dpdk-dev] [EXT] Re: [PATCH v2 1/2] drivers: add octeontx crypto adapter framework

2021-07-21 Thread Thomas Monjalon
20/07/2021 14:14, David Marchand:
> On Tue, Jul 20, 2021 at 1:59 PM Akhil Goyal  wrote:
> >
> >  Hi David,
> > >
> > > > >  deps += ['common_octeontx', 'mempool_octeontx', 'bus_vdev',
> > > > 'net_octeontx']
> > > > > +deps += ['crypto_octeontx']
> > > >
> > > > This extra dependency resulted in disabling the event/octeontx driver
> > > > in FreeBSD, since crypto/octeontx only builds on Linux.
> > > > Removing hw support triggers a ABI failure for FreeBSD.
> > > >
> > > >
> > > > - This had been reported by UNH CI:
> > > > http://mails.dpdk.org/archives/test-report/2021-June/200637.html
> > > > It seems the result has been ignored but it should have at least
> > > > raised some discussion.
> > > >
> > > This was highlighted to CI ML
> > > http://patches.dpdk.org/project/dpdk/patch/0686a7c3fb3a22e37378a8545b
> > > c37bce04f4c391.1624481225.git.sthot...@marvell.com/
> > >
> > > but I think I missed to take the follow up with Brandon and applied the 
> > > patch
> > > as it did not look an issue to me as octeon drivers are not currently 
> > > built on
> > > FreeBSD.
> > > Not sure why event driver is getting built there.
> > >
> > > >
> > > > - I asked UNH to stop testing FreeBSD abi for now, waiting to get the
> > > > main branch fixed.
> > > >
> > > > I don't have the time to look at this, please can you work on it?
> > > >
> > > > Several options:
> > > > * crypto/octeontx is made so that it compiles on FreeBSD,
> > > > * the abi check is extended to have exceptions per OS,
> > > > * the FreeBSD abi reference is regenerated at UNH not to have those
> > > > drivers in it (not sure it is doable),
> > >
> > > Thanks for the suggestions, we are working on it to resolve this as soon 
> > > as
> > > possible.
> > > We may need to add exception in ABI checking so that it does not shout if 
> > > a
> > > PMD
> > > is not compiled.
> > Can we have below change? Will it work to disable compilation of
> > event/octeontx2 for FreeBSD? I believe this was done by mistake earlier
> > as all other octeontx2 drivers are compiled off on platforms other than 
> > Linux.
> >
> > diff --git a/drivers/event/octeontx2/meson.build 
> > b/drivers/event/octeontx2/meson.build
> > index 96ebb1f2e7..1ebc51f73f 100644
> > --- a/drivers/event/octeontx2/meson.build
> > +++ b/drivers/event/octeontx2/meson.build
> > @@ -2,7 +2,7 @@
> >  # Copyright(C) 2019 Marvell International Ltd.
> >  #
> >
> > -if not dpdk_conf.get('RTE_ARCH_64')
> > +if not is_linux or not dpdk_conf.get('RTE_ARCH_64')
> >  build = false
> >  reason = 'only supported on 64-bit'
> >  subdir_done()
> 
> I did not suggest this possibility.
> That's the same as for other octeon drivers, such change has been
> deferred to 21.11.
> https://patches.dpdk.org/project/dpdk/list/?series=15885
> 
> >
> > Or of this does not work, then we would need to add exception in ABI 
> > checking.
> > Any suggestions how to do this?
> 
> Sorry, no good idea from me.

We would need to revert the change breaking the ABI test.
But I don't understand why it seems passing in recent CI runs?




Re: [dpdk-dev] [PATCH_v4 2/3] regex/mlx5: fix leak in PCI remove function

2021-07-21 Thread Ori Kam
Hi Michael,

> -Original Message-
> From: dev  On Behalf Of Michael Baum
> Sent: Monday, July 12, 2021 10:07 AM
> 
> In the PCI removal function, PMD releases all driver resources allocated in
> the probe function.
> 
> The MR btree memory is allocated in the probe function, but it is not freed in
> remove function what caused a memory leak.
> 
> Release it.
> 
> Fixes: cda883bbb655 ("regex/mlx5: add dynamic memory registration to
> datapath")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Michael Baum 
> ---
>  drivers/regex/mlx5/mlx5_regex.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/regex/mlx5/mlx5_regex.c
> b/drivers/regex/mlx5/mlx5_regex.c index 0f12d94d7e..f64dc2824c 100644
> --- a/drivers/regex/mlx5/mlx5_regex.c
> +++ b/drivers/regex/mlx5/mlx5_regex.c
> @@ -280,6 +280,8 @@ mlx5_regex_pci_remove(struct rte_pci_device
> *pci_dev)
>   if (TAILQ_EMPTY(&mlx5_mem_event_list))
> 
>   rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB",
> NULL);
> + if (priv->mr_scache.cache.table)
> + mlx5_mr_release_cache(&priv->mr_scache);
>   if (priv->pd)
>   mlx5_glue->dealloc_pd(priv->pd);
>   if (priv->uar)
> --
> 2.25.1

Acked-by: Ori Kam 
Thanks,
Ori



Re: [dpdk-dev] [PATCH_v4 3/3] regex/mlx5: fix redundancy in PCI remove function

2021-07-21 Thread Ori Kam
Hi Michael,

> -Original Message-
> From: Michael Baum 
> Sent: Monday, July 12, 2021 10:07 AM
> In the PCI removal function, PMD releases all driver resources and cancels
> the regexdev registry.
> 
> However, regexdev registration is accidentally canceled twice.
> 
> Remove one of them.
> 
> Fixes: b34d816363b5 ("regex/mlx5: support rules import")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Michael Baum 
> ---
>  drivers/regex/mlx5/mlx5_regex.c | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/drivers/regex/mlx5/mlx5_regex.c
> b/drivers/regex/mlx5/mlx5_regex.c index f64dc2824c..1c5bf930ad 100644
> --- a/drivers/regex/mlx5/mlx5_regex.c
> +++ b/drivers/regex/mlx5/mlx5_regex.c
> @@ -290,8 +290,6 @@ mlx5_regex_pci_remove(struct rte_pci_device
> *pci_dev)
>   rte_regexdev_unregister(priv->regexdev);
>   if (priv->ctx)
>   mlx5_glue->close_device(priv->ctx);
> - if (priv->regexdev)
> - rte_regexdev_unregister(priv->regexdev);
>   rte_free(priv);
>   }
>   return 0;
> --
> 2.25.1

Acked-by: Ori Kam 
Thanks,
Ori



Re: [dpdk-dev] [PATCH_v4 1/3] regex/mlx5: fix memory region unregistration

2021-07-21 Thread Ori Kam
Hi Michael,

> -Original Message-
> From: Michael Baum 
> Sent: Monday, July 12, 2021 10:07 AM
> 
> The issue can cause illegal physical address access while a huge-page A is
> released and huge-page B is allocated on the same virtual address.
> The old MR can be matched using the virtual address of huge-page B but the
> HW will access the physical address of huge-page A which is no more part of
> the DPDK process.
> 
> Register a driver callback for memory event in order to free out all the MRs 
> of
> memory that is going to be freed from the dpdk process.
> 
> Fixes: cda883bbb655 ("regex/mlx5: add dynamic memory registration to
> datapath")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Michael Baum 
> ---
>  drivers/regex/mlx5/mlx5_regex.c  | 55 
>  drivers/regex/mlx5/mlx5_regex.h  |  2 +
>  drivers/regex/mlx5/mlx5_regex_control.c  |  2 +
> drivers/regex/mlx5/mlx5_regex_fastpath.c | 50 +++--
>  4 files changed, 97 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/regex/mlx5/mlx5_regex.c
> b/drivers/regex/mlx5/mlx5_regex.c index dcb2ced88e..0f12d94d7e 100644
> --- a/drivers/regex/mlx5/mlx5_regex.c
> +++ b/drivers/regex/mlx5/mlx5_regex.c
> @@ -11,6 +11,7 @@
>  #include 
> 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -24,6 +25,10 @@
> 
>  int mlx5_regex_logtype;
> 
> +TAILQ_HEAD(regex_mem_event, mlx5_regex_priv) mlx5_mem_event_list
> =
> +
>   TAILQ_HEAD_INITIALIZER(mlx5_mem_event_list);
> +static pthread_mutex_t mem_event_list_lock =
> PTHREAD_MUTEX_INITIALIZER;
> +
>  const struct rte_regexdev_ops mlx5_regexdev_ops = {
>   .dev_info_get = mlx5_regex_info_get,
>   .dev_configure = mlx5_regex_configure, @@ -82,6 +87,40 @@
> mlx5_regex_get_name(char *name, struct rte_pci_device *pci_dev
> __rte_unused)
>   pci_dev->addr.devid, pci_dev->addr.function);  }
> 
> +/**
> + * Callback for memory event.
> + *
> + * @param event_type
> + *   Memory event type.
> + * @param addr
> + *   Address of memory.
> + * @param len
> + *   Size of memory.
> + */
> +static void
> +mlx5_regex_mr_mem_event_cb(enum rte_mem_event event_type,
> const void *addr,
> +size_t len, void *arg __rte_unused) {
> + struct mlx5_regex_priv *priv;
> +
> + /* Must be called from the primary process. */
> + MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
> + switch (event_type) {
> + case RTE_MEM_EVENT_FREE:
> + pthread_mutex_lock(&mem_event_list_lock);
> + /* Iterate all the existing mlx5 devices. */
> + TAILQ_FOREACH(priv, &mlx5_mem_event_list,
> mem_event_cb)
> + mlx5_free_mr_by_addr(&priv->mr_scache,
> +  priv->ctx->device->name,
> +  addr, len);
> + pthread_mutex_unlock(&mem_event_list_lock);
> + break;
> + case RTE_MEM_EVENT_ALLOC:
> + default:
> + break;
> + }
> +}
> +
>  static int
>  mlx5_regex_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
>struct rte_pci_device *pci_dev)
> @@ -193,6 +232,15 @@ mlx5_regex_pci_probe(struct rte_pci_driver
> *pci_drv __rte_unused,
>   rte_errno = ENOMEM;
>   goto error;
>   }
> + /* Register callback function for global shared MR cache
> management. */
> + if (TAILQ_EMPTY(&mlx5_mem_event_list))
> +
>   rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
> +
>   mlx5_regex_mr_mem_event_cb,
> + NULL);
> + /* Add device to memory callback list. */
> + pthread_mutex_lock(&mem_event_list_lock);
> + TAILQ_INSERT_TAIL(&mlx5_mem_event_list, priv, mem_event_cb);
> + pthread_mutex_unlock(&mem_event_list_lock);
>   DRV_LOG(INFO, "RegEx GGA is %s.",
>   priv->has_umr ? "supported" : "unsupported");
>   return 0;
> @@ -225,6 +273,13 @@ mlx5_regex_pci_remove(struct rte_pci_device
> *pci_dev)
>   return 0;
>   priv = dev->data->dev_private;
>   if (priv) {
> + /* Remove from memory callback device list. */
> + pthread_mutex_lock(&mem_event_list_lock);
> + TAILQ_REMOVE(&mlx5_mem_event_list, priv,
> mem_event_cb);
> + pthread_mutex_unlock(&mem_event_list_lock);
> + if (TAILQ_EMPTY(&mlx5_mem_event_list))
> +
>   rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB",
> +   NULL);
>   if (priv->pd)
>   mlx5_glue->dealloc_pd(priv->pd);
>   if (priv->uar)
> diff --git a/drivers/regex/mlx5/mlx5_regex.h
> b/drivers/regex/mlx5/mlx5_regex.h index 51a2101e53..61f59ba873 100644
> --- a/drivers/regex/mlx5/mlx5_regex.h
> +++ b/drivers/regex/mlx5/mlx5_regex.h
> @@ -70,6 +70,8 @@ struct mlx5_regex_priv {
>   uint32_t nb_engines; /* Number of RegEx engines. */
>  

Re: [dpdk-dev] [PATCH] app/testpmd: fix TX checksum calculation for tunnel

2021-07-21 Thread Ori Kam
Hi Gregory,

> -Original Message-
> From: dev  On Behalf Of Gregory Etelson
> Sent: Monday, July 19, 2021 11:33 AM
> 
> TX checksum of a tunnelled packet can be calculated for outer headers only
> or for both outer and inner parts. The calculation method is determined by
> application.
> If TX checksum calculation can be offloaded, hardware ignores existing
> checksum value and replaces it with an updated result.
> If TX checksum is calculated by a software, existing value must be zeroed
> first.
> The testpmd checksum forwarding engine always zeroed inner checksums.
> If inner checksum calculation was offloaded, that header was left with 0
> checksum value.
> Following outer software checksum calculation produced wrong value.
> The patch zeroes inner IPv4 checksum only before software calculation.
> 
> Fixes: 51f694dd40f5 ("app/testpmd: rework checksum forward engine")
> 
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Gregory Etelson 
> Reviewed-by: Dmitry Kozlyuk 
> ---
>  app/test-pmd/csumonly.c | 23 ---
>  1 file changed, 12 insertions(+), 11 deletions(-)
> 
> diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index
> 089936587b..a658cd5389 100644
> --- a/app/test-pmd/csumonly.c
> +++ b/app/test-pmd/csumonly.c
> @@ -480,17 +480,18 @@ process_inner_cksums(void *l3_hdr, const struct
> testpmd_offload_info *info,
> 
>   if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV4)) {
>   ipv4_hdr = l3_hdr;
> - ipv4_hdr->hdr_checksum = 0;
> 
>   ol_flags |= PKT_TX_IPV4;
>   if (info->l4_proto == IPPROTO_TCP && tso_segsz) {
>   ol_flags |= PKT_TX_IP_CKSUM;
>   } else {
> - if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM)
> + if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) {
>   ol_flags |= PKT_TX_IP_CKSUM;
> - else
> + } else if (ipv4_hdr->hdr_checksum) {
> + ipv4_hdr->hdr_checksum = 0;
>   ipv4_hdr->hdr_checksum =
>   rte_ipv4_cksum(ipv4_hdr);
> + }
>   }
>   } else if (info->ethertype == _htons(RTE_ETHER_TYPE_IPV6))
>   ol_flags |= PKT_TX_IPV6;
> @@ -501,10 +502,10 @@ process_inner_cksums(void *l3_hdr, const struct
> testpmd_offload_info *info,
>   udp_hdr = (struct rte_udp_hdr *)((char *)l3_hdr + info-
> >l3_len);
>   /* do not recalculate udp cksum if it was 0 */
>   if (udp_hdr->dgram_cksum != 0) {
> - udp_hdr->dgram_cksum = 0;
> - if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM)
> + if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) {
>   ol_flags |= PKT_TX_UDP_CKSUM;
> - else {
> + } else if (udp_hdr->dgram_cksum) {
> + udp_hdr->dgram_cksum = 0;
>   udp_hdr->dgram_cksum =
>   get_udptcp_checksum(l3_hdr,
> udp_hdr,
>   info->ethertype);
> @@ -514,12 +515,12 @@ process_inner_cksums(void *l3_hdr, const struct
> testpmd_offload_info *info,
>   ol_flags |= PKT_TX_UDP_SEG;
>   } else if (info->l4_proto == IPPROTO_TCP) {
>   tcp_hdr = (struct rte_tcp_hdr *)((char *)l3_hdr + info-
> >l3_len);
> - tcp_hdr->cksum = 0;
>   if (tso_segsz)
>   ol_flags |= PKT_TX_TCP_SEG;
> - else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM)
> + else if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM) {
>   ol_flags |= PKT_TX_TCP_CKSUM;
> - else {
> + } else if (tcp_hdr->cksum) {
> + tcp_hdr->cksum = 0;
>   tcp_hdr->cksum =
>   get_udptcp_checksum(l3_hdr, tcp_hdr,
>   info->ethertype);
> @@ -529,13 +530,13 @@ process_inner_cksums(void *l3_hdr, const struct
> testpmd_offload_info *info,
>   } else if (info->l4_proto == IPPROTO_SCTP) {
>   sctp_hdr = (struct rte_sctp_hdr *)
>   ((char *)l3_hdr + info->l3_len);
> - sctp_hdr->cksum = 0;
>   /* sctp payload must be a multiple of 4 to be
>* offloaded */
>   if ((tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM) &&
>   ((ipv4_hdr->total_length & 0x3) == 0)) {
>   ol_flags |= PKT_TX_SCTP_CKSUM;
> - } else {
> + } else if (sctp_hdr->cksum) {
> + sctp_hdr->cksum = 0;
>   /* XXX implement CRC32c, example available in
>* RFC3309 */
>   }
> --
> 2.31.1

Acked-by: Ori Kam 
Thanks,
Ori



Re: [dpdk-dev] [dpdk-announce] release candidate 21.08-rc1

2021-07-21 Thread Jiang, YuX
All,
Update the test status for Intel part. Till now dpdk21.08-rc1 test is finished 
and no critical issue is found.
# Basic Intel(R) NIC testing
* Build or compile:
*Build: cover the build test combination with latest GCC/Clang/ICC 
version and the popular OS revision such as Ubuntu20.04, Fedora34, etc.
- All passed.
*Compile: cover the CFLAGES(O0/O1/O2/O3) with popular OS such as 
Ubuntu20.04 and Fedora34.
- All passed.
* PF(i40e, ixgbe): test scenarios including 
RTE_FLOW/TSO/Jumboframe/checksum offload/VLAN/VXLAN, etc.
- All tests are done.
- One new issue about distributor: Core dumped occurs when 
execute distributor_autotest under dpdk-test is found on 32-bit Ubuntu20.04 
build environment. Intel Dev. has provided patch to fix it.
- One known bug https://bugs.dpdk.org/show_bug.cgi?id=687: Dev 
has provided patch to fix it, the patch is waiting for merge.
* VF(i40e, ixgbe): test scenarios including 
VF-RTE_FLOW/TSO/Jumboframe/checksum offload/VLAN/VXLAN, etc.
- All passed. No new issue is found.
* PF/VF(ice): test scenarios including Switch features/Package 
Management/Flow Director/Advanced Tx/Advanced RSS/ACL/DCF/Share code 
update/Flexible Descriptor, etc.
- All tests are done. More than 8 bugs about 
cvl_ipfragment_rte_flow/cvl_fdir/iavf_fdir/cvl_advanced_rss/CVL_Qos issues are 
found. Intel Dev. are working on them.
* Intel NIC single core/NIC performance: test scenarios including PF/VF 
single core performance test, RFC2544 Zero packet loss performance test, etc.
- All passed.
* Power and IPsec:
* Power: test scenarios including bi-direction/Telemetry/Empty 
Poll Lib/Priority Base Frequency, etc.
- All passed.
* IPsec: test scenarios including ipsec/ipsec-gw/ipsec library 
basic test - QAT&SW/FIB library, etc.
- All passed.
# Basic cryptodev and virtio testing
* Virtio: both function and performance test are covered. Such as 
PVP/Virtio_loopback/virtio-user loopback/virtio-net VM2VM perf testing/VMAWARE 
ESXI 7.0u2, etc.
- All tests are done. No new issue is found except known issue.
* Cryptodev:
*Function test: test scenarios including Cryptodev API 
testing/CompressDev ISA-L/QAT/ZLIB PMD Testing/FIPS, etc.
- All tests are done. No new issue is found except 
known issue.
*Performance test: test scenarios including Thoughput 
Performance /Cryptodev Latency, etc.
- All tests are done. No big perf drop.

Best regards,
Yu Jiang

> -Original Message-
> From: dev  On Behalf Of Thomas Monjalon
> Sent: Saturday, July 10, 2021 6:05 PM
> To: annou...@dpdk.org
> Subject: [dpdk-dev] [dpdk-announce] release candidate 21.08-rc1
>
> A new DPDK release candidate is ready for testing:
>   https://git.dpdk.org/dpdk/tag/?id=v21.08-rc1
>
> There are 517 new patches in this snapshot.
> This release cycle is short and should be small.
>
> Release notes:
>   https://doc.dpdk.org/guides/rel_notes/release_21_08.html
>
> Highlights of 21.08-rc1:
>   - Linux auxiliary bus
>   - Aarch32 cross-compilation
>   - Arm CPPC power management
>   - Rx multi-queue monitoring for power management
>   - XZ compressed firmware read
>   - Marvell CNXK drivers for ethernet, crypto and baseband PHY
>
> Please test and report issues on bugs.dpdk.org.
>
> DPDK 21.08-rc2 is expected in less than two weeks.
>
> Thank you everyone
>



[dpdk-dev] [PATCH v2 0/3] support new format meter

2021-07-21 Thread Rongwei Liu
Add option "policy-mtr" and "policy-g_actions" for meter with policy
Add option "meter-cir" to specify meter CIR value
Add option "packet-mode" to identify pps or bps based profile

Rongwei Liu (3):
  app/flow-perf: support meter policy API
  app/flow-perf: add new meter CIR Configuration
  app/flow-perf: add the supports for meter PPS

 app/test-flow-perf/main.c  | 133 +++--
 doc/guides/tools/flow-perf.rst |  12 +++
 2 files changed, 137 insertions(+), 8 deletions(-)

-- 
2.27.0



[dpdk-dev] [PATCH v2 1/3] app/flow-perf: support meter policy API

2021-07-21 Thread Rongwei Liu
Add option "policy-mtr" to indicate if meter creation will include policy
or not. Meter creation will keep same without it.

With "policy-mtr", policy is introduced. API create_meter_policy
is to create a policy. API create_meter_rule will use it to create
meter.

Add option "policy-g_actions" to specify meter policy green color actions.
W/o this, policy creation will fail since there is no default one.

Signed-off-by: Haifei Luo 
Signed-off-by: Jiawei Wang 
Signed-off-by: Rongwei Liu 
---
 app/test-flow-perf/main.c  | 121 ++---
 doc/guides/tools/flow-perf.rst |   6 ++
 2 files changed, 119 insertions(+), 8 deletions(-)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 9be8edc31d..e0d94f943a 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -37,6 +37,7 @@
 #include 
 
 #include "config.h"
+#include "actions_gen.h"
 #include "flow_gen.h"
 
 #define MAX_BATCHES_COUNT  100
@@ -49,10 +50,12 @@ static uint8_t flow_group;
 
 static uint64_t encap_data;
 static uint64_t decap_data;
+static uint64_t g_actions;
 
 static uint64_t flow_items[MAX_ITEMS_NUM];
 static uint64_t flow_actions[MAX_ACTIONS_NUM];
 static uint64_t flow_attrs[MAX_ATTRS_NUM];
+static uint32_t g_policy_id[MAX_PORTS];
 static uint8_t items_idx, actions_idx, attrs_idx;
 
 static uint64_t ports_mask;
@@ -62,6 +65,7 @@ static bool delete_flag;
 static bool dump_socket_mem_flag;
 static bool enable_fwd;
 static bool unique_data;
+static bool policy_mtr;
 
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
@@ -69,6 +73,7 @@ static uint32_t rules_count;
 static uint32_t rules_batch;
 static uint32_t hairpin_queues_num; /* total hairpin q number - default: 0 */
 static uint32_t nb_lcores;
+static uint64_t meter_cir;
 
 #define MAX_PKT_BURST32
 #define LCORE_MODE_PKT1
@@ -134,6 +139,8 @@ usage(char *progname)
printf("  --portmask=N: hexadecimal bitmask of ports used\n");
printf("  --unique-data: flag to set using unique data for all"
" actions that support data, such as header modify and encap 
actions\n");
+   printf("  --policy-mtr: To create meter with policy\n");
+   printf("  --policy-g_actions: To set meter policy green color 
actions\n");
 
printf("To set flow attributes:\n");
printf("  --ingress: set ingress attribute in flows\n");
@@ -573,6 +580,9 @@ args_parse(int argc, char **argv)
{ "unique-data",0, 0, 0 },
{ "portmask",   1, 0, 0 },
{ "cores",  1, 0, 0 },
+   { "policy-mtr", 0, 0, 0 },
+   { "policy-g_actions",   1, 0, 0 },
+   { "meter-profile-alg",  1, 0, 0 },
/* Attributes */
{ "ingress",0, 0, 0 },
{ "egress", 0, 0, 0 },
@@ -802,6 +812,32 @@ args_parse(int argc, char **argv)
RTE_MAX_LCORE);
}
}
+   if (strcmp(lgopts[opt_idx].name, "policy-mtr") == 0)
+   policy_mtr = true;
+   if (strcmp(lgopts[opt_idx].name,
+   "policy-g_actions") == 0) {
+   token = strtok(optarg, ",");
+   while (token != NULL) {
+   for (i = 0;
+i < RTE_DIM(flow_options); i++) {
+   if (strcmp(optarg,
+   flow_options[i].str) == 0) {
+   g_actions |=
+   flow_options[i].mask;
+   break;
+   }
+   }
+   /* Reached last item with no match */
+   if (i == (RTE_DIM(flow_options) - 1)) {
+   fprintf(stderr,
+   "Invalid g_actions "
+   "item: %s\n", token);
+   usage(argv[0]);
+   rte_exit(EXIT_SUCCESS, "Invalid 
g_actions item\n");
+   }
+   token = strtok(NULL, ",");
+   }
+   }
break;
default:
usage(argv[0]);
@@ -912,6 +948,62 @@ has_meter(void)
return 0;
 }
 
+static void
+create_meter_policy(void)
+{
+  

[dpdk-dev] [PATCH v2 2/3] app/flow-perf: add new meter CIR Configuration

2021-07-21 Thread Rongwei Liu
Add the new meter CIR configuration parameter, user can set the
different value for committed information rate(CIR) parameter.

The usage as below:
--meter-cir=N, default count is 125.

Signed-off-by: Jiawei Wang 
Signed-off-by: Rongwei Liu 
---
 app/test-flow-perf/main.c  | 8 +++-
 doc/guides/tools/flow-perf.rst | 3 +++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index e0d94f943a..dd0aac8b06 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -141,6 +141,8 @@ usage(char *progname)
" actions that support data, such as header modify and encap 
actions\n");
printf("  --policy-mtr: To create meter with policy\n");
printf("  --policy-g_actions: To set meter policy green color 
actions\n");
+   printf("  --meter-cir=N: to set committed information rate(CIR)"
+   " parameter in meter profile, default is %d\n", METER_CIR);
 
printf("To set flow attributes:\n");
printf("  --ingress: set ingress attribute in flows\n");
@@ -582,7 +584,7 @@ args_parse(int argc, char **argv)
{ "cores",  1, 0, 0 },
{ "policy-mtr", 0, 0, 0 },
{ "policy-g_actions",   1, 0, 0 },
-   { "meter-profile-alg",  1, 0, 0 },
+   { "meter-cir",  1, 0, 0 },
/* Attributes */
{ "ingress",0, 0, 0 },
{ "egress", 0, 0, 0 },
@@ -814,6 +816,10 @@ args_parse(int argc, char **argv)
}
if (strcmp(lgopts[opt_idx].name, "policy-mtr") == 0)
policy_mtr = true;
+   if (strcmp(lgopts[opt_idx].name, "meter-cir") == 0) {
+   n = atoi(optarg);
+   meter_cir = (uint64_t) n;
+   }
if (strcmp(lgopts[opt_idx].name,
"policy-g_actions") == 0) {
token = strtok(optarg, ",");
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 90b6934537..113e078eb5 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -105,6 +105,9 @@ The command line options are:
 Such as header modify and encap actions. Default is using fixed
 data for any action that support data for all flows.
 
+*  ``--meter-cir=N``
+   Set the committed information rate(CIR) parameter, default count is 
125.
+
 Attributes:
 
 *  ``--ingress``
-- 
2.27.0



[dpdk-dev] [PATCH v2 3/3] app/flow-perf: add the supports for meter PPS

2021-07-21 Thread Rongwei Liu
The flow perf application used the srtcm_rfc2697 as meter profile
while do the meter testing.

This patch adds the support new configuration parameter
'--packet-mode' to generate the meter flows with the packet mode.

Signed-off-by: Jiawei Wang 
Signed-off-by: Rongwei Liu 
---
 app/test-flow-perf/main.c  | 6 ++
 doc/guides/tools/flow-perf.rst | 3 +++
 2 files changed, 9 insertions(+)

diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index dd0aac8b06..d2003108cc 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -66,6 +66,7 @@ static bool dump_socket_mem_flag;
 static bool enable_fwd;
 static bool unique_data;
 static bool policy_mtr;
+static bool packet_mode;
 
 static struct rte_mempool *mbuf_mp;
 static uint32_t nb_lcores;
@@ -143,6 +144,7 @@ usage(char *progname)
printf("  --policy-g_actions: To set meter policy green color 
actions\n");
printf("  --meter-cir=N: to set committed information rate(CIR)"
" parameter in meter profile, default is %d\n", METER_CIR);
+   printf("  --packet-mode: To enable packet mode for meter profile\n");
 
printf("To set flow attributes:\n");
printf("  --ingress: set ingress attribute in flows\n");
@@ -585,6 +587,7 @@ args_parse(int argc, char **argv)
{ "policy-mtr", 0, 0, 0 },
{ "policy-g_actions",   1, 0, 0 },
{ "meter-cir",  1, 0, 0 },
+   { "packet-mode",0, 0, 0 },
/* Attributes */
{ "ingress",0, 0, 0 },
{ "egress", 0, 0, 0 },
@@ -820,6 +823,8 @@ args_parse(int argc, char **argv)
n = atoi(optarg);
meter_cir = (uint64_t) n;
}
+   if (strcmp(lgopts[opt_idx].name, "packet-mode") == 0)
+   packet_mode = true;
if (strcmp(lgopts[opt_idx].name,
"policy-g_actions") == 0) {
token = strtok(optarg, ",");
@@ -1165,6 +1170,7 @@ create_meter_profile(void)
mp.srtcm_rfc2697.cir = meter_cir;
mp.srtcm_rfc2697.cbs = meter_cir / 8;
mp.srtcm_rfc2697.ebs = 0;
+   mp.packet_mode = packet_mode;
ret = rte_mtr_meter_profile_add
(port_id, DEFAULT_METER_PROF_ID, &mp, &error);
if (ret != 0) {
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 113e078eb5..1ec0d5d408 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -108,6 +108,9 @@ The command line options are:
 *  ``--meter-cir=N``
Set the committed information rate(CIR) parameter, default count is 
125.
 
+*  ``--packet-mode``
+   Enable packets mode for meter profile.
+
 Attributes:
 
 *  ``--ingress``
-- 
2.27.0



Re: [dpdk-dev] [PATCH 2/2] examples/pipeline: fix incorrect array out of bounds check

2021-07-21 Thread Thomas Monjalon
12/07/2021 20:18, Cristian Dumitrescu:
> Fix the incorrect array out of bounds check within the function
> pipeline_selector_group_member_read().
> 
> Coverity issue: 371911
> Fixes: 598fe0dd0d8e3 ("examples/pipeline: support selector table")
> 
> Signed-off-by: Cristian Dumitrescu 

Pathset applied, thanks.





Re: [dpdk-dev] [PATCH 2/4] ethdev: move jumbo frame offload check to library

2021-07-21 Thread Ferruh Yigit
On 7/13/2021 2:48 PM, Andrew Rybchenko wrote:
> On 7/9/21 8:29 PM, Ferruh Yigit wrote:
>> Setting MTU bigger than RTE_ETHER_MTU requires the jumbo frame support,
>> and application should enable the jumbo frame offload support for it.
>>
>> When jumbo frame offload is not enabled by application, but MTU bigger
>> than RTE_ETHER_MTU is requested there are two options, either fail or
>> enable jumbo frame offload implicitly.
>>
>> Enabling jumbo frame offload implicitly is selected by many drivers
>> since setting a big MTU value already implies it, and this increases
>> usability.
>>
>> This patch moves this logic from drivers to the library, both to reduce
>> the duplicated code in the drivers and to make behaviour more visible.
>>
>> Signed-off-by: Ferruh Yigit 
> 
> Very good cleanup, many thanks.
> 
> Reviewed-by: Andrew Rybchenko 
> 
> [snip]
> 
>> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
>> index 3451125639f9..d649a5dd69a9 100644
>> --- a/lib/ethdev/rte_ethdev.c
>> +++ b/lib/ethdev/rte_ethdev.c
>> @@ -3625,6 +3625,7 @@ rte_eth_dev_set_mtu(uint16_t port_id, uint16_t mtu)
>>  int ret;
>>  struct rte_eth_dev_info dev_info;
>>  struct rte_eth_dev *dev;
>> +int is_jumbo_frame_capable = 0;
>>  
>>  RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
>>  dev = &rte_eth_devices[port_id];
>> @@ -3643,12 +3644,27 @@ rte_eth_dev_set_mtu(uint16_t port_id, uint16_t mtu)
>>  
>>  if (mtu < dev_info.min_mtu || mtu > dev_info.max_mtu)
>>  return -EINVAL;
>> +
>> +if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME)
>> +is_jumbo_frame_capable = 1;
>>  }
>>  
>> +if (mtu > RTE_ETHER_MTU && is_jumbo_frame_capable == 0)
>> +return -EINVAL;
>> +
>>  ret = (*dev->dev_ops->mtu_set)(dev, mtu);
>> -if (!ret)
>> +if (!ret) {
> 
> Since line it updated anyway, may I ask to use explicit
> comparison vs 0 as coding style says.
> 

ack, will fix all occurrences

>>  dev->data->mtu = mtu;
>>  
>> +/* switch to jumbo mode if needed */
>> +if (mtu > RTE_ETHER_MTU)
>> +dev->data->dev_conf.rxmode.offloads |=
>> +DEV_RX_OFFLOAD_JUMBO_FRAME;
>> +else
>> +dev->data->dev_conf.rxmode.offloads &=
>> +~DEV_RX_OFFLOAD_JUMBO_FRAME;
>> +}
>> +
>>  return eth_err(port_id, ret);
>>  }
>>  
>>
> 



Re: [dpdk-dev] [PATCH 4/4] ethdev: remove jumbo offload flag

2021-07-21 Thread Ferruh Yigit
On 7/13/2021 3:07 PM, Andrew Rybchenko wrote:
> On 7/9/21 8:29 PM, Ferruh Yigit wrote:
>> Removing 'DEV_RX_OFFLOAD_JUMBO_FRAME' offload flag.
>>
>> Instead of drivers announce this capability, application can deduct the
>> capability by checking reported 'dev_info.max_mtu' or
>> 'dev_info.max_rx_pktlen'.
>>
>> And instead of application explicitly set this flag to enable jumbo
>> frames, this can be deducted by driver by comparing requested 'mtu' to
>> 'RTE_ETHER_MTU'.
> 
> I can imagine the case when app wants to enable jumbo MTU in
> run-time, but enabling requires to know it in advance in order
> to configure HW correctly (i.e. offload is needed).
> I think it may be ignored. Driver should either reject MTU
> set in started state or do restart automatically on request.
> 

As far as I can see we have both implementations. Most of PMDs return error if
device is started, a few tries to restart to apply the configuration.

And many PMDs just record the value passed with this API and apply it in the
device start, some apply the value within API.

> However, driver maintainers should keep it in mind reviewing
> the patch.
> 

+1

>>
>> Removing this additional configuration for simplification.
>>
>> Signed-off-by: Ferruh Yigit 
> 
> ethdev part:
> 
> Acked-by: Andrew Rybchenko 
> 
> [snip]
> 
>> diff --git a/drivers/net/e1000/e1000_ethdev.h 
>> b/drivers/net/e1000/e1000_ethdev.h
>> index 3b4d9c3ee6f4..1ae78fe71f02 100644
>> --- a/drivers/net/e1000/e1000_ethdev.h
>> +++ b/drivers/net/e1000/e1000_ethdev.h
>> @@ -468,8 +468,8 @@ void eth_em_rx_queue_release(void *rxq);
>>  void em_dev_clear_queues(struct rte_eth_dev *dev);
>>  void em_dev_free_queues(struct rte_eth_dev *dev);
>>  
>> -uint64_t em_get_rx_port_offloads_capa(struct rte_eth_dev *dev);
>> -uint64_t em_get_rx_queue_offloads_capa(struct rte_eth_dev *dev);
>> +uint64_t em_get_rx_port_offloads_capa(void);
>> +uint64_t em_get_rx_queue_offloads_capa(void);
> 
> I'm not sure that it is a step in right direction.
> May be it is better to keep dev unused.
> net/e1000 maintainers should decide.
> 



[dpdk-dev] [PATCH] net/mlx5: fix ROCE LAG bond device probing

2021-07-21 Thread Viacheslav Ovsiienko
The ROCE LAG bond device requires neither E-Switch nor SR-IOV
configurations. It means the ROCE LAG bond device might be
presented as a single port Infiniband device.

The mlx5 PMD wrongly recognized standalone ROCE LAG bond device
as E-Switch configuration, this triggered the calls of E-Switch
ports related API and the latter failed (over the new OFED kernel
driver, starting since 5.4.1), causing the overall device probe
failure.

If there is a single port Infiniband bond device found the
E-Switch related flags must be cleared indicating standalone
configuration.

Also, it is not true anymore the bond device can exist
over E-Switch configurations only (as it was claimed for VF LAG
bond devices). The related checks are not relevant anymore
and removed.

Fixes: 790164ce1d2d ("net/mlx5: check kernel support for VF LAG bonding")
Cc: sta...@dpdk.org

Signed-off-by: Viacheslav Ovsiienko 
Acked-by: Matan Azrad 
---
 drivers/net/mlx5/linux/mlx5_os.c | 24 +---
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index aa5210fa45..e568cc9c48 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -2216,19 +2216,6 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev,
goto exit;
}
}
-#ifndef HAVE_MLX5DV_DR_DEVX_PORT
-   if (bd >= 0) {
-   /*
-* This may happen if there is VF LAG kernel support and
-* application is compiled with older rdma_core library.
-*/
-   DRV_LOG(ERR,
-   "No kernel/verbs support for VF LAG bonding found.");
-   rte_errno = ENOTSUP;
-   ret = -rte_errno;
-   goto exit;
-   }
-#endif
/*
 * Now we can determine the maximal
 * amount of devices to be spawned.
@@ -2292,10 +2279,18 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev,
(list[ns].ifindex,
 &list[ns].info);
}
-#ifdef HAVE_MLX5DV_DR_DEVX_PORT
if (!ret && bd >= 0) {
switch (list[ns].info.name_type) {
case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
+   if (np == 1) {
+   /*
+* Force standalone bonding
+* device for ROCE LAG
+* confgiurations.
+*/
+   list[ns].info.master = 0;
+   list[ns].info.representor = 0;
+   }
if (list[ns].info.port_name == bd)
ns++;
break;
@@ -2312,7 +2307,6 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev,
}
continue;
}
-#endif
if (!ret && (list[ns].info.representor ^
 list[ns].info.master))
ns++;
-- 
2.18.1



[dpdk-dev] [PATCH] net/mlx5: fix indexed pools allocate on Windows

2021-07-21 Thread Suanming Mou
Currently, the flow indexed pools are allocated per port, the allocation
was missing in Windows code.

This commit fixes the the Windows flow indexed pools are not allocated
issue.

Fixes: b4edeaf3efd5 ("net/mlx5: replace flow list with indexed pool")

Signed-off-by: Suanming Mou 
---
 drivers/net/mlx5/windows/mlx5_os.c | 47 ++
 1 file changed, 47 insertions(+)

diff --git a/drivers/net/mlx5/windows/mlx5_os.c 
b/drivers/net/mlx5/windows/mlx5_os.c
index 5da362a9d5..a31fafc90d 100644
--- a/drivers/net/mlx5/windows/mlx5_os.c
+++ b/drivers/net/mlx5/windows/mlx5_os.c
@@ -35,6 +35,44 @@ static const char *MZ_MLX5_PMD_SHARED_DATA = 
"mlx5_pmd_shared_data";
 /* Spinlock for mlx5_shared_data allocation. */
 static rte_spinlock_t mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
 
+/* rte flow indexed pool configuration. */
+static struct mlx5_indexed_pool_config icfg[] = {
+   {
+   .size = sizeof(struct rte_flow),
+   .trunk_size = 64,
+   .need_lock = 1,
+   .release_mem_en = 0,
+   .malloc = mlx5_malloc,
+   .free = mlx5_free,
+   .per_core_cache = 0,
+   .type = "ctl_flow_ipool",
+   },
+   {
+   .size = sizeof(struct rte_flow),
+   .trunk_size = 64,
+   .grow_trunk = 3,
+   .grow_shift = 2,
+   .need_lock = 1,
+   .release_mem_en = 0,
+   .malloc = mlx5_malloc,
+   .free = mlx5_free,
+   .per_core_cache = 1 << 14,
+   .type = "rte_flow_ipool",
+   },
+   {
+   .size = sizeof(struct rte_flow),
+   .trunk_size = 64,
+   .grow_trunk = 3,
+   .grow_shift = 2,
+   .need_lock = 1,
+   .release_mem_en = 0,
+   .malloc = mlx5_malloc,
+   .free = mlx5_free,
+   .per_core_cache = 0,
+   .type = "mcp_flow_ipool",
+   },
+};
+
 /**
  * Initialize shared data between primary and secondary process.
  *
@@ -317,6 +355,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
char name[RTE_ETH_NAME_MAX_LEN];
int own_domain_id = 0;
uint16_t port_id;
+   int i;
 
/* Build device name. */
strlcpy(name, dpdk_dev->name, sizeof(name));
@@ -584,6 +623,14 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
mlx5_set_min_inline(spawn, config);
/* Store device configuration on private structure. */
priv->config = *config;
+   for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {
+   icfg[i].release_mem_en = !!config->reclaim_mode;
+   if (config->reclaim_mode)
+   icfg[i].per_core_cache = 0;
+   priv->flows[i] = mlx5_ipool_create(&icfg[i]);
+   if (!priv->flows[i])
+   goto error;
+   }
/* Create context for virtual machine VLAN workaround. */
priv->vmwa_context = NULL;
if (config->dv_flow_en) {
-- 
2.25.1



Re: [dpdk-dev] [PATCH 4/4] ethdev: remove jumbo offload flag

2021-07-21 Thread Ferruh Yigit
On 7/13/2021 3:07 PM, Andrew Rybchenko wrote:

<...>

> 
>> diff --git a/drivers/net/e1000/e1000_ethdev.h 
>> b/drivers/net/e1000/e1000_ethdev.h
>> index 3b4d9c3ee6f4..1ae78fe71f02 100644
>> --- a/drivers/net/e1000/e1000_ethdev.h
>> +++ b/drivers/net/e1000/e1000_ethdev.h
>> @@ -468,8 +468,8 @@ void eth_em_rx_queue_release(void *rxq);
>>  void em_dev_clear_queues(struct rte_eth_dev *dev);
>>  void em_dev_free_queues(struct rte_eth_dev *dev);
>>  
>> -uint64_t em_get_rx_port_offloads_capa(struct rte_eth_dev *dev);
>> -uint64_t em_get_rx_queue_offloads_capa(struct rte_eth_dev *dev);
>> +uint64_t em_get_rx_port_offloads_capa(void);
>> +uint64_t em_get_rx_queue_offloads_capa(void);
> 
> I'm not sure that it is a step in right direction.
> May be it is better to keep dev unused.
> net/e1000 maintainers should decide.
> 

It is possible to keep dev as unused, but these are driver internal functions
and 'dev' is not used now, when it is needed it is easy to add it back.


Re: [dpdk-dev] [PATCH] net/mlx5: fix indexed pools allocate on Windows

2021-07-21 Thread Tal Shnaiderman
> Subject: [PATCH] net/mlx5: fix indexed pools allocate on Windows
> 
> Currently, the flow indexed pools are allocated per port, the allocation was
> missing in Windows code.
> 
> This commit fixes the the Windows flow indexed pools are not allocated issue.
> 
> Fixes: b4edeaf3efd5 ("net/mlx5: replace flow list with indexed pool")
> 
> Signed-off-by: Suanming Mou 
> ---
>  drivers/net/mlx5/windows/mlx5_os.c | 47
> ++
>  1 file changed, 47 insertions(+)
> 
> diff --git a/drivers/net/mlx5/windows/mlx5_os.c
> b/drivers/net/mlx5/windows/mlx5_os.c
> index 5da362a9d5..a31fafc90d 100644
> --- a/drivers/net/mlx5/windows/mlx5_os.c
> +++ b/drivers/net/mlx5/windows/mlx5_os.c
> @@ -35,6 +35,44 @@ static const char *MZ_MLX5_PMD_SHARED_DATA =
> "mlx5_pmd_shared_data";
>  /* Spinlock for mlx5_shared_data allocation. */  static rte_spinlock_t
> mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
> 
> +/* rte flow indexed pool configuration. */ static struct
> +mlx5_indexed_pool_config icfg[] = {
> + {
> + .size = sizeof(struct rte_flow),
> + .trunk_size = 64,
> + .need_lock = 1,
> + .release_mem_en = 0,
> + .malloc = mlx5_malloc,
> + .free = mlx5_free,
> + .per_core_cache = 0,
> + .type = "ctl_flow_ipool",
> + },
> + {
> + .size = sizeof(struct rte_flow),
> + .trunk_size = 64,
> + .grow_trunk = 3,
> + .grow_shift = 2,
> + .need_lock = 1,
> + .release_mem_en = 0,
> + .malloc = mlx5_malloc,
> + .free = mlx5_free,
> + .per_core_cache = 1 << 14,
> + .type = "rte_flow_ipool",
> + },
> + {
> + .size = sizeof(struct rte_flow),
> + .trunk_size = 64,
> + .grow_trunk = 3,
> + .grow_shift = 2,
> + .need_lock = 1,
> + .release_mem_en = 0,
> + .malloc = mlx5_malloc,
> + .free = mlx5_free,
> + .per_core_cache = 0,
> + .type = "mcp_flow_ipool",
> + },
> +};
> +
>  /**
>   * Initialize shared data between primary and secondary process.
>   *
> @@ -317,6 +355,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>   char name[RTE_ETH_NAME_MAX_LEN];
>   int own_domain_id = 0;
>   uint16_t port_id;
> + int i;
> 
>   /* Build device name. */
>   strlcpy(name, dpdk_dev->name, sizeof(name)); @@ -584,6 +623,14
> @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>   mlx5_set_min_inline(spawn, config);
>   /* Store device configuration on private structure. */
>   priv->config = *config;
> + for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {
> + icfg[i].release_mem_en = !!config->reclaim_mode;
> + if (config->reclaim_mode)
> + icfg[i].per_core_cache = 0;
> + priv->flows[i] = mlx5_ipool_create(&icfg[i]);
> + if (!priv->flows[i])
> + goto error;
> + }
>   /* Create context for virtual machine VLAN workaround. */
>   priv->vmwa_context = NULL;
>   if (config->dv_flow_en) {
> --
> 2.25.1

Acked-by: Tal Shnaiderman 


Re: [dpdk-dev] [PATCH v4] build: optional NUMA and cpu counts detection

2021-07-21 Thread Juraj Linkeš
> >>> [However, we also would need to find out how BSD numbers the
> >>> domains, too, as it's possible an OS could just call them 0 and 1,
> >>> rather than
> >>> 0 and 8 if it wanted to.]
> >>>
> >>> In short, we'd need to test to be sure. Is FreeBSD on P9 a supported
> >>> config, and if so can the P9 maintainer perhaps help out with testing?
> >>
> >> Results of the v4 patch on an IBM AC922 P9 system with Linux:
> >>
> >
> > Can you get results from FreeBSD as well?
> 
> I can't help with FreeBSD here, I only have Linux on systems within IBM.
> 

This is an open question still then. I guess we'll have to go with sysctl -n 
vm.ndomains.

> >
> >> $ python3 get-numa-count.py
> >> 8
> >> NUMA node0 CPU(s):   0-63
> >> NUMA node8 CPU(s):   64-127
> > 
> > Is this the right number for your case, i.e. are you able to use both numa 
> > nodes
> when RTE_MAX_NUMA_NODES=8?
> 
> node8 above is the ninth NUMA node so I'd need to use
> RTE_MAX_NUMA_NODES=9 as a minimum so that any arrays using that value
> are adequately sized.
> 
> Dave

Ok, the proper value to return is highest numa node + 1 then.


Re: [dpdk-dev] [PATCH] net/mlx5: fix indexed pools allocate on Windows

2021-07-21 Thread Odi Assli



> -Original Message-
> From: Tal Shnaiderman 
> Sent: Wednesday, July 21, 2021 11:40 AM
> To: Suanming Mou ; Slava Ovsiienko
> ; Matan Azrad ; Odi Assli
> 
> Cc: Raslan Darawsheh ; dev@dpdk.org
> Subject: RE: [PATCH] net/mlx5: fix indexed pools allocate on Windows
> 
> > Subject: [PATCH] net/mlx5: fix indexed pools allocate on Windows
> >
> > Currently, the flow indexed pools are allocated per port, the
> > allocation was missing in Windows code.
> >
> > This commit fixes the the Windows flow indexed pools are not allocated
> issue.
> >
> > Fixes: b4edeaf3efd5 ("net/mlx5: replace flow list with indexed pool")
> >
> > Signed-off-by: Suanming Mou 
> > ---
> >  drivers/net/mlx5/windows/mlx5_os.c | 47
> > ++
> >  1 file changed, 47 insertions(+)
> >
> > diff --git a/drivers/net/mlx5/windows/mlx5_os.c
> > b/drivers/net/mlx5/windows/mlx5_os.c
> > index 5da362a9d5..a31fafc90d 100644
> > --- a/drivers/net/mlx5/windows/mlx5_os.c
> > +++ b/drivers/net/mlx5/windows/mlx5_os.c
> > @@ -35,6 +35,44 @@ static const char *MZ_MLX5_PMD_SHARED_DATA =
> > "mlx5_pmd_shared_data";
> >  /* Spinlock for mlx5_shared_data allocation. */  static
> > rte_spinlock_t mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
> >
> > +/* rte flow indexed pool configuration. */ static struct
> > +mlx5_indexed_pool_config icfg[] = {
> > +   {
> > +   .size = sizeof(struct rte_flow),
> > +   .trunk_size = 64,
> > +   .need_lock = 1,
> > +   .release_mem_en = 0,
> > +   .malloc = mlx5_malloc,
> > +   .free = mlx5_free,
> > +   .per_core_cache = 0,
> > +   .type = "ctl_flow_ipool",
> > +   },
> > +   {
> > +   .size = sizeof(struct rte_flow),
> > +   .trunk_size = 64,
> > +   .grow_trunk = 3,
> > +   .grow_shift = 2,
> > +   .need_lock = 1,
> > +   .release_mem_en = 0,
> > +   .malloc = mlx5_malloc,
> > +   .free = mlx5_free,
> > +   .per_core_cache = 1 << 14,
> > +   .type = "rte_flow_ipool",
> > +   },
> > +   {
> > +   .size = sizeof(struct rte_flow),
> > +   .trunk_size = 64,
> > +   .grow_trunk = 3,
> > +   .grow_shift = 2,
> > +   .need_lock = 1,
> > +   .release_mem_en = 0,
> > +   .malloc = mlx5_malloc,
> > +   .free = mlx5_free,
> > +   .per_core_cache = 0,
> > +   .type = "mcp_flow_ipool",
> > +   },
> > +};
> > +
> >  /**
> >   * Initialize shared data between primary and secondary process.
> >   *
> > @@ -317,6 +355,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
> > char name[RTE_ETH_NAME_MAX_LEN];
> > int own_domain_id = 0;
> > uint16_t port_id;
> > +   int i;
> >
> > /* Build device name. */
> > strlcpy(name, dpdk_dev->name, sizeof(name)); @@ -584,6 +623,14
> @@
> > mlx5_dev_spawn(struct rte_device *dpdk_dev,
> > mlx5_set_min_inline(spawn, config);
> > /* Store device configuration on private structure. */
> > priv->config = *config;
> > +   for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {
> > +   icfg[i].release_mem_en = !!config->reclaim_mode;
> > +   if (config->reclaim_mode)
> > +   icfg[i].per_core_cache = 0;
> > +   priv->flows[i] = mlx5_ipool_create(&icfg[i]);
> > +   if (!priv->flows[i])
> > +   goto error;
> > +   }
> > /* Create context for virtual machine VLAN workaround. */
> > priv->vmwa_context = NULL;
> > if (config->dv_flow_en) {
> > --
> > 2.25.1
> 
> Acked-by: Tal Shnaiderman 
Tested-by: Odi Assli 


Re: [dpdk-dev] [PATCH] net/mlx5: fix indexed pools allocate on Windows

2021-07-21 Thread Matan Azrad
Hi

From: Suanming Mou:
> Currently, the flow indexed pools are allocated per port, the allocation was
> missing in Windows code.
> 
> This commit fixes the the Windows flow indexed pools are not allocated

Double "the"

Instead, you can use:
Allocate indexed pool for the Windows case too.

> issue.
> 
> Fixes: b4edeaf3efd5 ("net/mlx5: replace flow list with indexed pool")
> 
> Signed-off-by: Suanming Mou 

Better title:
net/mlx5/windows: fix indexed pools allocation

Besides,
Acked-by: Matan Azrad 

> ---
>  drivers/net/mlx5/windows/mlx5_os.c | 47
> ++
>  1 file changed, 47 insertions(+)
> 
> diff --git a/drivers/net/mlx5/windows/mlx5_os.c
> b/drivers/net/mlx5/windows/mlx5_os.c
> index 5da362a9d5..a31fafc90d 100644
> --- a/drivers/net/mlx5/windows/mlx5_os.c
> +++ b/drivers/net/mlx5/windows/mlx5_os.c
> @@ -35,6 +35,44 @@ static const char *MZ_MLX5_PMD_SHARED_DATA =
> "mlx5_pmd_shared_data";
>  /* Spinlock for mlx5_shared_data allocation. */  static rte_spinlock_t
> mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
> 
> +/* rte flow indexed pool configuration. */ static struct
> +mlx5_indexed_pool_config icfg[] = {
> + {
> + .size = sizeof(struct rte_flow),
> + .trunk_size = 64,
> + .need_lock = 1,
> + .release_mem_en = 0,
> + .malloc = mlx5_malloc,
> + .free = mlx5_free,
> + .per_core_cache = 0,
> + .type = "ctl_flow_ipool",
> + },
> + {
> + .size = sizeof(struct rte_flow),
> + .trunk_size = 64,
> + .grow_trunk = 3,
> + .grow_shift = 2,
> + .need_lock = 1,
> + .release_mem_en = 0,
> + .malloc = mlx5_malloc,
> + .free = mlx5_free,
> + .per_core_cache = 1 << 14,
> + .type = "rte_flow_ipool",
> + },
> + {
> + .size = sizeof(struct rte_flow),
> + .trunk_size = 64,
> + .grow_trunk = 3,
> + .grow_shift = 2,
> + .need_lock = 1,
> + .release_mem_en = 0,
> + .malloc = mlx5_malloc,
> + .free = mlx5_free,
> + .per_core_cache = 0,
> + .type = "mcp_flow_ipool",
> + },
> +};
> +
>  /**
>   * Initialize shared data between primary and secondary process.
>   *
> @@ -317,6 +355,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>   char name[RTE_ETH_NAME_MAX_LEN];
>   int own_domain_id = 0;
>   uint16_t port_id;
> + int i;
> 
>   /* Build device name. */
>   strlcpy(name, dpdk_dev->name, sizeof(name)); @@ -584,6 +623,14
> @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>   mlx5_set_min_inline(spawn, config);
>   /* Store device configuration on private structure. */
>   priv->config = *config;
> + for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {
> + icfg[i].release_mem_en = !!config->reclaim_mode;
> + if (config->reclaim_mode)
> + icfg[i].per_core_cache = 0;
> + priv->flows[i] = mlx5_ipool_create(&icfg[i]);
> + if (!priv->flows[i])
> + goto error;
> + }
>   /* Create context for virtual machine VLAN workaround. */
>   priv->vmwa_context = NULL;
>   if (config->dv_flow_en) {
> --
> 2.25.1



Re: [dpdk-dev] [PATCH v2] crypto/mvsam: IPSec full offload support

2021-07-21 Thread Thomas Monjalon
The correct wording is "IPsec"
and it is flagged by devtools/check-git-log.sh

20/07/2021 22:49, Akhil Goyal:
> > From: Michael Shamis 
> > 
> > This patch provides the support for IPSec protocol
> > offload to the hardware.
> > Following security operations are added:
> > - session_create
> > - session_destroy
> > - capabilities_get
> > 
> > Signed-off-by: Michael Shamis 
> > Reviewed-by: Liron Himi 
> > Tested-by: Liron Himi 
> > ---
> Applied to dpdk-next-crypto

A checkpatch warning was ignored:

Warning in drivers/crypto/mvsam/rte_mrvl_pmd.c:
Declaring a variable inside for()






[dpdk-dev] [PATCH] crypto: fix heap use after free bug

2021-07-21 Thread Ciara Power
The PMD destroy function was calling the release function, which frees
cryptodev->data, and then tries to free cryptodev->data->dev_private,
which causes the heap use after free issue.

A temporary pointer is set before the free of cryptodev->data,
which can then be used afterwards to free dev_private.
The free cannot be moved to before the release function is called,
as dev_private is used in the QAT close function while being released.

Fixes: 9e6edea41805 ("cryptodev: add APIs to assist PMD initialisation")
Cc: declan.dohe...@intel.com
Cc: sta...@dpdk.org

Reported-by: ZhihongX Peng 
Signed-off-by: Ciara Power 

---
The same issue is found in crypto/octeontx,
which may need to be addressed by maintainers.
Cc: Anoob Joseph 
---
 lib/cryptodev/rte_cryptodev_pmd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/cryptodev/rte_cryptodev_pmd.c 
b/lib/cryptodev/rte_cryptodev_pmd.c
index 0912004127..900acd7ba4 100644
--- a/lib/cryptodev/rte_cryptodev_pmd.c
+++ b/lib/cryptodev/rte_cryptodev_pmd.c
@@ -140,6 +140,7 @@ int
 rte_cryptodev_pmd_destroy(struct rte_cryptodev *cryptodev)
 {
int retval;
+   void *tmp_dev_private = cryptodev->data->dev_private;
 
CDEV_LOG_INFO("Closing crypto device %s", cryptodev->device->name);
 
@@ -149,7 +150,7 @@ rte_cryptodev_pmd_destroy(struct rte_cryptodev *cryptodev)
return retval;
 
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-   rte_free(cryptodev->data->dev_private);
+   rte_free(tmp_dev_private);
 
 
cryptodev->device = NULL;
-- 
2.25.1



[dpdk-dev] [PATCH v3 0/7] support yellow color policy in mlx5

2021-07-21 Thread Bing Zhao
When creating a meter policy, the actions for yellow color can be
specified together with green color. The mlx5 PMD now supports to
set the policy actions for yellow color.

The actions list that is supported for yellow is the same as that
for green.

---
v3:
  * patch set building fix
  * bug fixes and document update
v2:
  * bug fixes
  * add policy and profile consistency checking
  * add trTCM RFC2698 and RFC4115 support
---

Acked-by: Matan Azrad 

Bing Zhao (7):
  net/mlx5: handle yellow case in default meter policy
  net/mlx5: enable meter bucket overflow for yellow color
  net/mlx5: added support for yellow policy rules
  net/mlx5: split policies handling of colors
  net/mlx5: support yellow in meter policy validation
  net/mlx5: check consistency of meter policy and profile
  net/mlx5: add meter support for trTCM profiles

 doc/guides/nics/mlx5.rst   |  10 +-
 doc/guides/rel_notes/release_21_08.rst |   2 +
 drivers/common/mlx5/mlx5_prm.h |   5 +-
 drivers/net/mlx5/mlx5.h|  20 +-
 drivers/net/mlx5/mlx5_flow.c   |  46 +--
 drivers/net/mlx5/mlx5_flow.h   |   4 +-
 drivers/net/mlx5/mlx5_flow_aso.c   |  21 ++
 drivers/net/mlx5/mlx5_flow_dv.c| 471 +++--
 drivers/net/mlx5/mlx5_flow_meter.c | 203 +++
 9 files changed, 492 insertions(+), 290 deletions(-)

-- 
2.27.0



[dpdk-dev] [PATCH v3 1/7] net/mlx5: handle yellow case in default meter policy

2021-07-21 Thread Bing Zhao
In order to support the yellow color for the default meter policy,
the default policy action for yellow should be created together
with the green policy.

The default policy action for yellow action is the same as that for
green. In the same table, the same matcher will be reused for yellow
and the destination group will be the same.

Signed-off-by: Bing Zhao 
---
 drivers/net/mlx5/mlx5.h |   6 +-
 drivers/net/mlx5/mlx5_flow_dv.c | 144 +++-
 2 files changed, 91 insertions(+), 59 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 94618e10fa..a2fe9b90c7 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -632,8 +632,8 @@ struct mlx5_dev_shared_port {
 /*ASO flow meter structures*/
 /* Modify this value if enum rte_mtr_color changes. */
 #define RTE_MTR_DROPPED RTE_COLORS
-/* Yellow is not supported. */
-#define MLX5_MTR_RTE_COLORS (RTE_COLOR_GREEN + 1)
+/* Yellow is now supported. */
+#define MLX5_MTR_RTE_COLORS (RTE_COLOR_YELLOW + 1)
 /* table_id 22 bits in mlx5_flow_tbl_key so limit policy number. */
 #define MLX5_MAX_SUB_POLICY_TBL_NUM 0x3F
 #define MLX5_INVALID_POLICY_ID UINT32_MAX
@@ -641,6 +641,8 @@ struct mlx5_dev_shared_port {
 #define MLX5_MTR_TABLE_ID_SUFFIX 1
 /* Drop table_id on MLX5_FLOW_TABLE_LEVEL_METER. */
 #define MLX5_MTR_TABLE_ID_DROP 2
+/* Priority of the meter policy matcher. */
+#define MLX5_MTR_POLICY_MATCHER_PRIO 0
 
 enum mlx5_meter_domain {
MLX5_MTR_DOMAIN_INGRESS,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index d250486950..cfc646c5e5 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -188,7 +188,7 @@ flow_dv_attr_init(const struct rte_flow_item *item, union 
flow_dv_attr *attr,
attr->valid = 1;
 }
 
-/**
+/*
  * Convert rte_mtr_color to mlx5 color.
  *
  * @param[in] rcol
@@ -197,7 +197,7 @@ flow_dv_attr_init(const struct rte_flow_item *item, union 
flow_dv_attr *attr,
  * @return
  *   mlx5 color.
  */
-static int
+static inline int
 rte_col_2_mlx5_col(enum rte_color rcol)
 {
switch (rcol) {
@@ -15892,7 +15892,7 @@ flow_dv_destroy_mtr_drop_tbls(struct rte_eth_dev *dev)
 
 static void
 __flow_dv_destroy_domain_def_policy(struct rte_eth_dev *dev,
- enum mlx5_meter_domain domain)
+   enum mlx5_meter_domain domain)
 {
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_flow_meter_def_policy *def_policy =
@@ -15943,21 +15943,20 @@ __flow_dv_create_policy_flow(struct rte_eth_dev *dev,
if (match_src_port && (priv->representor || priv->master)) {
if (flow_dv_translate_item_port_id(dev, matcher.buf,
   value.buf, item, attr)) {
-   DRV_LOG(ERR,
-   "Failed to create meter policy flow with port.");
+   DRV_LOG(ERR, "Failed to create meter policy%d flow's"
+   " value with port.", color);
return -1;
}
}
flow_dv_match_meta_reg(matcher.buf, value.buf,
-   (enum modify_reg)color_reg_c_idx,
-   rte_col_2_mlx5_col(color),
-   UINT32_MAX);
+  (enum modify_reg)color_reg_c_idx,
+  rte_col_2_mlx5_col(color), UINT32_MAX);
misc_mask = flow_dv_matcher_enable(value.buf);
__flow_dv_adjust_buf_size(&value.size, misc_mask);
-   ret = mlx5_flow_os_create_flow(matcher_object,
-   (void *)&value, actions_n, actions, rule);
+   ret = mlx5_flow_os_create_flow(matcher_object, (void *)&value,
+  actions_n, actions, rule);
if (ret) {
-   DRV_LOG(ERR, "Failed to create meter policy flow.");
+   DRV_LOG(ERR, "Failed to create meter policy%d flow.", color);
return -1;
}
return 0;
@@ -15991,13 +15990,13 @@ __flow_dv_create_policy_matcher(struct rte_eth_dev 
*dev,
};
struct mlx5_flow_tbl_data_entry *tbl_data;
struct mlx5_priv *priv = dev->data->dev_private;
-   uint32_t color_mask = (UINT32_C(1) << MLX5_MTR_COLOR_BITS) - 1;
+   const uint32_t color_mask = (UINT32_C(1) << MLX5_MTR_COLOR_BITS) - 1;
 
if (match_src_port && (priv->representor || priv->master)) {
if (flow_dv_translate_item_port_id(dev, matcher.mask.buf,
   value.buf, item, attr)) {
-   DRV_LOG(ERR,
-   "Failed to register meter drop matcher with port.");
+   DRV_LOG(ERR, "Failed to register meter policy%d matcher"
+   " with port.", priority);
return -1;
}
}
@@ -1600

[dpdk-dev] [PATCH v3 4/7] net/mlx5: split policies handling of colors

2021-07-21 Thread Bing Zhao
If the fate action is either RSS or Queue of a meter policy, the
action will only be created in the flow splitting stage. With queue
as the fate action, only one sub-policy is needed. And RSS will
have more than one sub-policies if there is an expansion.

Since the RSS parameters are the same for both green and yellow
colors except the queues, the expansion result will be unique.
Even if only one color has the RSS action, the checking and possible
expansion will be done then. For each sub-policy, the action rules
need to be created separately on its own policy table.

Signed-off-by: Bing Zhao 
---
 drivers/net/mlx5/mlx5_flow.c| 40 ++--
 drivers/net/mlx5/mlx5_flow_dv.c | 67 +
 2 files changed, 55 insertions(+), 52 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 347e8c1a09..d90c8cd314 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -4687,7 +4687,7 @@ get_meter_sub_policy(struct rte_eth_dev *dev,
struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS] = {0};
uint32_t i;
 
-   /**
+   /*
 * This is a tmp dev_flow,
 * no need to register any matcher for it in translate.
 */
@@ -4695,18 +4695,19 @@ get_meter_sub_policy(struct rte_eth_dev *dev,
for (i = 0; i < MLX5_MTR_RTE_COLORS; i++) {
struct mlx5_flow dev_flow = {0};
struct mlx5_flow_handle dev_handle = { {0} };
+   uint8_t fate = final_policy->act_cnt[i].fate_action;
 
-   if (final_policy->is_rss) {
+   if (fate == MLX5_FLOW_FATE_SHARED_RSS) {
const void *rss_act =
final_policy->act_cnt[i].rss->conf;
struct rte_flow_action rss_actions[2] = {
[0] = {
.type = RTE_FLOW_ACTION_TYPE_RSS,
-   .conf = rss_act
+   .conf = rss_act,
},
[1] = {
.type = RTE_FLOW_ACTION_TYPE_END,
-   .conf = NULL
+   .conf = NULL,
}
};
 
@@ -4731,9 +4732,10 @@ get_meter_sub_policy(struct rte_eth_dev *dev,
rss_desc_v[i].hash_fields ?
rss_desc_v[i].queue_num : 1;
rss_desc_v[i].tunnel =
-   !!(dev_flow.handle->layers &
-   MLX5_FLOW_LAYER_TUNNEL);
-   } else {
+   !!(dev_flow.handle->layers &
+  MLX5_FLOW_LAYER_TUNNEL);
+   rss_desc[i] = &rss_desc_v[i];
+   } else if (fate == MLX5_FLOW_FATE_QUEUE) {
/* This is queue action. */
rss_desc_v[i] = wks->rss_desc;
rss_desc_v[i].key_len = 0;
@@ -4741,24 +4743,24 @@ get_meter_sub_policy(struct rte_eth_dev *dev,
rss_desc_v[i].queue =
&final_policy->act_cnt[i].queue;
rss_desc_v[i].queue_num = 1;
+   rss_desc[i] = &rss_desc_v[i];
+   } else {
+   rss_desc[i] = NULL;
}
-   rss_desc[i] = &rss_desc_v[i];
}
sub_policy = flow_drv_meter_sub_policy_rss_prepare(dev,
flow, policy, rss_desc);
} else {
enum mlx5_meter_domain mtr_domain =
attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
-   attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
-   MLX5_MTR_DOMAIN_INGRESS;
+   (attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
+   MLX5_MTR_DOMAIN_INGRESS);
sub_policy = policy->sub_policys[mtr_domain][0];
}
-   if (!sub_policy) {
+   if (!sub_policy)
rte_flow_error_set(error, EINVAL,
-   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
-   "Failed to get meter sub-policy.");
-   goto exit;
-   }
+  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+  "Failed to ge

[dpdk-dev] [PATCH v3 2/7] net/mlx5: enable meter bucket overflow for yellow color

2021-07-21 Thread Bing Zhao
To support the meter policy for yellow action, the prerequisite is
that the hardware needs to support the EBS, as defined in the
RFC2697.
  https://datatracker.ietf.org/doc/html/rfc2697
Then some of the packets can be marked as yellow if the tokens of C
bucket is not enough but enough in E bucket. The color could be used
for the further steering of the packets.

In the current implementation EBS and overflow were ignored when
creating a meter profile. With this commit, if EBS is set by the
application, the generation of yellow color will be enabled in the
hardware for flow rules steering of packets.

Signed-off-by: Bing Zhao 
---
 drivers/net/mlx5/mlx5_flow_aso.c   |  4 
 drivers/net/mlx5/mlx5_flow_meter.c | 10 +-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 64631ffc29..23e22e560a 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -747,6 +747,10 @@ mlx5_aso_mtr_sq_enqueue_single(struct mlx5_aso_sq *sq,
wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm =
RTE_BE32((1 << ASO_DSEG_VALID_OFFSET) |
(MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET));
+   /* Only needed for RFC2697. */
+   if (fm->profile->srtcm_prm.ebs_eir)
+   wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
+   RTE_BE32(1 << ASO_DSEG_BO_OFFSET);
sq->head++;
sq->pi += 2;/* Each WQE contains 2 WQEBB's. */
rte_io_wmb();
diff --git a/drivers/net/mlx5/mlx5_flow_meter.c 
b/drivers/net/mlx5/mlx5_flow_meter.c
index 78eb2a60f9..73eba0dabd 100644
--- a/drivers/net/mlx5/mlx5_flow_meter.c
+++ b/drivers/net/mlx5/mlx5_flow_meter.c
@@ -319,9 +319,9 @@ mlx5_flow_meter_param_fill(struct mlx5_flow_meter_profile 
*fmp,
cbs_man = man;
cbs_exp = exp;
srtcm->cbs_cir = rte_cpu_to_be_32(cbs_exp << ASO_DSEG_CBS_EXP_OFFSET |
-   cbs_man << ASO_DSEG_CBS_MAN_OFFSET |
-   cir_exp << ASO_DSEG_CIR_EXP_OFFSET |
-   cir_man);
+ cbs_man << ASO_DSEG_CBS_MAN_OFFSET |
+ cir_exp << ASO_DSEG_CIR_EXP_OFFSET |
+ cir_man);
mlx5_flow_meter_xbs_man_exp_calc(ebs, &man, &exp);
/* Check if ebs mantissa is too large. */
if (exp > ASO_DSEG_EXP_MASK)
@@ -332,7 +332,7 @@ mlx5_flow_meter_param_fill(struct mlx5_flow_meter_profile 
*fmp,
ebs_man = man;
ebs_exp = exp;
srtcm->ebs_eir = rte_cpu_to_be_32(ebs_exp << ASO_DSEG_EBS_EXP_OFFSET |
-   ebs_man << ASO_DSEG_EBS_MAN_OFFSET);
+ ebs_man << ASO_DSEG_EBS_MAN_OFFSET);
return 0;
 }
 
@@ -421,7 +421,7 @@ mlx5_flow_meter_profile_add(struct rte_eth_dev *dev,
return ret;
/* Meter profile memory allocation. */
fmp = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct mlx5_flow_meter_profile),
-RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
if (fmp == NULL)
return -rte_mtr_error_set(error, ENOMEM,
  RTE_MTR_ERROR_TYPE_UNSPECIFIED,
-- 
2.27.0



[dpdk-dev] [PATCH v3 3/7] net/mlx5: added support for yellow policy rules

2021-07-21 Thread Bing Zhao
When creating a meter policy, both / either of the action rules for
green and yellow colors may be provided. After validation, usually
the actions are created before the meter is using by a flow rule.

If there is action specified for the yellow color, the action rules
should be created together with green color in the same time. The
action of green / yellow color can be empty, then the default
behavior is the jump action of the rule, just the same as that of
the default policy.

If the fate action of either one color is queue / RSS, all the
actions rules will be created on the flow splitting stage instead of
the policy adding stage.

Signed-off-by: Bing Zhao 
---
 drivers/net/mlx5/mlx5_flow_dv.c| 46 ++-
 drivers/net/mlx5/mlx5_flow_meter.c | 50 +++---
 2 files changed, 56 insertions(+), 40 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index cfc646c5e5..2400565232 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -15214,7 +15214,7 @@ __flow_dv_create_domain_policy_acts(struct rte_eth_dev 
*dev,
struct mlx5_priv *priv = dev->data->dev_private;
struct rte_flow_error flow_err;
const struct rte_flow_action *act;
-   uint64_t action_flags = 0;
+   uint64_t action_flags;
struct mlx5_flow_handle dh;
struct mlx5_flow dev_flow;
struct mlx5_flow_dv_port_id_action_resource port_id_action;
@@ -15234,21 +15234,20 @@ __flow_dv_create_domain_policy_acts(struct 
rte_eth_dev *dev,
memset(&dh, 0, sizeof(struct mlx5_flow_handle));
memset(&dev_flow, 0, sizeof(struct mlx5_flow));
memset(&port_id_action, 0,
-   sizeof(struct mlx5_flow_dv_port_id_action_resource));
+  sizeof(struct mlx5_flow_dv_port_id_action_resource));
memset(mhdr_res, 0, sizeof(*mhdr_res));
mhdr_res->ft_type = transfer ? MLX5DV_FLOW_TABLE_TYPE_FDB :
-   egress ?
-   MLX5DV_FLOW_TABLE_TYPE_NIC_TX :
-   MLX5DV_FLOW_TABLE_TYPE_NIC_RX;
+  (egress ? MLX5DV_FLOW_TABLE_TYPE_NIC_TX :
+   MLX5DV_FLOW_TABLE_TYPE_NIC_RX);
dev_flow.handle = &dh;
dev_flow.dv.port_id_action = &port_id_action;
dev_flow.external = true;
for (i = 0; i < RTE_COLORS; i++) {
if (i < MLX5_MTR_RTE_COLORS)
act_cnt = &mtr_policy->act_cnt[i];
+   action_flags = 0;
for (act = actions[i];
-   act && act->type != RTE_FLOW_ACTION_TYPE_END;
-   act++) {
+act && act->type != RTE_FLOW_ACTION_TYPE_END; act++) {
switch (act->type) {
case RTE_FLOW_ACTION_TYPE_MARK:
{
@@ -15456,7 +15455,7 @@ __flow_dv_create_domain_policy_acts(struct rte_eth_dev 
*dev,
(1 << MLX5_SCALE_FLOW_GROUP_BIT),
};
struct mlx5_flow_meter_sub_policy *sub_policy =
-   mtr_policy->sub_policys[domain][0];
+   mtr_policy->sub_policys[domain][0];
 
if (i >= MLX5_MTR_RTE_COLORS)
return -rte_mtr_error_set(error,
@@ -15500,6 +15499,10 @@ __flow_dv_create_domain_policy_acts(struct rte_eth_dev 
*dev,
action_flags |= MLX5_FLOW_ACTION_JUMP;
break;
}
+   /*
+* No need to check meter hierarchy for Y or R colors
+* here since it is done in the validation stage.
+*/
case RTE_FLOW_ACTION_TYPE_METER:
{
const struct rte_flow_action_meter *mtr;
@@ -15615,6 +15618,7 @@ flow_dv_create_mtr_policy_acts(struct rte_eth_dev *dev,
ret = __flow_dv_create_domain_policy_acts(dev,
mtr_policy, actions,
(enum mlx5_meter_domain)i, error);
+   /* Cleaning resource is done in the caller level. */
if (ret)
return ret;
}
@@ -16156,16 +16160,15 @@ __flow_dv_create_policy_acts_rules(struct rte_eth_dev 
*dev,
 
for (i = 0; i < RTE_COLORS; i++) {
acts[i].actions_n = 0;
-   if (i == RTE_COLOR_YELLOW)
-   continue;
if (i == RTE_COLOR_RED) {
/* Only support drop on red. */
acts[i].dv_actions[0] =
-   mtr_policy->dr_drop_action

[dpdk-dev] [PATCH v3 6/7] net/mlx5: check consistency of meter policy and profile

2021-07-21 Thread Bing Zhao
In the previous implementation, only green color policy was
supported in mlx5 PMD. Since yellow color policy is supported now,
the consistency of meter policy and profile should be checked.
  1. If the profile supports yellow but the policy doesn't, an error
 should be returned when creating the meter. Or else, there is
 no explicit steering action for the packets marked with yellow.
  2. If the policy supports yellow but the profile doesn't, it will
 be considered as a valid case. Even if no packet will be
 handled with the yellow steering action, it is just like that
 only the green policy presents.

Usually the green color is supported by default, but when it is
disabled intentionally with setting the CBS to a small value like
zero in the profile, the similar checking on green policy and
profile should also be done.

Signed-off-by: Bing Zhao 
---
 drivers/net/mlx5/mlx5.h|  6 ++
 drivers/net/mlx5/mlx5_flow_dv.c|  4 
 drivers/net/mlx5/mlx5_flow_meter.c | 20 ++--
 3 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index ea16109972..3a8587b7cf 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -746,6 +746,10 @@ struct mlx5_flow_meter_policy {
/* Is queue action in policy table. */
uint32_t is_hierarchy:1;
/* Is meter action in policy table. */
+   uint32_t skip_y:1;
+   /* If yellow color policy is skipped. */
+   uint32_t skip_g:1;
+   /* If green color policy is skipped. */
rte_spinlock_t sl;
uint32_t ref_cnt;
/* Use count. */
@@ -866,6 +870,8 @@ struct mlx5_flow_meter_profile {
/**< srtcm_rfc2697 struct. */
};
uint32_t ref_cnt; /**< Use count. */
+   uint32_t g_support:1; /**< If G color will be generated. */
+   uint32_t y_support:1; /**< If Y color will be generated. */
 };
 
 /* 2 meters in each ASO cache line */
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 97e297d5c2..7ea04ba6e5 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -15245,6 +15245,10 @@ __flow_dv_create_domain_policy_acts(struct rte_eth_dev 
*dev,
for (i = 0; i < RTE_COLORS; i++) {
if (i < MLX5_MTR_RTE_COLORS)
act_cnt = &mtr_policy->act_cnt[i];
+   /* Skip the color policy actions creation. */
+   if ((i == RTE_COLOR_YELLOW && mtr_policy->skip_y) ||
+   (i == RTE_COLOR_GREEN && mtr_policy->skip_g))
+   continue;
action_flags = 0;
for (act = actions[i];
 act && act->type != RTE_FLOW_ACTION_TYPE_END; act++) {
diff --git a/drivers/net/mlx5/mlx5_flow_meter.c 
b/drivers/net/mlx5/mlx5_flow_meter.c
index 32ad4ea133..4f57b7e04e 100644
--- a/drivers/net/mlx5/mlx5_flow_meter.c
+++ b/drivers/net/mlx5/mlx5_flow_meter.c
@@ -333,6 +333,10 @@ mlx5_flow_meter_param_fill(struct mlx5_flow_meter_profile 
*fmp,
ebs_exp = exp;
srtcm->ebs_eir = rte_cpu_to_be_32(ebs_exp << ASO_DSEG_EBS_EXP_OFFSET |
  ebs_man << ASO_DSEG_EBS_MAN_OFFSET);
+   if (srtcm->cbs_cir)
+   fmp->g_support = 1;
+   if (srtcm->ebs_eir)
+   fmp->y_support = 1;
return 0;
 }
 
@@ -750,6 +754,10 @@ mlx5_flow_meter_policy_add(struct rte_eth_dev *dev,
return -rte_mtr_error_set(error, ENOMEM,
RTE_MTR_ERROR_TYPE_METER_POLICY, NULL,
"Memory alloc failed for meter policy.");
+   if (policy_mode == MLX5_MTR_POLICY_MODE_OG)
+   mtr_policy->skip_y = 1;
+   else if (policy_mode == MLX5_MTR_POLICY_MODE_OY)
+   mtr_policy->skip_g = 1;
policy_size = sizeof(struct mlx5_flow_meter_policy);
for (i = 0; i < MLX5_MTR_DOMAIN_MAX; i++) {
if (!(domain_bitmap & (1 << i)))
@@ -1132,13 +1140,13 @@ mlx5_flow_meter_create(struct rte_eth_dev *dev, 
uint32_t meter_id,
if (!priv->config.dv_esw_en)
domain_bitmap &= ~MLX5_MTR_DOMAIN_TRANSFER_BIT;
} else {
-   mtr_policy = mlx5_flow_meter_policy_find(dev,
-   params->meter_policy_id, &policy_idx);
if (!priv->sh->meter_aso_en)
return -rte_mtr_error_set(error, ENOTSUP,
RTE_MTR_ERROR_TYPE_UNSPECIFIED, NULL,
"Part of the policies cannot be "
"supported without ASO ");
+   mtr_policy = mlx5_flow_meter_policy_find(dev,
+   params->meter_policy_id, &policy_idx);
if (!mtr_policy)
return -rte_mtr_error_set(error, ENOENT,
RTE_MTR_ERROR_TYPE_METER

[dpdk-dev] [PATCH v3 7/7] net/mlx5: add meter support for trTCM profiles

2021-07-21 Thread Bing Zhao
The support of RFC2698 and RFC4115 are added in mlx5 PMD. Only the
ASO metering supports these two profiles.

Signed-off-by: Bing Zhao 
---
 doc/guides/nics/mlx5.rst   |   1 +
 doc/guides/rel_notes/release_21_08.rst |   1 +
 drivers/common/mlx5/mlx5_prm.h |   5 +-
 drivers/net/mlx5/mlx5_flow_aso.c   |  23 -
 drivers/net/mlx5/mlx5_flow_meter.c | 112 -
 5 files changed, 98 insertions(+), 44 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 1f5b6fb954..ae267d315f 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -433,6 +433,7 @@ Limitations
  - RED: must be DROP.
   - Policy actions of RSS for green and yellow should have the same 
configuration except queues.
   - meter profile packet mode is supported.
+  - meter profiles of RFC2697, RFC2698 and RFC4115 are supported.
 
 - Integrity:
 
diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index 03d4fd059a..e159615deb 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -92,6 +92,7 @@ New Features
   * Added support for matching on VXLAN header last 8-bits reserved field.
   * Optimized multi-thread flow rule insertion rate.
   * Added support for metering policy actions of yellow color.
+  * Added support for metering trTCM RFC2698 and RFC4115.
 
 * **Added Wangxun ngbe PMD.**
 
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 7950070976..88705be9d6 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -3031,11 +3031,12 @@ struct mlx5_aso_mtr_dseg {
 #define ASO_DSEG_VALID_OFFSET 31
 #define ASO_DSEG_BO_OFFSET 30
 #define ASO_DSEG_SC_OFFSET 28
+#define ASO_DSEG_BBOG_OFFSET 27
 #define ASO_DSEG_MTR_MODE 24
 #define ASO_DSEG_CBS_EXP_OFFSET 24
 #define ASO_DSEG_CBS_MAN_OFFSET 16
-#define ASO_DSEG_CIR_EXP_MASK 0x1F
-#define ASO_DSEG_CIR_EXP_OFFSET 8
+#define ASO_DSEG_XIR_EXP_MASK 0x1F
+#define ASO_DSEG_XIR_EXP_OFFSET 8
 #define ASO_DSEG_EBS_EXP_OFFSET 24
 #define ASO_DSEG_EBS_MAN_OFFSET 16
 #define ASO_DSEG_EXP_MASK 0x1F
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 23e22e560a..e11327a11b 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -747,10 +747,27 @@ mlx5_aso_mtr_sq_enqueue_single(struct mlx5_aso_sq *sq,
wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm =
RTE_BE32((1 << ASO_DSEG_VALID_OFFSET) |
(MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET));
-   /* Only needed for RFC2697. */
-   if (fm->profile->srtcm_prm.ebs_eir)
+   switch (fmp->profile.alg) {
+   case RTE_MTR_SRTCM_RFC2697:
+   /* Only needed for RFC2697. */
+   if (fm->profile->srtcm_prm.ebs_eir)
+   wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
+   RTE_BE32(1 << ASO_DSEG_BO_OFFSET);
+   break;
+   case RTE_MTR_TRTCM_RFC2698:
wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
-   RTE_BE32(1 << ASO_DSEG_BO_OFFSET);
+   RTE_BE32(1 << ASO_DSEG_BBOG_OFFSET);
+   break;
+   case RTE_MTR_TRTCM_RFC4115:
+   default:
+   break;
+   }
+   /*
+* Note:
+* Due to software performance reason, the token fields will not be
+* set when posting the WQE to ASO SQ. It will be filled by the HW
+* automatically.
+*/
sq->head++;
sq->pi += 2;/* Each WQE contains 2 WQEBB's. */
rte_io_wmb();
diff --git a/drivers/net/mlx5/mlx5_flow_meter.c 
b/drivers/net/mlx5/mlx5_flow_meter.c
index 4f57b7e04e..2d91a6fcf0 100644
--- a/drivers/net/mlx5/mlx5_flow_meter.c
+++ b/drivers/net/mlx5/mlx5_flow_meter.c
@@ -55,7 +55,7 @@ mlx5_flow_meter_action_create(struct mlx5_priv *priv,
MLX5_SET(flow_meter_parameters, fmp, cbs_exponent, val);
val = (cbs_cir >> ASO_DSEG_CBS_MAN_OFFSET) & ASO_DSEG_MAN_MASK;
MLX5_SET(flow_meter_parameters, fmp, cbs_mantissa, val);
-   val = (cbs_cir >> ASO_DSEG_CIR_EXP_OFFSET) & ASO_DSEG_EXP_MASK;
+   val = (cbs_cir >> ASO_DSEG_XIR_EXP_OFFSET) & ASO_DSEG_EXP_MASK;
MLX5_SET(flow_meter_parameters, fmp, cir_exponent, val);
val = (cbs_cir & ASO_DSEG_MAN_MASK);
MLX5_SET(flow_meter_parameters, fmp, cir_mantissa, val);
@@ -194,18 +194,18 @@ mlx5_flow_meter_profile_validate(struct rte_eth_dev *dev,
  NULL, "Metering algorithm not supported.");
 }
 
-/**
- * Calculate mantissa and exponent for cir.
+/*
+ * Calculate mantissa and exponent for cir / eir.
  *
- * @param[in] cir
+ * @param[in] xir
  *   Value to be calculated.
  * @param[out] man
  *   Pointer to the mantissa.
  * @param[out] exp
  *   Pointer to the exp.
  */
-static void
-mlx5_flow

[dpdk-dev] [PATCH v3 5/7] net/mlx5: support yellow in meter policy validation

2021-07-21 Thread Bing Zhao
In the previous implementation, the policy for yellow color was not
supported. The action validation for yellow was skipped.

Since the yellow color policy needs to be supported, the validation
should also be done for the yellow color. In the meanwhile, due to
the fact that color policies of one meter should be used for the
same flow(s), the domains supported of both colors should be the
same. If both of the colors have RSS as the termination actions,
except the queues, all other parameters of RSS should be the same.

Signed-off-by: Bing Zhao 
---
 doc/guides/nics/mlx5.rst   |   9 +-
 doc/guides/rel_notes/release_21_08.rst |   1 +
 drivers/net/mlx5/mlx5.h|   8 +-
 drivers/net/mlx5/mlx5_flow.c   |   6 +-
 drivers/net/mlx5/mlx5_flow.h   |   4 +-
 drivers/net/mlx5/mlx5_flow_dv.c| 210 -
 drivers/net/mlx5/mlx5_flow_meter.c |  15 +-
 7 files changed, 160 insertions(+), 93 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index f5b727c1ee..1f5b6fb954 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -420,19 +420,18 @@ Limitations
 - Meter:
 
   - All the meter colors with drop action will be counted only by the global 
drop statistics.
-  - Green color is not supported with drop action.
-  - Yellow detection is not supported.
+  - Yellow detection is only supported with ASO metering.
   - Red color must be with drop action.
   - Meter statistics are supported only for drop case.
-  - Meter yellow color detection is not supported.
   - A meter action created with pre-defined policy must be the last action in 
the flow except single case where the policy actions are:
  - green: NULL or END.
  - yellow: NULL or END.
  - RED: DROP / END.
   - The only supported meter policy actions:
- - green: QUEUE, RSS, PORT_ID, JUMP, MARK and SET_TAG.
- - yellow: must be empty.
+ - green: QUEUE, RSS, PORT_ID, JUMP, DROP, MARK and SET_TAG.
+ - yellow: QUEUE, RSS, PORT_ID, JUMP, DROP, MARK and SET_TAG.
  - RED: must be DROP.
+  - Policy actions of RSS for green and yellow should have the same 
configuration except queues.
   - meter profile packet mode is supported.
 
 - Integrity:
diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index 1b38b1aa51..03d4fd059a 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -91,6 +91,7 @@ New Features
   * Added matching on IPv4 Internet Header Length (IHL).
   * Added support for matching on VXLAN header last 8-bits reserved field.
   * Optimized multi-thread flow rule insertion rate.
+  * Added support for metering policy actions of yellow color.
 
 * **Added Wangxun ngbe PMD.**
 
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index a2fe9b90c7..ea16109972 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -629,7 +629,7 @@ struct mlx5_dev_shared_port {
  */
 #define MLX5_DV_MAX_NUMBER_OF_ACTIONS 8
 
-/*ASO flow meter structures*/
+/* ASO flow meter structures */
 /* Modify this value if enum rte_mtr_color changes. */
 #define RTE_MTR_DROPPED RTE_COLORS
 /* Yellow is now supported. */
@@ -643,6 +643,12 @@ struct mlx5_dev_shared_port {
 #define MLX5_MTR_TABLE_ID_DROP 2
 /* Priority of the meter policy matcher. */
 #define MLX5_MTR_POLICY_MATCHER_PRIO 0
+/* Default policy. */
+#define MLX5_MTR_POLICY_MODE_DEF 1
+/* Only green color valid. */
+#define MLX5_MTR_POLICY_MODE_OG 2
+/* Only yellow color valid. */
+#define MLX5_MTR_POLICY_MODE_OY 3
 
 enum mlx5_meter_domain {
MLX5_MTR_DOMAIN_INGRESS,
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index d90c8cd314..549b3058c2 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -7199,14 +7199,14 @@ mlx5_flow_validate_mtr_acts(struct rte_eth_dev *dev,
struct rte_flow_attr *attr,
bool *is_rss,
uint8_t *domain_bitmap,
-   bool *is_def_policy,
+   uint8_t *policy_mode,
struct rte_mtr_error *error)
 {
const struct mlx5_flow_driver_ops *fops;
 
fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
-   return fops->validate_mtr_acts(dev, actions, attr,
-   is_rss, domain_bitmap, is_def_policy, error);
+   return fops->validate_mtr_acts(dev, actions, attr, is_rss,
+  domain_bitmap, policy_mode, error);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 8f0521aa72..3724293d26 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -1209,7 +1209,7 @@ typedef int (*mlx5_flow_validate_mtr_acts_t)
 struct rte_flow_attr *attr,
 bool *is_rss,
 uint8_t *domain_bitmap,
-b

Re: [dpdk-dev] [EXT] Re: [PATCH v2] crypto/mvsam: IPSec full offload support

2021-07-21 Thread Akhil Goyal
> The correct wording is "IPsec"
> and it is flagged by devtools/check-git-log.sh
> 

Ahh my bad! Missed to run that on last patch.

> 20/07/2021 22:49, Akhil Goyal:
> > > From: Michael Shamis 
> > >
> > > This patch provides the support for IPSec protocol
> > > offload to the hardware.
> > > Following security operations are added:
> > > - session_create
> > > - session_destroy
> > > - capabilities_get
> > >
> > > Signed-off-by: Michael Shamis 
> > > Reviewed-by: Liron Himi 
> > > Tested-by: Liron Himi 
> > > ---
> > Applied to dpdk-next-crypto
> 
> A checkpatch warning was ignored:
> 
> Warning in drivers/crypto/mvsam/rte_mrvl_pmd.c:
> Declaring a variable inside for()
> 
Will be fixing in next few minutes.
Thanks for highlighting



[dpdk-dev] [PATCH v5] build: optional NUMA and cpu counts detection

2021-07-21 Thread Juraj Linkeš
Add an option to automatically discover the host's numa and cpu counts
and use those values for a non cross-build.
Give users the option to override the per-arch default values or values
from cross files by specifying them on the command line with -Dmax_lcores
and -Dmax_numa_nodes.

Signed-off-by: Juraj Linkeš 
Reviewed-by: Honnappa Nagarahalli 
Reviewed-by: David Christensen 
Acked-by: Bruce Richardson 
---
 MAINTAINERS  |  2 ++
 buildtools/get-cpu-count.py  |  7 ++
 buildtools/get-numa-count.py | 24 ++
 buildtools/meson.build   |  2 ++
 config/meson.build   | 47 ++--
 config/x86/meson.build   |  2 ++
 meson_options.txt|  8 +++---
 7 files changed, 86 insertions(+), 6 deletions(-)
 create mode 100644 buildtools/get-cpu-count.py
 create mode 100644 buildtools/get-numa-count.py

diff --git a/MAINTAINERS b/MAINTAINERS
index af2a91d7c4..f0e82598aa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -102,6 +102,8 @@ F: meson_options.txt
 F: config/
 F: buildtools/chkincs/
 F: buildtools/call-sphinx-build.py
+F: buildtools/get-cpu-count.py
+F: buildtools/get-numa-count.py
 F: buildtools/list-dir-globs.py
 F: buildtools/pkg-config/
 F: buildtools/symlink-drivers-solibs.sh
diff --git a/buildtools/get-cpu-count.py b/buildtools/get-cpu-count.py
new file mode 100644
index 00..317b32088f
--- /dev/null
+++ b/buildtools/get-cpu-count.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2021 PANTHEON.tech s.r.o.
+
+import os
+
+print(os.cpu_count())
diff --git a/buildtools/get-numa-count.py b/buildtools/get-numa-count.py
new file mode 100644
index 00..1b7787787f
--- /dev/null
+++ b/buildtools/get-numa-count.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2021 PANTHEON.tech s.r.o.
+
+import ctypes
+import glob
+import os
+import subprocess
+
+if os.name == 'posix':
+if os.path.isdir('/sys/devices/system/node'):
+numa_nodes = glob.glob('/sys/devices/system/node/node*')
+numa_nodes.sort()
+print(int(os.path.basename(numa_nodes[-1])[4:]) + 1)
+else:
+subprocess.run(['sysctl', '-n', 'vm.ndomains'], check=False)
+
+elif os.name == 'nt':
+libkernel32 = ctypes.windll.kernel32
+
+numa_count = ctypes.c_ulong()
+
+libkernel32.GetNumaHighestNodeNumber(ctypes.pointer(numa_count))
+print(numa_count.value + 1)
diff --git a/buildtools/meson.build b/buildtools/meson.build
index bd460e3e00..f776316da1 100644
--- a/buildtools/meson.build
+++ b/buildtools/meson.build
@@ -16,6 +16,8 @@ echo = py3 + ['-c', 'import sys; print(*sys.argv[1:])']
 list_dir_globs = py3 + files('list-dir-globs.py')
 map_to_win_cmd = py3 + files('map_to_win.py')
 sphinx_wrapper = py3 + files('call-sphinx-build.py')
+get_cpu_count_cmd = py3 + files('get-cpu-count.py')
+get_numa_count_cmd = py3 + files('get-numa-count.py')
 
 # select library and object file format
 pmdinfo = py3 + files('gen-pmdinfo-cfile.py') + [meson.current_build_dir()]
diff --git a/config/meson.build b/config/meson.build
index e80421003b..364788c32d 100644
--- a/config/meson.build
+++ b/config/meson.build
@@ -286,8 +286,6 @@ foreach arg: warning_flags
 endforeach
 
 # set other values pulled from the build options
-dpdk_conf.set('RTE_MAX_LCORE', get_option('max_lcores'))
-dpdk_conf.set('RTE_MAX_NUMA_NODES', get_option('max_numa_nodes'))
 dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
 dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet'))
 dpdk_conf.set('RTE_ENABLE_TRACE_FP', get_option('enable_trace_fp'))
@@ -321,6 +319,51 @@ if meson.is_cross_build()
 endif
 endif
 
+max_lcores = get_option('max_lcores')
+if max_lcores == 'detect'
+   # Discovery makes sense only for non-cross builds
+   if meson.is_cross_build()
+   error('Discovery of max_lcores is not supported for 
cross-compilation.')
+   endif
+   # Overwrite the default value with discovered values
+   max_lcores = run_command(get_cpu_count_cmd).stdout().to_int()
+   min_lcores = 2
+   # DPDK must be build for at least 2 cores
+   if max_lcores < min_lcores
+   message('Found less than @0@ cores, building for @0@ 
cores'.format(min_lcores))
+   max_lcores = min_lcores
+   else
+   message('Found @0@ cores'.format(max_lcores))
+   endif
+   dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
+elif max_lcores != 'default'
+   # Overwrite the default value from arch_subdir with user input
+   dpdk_conf.set('RTE_MAX_LCORE', max_lcores.to_int())
+endif
+
+max_numa_nodes = get_option('max_numa_nodes')
+if max_numa_nodes == 'detect'
+   # Discovery makes sense only for non-cross builds
+   if meson.is_cross_build()
+   error('Discovery of max_numa_nodes not supported for 
cross-compilation.')
+   endif
+   # Overwrite the default value with dis

Re: [dpdk-dev] [EXT] Re: [PATCH v2] crypto/mvsam: IPSec full offload support

2021-07-21 Thread Akhil Goyal
> 
> > The correct wording is "IPsec"
> > and it is flagged by devtools/check-git-log.sh
> >
> 
> Ahh my bad! Missed to run that on last patch.
> 
> > 20/07/2021 22:49, Akhil Goyal:
> > > > From: Michael Shamis 
> > > >
> > > > This patch provides the support for IPSec protocol
> > > > offload to the hardware.
> > > > Following security operations are added:
> > > > - session_create
> > > > - session_destroy
> > > > - capabilities_get
> > > >
> > > > Signed-off-by: Michael Shamis 
> > > > Reviewed-by: Liron Himi 
> > > > Tested-by: Liron Himi 
> > > > ---
> > > Applied to dpdk-next-crypto
> >
> > A checkpatch warning was ignored:
> >
> > Warning in drivers/crypto/mvsam/rte_mrvl_pmd.c:
> > Declaring a variable inside for()
> >
> Will be fixing in next few minutes.
> Thanks for highlighting

It is fixed now. 
Documentation update was also missing. It is also added.

@Dana Vardi, @Michael Shamis: please ensure to update documentation
Along with patch.



Re: [dpdk-dev] [EXT] Re: [PATCH v2] crypto/mvsam: IPSec full offload support

2021-07-21 Thread Thomas Monjalon
21/07/2021 15:14, Akhil Goyal:
> > > A checkpatch warning was ignored:
> > >
> > > Warning in drivers/crypto/mvsam/rte_mrvl_pmd.c:
> > > Declaring a variable inside for()
> > >
> > Will be fixing in next few minutes.
> > Thanks for highlighting
> 
> It is fixed now. 
> Documentation update was also missing. It is also added.

Thanks

> @Dana Vardi, @Michael Shamis: please ensure to update documentation
> Along with patch.

You should implement checks for crypto doc in devtools/check-doc-vs-code.sh





Re: [dpdk-dev] [PATCH v6 1/5] vhost: fix async vhost ops return type

2021-07-21 Thread Maxime Coquelin



On 7/19/21 10:10 AM, Cheng Jiang wrote:
> The async vhost callback ops should return negative value when there
> are something wrong in the callback, so the return type should be
> changed into int32_t. The issue in vhost example is also fixed.
> 
> Fixes: cd6760da1076 ("vhost: introduce async enqueue for split ring")
> Fixes: 819a71685826 ("vhost: fix async callback return type")
> Fixes: 6b3c81db8bb7 ("vhost: simplify async copy completion")
> Fixes: abec60e7115d ("examples/vhost: support vhost async data path")
> Fixes: 873e8dad6f49 ("vhost: support packed ring in async datapath")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Cheng Jiang 
> ---
>  examples/vhost/ioat.c   |  4 +--
>  examples/vhost/ioat.h   |  4 +--
>  lib/vhost/rte_vhost_async.h |  8 ++---
>  lib/vhost/virtio_net.c  | 61 -
>  4 files changed, 61 insertions(+), 16 deletions(-)
> 

Reviewed-by: Maxime Coquelin 

Thanks,
Maxime



Re: [dpdk-dev] [PATCH v6 2/5] vhost: add unsafe API to clear packets in async vhost

2021-07-21 Thread Maxime Coquelin



On 7/19/21 10:10 AM, Cheng Jiang wrote:
> Applications need to stop DMA transfers and finish all the inflight
> packets when in VM memory hot-plug case and async vhost is used. This
> patch is to provide an unsafe API to clear inflight packets which
> are submitted to DMA engine in vhost async data path.
> 
> Signed-off-by: Cheng Jiang 
> ---
>  lib/vhost/rte_vhost_async.h | 22 +
>  lib/vhost/version.map   |  3 ++
>  lib/vhost/virtio_net.c  | 93 +++--
>  3 files changed, 94 insertions(+), 24 deletions(-)
> 

Reviewed-by: Maxime Coquelin 

Thanks,
Maxime



[dpdk-dev] [PATCH v1 1/1] power: fix multi-queue scale mode for pmd mgmt

2021-07-21 Thread Anatoly Burakov
Currently in scale mode, multi-queue initialization will attempt to
initialize and de-initialize the per-lcore power library structures
multiple times. Fix it to only do this whenever we either enabling
first queue or disabling last queue.

Fixes: 5dff9a72b0ef ("power: support callbacks for multiple Rx queues")

Signed-off-by: Anatoly Burakov 
---
 lib/power/rte_power_pmd_mgmt.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/lib/power/rte_power_pmd_mgmt.c b/lib/power/rte_power_pmd_mgmt.c
index 2586204b93..0ce40f0875 100644
--- a/lib/power/rte_power_pmd_mgmt.c
+++ b/lib/power/rte_power_pmd_mgmt.c
@@ -534,11 +534,15 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned int 
lcore_id, uint16_t port_id,
clb = get_monitor_callback();
break;
case RTE_POWER_MGMT_TYPE_SCALE:
-   /* check if we can add a new queue */
-   ret = check_scale(lcore_id);
-   if (ret < 0)
-   goto end;
clb = clb_scale_freq;
+
+   /* we only have to check this when enabling first queue */
+   if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED)
+   break;
+   /* check if we can add a new queue */
+   ret = check_scale(lcore_id);
+   if (ret < 0)
+   goto end;
break;
case RTE_POWER_MGMT_TYPE_PAUSE:
/* figure out various time-to-tsc conversions */
@@ -633,9 +637,12 @@ rte_power_ethdev_pmgmt_queue_disable(unsigned int lcore_id,
rte_eth_remove_rx_callback(port_id, queue_id, queue_cfg->cb);
break;
case RTE_POWER_MGMT_TYPE_SCALE:
-   rte_power_freq_max(lcore_id);
rte_eth_remove_rx_callback(port_id, queue_id, queue_cfg->cb);
-   rte_power_exit(lcore_id);
+   /* disable power library on this lcore if this was last queue */
+   if (lcore_cfg->pwr_mgmt_state == PMD_MGMT_DISABLED) {
+   rte_power_freq_max(lcore_id);
+   rte_power_exit(lcore_id);
+   }
break;
}
/*
-- 
2.25.1



Re: [dpdk-dev] [PATCH v6 3/5] vhost: handle memory hotplug for async vhost

2021-07-21 Thread Maxime Coquelin



On 7/19/21 10:10 AM, Cheng Jiang wrote:
> From: Jiayu Hu 
> 
> When the guest memory is hotplugged, the vhost application which
> enables DMA acceleration must stop DMA transfers before the vhost
> re-maps the guest memory.
> 
> This patch is to notify the vhost application of stopping DMA
> transfers.
> 
> Signed-off-by: Jiayu Hu 
> ---
>  lib/vhost/vhost_user.c | 16 
>  1 file changed, 16 insertions(+)
> 

Reviewed-by: Maxime Coquelin 

Thanks,
Maxime



Re: [dpdk-dev] [PATCH v6 4/5] examples/vhost: handle memory hotplug for async vhost

2021-07-21 Thread Maxime Coquelin



On 7/19/21 10:10 AM, Cheng Jiang wrote:
> When the guest memory is hotplugged, the vhost application which
> enables DMA acceleration must stop DMA transfers before the vhost
> re-maps the guest memory.
> 
> To accomplish that, we need to do these changes in the vhost sample:
> 1. add inflight packets count.
> 2. add vring_state_changed() callback.
> 3. add inflight packets clear process in destroy_device() and
> vring_state_changed().
> 
> Signed-off-by: Cheng Jiang 
> ---
>  examples/vhost/main.c | 55 +--
>  examples/vhost/main.h |  1 +
>  2 files changed, 54 insertions(+), 2 deletions(-)
> 

Reviewed-by: Maxime Coquelin 

Thanks,
Maxime



Re: [dpdk-dev] [PATCH v6 5/5] doc: update doc for inflight packets clear API in vhost lib

2021-07-21 Thread Maxime Coquelin



On 7/19/21 10:10 AM, Cheng Jiang wrote:
> Update the program guide and release notes for inflight packets clear
> API in vhost lib.
> 
> Signed-off-by: Cheng Jiang 
> ---
>  doc/guides/prog_guide/vhost_lib.rst| 5 +
>  doc/guides/rel_notes/release_21_08.rst | 5 +
>  2 files changed, 10 insertions(+)
> 
> diff --git a/doc/guides/prog_guide/vhost_lib.rst 
> b/doc/guides/prog_guide/vhost_lib.rst
> index d18fb98910..3cdfdc0725 100644
> --- a/doc/guides/prog_guide/vhost_lib.rst
> +++ b/doc/guides/prog_guide/vhost_lib.rst
> @@ -281,6 +281,11 @@ The following is an overview of some key Vhost API 
> functions:
>Poll enqueue completion status from async data path. Completed packets
>are returned to applications through ``pkts``.
>  
> +* ``rte_vhost_clear_queue_thread_unsafe(vid, queue_id, **pkts, count)``
> +
> +  Clear inflight packets which are submitted to DMA engine in vhost async 
> data
> +  path. Completed packets are returned to applications through ``pkts``.
> +
>  Vhost-user Implementations
>  --
>  
> diff --git a/doc/guides/rel_notes/release_21_08.rst 
> b/doc/guides/rel_notes/release_21_08.rst
> index 6a902ef9ac..482d16ba13 100644
> --- a/doc/guides/rel_notes/release_21_08.rst
> +++ b/doc/guides/rel_notes/release_21_08.rst
> @@ -117,6 +117,11 @@ New Features
>The experimental PMD power management API now supports managing
>multiple Ethernet Rx queues per lcore.
>  
> +* **Added inflight packets clear API in vhost library.**
> +
> +  Added an API which can clear the inflight packets submitted to DMA
> +  engine in vhost async data path.
> +
>  
>  Removed Items
>  -
> 

Reviewed-by: Maxime Coquelin 

Thanks,
Maxime



Re: [dpdk-dev] [PATCH v1 1/1] power: fix multi-queue scale mode for pmd mgmt

2021-07-21 Thread David Hunt

Hi Anatoly,

On 21/7/2021 3:26 PM, Anatoly Burakov wrote:

Currently in scale mode, multi-queue initialization will attempt to
initialize and de-initialize the per-lcore power library structures
multiple times. Fix it to only do this whenever we either enabling
first queue or disabling last queue.

Fixes: 5dff9a72b0ef ("power: support callbacks for multiple Rx queues")

Signed-off-by: Anatoly Burakov 
---
  lib/power/rte_power_pmd_mgmt.c | 19 +--
  1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/lib/power/rte_power_pmd_mgmt.c b/lib/power/rte_power_pmd_mgmt.c
index 2586204b93..0ce40f0875 100644
--- a/lib/power/rte_power_pmd_mgmt.c
+++ b/lib/power/rte_power_pmd_mgmt.c
@@ -534,11 +534,15 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned int 
lcore_id, uint16_t port_id,
clb = get_monitor_callback();
break;
case RTE_POWER_MGMT_TYPE_SCALE:
-   /* check if we can add a new queue */
-   ret = check_scale(lcore_id);
-   if (ret < 0)
-   goto end;
clb = clb_scale_freq;
+
+   /* we only have to check this when enabling first queue */
+   if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED)
+   break;
+   /* check if we can add a new queue */
+   ret = check_scale(lcore_id);
+   if (ret < 0)
+   goto end;
break;
case RTE_POWER_MGMT_TYPE_PAUSE:
/* figure out various time-to-tsc conversions */
@@ -633,9 +637,12 @@ rte_power_ethdev_pmgmt_queue_disable(unsigned int lcore_id,
rte_eth_remove_rx_callback(port_id, queue_id, queue_cfg->cb);
break;
case RTE_POWER_MGMT_TYPE_SCALE:
-   rte_power_freq_max(lcore_id);
rte_eth_remove_rx_callback(port_id, queue_id, queue_cfg->cb);
-   rte_power_exit(lcore_id);
+   /* disable power library on this lcore if this was last queue */
+   if (lcore_cfg->pwr_mgmt_state == PMD_MGMT_DISABLED) {
+   rte_power_freq_max(lcore_id);
+   rte_power_exit(lcore_id);
+   }
break;
}
/*



Fix looks good. Previous to this patch, was failing on adding second 
queue to a core, now with this patch, succeeds.


Tested-by: David Hunt 





Re: [dpdk-dev] [EXT] Re: [PATCH v2 1/2] drivers: add octeontx crypto adapter framework

2021-07-21 Thread Brandon Lo
On Wed, Jul 21, 2021 at 5:44 AM Thomas Monjalon  wrote:
>
> 20/07/2021 14:14, David Marchand:
> > On Tue, Jul 20, 2021 at 1:59 PM Akhil Goyal  wrote:
> > >
> > >  Hi David,
> > > >
> > > > > >  deps += ['common_octeontx', 'mempool_octeontx', 'bus_vdev',
> > > > > 'net_octeontx']
> > > > > > +deps += ['crypto_octeontx']
> > > > >
> > > > > This extra dependency resulted in disabling the event/octeontx driver
> > > > > in FreeBSD, since crypto/octeontx only builds on Linux.
> > > > > Removing hw support triggers a ABI failure for FreeBSD.
> > > > >
> > > > >
> > > > > - This had been reported by UNH CI:
> > > > > http://mails.dpdk.org/archives/test-report/2021-June/200637.html
> > > > > It seems the result has been ignored but it should have at least
> > > > > raised some discussion.
> > > > >
> > > > This was highlighted to CI ML
> > > > http://patches.dpdk.org/project/dpdk/patch/0686a7c3fb3a22e37378a8545b
> > > > c37bce04f4c391.1624481225.git.sthot...@marvell.com/
> > > >
> > > > but I think I missed to take the follow up with Brandon and applied the 
> > > > patch
> > > > as it did not look an issue to me as octeon drivers are not currently 
> > > > built on
> > > > FreeBSD.
> > > > Not sure why event driver is getting built there.
> > > >
> > > > >
> > > > > - I asked UNH to stop testing FreeBSD abi for now, waiting to get the
> > > > > main branch fixed.
> > > > >
> > > > > I don't have the time to look at this, please can you work on it?
> > > > >
> > > > > Several options:
> > > > > * crypto/octeontx is made so that it compiles on FreeBSD,
> > > > > * the abi check is extended to have exceptions per OS,
> > > > > * the FreeBSD abi reference is regenerated at UNH not to have those
> > > > > drivers in it (not sure it is doable),
> > > >
> > > > Thanks for the suggestions, we are working on it to resolve this as 
> > > > soon as
> > > > possible.
> > > > We may need to add exception in ABI checking so that it does not shout 
> > > > if a
> > > > PMD
> > > > is not compiled.
> > > Can we have below change? Will it work to disable compilation of
> > > event/octeontx2 for FreeBSD? I believe this was done by mistake earlier
> > > as all other octeontx2 drivers are compiled off on platforms other than 
> > > Linux.
> > >
> > > diff --git a/drivers/event/octeontx2/meson.build 
> > > b/drivers/event/octeontx2/meson.build
> > > index 96ebb1f2e7..1ebc51f73f 100644
> > > --- a/drivers/event/octeontx2/meson.build
> > > +++ b/drivers/event/octeontx2/meson.build
> > > @@ -2,7 +2,7 @@
> > >  # Copyright(C) 2019 Marvell International Ltd.
> > >  #
> > >
> > > -if not dpdk_conf.get('RTE_ARCH_64')
> > > +if not is_linux or not dpdk_conf.get('RTE_ARCH_64')
> > >  build = false
> > >  reason = 'only supported on 64-bit'
> > >  subdir_done()
> >
> > I did not suggest this possibility.
> > That's the same as for other octeon drivers, such change has been
> > deferred to 21.11.
> > https://patches.dpdk.org/project/dpdk/list/?series=15885
> >
> > >
> > > Or of this does not work, then we would need to add exception in ABI 
> > > checking.
> > > Any suggestions how to do this?
> >
> > Sorry, no good idea from me.
>
> We would need to revert the change breaking the ABI test.
> But I don't understand why it seems passing in recent CI runs?

Hi Thomas,

For the UNH lab, FreeBSD 13 ABI tests have been disabled due to a request
made during the community CI meeting on July 15th.

The recent CI ABI runs will show up as passes, but the older runs with
FreeBSD 13 included will keep their recorded failures.

Thanks,
Brandon


--
Brandon Lo
UNH InterOperability Laboratory
21 Madbury Rd, Suite 100, Durham, NH 03824
b...@iol.unh.edu
www.iol.unh.edu


[dpdk-dev] imissed drop with mellanox connectx5

2021-07-21 Thread Yaron Illouz
Hi

We try to read from 100G NIC Mellanox ConnectX-5  without drop at nic.
All thread are with core pinning and cpu isolation.
We use dpdk 19.11
I tried to apply all configuration that are in 
https://fast.dpdk.org/doc/perf/DPDK_19_08_Mellanox_NIC_performance_report.pdf

We have a strange behavior, 1 thread can receive receive 20 Gbps/12 Mpps and 
free mbuf without dropps,  but when trying to pass these mbuf to another thread 
that only free them there are drops, even when trying to work with more threads.

When running 1 thread that only read from port (no multi queue) and free mbuf 
in the same thread, there are no dropp with traffic up to 21 Gbps  12.4 Mpps.
When running 6 thread that only read from port (with multi queue) and free mbuf 
in the same threads, there are no dropp with traffic up to 21 Gbps  12.4 Mpps.

When running 1 to 6 thread that only read from port and pass them to another 6 
thread that only read from ring and free mbuf, there are dropp in nic (imissed 
counter) with traffic over to 10 Gbps  5.2 Mpps.(Here receive thread were 
pinned to cpu 1-6 and additional thread from 7-12 each thread on a single cpu)
Each receive thread send to one thread that free the buffer.

Configurations:

We use rings of size 32768 between the threads. Ring are initialized with 
SP/SC, Write are done with bulk of 512 with rte_ring_enqueue_burst.
Port is initialized with rte_eth_rx_queue_setup nb_rx_desc=8192
rte_eth_rxconf - rx_conf.rx_thresh.pthresh = DPDK_NIC_RX_PTHRESH; //ring 
prefetch threshold
rx_conf.rx_thresh.hthresh = 
DPDK_NIC_RX_HTHRESH; //ring host threshold
rx_conf.rx_thresh.wthresh = 
DPDK_NIC_RX_WTHRESH; //ring writeback threshold
rx_conf.rx_free_thresh = 
DPDK_NIC_RX_FREE_THRESH;
rss ->  ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP;


We tried to work with and without hyperthreading.



Network devices using kernel driver
===
:37:00.0 'MT27800 Family [ConnectX-5] 1017' if=ens2f0 drv=mlx5_core 
unused=igb_uio
:37:00.1 'MT27800 Family [ConnectX-5] 1017' if=ens2f1 drv=mlx5_core 
unused=igb_uio



ethtool -i ens2f0
driver: mlx5_core
version: 5.3-1.0.0
firmware-version: 16.30.1004 (HPE09)
expansion-rom-version:
bus-info: :37:00.0
supports-statistics: yes
supports-test: yes
supports-eeprom-access: no
supports-register-dump: no
supports-priv-flags: yes



uname -a
Linux localhost.localdomain 3.10.0-1160.el7.x86_64 #1 SMP Mon Oct 19 16:18:59 
UTC 2020 x86_64 x86_64 x86_64 GNU/Linux



lscpu | grep -e Socket -e Core -e Thread
Thread(s) per core:1
Core(s) per socket:24
Socket(s): 2


cat /sys/devices/system/node/node0/cpulist
0-23

From /proc/cpuinfo

processor   : 0
vendor_id   : GenuineIntel
cpu family  : 6
model   : 85
model name  : Intel(R) Xeon(R) Gold 5220R CPU @ 2.20GHz
stepping: 7
microcode   : 0x5003003
cpu MHz : 2200.000



python /home/cpu_layout.py
==
Core and Socket Information (as reported by '/sys/devices/system/cpu')
==

cores =  [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 
25, 26, 27, 28, 29, 24]
sockets =  [0, 1]

Socket 0Socket 1

Core 0  [0] [24]
Core 1  [1] [25]
Core 2  [2] [26]
Core 3  [3] [27]
Core 4  [4] [28]
Core 5  [5] [29]
Core 6  [6] [30]
Core 8  [7]
Core 9  [8] [31]
Core 10 [9] [32]
Core 11 [10][33]
Core 12 [11][34]
Core 13 [12][35]
Core 16 [13][36]
Core 17 [14][37]
Core 18 [15][38]
Core 19 [16][39]
Core 20 [17][40]
Core 21 [18][41]
Core 25 [19][43]
Core 26 [20][44]
Core 27 [21][45]
Core 28 [22][46]
Core 29 [23][47]
Core 24 [42]


Re: [dpdk-dev] [PATCH v2] eal/windows: enforce alarm APIs parameter check

2021-07-21 Thread Dmitry Kozlyuk
2021-07-07 13:25 (UTC-0700), Jie Zhou:
> eal/windows alarm APIs rte_eal_alarm_set and rte_eal_alarm_cancel
> did not check parameters to fail fast for invalid parameters, which
> caught by DPDK UT alarm_autotest.
> 
> Enforce eal/windows alarm APIs parameter check to fail fast for
> invalid parameters.
> 
> Fixes: f4cbdbc7fbd2 ("eal/windows: implement alarm API")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Jie Zhou 
> 
> ---
> V2 changes:
> - Remove API parameter check on arbitrary 'us' range
> - Do explicit NULL cb_fn check
> 
> ---
>  lib/eal/windows/eal_alarm.c | 12 
>  1 file changed, 12 insertions(+)
> 
> diff --git a/lib/eal/windows/eal_alarm.c b/lib/eal/windows/eal_alarm.c
> index f5bf88715a..e5dc54efb8 100644
> --- a/lib/eal/windows/eal_alarm.c
> +++ b/lib/eal/windows/eal_alarm.c
> @@ -91,6 +91,12 @@ rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback 
> cb_fn, void *cb_arg)
>   LARGE_INTEGER deadline;
>   int ret;
>  
> + if (cb_fn == NULL) {
> + RTE_LOG(ERR, EAL, "NULL callback\n");
> + ret = -EINVAL;
> + goto exit;
> + }
> +
>   /* Calculate deadline ASAP, unit of measure = 100ns. */
>   GetSystemTimePreciseAsFileTime(&ft);
>   deadline.LowPart = ft.dwLowDateTime;
> @@ -180,6 +186,12 @@ rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void 
> *cb_arg)
>   bool executing;
>  
>   removed = 0;
> +
> + if (cb_fn == NULL) {
> + RTE_LOG(ERR, EAL, "NULL callback\n");
> + return -EINVAL;
> + }
> +
>   do {
>   executing = false;
>  

Acked-by: Dmitry Kozlyuk 


Re: [dpdk-dev] [PATCH 1/4] ethdev: fix max Rx packet length

2021-07-21 Thread Ferruh Yigit
On 7/19/2021 4:35 AM, Huisong Li wrote:
> Hi, Ferruh
> 

Hi Huisong,

Thanks for the review.

> 在 2021/7/10 1:29, Ferruh Yigit 写道:
>> There is a confusion on setting max Rx packet length, this patch aims to
>> clarify it.
>>
>> 'rte_eth_dev_configure()' API accepts max Rx packet size via
>> 'uint32_t max_rx_pkt_len' filed of the config struct 'struct
>> rte_eth_conf'.
>>
>> Also 'rte_eth_dev_set_mtu()' API can be used to set the MTU, and result
>> stored into '(struct rte_eth_dev)->data->mtu'.
>>
>> These two APIs are related but they work in a disconnected way, they
>> store the set values in different variables which makes hard to figure
>> out which one to use, also two different related method is confusing for
>> the users.
>>
>> Other issues causing confusion is:
>> * maximum transmission unit (MTU) is payload of the Ethernet frame. And
>>    'max_rx_pkt_len' is the size of the Ethernet frame. Difference is
>>    Ethernet frame overhead, but this may be different from device to
>>    device based on what device supports, like VLAN and QinQ.
>> * 'max_rx_pkt_len' is only valid when application requested jumbo frame,
>>    which adds additional confusion and some APIs and PMDs already
>>    discards this documented behavior.
>> * For the jumbo frame enabled case, 'max_rx_pkt_len' is an mandatory
>>    field, this adds configuration complexity for application.
>>
>> As solution, both APIs gets MTU as parameter, and both saves the result
>> in same variable '(struct rte_eth_dev)->data->mtu'. For this
>> 'max_rx_pkt_len' updated as 'mtu', and it is always valid independent
>> from jumbo frame.
>>
>> For 'rte_eth_dev_configure()', 'dev->data->dev_conf.rxmode.mtu' is user
>> request and it should be used only within configure function and result
>> should be stored to '(struct rte_eth_dev)->data->mtu'. After that point
>> both application and PMD uses MTU from this variable.
>>
>> When application doesn't provide an MTU during 'rte_eth_dev_configure()'
>> default 'RTE_ETHER_MTU' value is used.
>>
>> As additional clarification, MTU is used to configure the device for
>> physical Rx/Tx limitation. Other related issue is size of the buffer to
>> store Rx packets, many PMDs use mbuf data buffer size as Rx buffer size.
>> And compares MTU against Rx buffer size to decide enabling scattered Rx
>> or not, if PMD supports it. If scattered Rx is not supported by device,
>> MTU bigger than Rx buffer size should fail.
>>
>> Signed-off-by: Ferruh Yigit 

<...>

>> diff --git a/drivers/net/hns3/hns3_ethdev.c b/drivers/net/hns3/hns3_ethdev.c
>> index e51512560e15..8bccdeddb2f7 100644
>> --- a/drivers/net/hns3/hns3_ethdev.c
>> +++ b/drivers/net/hns3/hns3_ethdev.c
>> @@ -2379,20 +2379,11 @@ hns3_refresh_mtu(struct rte_eth_dev *dev, struct
>> rte_eth_conf *conf)
>>   {
>>   struct hns3_adapter *hns = dev->data->dev_private;
>>   struct hns3_hw *hw = &hns->hw;
>> -    uint32_t max_rx_pkt_len;
>> -    uint16_t mtu;
>> -    int ret;
>> -
>> -    if (!(conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME))
>> -    return 0;
>> +    uint32_t max_rx_pktlen;
>>   -    /*
>> - * If jumbo frames are enabled, MTU needs to be refreshed
>> - * according to the maximum RX packet length.
>> - */
>> -    max_rx_pkt_len = conf->rxmode.max_rx_pkt_len;
>> -    if (max_rx_pkt_len > HNS3_MAX_FRAME_LEN ||
>> -    max_rx_pkt_len <= HNS3_DEFAULT_FRAME_LEN) {
>> +    max_rx_pktlen = conf->rxmode.mtu + HNS3_ETH_OVERHEAD;
>> +    if (max_rx_pktlen > HNS3_MAX_FRAME_LEN ||
>> +    max_rx_pktlen <= HNS3_DEFAULT_FRAME_LEN) {
>>   hns3_err(hw, "maximum Rx packet length must be greater than %u "
>>    "and no more than %u when jumbo frame enabled.",
>>    (uint16_t)HNS3_DEFAULT_FRAME_LEN,
> 
> The preceding check for the maximum frame length was based on the scenario 
> where
> jumbo frames are enabled.
> 
> Since there is no offload of jumbo frames in this patchset, the maximum frame
> length does not need to be checked and only ensure conf->rxmode.mtu is valid.
> 
> These should be guaranteed by dev_configure() in the framework .
> 

Got it, agree that 'HNS3_DEFAULT_FRAME_LEN' check is now wrong, and as you said
these checks are becoming redundant, so I will remove them.

In that case 'hns3_refresh_mtu()' becomes just wrapper to 'hns3_dev_mtu_set()',
I will remove function too.

<...>

>> diff --git a/drivers/net/hns3/hns3_ethdev_vf.c
>> b/drivers/net/hns3/hns3_ethdev_vf.c
>> index e582503f529b..ca839fa55fa0 100644
>> --- a/drivers/net/hns3/hns3_ethdev_vf.c
>> +++ b/drivers/net/hns3/hns3_ethdev_vf.c
>> @@ -784,8 +784,7 @@ hns3vf_dev_configure(struct rte_eth_dev *dev)
>>   uint16_t nb_rx_q = dev->data->nb_rx_queues;
>>   uint16_t nb_tx_q = dev->data->nb_tx_queues;
>>   struct rte_eth_rss_conf rss_conf;
>> -    uint32_t max_rx_pkt_len;
>> -    uint16_t mtu;
>> +    uint32_t max_rx_pktlen;
>>   bool gro_en;
>>   int ret;
>>   @@ -825,29 +824,21 @@ hns3vf_dev_configure(struct 

Re: [dpdk-dev] [dpdk-announce] release candidate 21.08-rc1

2021-07-21 Thread Kalesh Anakkur Purayil
All,

The following is a list of tests executed with 21.08-rc1:

- Basic functionality:
  Send and receive multiple types of traffic.
- testpmd xstats counter test.
- RSS tests.
- VLAN filtering tests.
- Rx Checksum tests
- TSO tests.
- MTU and Jumbo frame tests
- Changing/checking link status through testpmd.
- Unicast/multicast MAC filtering tests
- VXLAN/Geneve Rx CSO, TSO, RSS tests

One issue was observed and we have pushed a fix for that too. We don't see
any critical issues.

Regards,
Kalesh

On Sat, Jul 10, 2021 at 3:35 PM Thomas Monjalon  wrote:

> A new DPDK release candidate is ready for testing:
> https://git.dpdk.org/dpdk/tag/?id=v21.08-rc1
>
> There are 517 new patches in this snapshot.
> This release cycle is short and should be small.
>
> Release notes:
> https://doc.dpdk.org/guides/rel_notes/release_21_08.html
>
> Highlights of 21.08-rc1:
> - Linux auxiliary bus
> - Aarch32 cross-compilation
> - Arm CPPC power management
> - Rx multi-queue monitoring for power management
> - XZ compressed firmware read
> - Marvell CNXK drivers for ethernet, crypto and baseband PHY
>
> Please test and report issues on bugs.dpdk.org.
>
> DPDK 21.08-rc2 is expected in less than two weeks.
>
> Thank you everyone
>
>
>

-- 
Regards,
Kalesh A P


Re: [dpdk-dev] [PATCH] bus/vmbus: Fix crash when handling packets in secondary process

2021-07-21 Thread Stephen Hemminger
Looks good, minor comment. You don't have to check for NULL before calling 
rte_free().
Rte_free(NULL) is a NOP like free(NULL).

Sorry for top posting; but if you send to my Microsoft account you are stuck 
with what
Outlook can do...

-Original Message-
From: jerb  
Sent: Tuesday, July 20, 2021 7:59 AM
To: Stephen Hemminger ; Long Li 
Cc: dev@dpdk.org; jerb 
Subject: [PATCH] bus/vmbus: Fix crash when handling packets in secondary process

Have secondary processes construct their own copy of primary channel with
own mappings.

Remove vmbus_channel primary ptr from struct mapped_vmbus_resource as its
not used.

Populate virtual memory address "addr" in struct rte_mem_resource for
secondary processes as netvsc will attempt to reference it thus causing
a crash. It was initialized for primary processes but not for secondary.

Signed-off-by: jerb 
---
 drivers/bus/vmbus/private.h  |  1 -
 drivers/bus/vmbus/vmbus_channel.c|  4 +---
 drivers/bus/vmbus/vmbus_common_uio.c | 15 ++-
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/bus/vmbus/private.h b/drivers/bus/vmbus/private.h
index 528d60a42f..746212bd5f 100644
--- a/drivers/bus/vmbus/private.h
+++ b/drivers/bus/vmbus/private.h
@@ -42,7 +42,6 @@ struct mapped_vmbus_resource {
 
rte_uuid_t id;
int nb_maps;
-   struct vmbus_channel *primary;
struct vmbus_map maps[VMBUS_MAX_RESOURCE];
char path[PATH_MAX];
 };
diff --git a/drivers/bus/vmbus/vmbus_channel.c 
b/drivers/bus/vmbus/vmbus_channel.c
index f67f1c438a..119b9b367e 100644
--- a/drivers/bus/vmbus/vmbus_channel.c
+++ b/drivers/bus/vmbus/vmbus_channel.c
@@ -351,10 +351,8 @@ int rte_vmbus_chan_open(struct rte_vmbus_device *device,
 
err = vmbus_chan_create(device, device->relid, 0,
device->monitor_id, new_chan);
-   if (!err) {
+   if (!err)
device->primary = *new_chan;
-   uio_res->primary = *new_chan;
-   }
 
return err;
 }
diff --git a/drivers/bus/vmbus/vmbus_common_uio.c 
b/drivers/bus/vmbus/vmbus_common_uio.c
index 8582e32c1d..7b9a8ef434 100644
--- a/drivers/bus/vmbus/vmbus_common_uio.c
+++ b/drivers/bus/vmbus/vmbus_common_uio.c
@@ -69,8 +69,10 @@ vmbus_uio_map_secondary(struct rte_vmbus_device *dev)
 fd, offset,
 uio_res->maps[i].size, 0);
 
-   if (mapaddr == uio_res->maps[i].addr)
+   if (mapaddr == uio_res->maps[i].addr) {
+   dev->resource[i].addr = mapaddr;
continue;   /* successful map */
+   }
 
if (mapaddr == MAP_FAILED)
VMBUS_LOG(ERR,
@@ -88,9 +90,9 @@ vmbus_uio_map_secondary(struct rte_vmbus_device *dev)
/* fd is not needed in secondary process, close it */
close(fd);
 
-   dev->primary = uio_res->primary;
-   if (!dev->primary) {
-   VMBUS_LOG(ERR, "missing primary channel");
+   if (vmbus_chan_create(dev, dev->relid, 0,
+   dev->monitor_id, &dev->primary)) {
+   VMBUS_LOG(ERR, "cannot create primary channel");
return -1;
}
 
@@ -211,8 +213,11 @@ vmbus_uio_unmap_resource(struct rte_vmbus_device *dev)
return;
 
/* secondary processes - just free maps */
-   if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+   if (dev->primary != NULL)
+   rte_free(dev->primary);
return vmbus_uio_unmap(uio_res);
+   }
 
TAILQ_REMOVE(uio_res_list, uio_res, next);
 
-- 
2.17.1



[dpdk-dev] [PATCH 0/4] doc: update RTE flow rule and bonding related info

2021-07-21 Thread Martin Havlik
Based on discussion in thread [1], I propose changes in RTE Flow docs to
reflect the current state and findings from the discussion.

[1] https://mails.dpdk.org/archives/dev/2021-July/213360.html

Cc: Jan Viktorin 
Cc: Ori Kam 
Cc: Matan Azrad 
Cc: Thomas Monjalon 
Cc: Andrew Rybchenko 
Cc: "Min Hu (Connor)" 

Martin Havlik (4):
  doc: clarify RTE flow behaviour on port stop/start
  doc: specify RTE flow create behaviour
  doc: update bonding mode 8023ad info
  doc: note that testpmd on mlx5 has dedicated queues problem

 doc/guides/nics/mlx5.rst | 3 ++-
 doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst | 8 
 doc/guides/prog_guide/rte_flow.rst   | 4 
 doc/guides/testpmd_app_ug/testpmd_funcs.rst  | 3 +++
 4 files changed, 17 insertions(+), 1 deletion(-)

-- 
1.8.3.1



[dpdk-dev] [PATCH 1/4] doc: clarify RTE flow behaviour on port stop/start

2021-07-21 Thread Martin Havlik
It is now clearly stated that RTE flow rules can be
created only after the port is started.

Signed-off-by: Martin Havlik 
---
 doc/guides/nics/mlx5.rst | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index f5b727c1ee..119d537adf 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -1790,21 +1790,25 @@ Notes for rte_flow
 --
 
 Flows are not cached in the driver.
 When stopping a device port, all the flows created on this port from the
 application will be flushed automatically in the background.
 After stopping the device port, all flows on this port become invalid and
 not represented in the system.
 All references to these flows held by the application should be discarded
 directly but neither destroyed nor flushed.
 
-The application should re-create the flows as required after the port restart.
+The application should re-create the flows as required after the port is
+started again.
+
+Creating flows before port start is not permitted. All flows the application
+wants to create have to be created after the port is started.
 
 Notes for testpmd
 -
 
 Compared to librte_net_mlx4 that implements a single RSS configuration per
 port, librte_net_mlx5 supports per-protocol RSS configuration.
 
 Since ``testpmd`` defaults to IP RSS mode and there is currently no
 command-line parameter to enable additional protocols (UDP and TCP as well
 as IP), the following commands must be entered from its CLI to get the same
-- 
2.27.0



[dpdk-dev] [PATCH 2/4] doc: specify RTE flow create behaviour

2021-07-21 Thread Martin Havlik
The ability to create RTE flow rules, depending on
port status, can and does differ between PMDs.
Now the doc reflects that.

Signed-off-by: Martin Havlik 
---
 doc/guides/prog_guide/rte_flow.rst | 4 
 1 file changed, 4 insertions(+)

diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index 2b42d5ec8c..2988e3328a 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -3097,6 +3097,10 @@ actually created and a handle returned.
const struct rte_flow_action *actions[],
struct rte_flow_error *error);
 
+The ability to create a flow rule may depend on the status (started/stopped)
+of the port for which the rule is being created. This behaviour is
+PMD specific. Seek relevant PMD documentation for details.
+
 Arguments:
 
 - ``port_id``: port identifier of Ethernet device.
-- 
2.27.0



[dpdk-dev] [PATCH 3/4] doc: update bonding mode 8023ad info

2021-07-21 Thread Martin Havlik
Included info on dedicated queues and added related
note about issue on mlx5.

Signed-off-by: Martin Havlik 
---
 doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst | 8 
 1 file changed, 8 insertions(+)

diff --git a/doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst 
b/doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst
index 30c56cd375..19c65f314c 100644
--- a/doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst
+++ b/doc/guides/prog_guide/link_bonding_poll_mode_drv_lib.rst
@@ -116,10 +116,18 @@ Currently the Link Bonding PMD library supports following 
modes of operation:
 #. Calls to ``rte_eth_tx_burst`` must have a buffer size of at least 2xN,
where N is the number of slaves. This is a space required for LACP
frames. Additionally LACP packets are included in the statistics, but
they are not returned to the application.
 
+This mode also supports enabling dedicated rx and tx queues for handling
+LACP frames separately from fast application path, resulting in
+a potential performance improvement.
+
+.. note::
+Currently mlx5 doesn't work with enabled dedicated queues due to
+an issue with RTE flow rule creation prior to port start.
+
 *   **Transmit Load Balancing (Mode 5):**
 
 .. figure:: img/bond-mode-5.*
 
Transmit Load Balancing (Mode 5)
-- 
2.27.0



[dpdk-dev] [PATCH 4/4] doc: note that testpmd on mlx5 has dedicated queues problem

2021-07-21 Thread Martin Havlik
In bonding mode 4 (8023ad), dedicated queues are not working
on mlx5 NICs.

Signed-off-by: Martin Havlik 
---
 doc/guides/testpmd_app_ug/testpmd_funcs.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst 
b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
index 2c43719ad3..8a6edc2bad 100644
--- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
+++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
@@ -2603,6 +2603,9 @@ when in mode 4 (link-aggregation-802.3ad)::
 
testpmd> set bonding lacp dedicated_queues (port_id) (enable|disable)
 
+.. note::
+   Dedicated queues `do not currently work
+   `__ on mlx5 NICs.
 
 set bonding agg_mode
 
-- 
2.27.0



Re: [dpdk-dev] [PATCH 1/4] ethdev: fix max Rx packet length

2021-07-21 Thread Ferruh Yigit
On 7/13/2021 1:47 PM, Andrew Rybchenko wrote:
> On 7/9/21 8:29 PM, Ferruh Yigit wrote:
>> There is a confusion on setting max Rx packet length, this patch aims to
>> clarify it.
>>
>> 'rte_eth_dev_configure()' API accepts max Rx packet size via
>> 'uint32_t max_rx_pkt_len' filed of the config struct 'struct
>> rte_eth_conf'.
>>
>> Also 'rte_eth_dev_set_mtu()' API can be used to set the MTU, and result
>> stored into '(struct rte_eth_dev)->data->mtu'.
>>
>> These two APIs are related but they work in a disconnected way, they
>> store the set values in different variables which makes hard to figure
>> out which one to use, also two different related method is confusing for
>> the users.
>>
>> Other issues causing confusion is:
>> * maximum transmission unit (MTU) is payload of the Ethernet frame. And
>>   'max_rx_pkt_len' is the size of the Ethernet frame. Difference is
>>   Ethernet frame overhead, but this may be different from device to
>>   device based on what device supports, like VLAN and QinQ.
>> * 'max_rx_pkt_len' is only valid when application requested jumbo frame,
>>   which adds additional confusion and some APIs and PMDs already
>>   discards this documented behavior.
>> * For the jumbo frame enabled case, 'max_rx_pkt_len' is an mandatory
>>   field, this adds configuration complexity for application.
>>
>> As solution, both APIs gets MTU as parameter, and both saves the result
>> in same variable '(struct rte_eth_dev)->data->mtu'. For this
>> 'max_rx_pkt_len' updated as 'mtu', and it is always valid independent
>> from jumbo frame.
>>
>> For 'rte_eth_dev_configure()', 'dev->data->dev_conf.rxmode.mtu' is user
>> request and it should be used only within configure function and result
>> should be stored to '(struct rte_eth_dev)->data->mtu'. After that point
>> both application and PMD uses MTU from this variable.
>>
>> When application doesn't provide an MTU during 'rte_eth_dev_configure()'
>> default 'RTE_ETHER_MTU' value is used.
>>
>> As additional clarification, MTU is used to configure the device for
>> physical Rx/Tx limitation. Other related issue is size of the buffer to
>> store Rx packets, many PMDs use mbuf data buffer size as Rx buffer size.
>> And compares MTU against Rx buffer size to decide enabling scattered Rx
>> or not, if PMD supports it. If scattered Rx is not supported by device,
>> MTU bigger than Rx buffer size should fail.
>>
> 
> Do I understand correctly that target is 21.11?
> 

Yes, it is for 21.11, I should clarify it.

> Really huge work. Many thanks.
> 
> See my notes below.
> 
>> Signed-off-by: Ferruh Yigit 
> 
> [snip]
> 
>> diff --git a/app/test-eventdev/test_pipeline_common.c 
>> b/app/test-eventdev/test_pipeline_common.c
>> index 6ee530d4cdc9..5fcea74b4d43 100644
>> --- a/app/test-eventdev/test_pipeline_common.c
>> +++ b/app/test-eventdev/test_pipeline_common.c
>> @@ -197,8 +197,9 @@ pipeline_ethdev_setup(struct evt_test *test, struct 
>> evt_options *opt)
>>  return -EINVAL;
>>  }
>>  
>> -port_conf.rxmode.max_rx_pkt_len = opt->max_pkt_sz;
>> -if (opt->max_pkt_sz > RTE_ETHER_MAX_LEN)
>> +port_conf.rxmode.mtu = opt->max_pkt_sz - RTE_ETHER_HDR_LEN -
>> +RTE_ETHER_CRC_LEN;
> 
> Subtract requires overflow check. May max_pkt_size be 0 or just
> smaller that RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN?
> 

There is a "opt->max_pkt_sz < RTE_ETHER_MIN_LEN" check above this, which ensures
it won't overflow.

>> +if (port_conf.rxmode.mtu > RTE_ETHER_MTU)
>>  port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
>>  
>>  t->internal_port = 1;
>> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
>> index 8468018cf35d..8bdc042f6e8e 100644
>> --- a/app/test-pmd/cmdline.c
>> +++ b/app/test-pmd/cmdline.c
>> @@ -1892,43 +1892,36 @@ cmd_config_max_pkt_len_parsed(void *parsed_result,
>>  __rte_unused void *data)
>>  {
>>  struct cmd_config_max_pkt_len_result *res = parsed_result;
>> -uint32_t max_rx_pkt_len_backup = 0;
>> -portid_t pid;
>> +portid_t port_id;
>>  int ret;
>>  
>> +if (strcmp(res->name, "max-pkt-len")) {
>> +printf("Unknown parameter\n");
>> +return;
>> +}
>> +
>>  if (!all_ports_stopped()) {
>>  printf("Please stop all ports first\n");
>>  return;
>>  }
>>  
>> -RTE_ETH_FOREACH_DEV(pid) {
>> -struct rte_port *port = &ports[pid];
>> -
>> -if (!strcmp(res->name, "max-pkt-len")) {
>> -if (res->value < RTE_ETHER_MIN_LEN) {
>> -printf("max-pkt-len can not be less than %d\n",
>> -RTE_ETHER_MIN_LEN);
>> -return;
>> -}
>> -if (res->value == port->dev_conf.rxmode.max_rx_pkt_len)
>> -return;
>> -
>> -ret = eth_dev_info_get_print_err(pid, &port->dev_info);
>> -

[dpdk-dev] [PATCH] net/mlx5: fix indirect action modify rollback

2021-07-21 Thread Dmitry Kozlyuk
mlx5_ind_table_obj_modify() first references queues from the new list,
then applies the new list to HW. In case of apply failure the function
dereferenced queues from the old list, while it should be the new list.

Fixes: fa7ad49e96b5 ("net/mlx5: fix shared RSS action update")
Cc: andr...@nvidia.com
Cc: sta...@dpdk.org

Signed-off-by: Dmitry Kozlyuk 
Acked-by: Matan Azrad 
---
 drivers/net/mlx5/mlx5_rxq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 23685d7654..06545ebf68 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -2086,7 +2086,7 @@ mlx5_ind_table_obj_modify(struct rte_eth_dev *dev,
 error:
err = rte_errno;
for (j = 0; j < i; j++)
-   mlx5_rxq_release(dev, ind_tbl->queues[j]);
+   mlx5_rxq_release(dev, queues[j]);
rte_errno = err;
DRV_LOG(DEBUG, "Port %u cannot setup indirection table.",
dev->data->port_id);
-- 
2.25.1



Re: [dpdk-dev] [PATCH 2/4] doc: specify RTE flow create behaviour

2021-07-21 Thread Stephen Hemminger
On Wed, 21 Jul 2021 17:58:14 +0200
Martin Havlik  wrote:

> The ability to create RTE flow rules, depending on
> port status, can and does differ between PMDs.
> Now the doc reflects that.
> 
> Signed-off-by: Martin Havlik 
> ---
>  doc/guides/prog_guide/rte_flow.rst | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/doc/guides/prog_guide/rte_flow.rst 
> b/doc/guides/prog_guide/rte_flow.rst
> index 2b42d5ec8c..2988e3328a 100644
> --- a/doc/guides/prog_guide/rte_flow.rst
> +++ b/doc/guides/prog_guide/rte_flow.rst
> @@ -3097,6 +3097,10 @@ actually created and a handle returned.
> const struct rte_flow_action *actions[],
> struct rte_flow_error *error);
>  
> +The ability to create a flow rule may depend on the status (started/stopped)
> +of the port for which the rule is being created. This behaviour is
> +PMD specific. Seek relevant PMD documentation for details.

Any PMD specific behavior in DPDK is an anathema to application developers
and should be considered a design flaw!


[dpdk-dev] [PATCH v4 00/16] net/mlx5: support Sub-Function

2021-07-21 Thread Xueming Li
Sub-Function [1] is a portion of the PCI device, a SF netdev has its own
dedicated queues(txq, rxq). A SF shares PCI level resources with other
SFs and/or with its parent PCI function. Auxiliary bus is the
fundamental of SF.

This patch set introduces Sub-Function support for mlx5 PMD driver
including class net, regex, vdpa and compress.

Version history:
  RFC:
initial version
  V1:
rebased on latest upstream code
  V2:
init sh->numa_node earlier
other bug fixes
  V3:
split first patch and fix compilation issue
  V4:
convert crypto pmd using new common driver

[1] SubFunction in kernel:
https://lore.kernel.org/netdev/20201112192424.2742-1-pa...@nvidia.com/

[2] Auxiliary bus:
http://patchwork.dpdk.org/project/dpdk/patch/20210510134732.2174-1-xuemi...@nvidia.com/


Thomas Monjalon (5):
  common/mlx5: move description of PCI sysfs functions
  common/mlx5: get PCI device address from any bus
  vdpa/mlx5: define driver name as macro
  vdpa/mlx5: remove PCI specifics
  vdpa/mlx5: support SubFunction

Xueming Li (11):
  common/mlx5: rename eth device class name
  common/mlx5: add common device driver
  common/mlx5: support auxiliary bus
  net/mlx5: remove PCI dependency
  net/mlx5: migrate to bus-agnostic common driver
  net/mlx5: support SubFunction
  net/mlx5: check max Verbs port number
  regex/mlx5: migrate to common driver
  compress/mlx5: migrate to common driver
  crypto/mlx5: migrate to common driver
  common/mlx5: clean up legacy PCI bus driver

 doc/guides/nics/mlx5.rst  |  54 +-
 doc/guides/rel_notes/release_21_08.rst|   5 +
 doc/guides/vdpadevs/mlx5.rst  |  10 +
 drivers/common/mlx5/linux/meson.build |   3 +
 .../common/mlx5/linux/mlx5_common_auxiliary.c | 192 +++
 drivers/common/mlx5/linux/mlx5_common_os.c|  29 +-
 drivers/common/mlx5/linux/mlx5_common_os.h|   6 +-
 drivers/common/mlx5/linux/mlx5_common_verbs.c |  24 +-
 drivers/common/mlx5/meson.build   |   2 +-
 drivers/common/mlx5/mlx5_common.c | 390 -
 drivers/common/mlx5/mlx5_common.h | 179 +-
 drivers/common/mlx5/mlx5_common_pci.c | 540 --
 drivers/common/mlx5/mlx5_common_pci.h |  78 ---
 drivers/common/mlx5/mlx5_common_private.h |  50 ++
 drivers/common/mlx5/mlx5_common_utils.h   |   2 +
 drivers/common/mlx5/version.map   |  12 +-
 drivers/compress/mlx5/mlx5_compress.c |  71 +--
 drivers/crypto/mlx5/mlx5_crypto.c |  61 +-
 drivers/crypto/mlx5/mlx5_crypto.h |   1 -
 drivers/net/mlx5/linux/mlx5_ethdev_os.c   |  14 +-
 drivers/net/mlx5/linux/mlx5_os.c  | 203 +--
 drivers/net/mlx5/linux/mlx5_os.h  |   5 +-
 drivers/net/mlx5/mlx5.c   | 116 ++--
 drivers/net/mlx5/mlx5.h   |  14 +-
 drivers/net/mlx5/mlx5_ethdev.c|   2 +-
 drivers/net/mlx5/mlx5_mac.c   |   2 +-
 drivers/net/mlx5/mlx5_mr.c|  48 +-
 drivers/net/mlx5/mlx5_rxmode.c|   8 +-
 drivers/net/mlx5/mlx5_rxtx.h  |   9 +-
 drivers/net/mlx5/mlx5_trigger.c   |  14 +-
 drivers/net/mlx5/mlx5_txq.c   |   3 +-
 drivers/net/mlx5/windows/mlx5_os.c|  15 +-
 drivers/regex/mlx5/mlx5_regex.c   |  49 +-
 drivers/regex/mlx5/mlx5_regex.h   |   1 -
 drivers/vdpa/mlx5/mlx5_vdpa.c | 128 ++---
 drivers/vdpa/mlx5/mlx5_vdpa.h |   1 -
 36 files changed, 1380 insertions(+), 961 deletions(-)
 create mode 100644 drivers/common/mlx5/linux/mlx5_common_auxiliary.c
 delete mode 100644 drivers/common/mlx5/mlx5_common_pci.h
 create mode 100644 drivers/common/mlx5/mlx5_common_private.h

-- 
2.25.1



[dpdk-dev] [PATCH v4 05/16] common/mlx5: get PCI device address from any bus

2021-07-21 Thread Xueming Li
From: Thomas Monjalon 

A function is exported to allow retrieving the PCI address
of the parent PCI device of a Sub-Function in auxiliary bus sysfs.
The function mlx5_dev_to_pci_str() is accepting both PCI and auxiliary
devices. In case of a PCI device, it is simply using the device name.

The function mlx5_dev_to_pci_addr(), which is based on sysfs path
and do not use any device object, is renamed to mlx5_get_pci_addr()
for clarity purpose.

Signed-off-by: Thomas Monjalon 
Acked-by: Viacheslav Ovsiienko 
---
 .../common/mlx5/linux/mlx5_common_auxiliary.c | 19 +++
 drivers/common/mlx5/linux/mlx5_common_os.c|  5 ++--
 drivers/common/mlx5/mlx5_common.c | 23 +++
 drivers/common/mlx5/mlx5_common.h | 16 -
 drivers/common/mlx5/mlx5_common_private.h |  2 ++
 drivers/common/mlx5/version.map   |  3 ++-
 drivers/net/mlx5/linux/mlx5_os.c  |  6 ++---
 7 files changed, 66 insertions(+), 8 deletions(-)

diff --git a/drivers/common/mlx5/linux/mlx5_common_auxiliary.c 
b/drivers/common/mlx5/linux/mlx5_common_auxiliary.c
index 4ca27cd281..6584aeb18e 100644
--- a/drivers/common/mlx5/linux/mlx5_common_auxiliary.c
+++ b/drivers/common/mlx5/linux/mlx5_common_auxiliary.c
@@ -4,6 +4,8 @@
 
 #include 
 #include 
+#include 
+
 #include 
 #include 
 #include 
@@ -66,6 +68,23 @@ mlx5_auxiliary_get_pci_path(const struct 
rte_auxiliary_device *dev,
return 0;
 }
 
+int
+mlx5_auxiliary_get_pci_str(const struct rte_auxiliary_device *dev,
+  char *addr, size_t size)
+{
+   char sysfs_pci[PATH_MAX];
+   char *base;
+
+   if (mlx5_auxiliary_get_pci_path(dev, sysfs_pci, sizeof(sysfs_pci)) != 0)
+   return -ENODEV;
+   base = basename(sysfs_pci);
+   if (base == NULL)
+   return -errno;
+   if (rte_strscpy(addr, base, size) < 0)
+   return -rte_errno;
+   return 0;
+}
+
 static int
 mlx5_auxiliary_get_numa(const struct rte_auxiliary_device *dev)
 {
diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c 
b/drivers/common/mlx5/linux/mlx5_common_os.c
index 337e9df8cb..9e0c823c97 100644
--- a/drivers/common/mlx5/linux/mlx5_common_os.c
+++ b/drivers/common/mlx5/linux/mlx5_common_os.c
@@ -24,8 +24,7 @@ const struct mlx5_glue *mlx5_glue;
 #endif
 
 int
-mlx5_dev_to_pci_addr(const char *dev_path,
-struct rte_pci_addr *pci_addr)
+mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr)
 {
FILE *file;
char line[32];
@@ -417,7 +416,7 @@ mlx5_os_get_ibv_device(const struct rte_pci_addr *addr)
struct rte_pci_addr paddr;
 
DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name);
-   if (mlx5_dev_to_pci_addr(ibv_list[n]->ibdev_path, &paddr) != 0)
+   if (mlx5_get_pci_addr(ibv_list[n]->ibdev_path, &paddr) != 0)
continue;
if (rte_pci_addr_cmp(addr, &paddr) != 0)
continue;
diff --git a/drivers/common/mlx5/mlx5_common.c 
b/drivers/common/mlx5/mlx5_common.c
index 94f858a9bd..8fe36f7077 100644
--- a/drivers/common/mlx5/mlx5_common.c
+++ b/drivers/common/mlx5/mlx5_common.c
@@ -197,6 +197,29 @@ to_mlx5_device(const struct rte_device *rte_dev)
return NULL;
 }
 
+int
+mlx5_dev_to_pci_str(const struct rte_device *dev, char *addr, size_t size)
+{
+   struct rte_pci_addr pci_addr = { 0 };
+   int ret;
+
+   if (mlx5_dev_is_pci(dev)) {
+   /* Input might be , format PCI address to . */
+   ret = rte_pci_addr_parse(dev->name, &pci_addr);
+   if (ret != 0)
+   return -ENODEV;
+   rte_pci_device_name(&pci_addr, addr, size);
+   return 0;
+   }
+#ifdef RTE_EXEC_ENV_LINUX
+   return mlx5_auxiliary_get_pci_str(RTE_DEV_TO_AUXILIARY_CONST(dev),
+   addr, size);
+#else
+   rte_errno = ENODEV;
+   return -rte_errno;
+#endif
+}
+
 static void
 dev_release(struct mlx5_common_device *dev)
 {
diff --git a/drivers/common/mlx5/mlx5_common.h 
b/drivers/common/mlx5/mlx5_common.h
index 3cb961f495..66c83047a1 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -210,6 +210,20 @@ check_cqe(volatile struct mlx5_cqe *cqe, const uint16_t 
cqes_n,
return MLX5_CQE_STATUS_SW_OWN;
 }
 
+/*
+ * Get PCI address  string from EAL device.
+ *
+ * @param[out] addr
+ * The output address buffer string
+ * @param[in] size
+ * The output buffer size
+ * @return
+ *   - 0 on success.
+ *   - Negative value and rte_errno is set otherwise.
+ */
+__rte_internal
+int mlx5_dev_to_pci_str(const struct rte_device *dev, char *addr, size_t size);
+
 /*
  * Get PCI address from sysfs of a PCI-related device.
  *
@@ -224,7 +238,7 @@ check_cqe(volatile struct mlx5_cqe *cqe, const uint16_t 
cqes_n,
  *   - Negative value and rte_errno is set otherwise.
  */
 __rte_internal

[dpdk-dev] [PATCH v4 04/16] common/mlx5: support auxiliary bus

2021-07-21 Thread Xueming Li
This patch adds auxiliary bus driver and delegate to
registered internal mlx5 common device drivers, i.e. eth, vdpa...

Current major target is to support SubFunction on auxiliary bus.

As a limitation of current driver, numa node of device is detected from
PCI bus of device symbol link, will remove once numa node file available
on sysfs.

Signed-off-by: Xueming Li 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/common/mlx5/linux/meson.build |   3 +
 .../common/mlx5/linux/mlx5_common_auxiliary.c | 173 ++
 drivers/common/mlx5/linux/mlx5_common_verbs.c |   5 +-
 drivers/common/mlx5/meson.build   |   2 +-
 drivers/common/mlx5/mlx5_common.c |   3 +
 drivers/common/mlx5/mlx5_common.h |   6 +
 drivers/common/mlx5/mlx5_common_private.h |   6 +
 drivers/common/mlx5/version.map   |   2 +
 8 files changed, 198 insertions(+), 2 deletions(-)
 create mode 100644 drivers/common/mlx5/linux/mlx5_common_auxiliary.c

diff --git a/drivers/common/mlx5/linux/meson.build 
b/drivers/common/mlx5/linux/meson.build
index 686df26909..6e2743 100644
--- a/drivers/common/mlx5/linux/meson.build
+++ b/drivers/common/mlx5/linux/meson.build
@@ -48,10 +48,13 @@ endif
 sources += files('mlx5_nl.c')
 sources += files('mlx5_common_os.c')
 sources += files('mlx5_common_verbs.c')
+sources += files('mlx5_common_auxiliary.c')
 if not dlopen_ibverbs
 sources += files('mlx5_glue.c')
 endif
 
+deps += ['bus_auxiliary']
+
 # To maintain the compatibility with the make build system
 # mlx5_autoconf.h file is still generated.
 # input array for meson member search:
diff --git a/drivers/common/mlx5/linux/mlx5_common_auxiliary.c 
b/drivers/common/mlx5/linux/mlx5_common_auxiliary.c
new file mode 100644
index 00..4ca27cd281
--- /dev/null
+++ b/drivers/common/mlx5/linux/mlx5_common_auxiliary.c
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 Mellanox Technologies Ltd
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "eal_filesystem.h"
+
+#include "mlx5_common_utils.h"
+#include "mlx5_common_private.h"
+
+#define AUXILIARY_SYSFS_PATH "/sys/bus/auxiliary/devices"
+#define MLX5_AUXILIARY_PREFIX "mlx5_core.sf."
+
+int
+mlx5_auxiliary_get_child_name(const char *dev, const char *node,
+ char *child, size_t size)
+{
+   DIR *dir;
+   struct dirent *dent;
+   MKSTR(path, "%s/%s%s", AUXILIARY_SYSFS_PATH, dev, node);
+
+   dir = opendir(path);
+   if (dir == NULL) {
+   rte_errno = errno;
+   return -rte_errno;
+   }
+   /* Get the first file name. */
+   while ((dent = readdir(dir)) != NULL) {
+   if (dent->d_name[0] != '.')
+   break;
+   }
+   closedir(dir);
+   if (dent == NULL) {
+   rte_errno = ENOENT;
+   return -rte_errno;
+   }
+   if (rte_strscpy(child, dent->d_name, size) < 0)
+   return -rte_errno;
+   return 0;
+}
+
+static int
+mlx5_auxiliary_get_pci_path(const struct rte_auxiliary_device *dev,
+   char *sysfs_pci, size_t size)
+{
+   char sysfs_real[PATH_MAX] = { 0 };
+   MKSTR(sysfs_aux, "%s/%s", AUXILIARY_SYSFS_PATH, dev->name);
+   char *dir;
+
+   if (realpath(sysfs_aux, sysfs_real) == NULL) {
+   rte_errno = errno;
+   return -rte_errno;
+   }
+   dir = dirname(sysfs_real);
+   if (dir == NULL) {
+   rte_errno = errno;
+   return -rte_errno;
+   }
+   if (rte_strscpy(sysfs_pci, dir, size) < 0)
+   return -rte_errno;
+   return 0;
+}
+
+static int
+mlx5_auxiliary_get_numa(const struct rte_auxiliary_device *dev)
+{
+   unsigned long numa;
+   char numa_path[PATH_MAX];
+
+   if (mlx5_auxiliary_get_pci_path(dev, numa_path, sizeof(numa_path)) != 0)
+   return SOCKET_ID_ANY;
+   if (strcat(numa_path, "/numa_node") == NULL) {
+   rte_errno = ENAMETOOLONG;
+   return SOCKET_ID_ANY;
+   }
+   if (eal_parse_sysfs_value(numa_path, &numa) != 0) {
+   rte_errno = EINVAL;
+   return SOCKET_ID_ANY;
+   }
+   return (int)numa;
+}
+
+struct ibv_device *
+mlx5_get_aux_ibv_device(const struct rte_auxiliary_device *dev)
+{
+   int n;
+   char ib_name[64] = { 0 };
+   struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n);
+   struct ibv_device *ibv_match = NULL;
+
+   if (!ibv_list) {
+   rte_errno = ENOSYS;
+   return NULL;
+   }
+   if (mlx5_auxiliary_get_child_name(dev->name, "/infiniband",
+ ib_name, sizeof(ib_name)) != 0)
+   goto out;
+   while (n-- > 0) {
+   if (strcmp(ibv_list[n]->name, ib_name) != 0)
+   continue;
+   ibv_match = ibv_list[n];
+   break;
+ 

[dpdk-dev] [PATCH v4 01/16] common/mlx5: rename eth device class name

2021-07-21 Thread Xueming Li
To align with EAL class driver, rename internal class name from "net" to
"eth"

Signed-off-by: Xueming Li 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/common/mlx5/mlx5_common.h |  2 +-
 drivers/common/mlx5/mlx5_common_pci.c | 22 --
 drivers/net/mlx5/mlx5.c   |  2 +-
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_common.h 
b/drivers/common/mlx5/mlx5_common.h
index 962179a5a5..05008983ea 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -212,7 +212,7 @@ int mlx5_get_ifname_sysfs(const char *ibdev_path, char 
*ifname);
 
 enum mlx5_class {
MLX5_CLASS_INVALID,
-   MLX5_CLASS_NET = RTE_BIT64(0),
+   MLX5_CLASS_ETH = RTE_BIT64(0),
MLX5_CLASS_VDPA = RTE_BIT64(1),
MLX5_CLASS_REGEX = RTE_BIT64(2),
MLX5_CLASS_COMPRESS = RTE_BIT64(3),
diff --git a/drivers/common/mlx5/mlx5_common_pci.c 
b/drivers/common/mlx5/mlx5_common_pci.c
index 5547e62d6b..591054468d 100644
--- a/drivers/common/mlx5/mlx5_common_pci.c
+++ b/drivers/common/mlx5/mlx5_common_pci.c
@@ -28,34 +28,36 @@ static const struct {
unsigned int driver_class;
 } mlx5_classes[] = {
{ .name = "vdpa", .driver_class = MLX5_CLASS_VDPA },
-   { .name = "net", .driver_class = MLX5_CLASS_NET },
+   { .name = "eth", .driver_class = MLX5_CLASS_ETH },
+   /* Keep name "net" for backward compatibility. */
+   { .name = "net", .driver_class = MLX5_CLASS_ETH },
{ .name = "regex", .driver_class = MLX5_CLASS_REGEX },
{ .name = "compress", .driver_class = MLX5_CLASS_COMPRESS },
{ .name = "crypto", .driver_class = MLX5_CLASS_CRYPTO },
 };
 
 static const unsigned int mlx5_class_combinations[] = {
-   MLX5_CLASS_NET,
+   MLX5_CLASS_ETH,
MLX5_CLASS_VDPA,
MLX5_CLASS_REGEX,
MLX5_CLASS_COMPRESS,
MLX5_CLASS_CRYPTO,
-   MLX5_CLASS_NET | MLX5_CLASS_REGEX,
+   MLX5_CLASS_ETH | MLX5_CLASS_REGEX,
MLX5_CLASS_VDPA | MLX5_CLASS_REGEX,
-   MLX5_CLASS_NET | MLX5_CLASS_COMPRESS,
+   MLX5_CLASS_ETH | MLX5_CLASS_COMPRESS,
MLX5_CLASS_VDPA | MLX5_CLASS_COMPRESS,
MLX5_CLASS_REGEX | MLX5_CLASS_COMPRESS,
-   MLX5_CLASS_NET | MLX5_CLASS_CRYPTO,
+   MLX5_CLASS_ETH | MLX5_CLASS_CRYPTO,
+   MLX5_CLASS_ETH | MLX5_CLASS_REGEX | MLX5_CLASS_COMPRESS,
MLX5_CLASS_VDPA | MLX5_CLASS_CRYPTO,
MLX5_CLASS_REGEX | MLX5_CLASS_CRYPTO,
MLX5_CLASS_COMPRESS | MLX5_CLASS_CRYPTO,
-   MLX5_CLASS_NET | MLX5_CLASS_REGEX | MLX5_CLASS_COMPRESS,
MLX5_CLASS_VDPA | MLX5_CLASS_REGEX | MLX5_CLASS_COMPRESS,
-   MLX5_CLASS_NET | MLX5_CLASS_REGEX | MLX5_CLASS_CRYPTO,
+   MLX5_CLASS_ETH | MLX5_CLASS_REGEX | MLX5_CLASS_CRYPTO,
MLX5_CLASS_VDPA | MLX5_CLASS_REGEX | MLX5_CLASS_CRYPTO,
-   MLX5_CLASS_NET | MLX5_CLASS_COMPRESS | MLX5_CLASS_CRYPTO,
+   MLX5_CLASS_ETH | MLX5_CLASS_COMPRESS | MLX5_CLASS_CRYPTO,
MLX5_CLASS_VDPA | MLX5_CLASS_COMPRESS | MLX5_CLASS_CRYPTO,
-   MLX5_CLASS_NET | MLX5_CLASS_REGEX | MLX5_CLASS_COMPRESS |
+   MLX5_CLASS_ETH | MLX5_CLASS_REGEX | MLX5_CLASS_COMPRESS |
MLX5_CLASS_CRYPTO,
MLX5_CLASS_VDPA | MLX5_CLASS_REGEX | MLX5_CLASS_COMPRESS |
MLX5_CLASS_CRYPTO,
@@ -317,7 +319,7 @@ mlx5_common_pci_probe(struct rte_pci_driver *pci_drv 
__rte_unused,
}
} else {
/* Default to net class. */
-   user_classes = MLX5_CLASS_NET;
+   user_classes = MLX5_CLASS_ETH;
}
dev = pci_to_mlx5_device(pci_dev);
if (!dev) {
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 021a34dd4d..d9c90d5ef9 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -2446,7 +2446,7 @@ static const struct rte_pci_id mlx5_pci_id_map[] = {
 };
 
 static struct mlx5_pci_driver mlx5_driver = {
-   .driver_class = MLX5_CLASS_NET,
+   .driver_class = MLX5_CLASS_ETH,
.pci_driver = {
.driver = {
.name = MLX5_PCI_DRIVER_NAME,
-- 
2.25.1



[dpdk-dev] [PATCH v4 02/16] common/mlx5: add common device driver

2021-07-21 Thread Xueming Li
To support auxiliary bus, introduces common device driver and callbacks,
suppose to replace mlx5 common PCI bus driver.

Mlx5 class drivers, i.e. eth, vDPA, regex and compress normally consumes
single Verbs device context to probe a device. The Verbs device comes
from PCI address if the device is PCI bus device, from Auxiliary sysfs
if the device is auxiliary bus device. Currently only PCI bus is
supported.

Common device driver is a middle layer between mlx5 class drivers and
bus, resolve and abstract bus info to Verbs device for class drivers.
Both PCI bus driver and Auxiliary bus driver can utilize the common
driver layer to cast bus operations to mlx5 class drivers.

Legacy mlx5 common PCI bus driver still being used by mlx5 eth, vDPA,
regex and compress PMD, will remove once all PMD drivers migrate to new
common driver.

Signed-off-by: Xueming Li 
Acked-by: Viacheslav Ovsiienko 
---
 doc/guides/rel_notes/release_21_08.rst|   5 +
 drivers/common/mlx5/linux/mlx5_common_os.c|   2 +-
 drivers/common/mlx5/linux/mlx5_common_os.h|   7 +-
 drivers/common/mlx5/linux/mlx5_common_verbs.c |  21 +-
 drivers/common/mlx5/mlx5_common.c | 363 +-
 drivers/common/mlx5/mlx5_common.h | 128 ++
 drivers/common/mlx5/mlx5_common_pci.c | 133 ++-
 drivers/common/mlx5/mlx5_common_private.h |  41 ++
 drivers/common/mlx5/mlx5_common_utils.h   |   2 +
 drivers/common/mlx5/version.map   |   4 +
 10 files changed, 688 insertions(+), 18 deletions(-)
 create mode 100644 drivers/common/mlx5/mlx5_common_private.h

diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index 409f9980e9..27d591137e 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -148,6 +148,11 @@ New Features
   The experimental PMD power management API now supports managing
   multiple Ethernet Rx queues per lcore.
 
+* **Added Sub-Function support for mlx5 PMDs**
+
+  Added Sub-Function support based on auxiliary bus for mlx5 PMDs:
+  net, vDPA, compress and regex.
+
 
 Removed Items
 -
diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c 
b/drivers/common/mlx5/linux/mlx5_common_os.c
index ea0b71e425..78a9723075 100644
--- a/drivers/common/mlx5/linux/mlx5_common_os.c
+++ b/drivers/common/mlx5/linux/mlx5_common_os.c
@@ -425,7 +425,7 @@ mlx5_glue_constructor(void)
 }
 
 struct ibv_device *
-mlx5_os_get_ibv_device(struct rte_pci_addr *addr)
+mlx5_os_get_ibv_device(const struct rte_pci_addr *addr)
 {
int n;
struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n);
diff --git a/drivers/common/mlx5/linux/mlx5_common_os.h 
b/drivers/common/mlx5/linux/mlx5_common_os.h
index 72d6bf828b..86d0cb09b0 100644
--- a/drivers/common/mlx5/linux/mlx5_common_os.h
+++ b/drivers/common/mlx5/linux/mlx5_common_os.h
@@ -291,6 +291,11 @@ mlx5_os_free(void *addr)
 
 __rte_internal
 struct ibv_device *
-mlx5_os_get_ibv_device(struct rte_pci_addr *addr);
+mlx5_os_get_ibv_device(const struct rte_pci_addr *addr);
+
+__rte_internal
+struct ibv_device *
+mlx5_os_get_ibv_dev(const struct rte_device *dev);
+
 
 #endif /* RTE_PMD_MLX5_COMMON_OS_H_ */
diff --git a/drivers/common/mlx5/linux/mlx5_common_verbs.c 
b/drivers/common/mlx5/linux/mlx5_common_verbs.c
index aa560f05f2..6a6ab7a7a2 100644
--- a/drivers/common/mlx5/linux/mlx5_common_verbs.c
+++ b/drivers/common/mlx5/linux/mlx5_common_verbs.c
@@ -10,11 +10,31 @@
 #include 
 #include 
 
+#include 
+#include 
+
+#include "mlx5_common_utils.h"
+#include "mlx5_common_log.h"
+#include "mlx5_common_private.h"
 #include "mlx5_autoconf.h"
 #include 
 #include 
 #include 
 
+struct ibv_device *
+mlx5_os_get_ibv_dev(const struct rte_device *dev)
+{
+   struct ibv_device *ibv = NULL;
+
+   if (mlx5_dev_is_pci(dev))
+   ibv = mlx5_os_get_ibv_device(&RTE_DEV_TO_PCI_CONST(dev)->addr);
+   if (ibv == NULL) {
+   rte_errno = ENODEV;
+   DRV_LOG(ERR, "Verbs device not found: %s", dev->name);
+   }
+   return ibv;
+}
+
 /**
  * Register mr. Given protection domain pointer, pointer to addr and length
  * register the memory region.
@@ -68,4 +88,3 @@ mlx5_common_verbs_dereg_mr(struct mlx5_pmd_mr *pmd_mr)
memset(pmd_mr, 0, sizeof(*pmd_mr));
}
 }
-
diff --git a/drivers/common/mlx5/mlx5_common.c 
b/drivers/common/mlx5/mlx5_common.c
index 25e9f09108..97d3e3e60e 100644
--- a/drivers/common/mlx5/mlx5_common.c
+++ b/drivers/common/mlx5/mlx5_common.c
@@ -8,11 +8,14 @@
 
 #include 
 #include 
+#include 
+#include 
 
 #include "mlx5_common.h"
 #include "mlx5_common_os.h"
 #include "mlx5_common_log.h"
 #include "mlx5_common_pci.h"
+#include "mlx5_common_private.h"
 
 uint8_t haswell_broadwell_cpu;
 
@@ -41,6 +44,362 @@ static inline void mlx5_cpu_id(unsigned int level,
 
 RTE_LOG_REGISTER_DEFAULT(mlx5_common_logtype, NOTICE)
 
+/* Head of list of drivers. */
+static TAILQ_HEAD(mlx5_drivers, m

[dpdk-dev] [PATCH v4 08/16] net/mlx5: support SubFunction

2021-07-21 Thread Xueming Li
This patch introduces SF support. Similar to VF, SF on auxiliary bus is
a portion of hardware PF, no representor or bonding parameters for SF.

Devargs to support SF:
-a auxiliary:mlx5_core.sf.8,dv_flow_en=1

New global syntax to support SF:
-a bus=auxiliary,name=mlx5_core.sf.8/class=eth/driver=mlx5,dv_flow_en=1

Signed-off-by: Xueming Li 
Acked-by: Viacheslav Ovsiienko 
---
 doc/guides/nics/mlx5.rst|  54 +
 drivers/net/mlx5/linux/mlx5_ethdev_os.c |  12 +-
 drivers/net/mlx5/linux/mlx5_os.c| 145 +---
 drivers/net/mlx5/linux/mlx5_os.h|   2 +
 drivers/net/mlx5/mlx5.c |  23 +++-
 drivers/net/mlx5/mlx5.h |   2 +
 drivers/net/mlx5/mlx5_mac.c |   2 +-
 drivers/net/mlx5/mlx5_rxmode.c  |   8 +-
 drivers/net/mlx5/mlx5_trigger.c |   2 +-
 drivers/net/mlx5/windows/mlx5_os.c  |  12 +-
 10 files changed, 188 insertions(+), 74 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index f5b727c1ee..a2ecddef13 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -112,6 +112,11 @@ Features
 - Flow integrity offload API.
 - Connection tracking.
 - Sub-Function representors.
+- Sub-Function.
+
+Limitations
+---
+
 
 Limitations
 ---
@@ -1478,40 +1483,51 @@ the DPDK application.
 
 echo switchdev > /sys/class/net//compat/devlink/mode
 
-Sub-Function representor
-
+Sub-Function support
+
 
 Sub-Function is a portion of the PCI device, a SF netdev has its own
-dedicated queues(txq, rxq). A SF netdev supports E-Switch representation
-offload similar to existing PF and VF representors. A SF shares PCI
-level resources with other SFs and/or with its parent PCI function.
+dedicated queues(txq, rxq). A SF shares PCI level resources with other SFs
+and/or with its parent PCI function.
+
+0. Requirement::
+
+OFED version >= 5.4-0.3.3.0
 
 1. Configure SF feature::
 
-mlxconfig -d  set PF_BAR2_SIZE=<0/1/2/3> PF_BAR2_ENABLE=1
+# Run mlxconfig on both PFs on host and ECPFs on BlueField.
+mlxconfig -d  set PER_PF_NUM_SF=1 PF_TOTAL_SF=252 
PF_SF_BAR_SIZE=12
 
-Value of PF_BAR2_SIZE:
+2. Enable switchdev mode::
 
-0: 8 SFs
-1: 16 SFs
-2: 32 SFs
-3: 64 SFs
+mlxdevm dev eswitch set pci/ mode switchdev
 
-2. Reset the FW::
+3. Add SF port::
 
-mlxfwreset -d  reset
+mlxdevm port add pci/ flavour pcisf pfnum 0 sfnum 
 
-3. Enable switchdev mode::
+Get SFID from output: pci//
 
-echo switchdev > /sys/class/net//compat/devlink/mode
+4. Modify MAC address::
+
+mlxdevm port function set pci// hw_addr 
+
+5. Activate SF port::
+
+mlxdevm port function set pci// state active
+
+6. Devargs to probe SF device::
 
-4. Create SF::
+auxiliary:mlx5_core.sf.,dv_flow_en=1
 
-mlnx-sf -d  -a create
+Sub-Function representor support
+
 
-5. Probe SF representor::
+A SF netdev supports E-Switch representation offload similar to existing PF
+and VF representors. Use  to probe SF representor.
 
-testpmd> port attach ,representor=sf0,dv_flow_en=1
+testpmd> port attach ,representor=sf,dv_flow_en=1
 
 Performance tuning
 --
diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c 
b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
index b05b9fc950..f34133e2c6 100644
--- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c
+++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
@@ -128,6 +128,17 @@ struct ethtool_link_settings {
 #define ETHTOOL_LINK_MODE_20baseCR4_Full_BIT 2 /* 66 - 64 */
 #endif
 
+/* Get interface index from SubFunction device name. */
+int
+mlx5_auxiliary_get_ifindex(const char *sf_name)
+{
+   char if_name[IF_NAMESIZE] = { 0 };
+
+   if (mlx5_auxiliary_get_child_name(sf_name, "/net",
+ if_name, sizeof(if_name)) != 0)
+   return -rte_errno;
+   return if_nametoindex(if_name);
+}
 
 /**
  * Get interface name from private structure.
@@ -1619,4 +1630,3 @@ mlx5_get_mac(struct rte_eth_dev *dev, uint8_t 
(*mac)[RTE_ETHER_ADDR_LEN])
memcpy(mac, request.ifr_hwaddr.sa_data, RTE_ETHER_ADDR_LEN);
return 0;
 }
-
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 1b7ee419d1..f041f9054f 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2061,6 +2062,27 @@ mlx5_device_bond_pci_match(const struct ibv_device 
*ibv_dev,
return pf;
 }
 
+static void
+mlx5_os_config_default(struct mlx5_dev_config *config)
+{
+   memset(config, 0, sizeof(*config));
+   config->mps = MLX5_ARG_UNSET;
+   config->dbnc = MLX5_ARG_UNSET;
+   config->rx_vec_en = 1;
+   config->txq_inline_max = MLX5

[dpdk-dev] [PATCH v4 03/16] common/mlx5: move description of PCI sysfs functions

2021-07-21 Thread Xueming Li
From: Thomas Monjalon 

The Linux-specific functions mlx5_get_pci_addr() and
mlx5_get_ifname_sysfs() are better described in the .h file.

The requirement for using mlx5_get_pci_addr() is explicit:
the node /device must exist in the provided sysfs path.

Signed-off-by: Thomas Monjalon 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/common/mlx5/linux/mlx5_common_os.c | 22 --
 drivers/common/mlx5/mlx5_common.h  | 26 ++
 2 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c 
b/drivers/common/mlx5/linux/mlx5_common_os.c
index 78a9723075..337e9df8cb 100644
--- a/drivers/common/mlx5/linux/mlx5_common_os.c
+++ b/drivers/common/mlx5/linux/mlx5_common_os.c
@@ -23,17 +23,6 @@
 const struct mlx5_glue *mlx5_glue;
 #endif
 
-/**
- * Get PCI information by sysfs device path.
- *
- * @param dev_path
- *   Pointer to device sysfs folder name.
- * @param[out] pci_addr
- *   PCI bus address output buffer.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
 int
 mlx5_dev_to_pci_addr(const char *dev_path,
 struct rte_pci_addr *pci_addr)
@@ -159,17 +148,6 @@ mlx5_translate_port_name(const char *port_name_in,
port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
 }
 
-/**
- * Get kernel interface name from IB device path.
- *
- * @param[in] ibdev_path
- *   Pointer to IB device path.
- * @param[out] ifname
- *   Interface name output buffer.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
 int
 mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname)
 {
diff --git a/drivers/common/mlx5/mlx5_common.h 
b/drivers/common/mlx5/mlx5_common.h
index 6ba1e70223..1b811f2509 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -204,8 +204,34 @@ check_cqe(volatile struct mlx5_cqe *cqe, const uint16_t 
cqes_n,
return MLX5_CQE_STATUS_SW_OWN;
 }
 
+/*
+ * Get PCI address from sysfs of a PCI-related device.
+ *
+ * @param[in] dev_path
+ *   The sysfs path should not point to the direct plain PCI device.
+ *   Instead, the node "/device/" is used to access the real device.
+ * @param[out] pci_addr
+ *   Parsed PCI address.
+ *
+ * @return
+ *   - 0 on success.
+ *   - Negative value and rte_errno is set otherwise.
+ */
 __rte_internal
 int mlx5_dev_to_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr);
+
+/*
+ * Get kernel network interface name from sysfs IB device path.
+ *
+ * @param[in] ibdev_path
+ *   The sysfs path to IB device.
+ * @param[out] ifname
+ *   Interface name output of size IF_NAMESIZE.
+ *
+ * @return
+ *   - 0 on success.
+ *   - Negative value and rte_errno is set otherwise.
+ */
 __rte_internal
 int mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname);
 
-- 
2.25.1



[dpdk-dev] [PATCH v4 09/16] net/mlx5: check max Verbs port number

2021-07-21 Thread Xueming Li
Verbs API doesn't support Device port number larger than 255 by design.
Adds check and fails probing with proper error log.

Signed-off-by: Xueming Li 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/net/mlx5/linux/mlx5_os.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index f041f9054f..db44169b84 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1294,6 +1294,12 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
config->dv_flow_en = 0;
}
 #endif
+   if (spawn->max_port > UINT8_MAX) {
+   /* Verbs can't support ports larger than 255 by design. */
+   DRV_LOG(ERR, "can't support IB ports > UINT8_MAX");
+   err = EINVAL;
+   goto error;
+   }
config->ind_table_max_size =
sh->device_attr.max_rwq_indirection_table_size;
/*
-- 
2.25.1



[dpdk-dev] [PATCH v4 07/16] net/mlx5: migrate to bus-agnostic common driver

2021-07-21 Thread Xueming Li
To support SubFunction based on auxiliary bus, common driver supports
new bus-agnostic driver.

This patch migrates net driver to new common driver.

Signed-off-by: Xueming Li 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/net/mlx5/linux/mlx5_os.c | 46 --
 drivers/net/mlx5/linux/mlx5_os.h |  3 --
 drivers/net/mlx5/mlx5.c  | 48 +++-
 drivers/net/mlx5/mlx5.h  |  3 +-
 drivers/net/mlx5/mlx5_mr.c   | 38 -
 drivers/net/mlx5/mlx5_rxtx.h |  9 +++---
 6 files changed, 78 insertions(+), 69 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 4ab30fd244..1b7ee419d1 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -2115,14 +2115,6 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev,
struct mlx5_bond_info bond_info;
int ret = -1;
 
-   if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-   mlx5_pmd_socket_init();
-   ret = mlx5_init_once();
-   if (ret) {
-   DRV_LOG(ERR, "unable to init PMD global data: %s",
-   strerror(rte_errno));
-   return -rte_errno;
-   }
errno = 0;
ibv_list = mlx5_glue->get_device_list(&ret);
if (!ibv_list) {
@@ -2569,21 +2561,18 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev,
 }
 
 /**
- * DPDK callback to register a PCI device.
+ * Callback to register a PCI device.
  *
  * This function spawns Ethernet devices out of a given PCI device.
  *
- * @param[in] pci_drv
- *   PCI driver structure (mlx5_driver).
  * @param[in] pci_dev
  *   PCI device information.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-int
-mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
- struct rte_pci_device *pci_dev)
+static int
+mlx5_os_pci_probe(struct rte_pci_device *pci_dev)
 {
struct rte_eth_devargs eth_da = { .type = RTE_ETH_REPRESENTOR_NONE };
int ret = 0;
@@ -2622,6 +2611,35 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv 
__rte_unused,
return ret;
 }
 
+/**
+ * Net class driver callback to probe a device.
+ *
+ * This function probe PCI bus device(s).
+ *
+ * @param[in] dev
+ *   Pointer to the generic device.
+ *
+ * @return
+ *   0 on success, the function cannot fail.
+ */
+int
+mlx5_os_net_probe(struct rte_device *dev)
+{
+   int ret;
+
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+   mlx5_pmd_socket_init();
+   ret = mlx5_init_once();
+   if (ret) {
+   DRV_LOG(ERR, "unable to init PMD global data: %s",
+   strerror(rte_errno));
+   return -rte_errno;
+   }
+   if (mlx5_dev_is_pci(dev))
+   return mlx5_os_pci_probe(RTE_DEV_TO_PCI(dev));
+   return 0;
+}
+
 static int
 mlx5_config_doorbell_mapping_env(const struct mlx5_dev_config *config)
 {
diff --git a/drivers/net/mlx5/linux/mlx5_os.h b/drivers/net/mlx5/linux/mlx5_os.h
index 4ae7d0ef47..af7cbeb418 100644
--- a/drivers/net/mlx5/linux/mlx5_os.h
+++ b/drivers/net/mlx5/linux/mlx5_os.h
@@ -19,7 +19,4 @@ enum {
 
 #define MLX5_NAMESIZE IF_NAMESIZE
 
-#define PCI_DRV_FLAGS  (RTE_PCI_DRV_INTR_LSC | \
-   RTE_PCI_DRV_INTR_RMV | \
-   RTE_PCI_DRV_PROBE_AGAIN)
 #endif /* RTE_PMD_MLX5_OS_H_ */
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 8e64bf955b..96e8d189ba 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -12,7 +12,6 @@
 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -28,7 +27,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 
 #include "mlx5_defs.h"
@@ -43,6 +41,8 @@
 #include "mlx5_flow_os.h"
 #include "rte_pmd_mlx5.h"
 
+#define MLX5_ETH_DRIVER_NAME mlx5_eth
+
 /* Device parameter to enable RX completion queue compression. */
 #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en"
 
@@ -2345,23 +2345,23 @@ mlx5_eth_find_next(uint16_t port_id, struct rte_device 
*odev)
 }
 
 /**
- * DPDK callback to remove a PCI device.
+ * Callback to remove a device.
  *
- * This function removes all Ethernet devices belong to a given PCI device.
+ * This function removes all Ethernet devices belong to a given device.
  *
- * @param[in] pci_dev
- *   Pointer to the PCI device.
+ * @param[in] dev
+ *   Pointer to the generic device.
  *
  * @return
  *   0 on success, the function cannot fail.
  */
 static int
-mlx5_pci_remove(struct rte_pci_device *pci_dev)
+mlx5_net_remove(struct rte_device *dev)
 {
uint16_t port_id;
int ret = 0;
 
-   RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device) {
+   RTE_ETH_FOREACH_DEV_OF(port_id, dev) {
/*
 * mlx5_dev_close() is not registered to secondary process,
 * call the close function explicitly for secondary process.
@@ -2452,19 +2452,17 @@ static co

[dpdk-dev] [PATCH v4 06/16] net/mlx5: remove PCI dependency

2021-07-21 Thread Xueming Li
To support more bus types, remove PCI dependency where possible.

Signed-off-by: Xueming Li 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/net/mlx5/linux/mlx5_ethdev_os.c |  2 +-
 drivers/net/mlx5/linux/mlx5_os.c|  4 +--
 drivers/net/mlx5/mlx5.c | 45 ++---
 drivers/net/mlx5/mlx5.h |  9 ++---
 drivers/net/mlx5/mlx5_ethdev.c  |  2 +-
 drivers/net/mlx5/mlx5_mr.c  | 14 
 drivers/net/mlx5/mlx5_trigger.c | 12 +++
 drivers/net/mlx5/mlx5_txq.c |  3 +-
 drivers/net/mlx5/windows/mlx5_os.c  |  3 +-
 9 files changed, 51 insertions(+), 43 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c 
b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
index ddc1371aa9..b05b9fc950 100644
--- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c
+++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
@@ -346,7 +346,7 @@ mlx5_find_master_dev(struct rte_eth_dev *dev)
priv = dev->data->dev_private;
domain_id = priv->domain_id;
MLX5_ASSERT(priv->representor);
-   MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) {
+   MLX5_ETH_FOREACH_DEV(port_id, dev->device) {
struct mlx5_priv *opriv =
rte_eth_devices[port_id].data->dev_private;
if (opriv &&
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 05b1761f6b..4ab30fd244 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1252,7 +1252,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 * Look for sibling devices in order to reuse their switch domain
 * if any, otherwise allocate one.
 */
-   MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) {
+   MLX5_ETH_FOREACH_DEV(port_id, NULL) {
const struct mlx5_priv *opriv =
rte_eth_devices[port_id].data->dev_private;
 
@@ -2511,6 +2511,7 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev,
dev_config.decap_en = 1;
dev_config.log_hp_size = MLX5_ARG_UNSET;
dev_config.allow_duplicate_pattern = 1;
+   list[i].numa_node = pci_dev->device.numa_node;
list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device,
 &list[i],
 &dev_config,
@@ -2708,7 +2709,6 @@ mlx5_os_open_device(const struct mlx5_dev_spawn_data 
*spawn,
int dbmap_env;
int err = 0;
 
-   sh->numa_node = spawn->pci_dev->device.numa_node;
pthread_mutex_init(&sh->txpp.mutex, NULL);
/*
 * Configure environment variable "MLX5_BF_SHUT_UP"
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index d9c90d5ef9..8e64bf955b 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1120,6 +1120,7 @@ mlx5_alloc_shared_dev_ctx(const struct 
mlx5_dev_spawn_data *spawn,
rte_errno  = ENOMEM;
goto exit;
}
+   sh->numa_node = spawn->numa_node;
if (spawn->bond_info)
sh->bond = *spawn->bond_info;
err = mlx5_os_open_device(spawn, config, sh);
@@ -1197,7 +1198,7 @@ mlx5_alloc_shared_dev_ctx(const struct 
mlx5_dev_spawn_data *spawn,
 */
err = mlx5_mr_btree_init(&sh->share_cache.cache,
 MLX5_MR_BTREE_CACHE_N * 2,
-spawn->pci_dev->device.numa_node);
+sh->numa_node);
if (err) {
err = rte_errno;
goto error;
@@ -1635,7 +1636,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
unsigned int c = 0;
uint16_t port_id;
 
-   MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) {
+   MLX5_ETH_FOREACH_DEV(port_id, dev->device) {
struct mlx5_priv *opriv =
rte_eth_devices[port_id].data->dev_private;
 
@@ -2077,18 +2078,20 @@ mlx5_set_min_inline(struct mlx5_dev_spawn_data *spawn,
 {
if (config->txq_inline_min != MLX5_ARG_UNSET) {
/* Application defines size of inlined data explicitly. */
-   switch (spawn->pci_dev->id.device_id) {
-   case PCI_DEVICE_ID_MELLANOX_CONNECTX4:
-   case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF:
-   if (config->txq_inline_min <
-  (int)MLX5_INLINE_HSIZE_L2) {
-   DRV_LOG(DEBUG,
-   "txq_inline_mix aligned to minimal"
-   " ConnectX-4 required value %d",
-   (int)MLX5_INLINE_HSIZE_L2);
-   config->txq_inline_min = MLX5_INLINE_HSIZE_L2;
+   if (spawn->pci_dev != NULL) {
+   switch (spawn->pci_dev->id.device_id) {
+   case PCI_D

[dpdk-dev] [PATCH v4 11/16] vdpa/mlx5: define driver name as macro

2021-07-21 Thread Xueming Li
From: Thomas Monjalon 

Uses macro for pmd driver name.

Signed-off-by: Thomas Monjalon 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/vdpa/mlx5/mlx5_vdpa.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
index 8b5bfd8c3d..5ab7c525c2 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@@ -24,6 +24,7 @@
 #include "mlx5_vdpa_utils.h"
 #include "mlx5_vdpa.h"
 
+#define MLX5_VDPA_DRIVER_NAME vdpa_mlx5
 
 #define MLX5_VDPA_DEFAULT_FEATURES ((1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | 
\
(1ULL << VIRTIO_F_ANY_LAYOUT) | \
@@ -834,7 +835,7 @@ static struct mlx5_pci_driver mlx5_vdpa_driver = {
.driver_class = MLX5_CLASS_VDPA,
.pci_driver = {
.driver = {
-   .name = "mlx5_vdpa",
+   .name = RTE_STR(MLX5_VDPA_DRIVER_NAME),
},
.id_table = mlx5_vdpa_pci_id_map,
.probe = mlx5_vdpa_pci_probe,
@@ -855,6 +856,6 @@ RTE_INIT(rte_mlx5_vdpa_init)
mlx5_pci_driver_register(&mlx5_vdpa_driver);
 }
 
-RTE_PMD_EXPORT_NAME(net_mlx5_vdpa, __COUNTER__);
-RTE_PMD_REGISTER_PCI_TABLE(net_mlx5_vdpa, mlx5_vdpa_pci_id_map);
-RTE_PMD_REGISTER_KMOD_DEP(net_mlx5_vdpa, "* ib_uverbs & mlx5_core & mlx5_ib");
+RTE_PMD_EXPORT_NAME(MLX5_VDPA_DRIVER_NAME, __COUNTER__);
+RTE_PMD_REGISTER_PCI_TABLE(MLX5_VDPA_DRIVER_NAME, mlx5_vdpa_pci_id_map);
+RTE_PMD_REGISTER_KMOD_DEP(MLX5_VDPA_DRIVER_NAME, "* ib_uverbs & mlx5_core & 
mlx5_ib");
-- 
2.25.1



[dpdk-dev] [PATCH v4 14/16] compress/mlx5: migrate to common driver

2021-07-21 Thread Xueming Li
To support auxiliary bus, upgrades driver to use mlx5 common driver
structure.

Signed-off-by: Xueming Li 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/compress/mlx5/mlx5_compress.c | 71 ++-
 1 file changed, 15 insertions(+), 56 deletions(-)

diff --git a/drivers/compress/mlx5/mlx5_compress.c 
b/drivers/compress/mlx5/mlx5_compress.c
index f5f51c0ebe..9775c81789 100644
--- a/drivers/compress/mlx5/mlx5_compress.c
+++ b/drivers/compress/mlx5/mlx5_compress.c
@@ -5,7 +5,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
@@ -13,7 +13,6 @@
 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -37,7 +36,6 @@ struct mlx5_compress_xform {
 struct mlx5_compress_priv {
TAILQ_ENTRY(mlx5_compress_priv) next;
struct ibv_context *ctx; /* Device context. */
-   struct rte_pci_device *pci_dev;
struct rte_compressdev *cdev;
void *uar;
uint32_t pdn; /* Protection Domain number. */
@@ -780,23 +778,8 @@ mlx5_compress_mr_mem_event_cb(enum rte_mem_event 
event_type, const void *addr,
}
 }
 
-/**
- * DPDK callback to register a PCI device.
- *
- * This function spawns compress device out of a given PCI device.
- *
- * @param[in] pci_drv
- *   PCI driver structure (mlx5_compress_driver).
- * @param[in] pci_dev
- *   PCI device information.
- *
- * @return
- *   0 on success, 1 to skip this driver, a negative errno value otherwise
- *   and rte_errno is set.
- */
 static int
-mlx5_compress_pci_probe(struct rte_pci_driver *pci_drv,
-   struct rte_pci_device *pci_dev)
+mlx5_compress_dev_probe(struct rte_device *dev)
 {
struct ibv_device *ibv;
struct rte_compressdev *cdev;
@@ -805,24 +788,17 @@ mlx5_compress_pci_probe(struct rte_pci_driver *pci_drv,
struct mlx5_hca_attr att = { 0 };
struct rte_compressdev_pmd_init_params init_params = {
.name = "",
-   .socket_id = pci_dev->device.numa_node,
+   .socket_id = dev->numa_node,
};
 
-   RTE_SET_USED(pci_drv);
if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
DRV_LOG(ERR, "Non-primary process type is not supported.");
rte_errno = ENOTSUP;
return -rte_errno;
}
-   ibv = mlx5_os_get_ibv_device(&pci_dev->addr);
-   if (ibv == NULL) {
-   DRV_LOG(ERR, "No matching IB device for PCI slot "
-   PCI_PRI_FMT ".", pci_dev->addr.domain,
-   pci_dev->addr.bus, pci_dev->addr.devid,
-   pci_dev->addr.function);
+   ibv = mlx5_os_get_ibv_dev(dev);
+   if (ibv == NULL)
return -rte_errno;
-   }
-   DRV_LOG(INFO, "PCI information matches for device \"%s\".", ibv->name);
ctx = mlx5_glue->dv_open_device(ibv);
if (ctx == NULL) {
DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name);
@@ -838,7 +814,7 @@ mlx5_compress_pci_probe(struct rte_pci_driver *pci_drv,
rte_errno = ENOTSUP;
return -ENOTSUP;
}
-   cdev = rte_compressdev_pmd_create(ibv->name, &pci_dev->device,
+   cdev = rte_compressdev_pmd_create(ibv->name, dev,
  sizeof(*priv), &init_params);
if (cdev == NULL) {
DRV_LOG(ERR, "Failed to create device \"%s\".", ibv->name);
@@ -853,7 +829,6 @@ mlx5_compress_pci_probe(struct rte_pci_driver *pci_drv,
cdev->feature_flags = RTE_COMPDEV_FF_HW_ACCELERATED;
priv = cdev->data->dev_private;
priv->ctx = ctx;
-   priv->pci_dev = pci_dev;
priv->cdev = cdev;
priv->min_block_size = att.compress_min_block_size;
priv->sq_ts_format = att.sq_ts_format;
@@ -884,25 +859,14 @@ mlx5_compress_pci_probe(struct rte_pci_driver *pci_drv,
return 0;
 }
 
-/**
- * DPDK callback to remove a PCI device.
- *
- * This function removes all compress devices belong to a given PCI device.
- *
- * @param[in] pci_dev
- *   Pointer to the PCI device.
- *
- * @return
- *   0 on success, the function cannot fail.
- */
 static int
-mlx5_compress_pci_remove(struct rte_pci_device *pdev)
+mlx5_compress_dev_remove(struct rte_device *dev)
 {
struct mlx5_compress_priv *priv = NULL;
 
pthread_mutex_lock(&priv_list_lock);
TAILQ_FOREACH(priv, &mlx5_compress_priv_list, next)
-   if (rte_pci_addr_cmp(&priv->pci_dev->addr, &pdev->addr) != 0)
+   if (priv->cdev->device == dev)
break;
if (priv)
TAILQ_REMOVE(&mlx5_compress_priv_list, priv, next);
@@ -929,24 +893,19 @@ static const struct rte_pci_id mlx5_compress_pci_id_map[] 
= {
}
 };
 
-static struct mlx5_pci_driver mlx5_compress_driver = {
-   .driver_class = MLX5_CLASS_COMPRESS,
-   .pci_driver = {
-   .driver = {
-   .name = RTE_STR(MLX5_COMPRESS_DRIVER_NAME

[dpdk-dev] [PATCH v4 13/16] vdpa/mlx5: support SubFunction

2021-07-21 Thread Xueming Li
From: Thomas Monjalon 

Supports SubFunction on auxiliary bus. SF probe devargs:
  auxiliary:mlx5_core.sf.,class=vdpa

Signed-off-by: Thomas Monjalon 
Acked-by: Viacheslav Ovsiienko 
---
 doc/guides/vdpadevs/mlx5.rst  | 10 ++
 drivers/vdpa/mlx5/mlx5_vdpa.c |  8 ++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/doc/guides/vdpadevs/mlx5.rst b/doc/guides/vdpadevs/mlx5.rst
index 9b2f9f12c7..e81dbd0004 100644
--- a/doc/guides/vdpadevs/mlx5.rst
+++ b/doc/guides/vdpadevs/mlx5.rst
@@ -162,6 +162,16 @@ Driver options
 
   - 0, HW default.
 
+Devargs example
+^^^
+
+- PCI devargs:
+
+  -a :03:00.2,class=vdpa
+
+- Auxiliary devargs:
+
+  -a auxiliary:mlx5_core.sf.2,class=vdpa
 
 Error handling
 ^^
diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
index 9c9a552ba0..6d17d7a6f3 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@@ -553,9 +553,13 @@ mlx5_vdpa_sys_roce_disable(const char *addr)
 static int
 mlx5_vdpa_roce_disable(struct rte_device *dev)
 {
+   char pci_addr[PCI_PRI_STR_SIZE] = { 0 };
+
+   if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0)
+   return -rte_errno;
/* Firstly try to disable ROCE by Netlink and fallback to sysfs. */
-   if (mlx5_vdpa_nl_roce_disable(dev->name) != 0 &&
-   mlx5_vdpa_sys_roce_disable(dev->name) != 0)
+   if (mlx5_vdpa_nl_roce_disable(pci_addr) != 0 &&
+   mlx5_vdpa_sys_roce_disable(pci_addr) != 0)
return -rte_errno;
return 0;
 }
-- 
2.25.1



[dpdk-dev] [PATCH v4 12/16] vdpa/mlx5: remove PCI specifics

2021-07-21 Thread Xueming Li
From: Thomas Monjalon 

Removes PCI specific driver, replaces with common class driver.

Signed-off-by: Thomas Monjalon 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/vdpa/mlx5/mlx5_vdpa.c | 119 ++
 drivers/vdpa/mlx5/mlx5_vdpa.h |   1 -
 2 files changed, 34 insertions(+), 86 deletions(-)

diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
index 5ab7c525c2..9c9a552ba0 100644
--- a/drivers/vdpa/mlx5/mlx5_vdpa.c
+++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
@@ -11,12 +11,11 @@
 #include 
 #include 
 #include 
-#include 
 #include 
+#include 
 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -552,34 +551,13 @@ mlx5_vdpa_sys_roce_disable(const char *addr)
 }
 
 static int
-mlx5_vdpa_roce_disable(struct rte_pci_addr *addr, struct ibv_device **ibv)
+mlx5_vdpa_roce_disable(struct rte_device *dev)
 {
-   char addr_name[64] = {0};
-
-   rte_pci_device_name(addr, addr_name, sizeof(addr_name));
/* Firstly try to disable ROCE by Netlink and fallback to sysfs. */
-   if (mlx5_vdpa_nl_roce_disable(addr_name) == 0 ||
-   mlx5_vdpa_sys_roce_disable(addr_name) == 0) {
-   /*
-* Succeed to disable ROCE, wait for the IB device to appear
-* again after reload.
-*/
-   int r;
-   struct ibv_device *ibv_new;
-
-   for (r = MLX5_VDPA_MAX_RETRIES; r; r--) {
-   ibv_new = mlx5_os_get_ibv_device(addr);
-   if (ibv_new) {
-   *ibv = ibv_new;
-   return 0;
-   }
-   usleep(MLX5_VDPA_USEC);
-   }
-   DRV_LOG(ERR, "Cannot much device %s after ROCE disable, "
-   "retries exceed %d", addr_name, MLX5_VDPA_MAX_RETRIES);
-   rte_errno = EAGAIN;
-   }
-   return -rte_errno;
+   if (mlx5_vdpa_nl_roce_disable(dev->name) != 0 &&
+   mlx5_vdpa_sys_roce_disable(dev->name) != 0)
+   return -rte_errno;
+   return 0;
 }
 
 static int
@@ -647,44 +625,33 @@ mlx5_vdpa_config_get(struct rte_devargs *devargs, struct 
mlx5_vdpa_priv *priv)
DRV_LOG(DEBUG, "no traffic max is %u.", priv->no_traffic_max);
 }
 
-/**
- * DPDK callback to register a mlx5 PCI device.
- *
- * This function spawns vdpa device out of a given PCI device.
- *
- * @param[in] pci_drv
- *   PCI driver structure (mlx5_vpda_driver).
- * @param[in] pci_dev
- *   PCI device information.
- *
- * @return
- *   0 on success, 1 to skip this driver, a negative errno value otherwise
- *   and rte_errno is set.
- */
 static int
-mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
-   struct rte_pci_device *pci_dev __rte_unused)
+mlx5_vdpa_dev_probe(struct rte_device *dev)
 {
struct ibv_device *ibv;
struct mlx5_vdpa_priv *priv = NULL;
struct ibv_context *ctx = NULL;
struct mlx5_hca_attr attr;
+   int retry;
int ret;
 
-   ibv = mlx5_os_get_ibv_device(&pci_dev->addr);
-   if (!ibv) {
-   DRV_LOG(ERR, "No matching IB device for PCI slot "
-   PCI_PRI_FMT ".", pci_dev->addr.domain,
-   pci_dev->addr.bus, pci_dev->addr.devid,
-   pci_dev->addr.function);
+   if (mlx5_vdpa_roce_disable(dev) != 0) {
+   DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
+   dev->name);
return -rte_errno;
-   } else {
-   DRV_LOG(INFO, "PCI information matches for device \"%s\".",
-   ibv->name);
}
-   if (mlx5_vdpa_roce_disable(&pci_dev->addr, &ibv) != 0) {
-   DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
-   ibv->name);
+   /* Wait for the IB device to appear again after reload. */
+   for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) {
+   ibv = mlx5_os_get_ibv_dev(dev);
+   if (ibv != NULL)
+   break;
+   usleep(MLX5_VDPA_USEC);
+   }
+   if (ibv == NULL) {
+   DRV_LOG(ERR, "Cannot get IB device after disabling RoCE for "
+   "\"%s\", retries exceed %d.",
+   dev->name, MLX5_VDPA_MAX_RETRIES);
+   rte_errno = EAGAIN;
return -rte_errno;
}
ctx = mlx5_glue->dv_open_device(ibv);
@@ -722,20 +689,18 @@ mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv 
__rte_unused,
if (attr.num_lag_ports == 0)
priv->num_lag_ports = 1;
priv->ctx = ctx;
-   priv->pci_dev = pci_dev;
priv->var = mlx5_glue->dv_alloc_var(ctx, 0);
if (!priv->var) {
DRV_LOG(ERR, "Failed to allocate VAR %u.", errno);
goto error;
}
-   priv->vdev = rte_vdpa_register_device(&

[dpdk-dev] [PATCH v4 15/16] crypto/mlx5: migrate to common driver

2021-07-21 Thread Xueming Li
To support auxiliary bus, upgrades driver to use mlx5 common driver
structure.

Signed-off-by: Xueming Li 
---
 drivers/crypto/mlx5/mlx5_crypto.c | 61 ---
 drivers/crypto/mlx5/mlx5_crypto.h |  1 -
 2 files changed, 16 insertions(+), 46 deletions(-)

diff --git a/drivers/crypto/mlx5/mlx5_crypto.c 
b/drivers/crypto/mlx5/mlx5_crypto.c
index fc05bb7d46..ea734f4d5c 100644
--- a/drivers/crypto/mlx5/mlx5_crypto.c
+++ b/drivers/crypto/mlx5/mlx5_crypto.c
@@ -6,12 +6,11 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 
 #include 
 #include 
-#include 
 #include 
 #include 
 
@@ -977,23 +976,8 @@ mlx5_crypto_mr_mem_event_cb(enum rte_mem_event event_type, 
const void *addr,
}
 }
 
-/**
- * DPDK callback to register a PCI device.
- *
- * This function spawns crypto device out of a given PCI device.
- *
- * @param[in] pci_drv
- *   PCI driver structure (mlx5_crypto_driver).
- * @param[in] pci_dev
- *   PCI device information.
- *
- * @return
- *   0 on success, 1 to skip this driver, a negative errno value otherwise
- *   and rte_errno is set.
- */
 static int
-mlx5_crypto_pci_probe(struct rte_pci_driver *pci_drv,
-   struct rte_pci_device *pci_dev)
+mlx5_crypto_dev_probe(struct rte_device *dev)
 {
struct ibv_device *ibv;
struct rte_cryptodev *crypto_dev;
@@ -1005,28 +989,21 @@ mlx5_crypto_pci_probe(struct rte_pci_driver *pci_drv,
struct rte_cryptodev_pmd_init_params init_params = {
.name = "",
.private_data_size = sizeof(struct mlx5_crypto_priv),
-   .socket_id = pci_dev->device.numa_node,
+   .socket_id = dev->numa_node,
.max_nb_queue_pairs =
RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_QUEUE_PAIRS,
};
uint16_t rdmw_wqe_size;
int ret;
 
-   RTE_SET_USED(pci_drv);
if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
DRV_LOG(ERR, "Non-primary process type is not supported.");
rte_errno = ENOTSUP;
return -rte_errno;
}
-   ibv = mlx5_os_get_ibv_device(&pci_dev->addr);
-   if (ibv == NULL) {
-   DRV_LOG(ERR, "No matching IB device for PCI slot "
-   PCI_PRI_FMT ".", pci_dev->addr.domain,
-   pci_dev->addr.bus, pci_dev->addr.devid,
-   pci_dev->addr.function);
+   ibv = mlx5_os_get_ibv_dev(dev);
+   if (ibv == NULL)
return -rte_errno;
-   }
-   DRV_LOG(INFO, "PCI information matches for device \"%s\".", ibv->name);
ctx = mlx5_glue->dv_open_device(ibv);
if (ctx == NULL) {
DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name);
@@ -1041,7 +1018,7 @@ mlx5_crypto_pci_probe(struct rte_pci_driver *pci_drv,
rte_errno = ENOTSUP;
return -ENOTSUP;
}
-   ret = mlx5_crypto_parse_devargs(pci_dev->device.devargs, &devarg_prms);
+   ret = mlx5_crypto_parse_devargs(dev->devargs, &devarg_prms);
if (ret) {
DRV_LOG(ERR, "Failed to parse devargs.");
return -rte_errno;
@@ -1052,7 +1029,7 @@ mlx5_crypto_pci_probe(struct rte_pci_driver *pci_drv,
DRV_LOG(ERR, "Failed to configure login.");
return -rte_errno;
}
-   crypto_dev = rte_cryptodev_pmd_create(ibv->name, &pci_dev->device,
+   crypto_dev = rte_cryptodev_pmd_create(ibv->name, dev,
&init_params);
if (crypto_dev == NULL) {
DRV_LOG(ERR, "Failed to create device \"%s\".", ibv->name);
@@ -1069,7 +1046,6 @@ mlx5_crypto_pci_probe(struct rte_pci_driver *pci_drv,
priv = crypto_dev->data->dev_private;
priv->ctx = ctx;
priv->login_obj = login;
-   priv->pci_dev = pci_dev;
priv->crypto_dev = crypto_dev;
if (mlx5_crypto_hw_global_prepare(priv) != 0) {
rte_cryptodev_pmd_destroy(priv->crypto_dev);
@@ -1112,13 +1088,13 @@ mlx5_crypto_pci_probe(struct rte_pci_driver *pci_drv,
 }
 
 static int
-mlx5_crypto_pci_remove(struct rte_pci_device *pdev)
+mlx5_crypto_dev_remove(struct rte_device *dev)
 {
struct mlx5_crypto_priv *priv = NULL;
 
pthread_mutex_lock(&priv_list_lock);
TAILQ_FOREACH(priv, &mlx5_crypto_priv_list, next)
-   if (rte_pci_addr_cmp(&priv->pci_dev->addr, &pdev->addr) != 0)
+   if (priv->crypto_dev->device == dev)
break;
if (priv)
TAILQ_REMOVE(&mlx5_crypto_priv_list, priv, next);
@@ -1146,24 +1122,19 @@ static const struct rte_pci_id mlx5_crypto_pci_id_map[] 
= {
}
 };
 
-static struct mlx5_pci_driver mlx5_crypto_driver = {
-   .driver_class = MLX5_CLASS_CRYPTO,
-   .pci_driver = {
-   .driver = {
-   .name = RTE_STR(MLX5_CRYPTO_DRIVER_NAME),
-   },
- 

[dpdk-dev] [PATCH v4 10/16] regex/mlx5: migrate to common driver

2021-07-21 Thread Xueming Li
To support auxiliary bus, upgrades driver to use mlx5 common driver
structure.

Signed-off-by: Xueming Li 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/regex/mlx5/mlx5_regex.c | 49 -
 drivers/regex/mlx5/mlx5_regex.h |  1 -
 2 files changed, 18 insertions(+), 32 deletions(-)

diff --git a/drivers/regex/mlx5/mlx5_regex.c b/drivers/regex/mlx5/mlx5_regex.c
index dcb2ced88e..9d93eaa934 100644
--- a/drivers/regex/mlx5/mlx5_regex.c
+++ b/drivers/regex/mlx5/mlx5_regex.c
@@ -9,8 +9,8 @@
 #include 
 #include 
 #include 
+#include 
 
-#include 
 #include 
 #include 
 #include 
@@ -76,15 +76,13 @@ mlx5_regex_engines_status(struct ibv_context *ctx, int 
num_engines)
 }
 
 static void
-mlx5_regex_get_name(char *name, struct rte_pci_device *pci_dev __rte_unused)
+mlx5_regex_get_name(char *name, struct rte_device *dev)
 {
-   sprintf(name, "mlx5_regex_%02x:%02x.%02x", pci_dev->addr.bus,
-   pci_dev->addr.devid, pci_dev->addr.function);
+   sprintf(name, "mlx5_regex_%s", dev->name);
 }
 
 static int
-mlx5_regex_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
-struct rte_pci_device *pci_dev)
+mlx5_regex_dev_probe(struct rte_device *rte_dev)
 {
struct ibv_device *ibv;
struct mlx5_regex_priv *priv = NULL;
@@ -94,16 +92,10 @@ mlx5_regex_pci_probe(struct rte_pci_driver *pci_drv 
__rte_unused,
int ret;
uint32_t val;
 
-   ibv = mlx5_os_get_ibv_device(&pci_dev->addr);
-   if (!ibv) {
-   DRV_LOG(ERR, "No matching IB device for PCI slot "
-   PCI_PRI_FMT ".", pci_dev->addr.domain,
-   pci_dev->addr.bus, pci_dev->addr.devid,
-   pci_dev->addr.function);
+   ibv = mlx5_os_get_ibv_dev(rte_dev);
+   if (ibv == NULL)
return -rte_errno;
-   }
-   DRV_LOG(INFO, "PCI information matches for device \"%s\".",
-   ibv->name);
+   DRV_LOG(INFO, "Probe device \"%s\".", ibv->name);
ctx = mlx5_glue->dv_open_device(ibv);
if (!ctx) {
DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name);
@@ -146,7 +138,7 @@ mlx5_regex_pci_probe(struct rte_pci_driver *pci_drv 
__rte_unused,
priv->is_bf2 = 1;
/* Default RXP programming mode to Shared. */
priv->prog_mode = MLX5_RXP_SHARED_PROG_MODE;
-   mlx5_regex_get_name(name, pci_dev);
+   mlx5_regex_get_name(name, rte_dev);
priv->regexdev = rte_regexdev_register(name);
if (priv->regexdev == NULL) {
DRV_LOG(ERR, "Failed to register RegEx device.");
@@ -180,7 +172,7 @@ mlx5_regex_pci_probe(struct rte_pci_driver *pci_drv 
__rte_unused,
priv->regexdev->enqueue = mlx5_regexdev_enqueue_gga;
 #endif
priv->regexdev->dequeue = mlx5_regexdev_dequeue;
-   priv->regexdev->device = (struct rte_device *)pci_dev;
+   priv->regexdev->device = rte_dev;
priv->regexdev->data->dev_private = priv;
priv->regexdev->state = RTE_REGEXDEV_READY;
priv->mr_scache.reg_mr_cb = mlx5_common_verbs_reg_mr;
@@ -213,13 +205,13 @@ mlx5_regex_pci_probe(struct rte_pci_driver *pci_drv 
__rte_unused,
 }
 
 static int
-mlx5_regex_pci_remove(struct rte_pci_device *pci_dev)
+mlx5_regex_dev_remove(struct rte_device *rte_dev)
 {
char name[RTE_REGEXDEV_NAME_MAX_LEN];
struct rte_regexdev *dev;
struct mlx5_regex_priv *priv = NULL;
 
-   mlx5_regex_get_name(name, pci_dev);
+   mlx5_regex_get_name(name, rte_dev);
dev = rte_regexdev_get_device_by_name(name);
if (!dev)
return 0;
@@ -254,24 +246,19 @@ static const struct rte_pci_id mlx5_regex_pci_id_map[] = {
}
 };
 
-static struct mlx5_pci_driver mlx5_regex_driver = {
-   .driver_class = MLX5_CLASS_REGEX,
-   .pci_driver = {
-   .driver = {
-   .name = RTE_STR(MLX5_REGEX_DRIVER_NAME),
-   },
-   .id_table = mlx5_regex_pci_id_map,
-   .probe = mlx5_regex_pci_probe,
-   .remove = mlx5_regex_pci_remove,
-   .drv_flags = 0,
-   },
+static struct mlx5_class_driver mlx5_regex_driver = {
+   .drv_class = MLX5_CLASS_REGEX,
+   .name = RTE_STR(MLX5_REGEX_DRIVER_NAME),
+   .id_table = mlx5_regex_pci_id_map,
+   .probe = mlx5_regex_dev_probe,
+   .remove = mlx5_regex_dev_remove,
 };
 
 RTE_INIT(rte_mlx5_regex_init)
 {
mlx5_common_init();
if (mlx5_glue)
-   mlx5_pci_driver_register(&mlx5_regex_driver);
+   mlx5_class_driver_register(&mlx5_regex_driver);
 }
 
 RTE_LOG_REGISTER_DEFAULT(mlx5_regex_logtype, NOTICE)
diff --git a/drivers/regex/mlx5/mlx5_regex.h b/drivers/regex/mlx5/mlx5_regex.h
index 51a2101e53..45200bf937 100644
--- a/drivers/regex/mlx5/mlx5_regex.h
+++ b/drivers/regex/mlx5/mlx5_regex.h
@@ -59,7 +59,6 @@ struct mlx5_regex_db {
 struct mlx5_regex_priv {
TAILQ_ENTRY(mlx5_regex_pri

[dpdk-dev] [PATCH v4 16/16] common/mlx5: clean up legacy PCI bus driver

2021-07-21 Thread Xueming Li
Clean up legacy PCI bus driver since all mlx5 PMDs moved to new common
PCI bus driver.

Signed-off-by: Xueming Li 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/common/mlx5/linux/mlx5_common_os.h |   1 -
 drivers/common/mlx5/mlx5_common.c  |   1 -
 drivers/common/mlx5/mlx5_common.h  |   1 +
 drivers/common/mlx5/mlx5_common_pci.c  | 449 +
 drivers/common/mlx5/mlx5_common_pci.h  |  78 
 drivers/common/mlx5/mlx5_common_private.h  |   1 +
 drivers/common/mlx5/version.map|   3 -
 7 files changed, 4 insertions(+), 530 deletions(-)
 delete mode 100644 drivers/common/mlx5/mlx5_common_pci.h

diff --git a/drivers/common/mlx5/linux/mlx5_common_os.h 
b/drivers/common/mlx5/linux/mlx5_common_os.h
index 86d0cb09b0..2b03bf811e 100644
--- a/drivers/common/mlx5/linux/mlx5_common_os.h
+++ b/drivers/common/mlx5/linux/mlx5_common_os.h
@@ -289,7 +289,6 @@ mlx5_os_free(void *addr)
free(addr);
 }
 
-__rte_internal
 struct ibv_device *
 mlx5_os_get_ibv_device(const struct rte_pci_addr *addr);
 
diff --git a/drivers/common/mlx5/mlx5_common.c 
b/drivers/common/mlx5/mlx5_common.c
index 8fe36f7077..459cf4bcc4 100644
--- a/drivers/common/mlx5/mlx5_common.c
+++ b/drivers/common/mlx5/mlx5_common.c
@@ -14,7 +14,6 @@
 #include "mlx5_common.h"
 #include "mlx5_common_os.h"
 #include "mlx5_common_log.h"
-#include "mlx5_common_pci.h"
 #include "mlx5_common_private.h"
 
 uint8_t haswell_broadwell_cpu;
diff --git a/drivers/common/mlx5/mlx5_common.h 
b/drivers/common/mlx5/mlx5_common.h
index 66c83047a1..25737bfffb 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/common/mlx5/mlx5_common_pci.c 
b/drivers/common/mlx5/mlx5_common_pci.c
index a7db6e2c11..8b38091d87 100644
--- a/drivers/common/mlx5/mlx5_common_pci.c
+++ b/drivers/common/mlx5/mlx5_common_pci.c
@@ -8,447 +8,17 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include "mlx5_common_log.h"
-#include "mlx5_common_pci.h"
 #include "mlx5_common_private.h"
 
 static struct rte_pci_driver mlx5_common_pci_driver;
 
-/** Legacy PCI bus driver, to be removed /
-
-struct mlx5_pci_device {
-   struct rte_pci_device *pci_dev;
-   TAILQ_ENTRY(mlx5_pci_device) next;
-   uint32_t classes_loaded;
-};
-
-/* Head of list of drivers. */
-static TAILQ_HEAD(mlx5_pci_bus_drv_head, mlx5_pci_driver) drv_list =
-   TAILQ_HEAD_INITIALIZER(drv_list);
-
-/* Head of mlx5 pci devices. */
-static TAILQ_HEAD(mlx5_pci_devices_head, mlx5_pci_device) devices_list =
-   TAILQ_HEAD_INITIALIZER(devices_list);
-
-static const struct {
-   const char *name;
-   unsigned int driver_class;
-} mlx5_classes[] = {
-   { .name = "vdpa", .driver_class = MLX5_CLASS_VDPA },
-   { .name = "eth", .driver_class = MLX5_CLASS_ETH },
-   /* Keep name "net" for backward compatibility. */
-   { .name = "net", .driver_class = MLX5_CLASS_ETH },
-   { .name = "regex", .driver_class = MLX5_CLASS_REGEX },
-   { .name = "compress", .driver_class = MLX5_CLASS_COMPRESS },
-   { .name = "crypto", .driver_class = MLX5_CLASS_CRYPTO },
-};
-
-static const unsigned int mlx5_class_combinations[] = {
-   MLX5_CLASS_ETH,
-   MLX5_CLASS_VDPA,
-   MLX5_CLASS_REGEX,
-   MLX5_CLASS_COMPRESS,
-   MLX5_CLASS_CRYPTO,
-   MLX5_CLASS_ETH | MLX5_CLASS_REGEX,
-   MLX5_CLASS_VDPA | MLX5_CLASS_REGEX,
-   MLX5_CLASS_ETH | MLX5_CLASS_COMPRESS,
-   MLX5_CLASS_VDPA | MLX5_CLASS_COMPRESS,
-   MLX5_CLASS_REGEX | MLX5_CLASS_COMPRESS,
-   MLX5_CLASS_ETH | MLX5_CLASS_CRYPTO,
-   MLX5_CLASS_ETH | MLX5_CLASS_REGEX | MLX5_CLASS_COMPRESS,
-   MLX5_CLASS_VDPA | MLX5_CLASS_CRYPTO,
-   MLX5_CLASS_REGEX | MLX5_CLASS_CRYPTO,
-   MLX5_CLASS_COMPRESS | MLX5_CLASS_CRYPTO,
-   MLX5_CLASS_VDPA | MLX5_CLASS_REGEX | MLX5_CLASS_COMPRESS,
-   MLX5_CLASS_ETH | MLX5_CLASS_REGEX | MLX5_CLASS_CRYPTO,
-   MLX5_CLASS_VDPA | MLX5_CLASS_REGEX | MLX5_CLASS_CRYPTO,
-   MLX5_CLASS_ETH | MLX5_CLASS_COMPRESS | MLX5_CLASS_CRYPTO,
-   MLX5_CLASS_VDPA | MLX5_CLASS_COMPRESS | MLX5_CLASS_CRYPTO,
-   MLX5_CLASS_ETH | MLX5_CLASS_REGEX | MLX5_CLASS_COMPRESS |
-   MLX5_CLASS_CRYPTO,
-   MLX5_CLASS_VDPA | MLX5_CLASS_REGEX | MLX5_CLASS_COMPRESS |
-   MLX5_CLASS_CRYPTO,
-   /* New class combination should be added here. */
-};
-
-static int
-class_name_to_value(const char *class_name)
-{
-   unsigned int i;
-
-   for (i = 0; i < RTE_DIM(mlx5_classes); i++) {
-   if (strcmp(class_name, mlx5_classes[i].name) == 0)
-   return mlx5_classes[i].driver_class;
-   }
-   return -EINVAL;
-}
-
-static struct mlx5_pci_driver *
-driver_get(uint32_t class)
-{
-   struct mlx5_pci_driver *driver;
-
-   TAILQ_FOREACH(driver, &drv_list, next) 

[dpdk-dev] [PATCH v3] doc: announce API changes for Windows compatibility

2021-07-21 Thread Dmitry Kozlyuk
Windows system headers define `s_addr`, `min`, and `max` macros which
break structure definitions containing fields with one of these names.
Undefining those macros would break consumer code that relies on them.

Example 1:

#include 
#include 
struct in_addr addr;
/* addr.s_addr = 0; ERROR: s_addr undefined by DPDK */

Example 2:

#include 
#include 
struct rte_ether_hdr eh;
/* eh.s_addr.addr_bytes[0] = 0; ERROR: `addr_s` is a macro */

Commit 6c068dbd9fea ("net: work around s_addr macro on Windows")
modified definition of `struct rte_ether_hdr` to avoid the issue.
However, the workaround assumes `#define s_addr S_addr.S_un`
in Windows headers, which is not a part of official API.
It also complicates the definition of `struct rte_ether_hdr`.

For `min` and `max`, no workaround seems available. If cryptodev or
compressdev is going to be enabled on Windows before 21.11, the only
option seems to use a new name on Windows (using #ifdef).

It is proposed to rename the conflicting fields on DPDK side,
because Win32 API has wider use and is slower and harder to change.
Exact new names are left for further discussion.

Signed-off-by: Dmitry Kozlyuk 
Acked-by: Khoa To 
---
v3: fix typos (Ferruh), remove naming speculation,
replace workaround snippet with commit reference.

 doc/guides/rel_notes/deprecation.rst | 9 +
 1 file changed, 9 insertions(+)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index 9584d6bfd7..cc6e8db92c 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -147,3 +147,12 @@ Deprecation Notices
 * cmdline: ``cmdline`` structure will be made opaque to hide platform-specific
   content. On Linux and FreeBSD, supported prior to DPDK 20.11,
   original structure will be kept until DPDK 21.11.
+
+* net: ``s_addr`` and ``d_addr`` fields of ``rte_ether_hdr`` structure
+  will be renamed in DPDK 21.11 to avoid conflict with Windows Sockets headers.
+
+* compressdev: ``min`` and ``max`` fields of ``rte_param_log2_range`` structure
+  will be renamed in DPDK 21.11 to avoid conflict with Windows Sockets headers.
+
+* cryptodev: ``min`` and ``max`` fields of ``rte_crypto_param_range`` structure
+  will be renamed in DPDK 21.11 to avoid conflict with Windows Sockets headers.
-- 
2.29.3



[dpdk-dev] [PATCH v4] doc: announce API changes for Windows compatibility

2021-07-21 Thread Dmitry Kozlyuk
Windows headers define `s_addr`, `min`, and `max` as macros.
If DPDK headers are included after Windows ones, DPDK structure
definitions containing fields with these names get broken (example 1),
as well as any usage of such fields (example 2). If DPDK headers
undefined these macros, it could break consumer code (example 3).
It is proposed to rename structure fields in DPDK, because Win32 headers
are used more widely than DPDK, as a general-purpose platform compared
to domain-specific kit, and are harder to fix because of that.
Exact new names are left for further discussion.

Example 1:

/* in DPDK public header included after windows.h */
struct rte_type {
int min;/* ERROR: `min` is a macro */
};

Example 2:

#include 
#include 
struct rte_ether_hdr eh;
eh.s_addr.addr_bytes[0] = 0;/* ERROR: `addr_s` is a macro */

Example 3:

#include 
#include 
struct in_addr addr;
addr.s_addr = 0;  /* ERROR: there is no `s_addr` field,
 and `s_addr` macro is undefined by DPDK. */

Commit 6c068dbd9fea ("net: work around s_addr macro on Windows")
modified definition of `struct rte_ether_hdr` to avoid the issue.
However, the workaround assumes `#define s_addr S_addr.S_un`
in Windows headers, which is not a part of official API.
It also complicates the definition of `struct rte_ether_hdr`.

Signed-off-by: Dmitry Kozlyuk 
Acked-by: Khoa To 
---
v4: improve wording (Akhil).
v3: fix typos (Ferruh), remove naming speculation,
replace workaround snippet with commit reference.

 doc/guides/rel_notes/deprecation.rst | 9 +
 1 file changed, 9 insertions(+)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index 9584d6bfd7..cc6e8db92c 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -147,3 +147,12 @@ Deprecation Notices
 * cmdline: ``cmdline`` structure will be made opaque to hide platform-specific
   content. On Linux and FreeBSD, supported prior to DPDK 20.11,
   original structure will be kept until DPDK 21.11.
+
+* net: ``s_addr`` and ``d_addr`` fields of ``rte_ether_hdr`` structure
+  will be renamed in DPDK 21.11 to avoid conflict with Windows Sockets headers.
+
+* compressdev: ``min`` and ``max`` fields of ``rte_param_log2_range`` structure
+  will be renamed in DPDK 21.11 to avoid conflict with Windows Sockets headers.
+
+* cryptodev: ``min`` and ``max`` fields of ``rte_crypto_param_range`` structure
+  will be renamed in DPDK 21.11 to avoid conflict with Windows Sockets headers.
-- 
2.29.3



[dpdk-dev] [PATCH] build: enable to build on power10 or newer for ppc

2021-07-21 Thread Thinh Tran
A older version of complier would fail to generate code for new Power
CPUs when it uses "-mcpu=native" argument.
This patch will test if the compiler supports the current Power CPU type
then proceeds with "-mcpu=native" argument, else it tries with older type.  
Limit to two older CPU type levels.

Signed-off-by: Thinh Tran 
---
 config/ppc/check_cpu_platform.sh |  2 ++
 config/ppc/meson.build   | 40 +---
 2 files changed, 34 insertions(+), 8 deletions(-)
 create mode 100644 config/ppc/check_cpu_platform.sh

diff --git a/config/ppc/check_cpu_platform.sh b/config/ppc/check_cpu_platform.sh
new file mode 100644
index 00..cdea24561b
--- /dev/null
+++ b/config/ppc/check_cpu_platform.sh
@@ -0,0 +1,2 @@
+#! /bin/sh
+LD_SHOW_AUXV=1 /bin/true | awk '/AT_PLATFORM/ {print $2}'|sed  's/\power//'
diff --git a/config/ppc/meson.build b/config/ppc/meson.build
index adf49e1f42..05aa860cfd 100644
--- a/config/ppc/meson.build
+++ b/config/ppc/meson.build
@@ -7,16 +7,40 @@ endif
 dpdk_conf.set('RTE_ARCH', 'ppc_64')
 dpdk_conf.set('RTE_ARCH_PPC_64', 1)
 
-# RHEL 7.x uses gcc 4.8.X which doesn't generate code for Power 9 CPUs,
-# though it will detect a Power 9 CPU when the "-mcpu=native" argument
-# is used, resulting in a build failure.
-power9_supported = cc.has_argument('-mcpu=power9')
-if not power9_supported
-cpu_instruction_set = 'power8'
-machine_args = ['-mcpu=power8', '-mtune=power8']
-dpdk_conf.set('RTE_MACHINE','power8')
+# Checking compiler for supporting Power CPU platform
+# For newer Power(N) System that current gcc may not supoort it yet,
+# it falls back and try  N-1 and N-2
+check_cpu = find_program(join_paths(meson.current_source_dir(),
+ 'check_cpu_platform.sh'))
+
+target_cpu = run_command(check_cpu.path()).stdout().strip()
+
+cpu_int = target_cpu.to_int()
+cpu_flag = '-mcpu=power@0@'
+tune_flag = '-mtune=power@0@'
+machine_type = 'power@0@'
+debug = 'configure the compiler to build DPDK for POWER@0@ platform'
+
+if cc.has_argument(cpu_flag.format(cpu_int))
+
+  # target system cpu is supported by the compiler, use '-mcpu=native'
+  message(debug.format(target_cpu+'_native'))
+  machine_args = ['-mcpu=native']
+  dpdk_conf.set('RTE_MACHINE',machine_type.format(cpu_int))
+elif cc.has_argument(cpu_flag.format(cpu_int-1))
+  message(debug.format(cpu_int-1))
+  machine_args = [cpu_flag.format(cpu_int-1),tune_flag.format(cpu_int-1)]
+  dpdk_conf.set('RTE_MACHINE',machine_type.format(cpu_int-1))
+elif cc.has_argument(cpu_flag.format(cpu_int-2))
+  message(debug.format(cpu_int-2))
+  machine_args = [cpu_flag.format(cpu_int-2),tune_flag.format(cpu_int-2)]
+  dpdk_conf.set('RTE_MACHINE',machine_type.format(cpu_int-2))
+else
+  error('The compiler does not support POWER@0@ platform' .format(cpu_int))
 endif
 
+
+
 # Certain POWER9 systems can scale as high as 1536 LCORES, but setting such a
 # high value can waste memory, cause timeouts in time limited autotests, and is
 # unlikely to be used in many production situations.  Similarly, keeping the
-- 
2.17.1



Re: [dpdk-dev] [dpdk-announce] release candidate 21.08-rc1

2021-07-21 Thread Thinh Tran

Hi-

IBM - DPDK on Power Systems

* Basic PF on Mellanox: No new issues or regressions were seen.
* Performance: not tested.

Systems tested:
 - IBM Power9 PowerNV 9006-22P
OS: RHEL 8.3
GCC:  version 8.3.1 20191121 (Red Hat 8.3.1-5)
NICs:
- Mellanox Technologies MT28800 Family [ConnectX-5 Ex]
- firmware version: 16.29.1017
- MLNX_OFED_LINUX-5.2-1.0.4.1 (OFED-5.2-1.0.4)

 - LPARs on IBM Power10 CHRP IBM,9105-42B
OS: RHEL 8.4
GCC: gcc version 8.4.1 20200928 (Red Hat 8.4.1-1)
NICs:
- Mellanox Technologies MT28800 Family [ConnectX-5 Ex]
- firmware version: 16.30.1004
- MLNX_OFED_LINUX-5.3-1.0.0.2

Regards,
Thinh Tran


On 7/10/2021 5:05 AM, Thomas Monjalon wrote:

A new DPDK release candidate is ready for testing:
https://git.dpdk.org/dpdk/tag/?id=v21.08-rc1

There are 517 new patches in this snapshot.
This release cycle is short and should be small.

Release notes:
https://doc.dpdk.org/guides/rel_notes/release_21_08.html

Highlights of 21.08-rc1:
- Linux auxiliary bus
- Aarch32 cross-compilation
- Arm CPPC power management
- Rx multi-queue monitoring for power management
- XZ compressed firmware read
- Marvell CNXK drivers for ethernet, crypto and baseband PHY

Please test and report issues on bugs.dpdk.org.

DPDK 21.08-rc2 is expected in less than two weeks.

Thank you everyone




Re: [dpdk-dev] [PATCH v4 00/16] net/mlx5: support Sub-Function

2021-07-21 Thread Thomas Monjalon
21/07/2021 16:37, Xueming Li:
> Sub-Function [1] is a portion of the PCI device, a SF netdev has its own
> dedicated queues(txq, rxq). A SF shares PCI level resources with other
> SFs and/or with its parent PCI function. Auxiliary bus is the
> fundamental of SF.
> 
> This patch set introduces Sub-Function support for mlx5 PMD driver
> including class net, regex, vdpa and compress.

Applied, thanks.

Fixup note: a transient per-patch compilation issue was fixed,
and new common symbols are made exported for Windows.




Re: [dpdk-dev] [PATCH v1] net/ice: fix IPv6 fragment RSS L3 dst/src not work

2021-07-21 Thread Zhang, Qi Z



> -Original Message-
> From: Xu, Ting 
> Sent: Sunday, July 18, 2021 10:50 PM
> To: dev@dpdk.org
> Cc: Zhang, Qi Z ; Wu, Jingjing ;
> Xing, Beilei ; Xu, Ting ;
> sta...@dpdk.org
> Subject: [PATCH v1] net/ice: fix IPv6 fragment RSS L3 dst/src not work
> 
> Since the header type of IPv6 fragment is wrong, the L3 dst/src RSS hash 
> fields
> cannot work properly. This patch changed the header type from any to outer.
> 
> Fixes: f1ea76eb6394 ("net/ice: support RSS hash for IP fragment")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Ting Xu 
Acked-by: Qi Zhang 

Applied to dpdk-next-net-intel.

Thanks
Qi


[dpdk-dev] [PATCH] doc: update matching list for ice

2021-07-21 Thread Qi Zhang
Add recommended matching list for ice PMD in DPDK 21.05.

Signed-off-by: Qi Zhang 
---
 doc/guides/nics/ice.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/guides/nics/ice.rst b/doc/guides/nics/ice.rst
index 22a19b8bba..5bc472faa9 100644
--- a/doc/guides/nics/ice.rst
+++ b/doc/guides/nics/ice.rst
@@ -55,6 +55,8 @@ The detailed information can refer to chapter Tested 
Platforms/Tested NICs in re

+---+---+-+---+--+---+
|21.02  | 1.4.11|  1.3.24 |  1.3.28   |1.3.4 |  
  2.4|

+---+---+-+---+--+---+
+   |21.05  | 1.6.5 |  1.3.26 |  1.3.30   |1.3.6 |  
  3.0|
+   
+---+---+-+---+--+---+
 
 Pre-Installation Configuration
 --
-- 
2.26.2



Re: [dpdk-dev] [DISCUSSION] code snippet documentation

2021-07-21 Thread Asaf Penso
+ dev@dpdk

Regards,
Asaf Penso


From: users  on behalf of Asaf Penso 
Sent: Thursday, 15 July 2021, 10:02
To: us...@dpdk.org
Subject: [dpdk-users] [DISCUSSION] code snippet documentation

Hello DPDK community,

I would like to bring up a discussion about a way to have code snippets as an 
example for proper usage.
The DPDK tree is filled with great pieces of code that are well documented and 
maintained in high quality.
I feel we are a bit behind when we talk about usage examples.

One way, whenever we implement a new feature, is to extend one of the test-* 
under the "app" folder.
This, however, provides means to test but doesn't provide a good usage example.

Another way is to check the content of the "example" folder and whenever we 
have a BIG new feature it seems like a good place.
This, however, doesn't provide a good option when we talk about small features.
If, for example, we extend rte_flow with an extra action then providing a 
full-blown example application is somewhat an entry barrier.

A third option could be to document it in one of the .rst files we have.
Obviously, this requires high maintenance and no option to assure it still 
compiles.

I'd like to propose another approach that will address the main two issues: 
remove the entry barrier and assure compilation.
In this approach, inside the "examples" folder we'll create another folder for 
"snippets".
Inside "snippets" we'll have several files per category, for example, 
rte_flow_snippets.c
Each .c file will include a main function that calls the different use cases we 
want to give as an example.
The purpose is not to generate traffic nor read rx/tx packets from the DPDK 
ports.
The purpose is to have a good example that compiles properly.

Taking the rte_flow_snippets.c as an example its main function would look like 
this:

int
main(int argc, char **argv)
{
  rte_flow_snippet_match_5tuple_and_drop();
  rte_flow_snippet_match_geneve_ope_and_rss();
  ...
  Return 0;
}

Regards,
Asaf Penso



[dpdk-dev] [PATCH v7 0/5] vhost: handle memory hotplug for async vhost

2021-07-21 Thread Cheng Jiang
When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

This patch set is to provide an unsafe API to drain inflight pkts
which are submitted to DMA engine in vhost async data path, and
notify the vhost application of stopping DMA transfers. And enable it
in vhost example.

v7:
 * rebased on the latest codes
 * improved commit log
v6:
 * removed unnecessary args for the new API
 * improved variable names and function names
 * added enable notification in set_mem_table
 * fixed vhost example queue clear process
v5:
 * added fixes in 'vhost: fix async vhost ops return type'
 * improved git log, variable names and logs
v4:
 * rebased on the latest codes
v3:
 * added a patch to fix async ops return type
 * fixed async ops fail handler
 * updated the doc
v2:
 * changed the patch structure

Cheng Jiang (4):
  vhost: fix async vhost ops return type
  vhost: add unsafe API to clear packets in async vhost
  examples/vhost: handle memory hotplug for async vhost
  doc: update doc for queue clear API in vhost lib

Jiayu Hu (1):
  vhost: handle memory hotplug for async vhost

 doc/guides/prog_guide/vhost_lib.rst|   5 +
 doc/guides/rel_notes/release_21_08.rst |   5 +
 examples/vhost/ioat.c  |   4 +-
 examples/vhost/ioat.h  |   4 +-
 examples/vhost/main.c  |  55 -
 examples/vhost/main.h  |   1 +
 lib/vhost/rte_vhost_async.h|  30 -
 lib/vhost/version.map  |   1 +
 lib/vhost/vhost_user.c |  16 +++
 lib/vhost/virtio_net.c | 152 -
 10 files changed, 232 insertions(+), 41 deletions(-)

--
2.29.2



[dpdk-dev] [PATCH v7 1/5] vhost: fix async vhost ops return type

2021-07-21 Thread Cheng Jiang
The async vhost callback ops should return negative value when there
are something wrong in the callback, so the return type should be
changed into int32_t. The issue in vhost example is also fixed.

Fixes: cd6760da1076 ("vhost: introduce async enqueue for split ring")
Fixes: 819a71685826 ("vhost: fix async callback return type")
Fixes: 6b3c81db8bb7 ("vhost: simplify async copy completion")
Fixes: abec60e7115d ("examples/vhost: support vhost async data path")
Fixes: 873e8dad6f49 ("vhost: support packed ring in async datapath")
Cc: sta...@dpdk.org

Signed-off-by: Cheng Jiang 
Reviewed-by: Maxime Coquelin 
---
 examples/vhost/ioat.c   |  4 +--
 examples/vhost/ioat.h   |  4 +--
 lib/vhost/rte_vhost_async.h |  8 ++---
 lib/vhost/virtio_net.c  | 61 -
 4 files changed, 61 insertions(+), 16 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 2a2c2d7202..457f8171f0 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -122,7 +122,7 @@ open_ioat(const char *value)
return ret;
 }
 
-uint32_t
+int32_t
 ioat_transfer_data_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data, uint16_t count)
@@ -168,7 +168,7 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
return i_desc;
 }
 
-uint32_t
+int32_t
 ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets)
diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index 1aa28ed6a3..b57b5645b0 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -27,12 +27,12 @@ struct dma_for_vhost {
 #ifdef RTE_RAW_IOAT
 int open_ioat(const char *value);
 
-uint32_t
+int32_t
 ioat_transfer_data_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data, uint16_t count);
 
-uint32_t
+int32_t
 ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets);
diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index 69ec66bba5..02d012ae23 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -59,9 +59,9 @@ struct rte_vhost_async_channel_ops {
 * @param count
 *  number of elements in the "descs" array
 * @return
-*  number of descs processed
+*  number of descs processed, negative value means error
 */
-   uint32_t (*transfer_data)(int vid, uint16_t queue_id,
+   int32_t (*transfer_data)(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data,
uint16_t count);
@@ -76,9 +76,9 @@ struct rte_vhost_async_channel_ops {
 * @param max_packets
 *  max number of packets could be completed
 * @return
-*  number of async descs completed
+*  number of async descs completed, negative value means error
 */
-   uint32_t (*check_completed_copies)(int vid, uint16_t queue_id,
+   int32_t (*check_completed_copies)(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets);
 };
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 6e5d82c1a8..3ab5229f76 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -1644,6 +1644,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct async_inflight_info *pkts_info = vq->async_pkts_info;
uint32_t n_pkts = 0, pkt_err = 0;
uint32_t num_async_pkts = 0, num_done_pkts = 0;
+   int32_t n_xfer;
struct {
uint16_t pkt_idx;
uint16_t last_avail_idx;
@@ -1724,8 +1725,17 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
((VHOST_MAX_ASYNC_VEC >> 1) - segs_await <
BUF_VECTOR_MAX))) {
-   n_pkts = vq->async_ops.transfer_data(dev->vid,
+   n_xfer = vq->async_ops.transfer_data(dev->vid,
queue_id, tdes, 0, pkt_burst_idx);
+   if (n_xfer >= 0) {
+   n_pkts = n_xfer;
+   } else {
+   VHOST_LOG_DATA(ERR,
+   "(%d) %s: failed to transfer data for 
queue id %d.\n",
+   dev->vid, __func__, queue_id);
+   n_pkts = 0;
+   }
+
iovec_idx = 0;
it_idx = 0;
 
@@ -1748,8 +1758,15 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,

[dpdk-dev] [PATCH v7 2/5] vhost: add unsafe API to clear packets in async vhost

2021-07-21 Thread Cheng Jiang
Applications need to stop DMA transfers and finish all the inflight
packets when in VM memory hot-plug case and async vhost is used. This
patch is to provide an unsafe API to clear inflight packets which
are submitted to DMA engine in vhost async data path.

Signed-off-by: Cheng Jiang 
Reviewed-by: Maxime Coquelin 
---
 lib/vhost/rte_vhost_async.h | 22 +
 lib/vhost/version.map   |  1 +
 lib/vhost/virtio_net.c  | 93 +++--
 3 files changed, 92 insertions(+), 24 deletions(-)

diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index 02d012ae23..b25ff446f7 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -246,4 +246,26 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
 __rte_experimental
 int rte_vhost_async_get_inflight(int vid, uint16_t queue_id);
 
+/**
+ * This function checks async completion status and clear packets for
+ * a specific vhost device queue. Packets which are inflight will be
+ * returned in an array.
+ *
+ * @note This function does not perform any locking
+ *
+ * @param vid
+ *  ID of vhost device to clear data
+ * @param queue_id
+ *  Queue id to clear data
+ * @param pkts
+ *  Blank array to get return packet pointer
+ * @param count
+ *  Size of the packet array
+ * @return
+ *  Number of packets returned
+ */
+__rte_experimental
+uint16_t rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id,
+   struct rte_mbuf **pkts, uint16_t count);
+
 #endif /* _RTE_VHOST_ASYNC_H_ */
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index e0c89646e8..e2504ba657 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -84,4 +84,5 @@ EXPERIMENTAL {
rte_vhost_async_get_inflight;
rte_vhost_async_channel_register_thread_unsafe;
rte_vhost_async_channel_unregister_thread_unsafe;
+   rte_vhost_clear_queue_thread_unsafe;
 };
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 3ab5229f76..8549afbbe1 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -2214,10 +2214,10 @@ write_back_completed_descs_packed(struct 
vhost_virtqueue *vq,
} while (nr_left > 0);
 }
 
-uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+static __rte_always_inline uint16_t
+vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count)
 {
-   struct virtio_net *dev = get_device(vid);
struct vhost_virtqueue *vq;
uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
uint16_t start_idx, pkts_idx, vq_size;
@@ -2225,26 +2225,8 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
uint16_t from, i;
int32_t n_cpl;
 
-   if (!dev)
-   return 0;
-
-   VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
-   if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
-   VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
-   dev->vid, __func__, queue_id);
-   return 0;
-   }
-
vq = dev->virtqueue[queue_id];
 
-   if (unlikely(!vq->async_registered)) {
-   VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id 
%d.\n",
-   dev->vid, __func__, queue_id);
-   return 0;
-   }
-
-   rte_spinlock_lock(&vq->access_lock);
-
pkts_idx = vq->async_pkts_idx % vq->size;
pkts_info = vq->async_pkts_info;
vq_size = vq->size;
@@ -2252,7 +2234,7 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
vq_size, vq->async_pkts_inflight_n);
 
if (count > vq->async_last_pkts_n) {
-   n_cpl = vq->async_ops.check_completed_copies(vid,
+   n_cpl = vq->async_ops.check_completed_copies(dev->vid,
queue_id, 0, count - vq->async_last_pkts_n);
if (n_cpl >= 0) {
n_pkts_cpl = n_cpl;
@@ -2268,7 +2250,7 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
n_pkts_put = RTE_MIN(count, n_pkts_cpl);
if (unlikely(n_pkts_put == 0)) {
vq->async_last_pkts_n = n_pkts_cpl;
-   goto done;
+   return 0;
}
 
if (vq_is_packed(dev)) {
@@ -2310,10 +2292,73 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
}
}
 
-done:
+   return n_pkts_put;
+}
+
+uint16_t
+rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+   struct rte_mbuf **pkts, uint16_t count)
+{
+   struct virtio_net *dev = get_device(vid);
+   struct vhost_virtqueue *vq;
+   uint16_t n_pkts_cpl = 0;
+
+   if (!dev)
+   return 0;
+
+   VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
+   if (unlikely(!is_valid_virt_queue_idx(queue_id,

[dpdk-dev] [PATCH v7 3/5] vhost: handle memory hotplug for async vhost

2021-07-21 Thread Cheng Jiang
From: Jiayu Hu 

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

This patch is to notify the vhost application of stopping DMA
transfers.

Signed-off-by: Jiayu Hu 
Reviewed-by: Maxime Coquelin 
---
 lib/vhost/vhost_user.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 31300e194f..433f412fa8 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -1248,6 +1248,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct 
VhostUserMsg *msg,
int numa_node = SOCKET_ID_ANY;
uint64_t mmap_offset;
uint32_t i;
+   bool async_notify = false;
 
if (validate_msg_fds(msg, memory->nregions) != 0)
return RTE_VHOST_MSG_RESULT_ERR;
@@ -1275,6 +1276,16 @@ vhost_user_set_mem_table(struct virtio_net **pdev, 
struct VhostUserMsg *msg,
vdpa_dev->ops->dev_close(dev->vid);
dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED;
}
+
+   /* notify the vhost application to stop DMA transfers */
+   if (dev->async_copy && dev->notify_ops->vring_state_changed) {
+   for (i = 0; i < dev->nr_vring; i++) {
+   dev->notify_ops->vring_state_changed(dev->vid,
+   i, 0);
+   }
+   async_notify = true;
+   }
+
free_mem_region(dev);
rte_free(dev->mem);
dev->mem = NULL;
@@ -1371,6 +1382,11 @@ vhost_user_set_mem_table(struct virtio_net **pdev, 
struct VhostUserMsg *msg,
 
dump_guest_pages(dev);
 
+   if (async_notify) {
+   for (i = 0; i < dev->nr_vring; i++)
+   dev->notify_ops->vring_state_changed(dev->vid, i, 1);
+   }
+
return RTE_VHOST_MSG_RESULT_OK;
 
 free_mem_table:
-- 
2.29.2



[dpdk-dev] [PATCH v7 4/5] examples/vhost: handle memory hotplug for async vhost

2021-07-21 Thread Cheng Jiang
When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

To accomplish that, we need to do these changes in the vhost sample:
1. add inflight packets count.
2. add vring_state_changed() callback.
3. add inflight packets clear process in destroy_device() and
vring_state_changed().

Signed-off-by: Cheng Jiang 
Reviewed-by: Maxime Coquelin 
---
 examples/vhost/main.c | 55 +--
 examples/vhost/main.h |  1 +
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 9cd855a696..bc3d71c898 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -851,8 +851,11 @@ complete_async_pkts(struct vhost_dev *vdev)
 
complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
-   if (complete_count)
+   if (complete_count) {
free_pkts(p_cpl, complete_count);
+   __atomic_sub_fetch(&vdev->pkts_inflight, complete_count, 
__ATOMIC_SEQ_CST);
+   }
+
 }
 
 static __rte_always_inline void
@@ -895,6 +898,7 @@ drain_vhost(struct vhost_dev *vdev)
complete_async_pkts(vdev);
ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+   __atomic_add_fetch(&vdev->pkts_inflight, ret - cpu_cpl_nr, 
__ATOMIC_SEQ_CST);
 
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
@@ -1226,6 +1230,9 @@ drain_eth_rx(struct vhost_dev *vdev)
enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
VIRTIO_RXQ, pkts, rx_count,
m_cpu_cpl, &cpu_cpl_nr);
+   __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count - 
cpu_cpl_nr,
+   __ATOMIC_SEQ_CST);
+
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
 
@@ -1397,8 +1404,19 @@ destroy_device(int vid)
"(%d) device has been removed from data core\n",
vdev->vid);
 
-   if (async_vhost_driver)
+   if (async_vhost_driver) {
+   uint16_t n_pkt = 0;
+   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+
+   while (vdev->pkts_inflight) {
+   n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, 
VIRTIO_RXQ,
+   m_cpl, vdev->pkts_inflight);
+   free_pkts(m_cpl, n_pkt);
+   __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, 
__ATOMIC_SEQ_CST);
+   }
+
rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+   }
 
rte_free(vdev);
 }
@@ -1487,6 +1505,38 @@ new_device(int vid)
return 0;
 }
 
+static int
+vring_state_changed(int vid, uint16_t queue_id, int enable)
+{
+   struct vhost_dev *vdev = NULL;
+
+   TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
+   if (vdev->vid == vid)
+   break;
+   }
+   if (!vdev)
+   return -1;
+
+   if (queue_id != VIRTIO_RXQ)
+   return 0;
+
+   if (async_vhost_driver) {
+   if (!enable) {
+   uint16_t n_pkt = 0;
+   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+
+   while (vdev->pkts_inflight) {
+   n_pkt = 
rte_vhost_clear_queue_thread_unsafe(vid, queue_id,
+   m_cpl, 
vdev->pkts_inflight);
+   free_pkts(m_cpl, n_pkt);
+   __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, 
__ATOMIC_SEQ_CST);
+   }
+   }
+   }
+
+   return 0;
+}
+
 /*
  * These callback allow devices to be added to the data core when configuration
  * has been fully complete.
@@ -1495,6 +1545,7 @@ static const struct vhost_device_ops 
virtio_net_device_ops =
 {
.new_device =  new_device,
.destroy_device = destroy_device,
+   .vring_state_changed = vring_state_changed,
 };
 
 /*
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 0ccdce4b4a..e7b1ac60a6 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -51,6 +51,7 @@ struct vhost_dev {
uint64_t features;
size_t hdr_len;
uint16_t nr_vrings;
+   uint16_t pkts_inflight;
struct rte_vhost_memory *mem;
struct device_statistics stats;
TAILQ_ENTRY(vhost_dev) global_vdev_entry;
-- 
2.29.2



[dpdk-dev] [PATCH v7 5/5] doc: update doc for queue clear API in vhost lib

2021-07-21 Thread Cheng Jiang
Update the program guide and release notes for virtqueue inflight
packets clear API in vhost lib.

Signed-off-by: Cheng Jiang 
Reviewed-by: Maxime Coquelin 
---
 doc/guides/prog_guide/vhost_lib.rst| 5 +
 doc/guides/rel_notes/release_21_08.rst | 5 +
 2 files changed, 10 insertions(+)

diff --git a/doc/guides/prog_guide/vhost_lib.rst 
b/doc/guides/prog_guide/vhost_lib.rst
index 70ce4974df..8874033165 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -305,6 +305,11 @@ The following is an overview of some key Vhost API 
functions:
   This function returns the amount of in-flight packets for the vhost
   queue using async acceleration.
 
+* ``rte_vhost_clear_queue_thread_unsafe(vid, queue_id, **pkts, count)``
+
+  Clear inflight packets which are submitted to DMA engine in vhost async data
+  path. Completed packets are returned to applications through ``pkts``.
+
 Vhost-user Implementations
 --
 
diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index 543e93ff1d..d9c4cc5df0 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -155,6 +155,11 @@ New Features
   The experimental PMD power management API now supports managing
   multiple Ethernet Rx queues per lcore.
 
+* **Added inflight packets clear API in vhost library.**
+
+  Added an API which can clear the inflight packets submitted to DMA
+  engine in vhost async data path.
+
 
 Removed Items
 -
-- 
2.29.2



Re: [dpdk-dev] [PATCH v7 0/5] vhost: handle memory hotplug for async vhost

2021-07-21 Thread Xia, Chenbo
> -Original Message-
> From: Jiang, Cheng1 
> Sent: Thursday, July 22, 2021 12:09 PM
> To: maxime.coque...@redhat.com; Xia, Chenbo 
> Cc: dev@dpdk.org; Hu, Jiayu ; Yang, YvonneX
> ; Jiang, Cheng1 
> Subject: [PATCH v7 0/5] vhost: handle memory hotplug for async vhost
> 
> When the guest memory is hotplugged, the vhost application which
> enables DMA acceleration must stop DMA transfers before the vhost
> re-maps the guest memory.
> 
> This patch set is to provide an unsafe API to drain inflight pkts
> which are submitted to DMA engine in vhost async data path, and
> notify the vhost application of stopping DMA transfers. And enable it
> in vhost example.
> 
> v7:
>  * rebased on the latest codes
>  * improved commit log
> v6:
>  * removed unnecessary args for the new API
>  * improved variable names and function names
>  * added enable notification in set_mem_table
>  * fixed vhost example queue clear process
> v5:
>  * added fixes in 'vhost: fix async vhost ops return type'
>  * improved git log, variable names and logs
> v4:
>  * rebased on the latest codes
> v3:
>  * added a patch to fix async ops return type
>  * fixed async ops fail handler
>  * updated the doc
> v2:
>  * changed the patch structure
> 
> Cheng Jiang (4):
>   vhost: fix async vhost ops return type
>   vhost: add unsafe API to clear packets in async vhost
>   examples/vhost: handle memory hotplug for async vhost
>   doc: update doc for queue clear API in vhost lib
> 
> Jiayu Hu (1):
>   vhost: handle memory hotplug for async vhost
> 
>  doc/guides/prog_guide/vhost_lib.rst|   5 +
>  doc/guides/rel_notes/release_21_08.rst |   5 +
>  examples/vhost/ioat.c  |   4 +-
>  examples/vhost/ioat.h  |   4 +-
>  examples/vhost/main.c  |  55 -
>  examples/vhost/main.h  |   1 +
>  lib/vhost/rte_vhost_async.h|  30 -
>  lib/vhost/version.map  |   1 +
>  lib/vhost/vhost_user.c |  16 +++
>  lib/vhost/virtio_net.c | 152 -
>  10 files changed, 232 insertions(+), 41 deletions(-)
> 
> --
> 2.29.2

Series applied to next-virtio/main. Thanks


Re: [dpdk-dev] [PATCH 1/4] ethdev: fix max Rx packet length

2021-07-21 Thread Ajit Khaparde
> > [snip]
> >
> >> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> >> index faf3bd901d75..9f288f98329c 100644
> >> --- a/lib/ethdev/rte_ethdev.h
> >> +++ b/lib/ethdev/rte_ethdev.h
> >> @@ -410,7 +410,7 @@ enum rte_eth_tx_mq_mode {
> >>  struct rte_eth_rxmode {
> >>  /** The multi-queue packet distribution mode to be used, e.g. RSS.
> */
> >>  enum rte_eth_rx_mq_mode mq_mode;
> >> -uint32_t max_rx_pkt_len;  /**< Only used if JUMBO_FRAME enabled. */
> >> +uint32_t mtu;  /**< Requested MTU. */
> >
> > Maximum Transmit Unit looks a bit confusing in Rx mode
> > structure.
> >
>
> True, but I think it is already used for Rx already as concept, I believe
> the
> intention will be clear enough. Do you think will be more clear if we pick
> a
> DPDK specific variable name?
>
Maybe use MRU - Max Receive Unit.


>
> >>  /** Maximum allowed size of LRO aggregated packet. */
> >>  uint32_t max_lro_pkt_size;
> >>  uint16_t split_hdr_size;  /**< hdr buf size (header_split
> enabled).*/
> >
> > [snip]
> >
>
>