Re: [RFC PATCH v2 3/9] vhost: annotate virtqueue access lock

2022-04-07 Thread David Marchand
On Thu, Apr 7, 2022 at 3:40 AM Hu, Jiayu  wrote:
> > diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index
> > a9edc271aa..158460b7d7 100644
> > --- a/lib/vhost/vhost.h
> > +++ b/lib/vhost/vhost.h
> > @@ -834,6 +834,7 @@ vhost_need_event(uint16_t event_idx, uint16_t
> > new_idx, uint16_t old)
> >
> >  static __rte_always_inline void
> >  vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
> > + RTE_EXC_LOCK_REQUIRES(vq->access_lock)
>
> vhost_vring_call_split() is called in rte_vhost_vring_call() too, but it 
> doesn't
> acquire vq->access_lock before calling vhost_vring_call_split().

I have some issues with sending patches from other people (Mimecast
seems to think I try to impersonate them and strip the content of the
mail?).

You'll notice the series in patchwork starts at patch 2.
https://patchwork.dpdk.org/project/dpdk/list/?series=22292&state=*

My intent was to have Maxime fix (already in next-virtio:
https://git.dpdk.org/next/dpdk-next-virtio/commit/?id=53d8fffcf8e3c89c9785f8ce50db892f2cdfd7c7)
in this series.


[snip]

> > @@ -1955,11 +1957,11 @@ write_back_completed_descs_packed(struct
> > vhost_virtqueue *vq,  }
> >
> >  static __rte_always_inline uint16_t
> > -vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
> > +vhost_poll_enqueue_completed(struct virtio_net *dev, struct
> > +vhost_virtqueue *vq,
> >   struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
> >   uint16_t vchan_id)
> > + RTE_EXC_LOCK_REQUIRES(vq->access_lock)
>
> rte_vhost_clear_queue_thread_unsafe() doesn't acquire vq->access_lock.
> Will it cause a compiler warning?

Mm, probably a rebase/split error on my side when doing rfc v2.
On the other hand I don't think we can enable the check at this point
of the series in any case (there would be other warnings, at least for
rwlocks).
I'll double check before sending next revision, thanks for pointing out.


-- 
David Marchand



RE: [PATCH v2] net/ice: refact parser API usage

2022-04-07 Thread Zhang, Qi Z



> -Original Message-
> From: Xu, Ting 
> Sent: Thursday, April 7, 2022 2:45 PM
> To: Zhang, Qi Z ; Guo, Junfeng
> 
> Cc: dev@dpdk.org; Yang, Qiming ;
> sta...@dpdk.org
> Subject: RE: [PATCH v2] net/ice: refact parser API usage
> 
> > -Original Message-
> > From: Zhang, Qi Z 
> > Sent: Thursday, April 7, 2022 8:40 PM
> > To: Guo, Junfeng ; Xu, Ting 
> > Cc: dev@dpdk.org; Yang, Qiming ; Zhang, Qi Z
> > ; sta...@dpdk.org
> > Subject: [PATCH v2] net/ice: refact parser API usage
> >
> > Not necessary to create / destroy a parser instance for every raw packet
> rule.
> > A global parser instance will be created in ice_flow_init and be
> > destroyed in ice_flow_uninit.
> >
> > Also, ice_dev_udp_tunnel_port_add has been hooked to perform
> > corresponding parser configure. This also fix the issue that RSS
> > engine can't support VXLAN inner through raw packet filter.
> >
> > Fixes: 1b9c68120a1c ("net/ice: enable protocol agnostic flow
> > offloading in
> > RSS")
> > Cc: sta...@dpdk.org
> >
> > Signed-off-by: Qi Zhang 
> > ---
> >  drivers/net/ice/ice_ethdev.c   | 10 ++
> >  drivers/net/ice/ice_ethdev.h   |  1 +
> >  drivers/net/ice/ice_fdir_filter.c  | 15 +--
> > drivers/net/ice/ice_generic_flow.c |  8 
> >  4 files changed, 24 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/net/ice/ice_ethdev.c
> > b/drivers/net/ice/ice_ethdev.c index 73e550f5fb..8bb34b874b 100644
> > --- a/drivers/net/ice/ice_ethdev.c
> > +++ b/drivers/net/ice/ice_ethdev.c
> > @@ -5621,6 +5621,8 @@ ice_dev_udp_tunnel_port_add(struct
> rte_eth_dev
> > *dev,  {
> > int ret = 0;
> > struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data-
> > >dev_private);
> > +   struct ice_adapter *ad =
> > +   ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> >
> > if (udp_tunnel == NULL)
> > return -EINVAL;
> > @@ -5628,6 +5630,9 @@ ice_dev_udp_tunnel_port_add(struct
> rte_eth_dev
> > *dev,
> > switch (udp_tunnel->prot_type) {
> > case RTE_ETH_TUNNEL_TYPE_VXLAN:
> > ret = ice_create_tunnel(hw, TNL_VXLAN, udp_tunnel-
> > >udp_port);
> > +   if (!ret && ad->psr != NULL)
> > +   ice_parser_vxlan_tunnel_set(ad->psr,
> > +   udp_tunnel->udp_port, true);
> > break;
> > default:
> > PMD_DRV_LOG(ERR, "Invalid tunnel type"); @@ -5645,6
> > +5650,8 @@ ice_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,  {
> > int ret = 0;
> > struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data-
> > >dev_private);
> > +   struct ice_adapter *ad =
> > +   ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> >
> > if (udp_tunnel == NULL)
> > return -EINVAL;
> > @@ -5652,6 +5659,9 @@ ice_dev_udp_tunnel_port_del(struct
> rte_eth_dev
> > *dev,
> > switch (udp_tunnel->prot_type) {
> > case RTE_ETH_TUNNEL_TYPE_VXLAN:
> > ret = ice_destroy_tunnel(hw, udp_tunnel->udp_port, 0);
> > +   if (!ret && ad->psr != NULL)
> > +   ice_parser_vxlan_tunnel_set(ad->psr,
> > +   udp_tunnel->udp_port, false);
> > break;
> > default:
> > PMD_DRV_LOG(ERR, "Invalid tunnel type"); diff --git
> > a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h index
> > 3ab310628f..3d8427225f 100644
> > --- a/drivers/net/ice/ice_ethdev.h
> > +++ b/drivers/net/ice/ice_ethdev.h
> > @@ -561,6 +561,7 @@ struct ice_adapter {
> > struct ice_rss_prof_info rss_prof_info[ICE_MAX_PTGS];
> > /* True if DCF state of the associated PF is on */
> > bool dcf_state_on;
> > +   struct ice_parser *psr;
> >  #ifdef RTE_ARCH_X86
> > bool rx_use_avx2;
> > bool rx_use_avx512;
> > diff --git a/drivers/net/ice/ice_fdir_filter.c
> > b/drivers/net/ice/ice_fdir_filter.c
> > index 7954c6d8ea..894e593dc7 100644
> > --- a/drivers/net/ice/ice_fdir_filter.c
> > +++ b/drivers/net/ice/ice_fdir_filter.c
> > @@ -1826,7 +1826,6 @@ ice_fdir_parse_pattern(__rte_unused struct
> > ice_adapter *ad,
> > struct ice_fdir_v4 *p_v4 = NULL;
> > struct ice_fdir_v6 *p_v6 = NULL;
> > struct ice_parser_result rslt;
> > -   struct ice_parser *psr;
> > uint8_t item_num = 0;
> >
> > for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END;
> > item++) { @@ -1861,6 +1860,10 @@ ice_fdir_parse_pattern(__rte_unused
> > struct ice_adapter *ad,
> >
> > switch (item_type) {
> > case RTE_FLOW_ITEM_TYPE_RAW: {
> > +
> > +   if (ad->psr == NULL)
> > +   return -rte_errno;
> > +
> > raw_spec = item->spec;
> > raw_mask = item->mask;
> >
> > @@ -1872,7 +1875,6 @@ ice_fdir_parse_pattern(__rte_unused struct
> > ice_adapter *ad,
> > (uint8_t *)(uintptr_t)raw_spec->pattern;
> > unsigned char *tmp_mask =
> > (uint8_t *)(uintptr_t)raw_mask

[PATCH v3 1/2] cryptodev: move dh type from xform to dh op

2022-04-07 Thread Arek Kusztal
Operation type (PUBLIC_KEY_GENERATION, SHARED_SECRET) should
be free to choose for any operation. One xform/session should
be enough to perform both DH operations, if this is xform
member, session needs to be created twice for the same group,
similar problem would be observed in sessionless case.
Additionally it will help extending DH to support Elliptic Curves.

Signed-off-by: Arek Kusztal 
---
v3:
- changed op_type comments in dh

 lib/cryptodev/rte_crypto_asym.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/lib/cryptodev/rte_crypto_asym.h b/lib/cryptodev/rte_crypto_asym.h
index cd24d4b07b..40c1d90604 100644
--- a/lib/cryptodev/rte_crypto_asym.h
+++ b/lib/cryptodev/rte_crypto_asym.h
@@ -256,8 +256,6 @@ struct rte_crypto_modinv_xform {
  *
  */
 struct rte_crypto_dh_xform {
-   enum rte_crypto_asym_op_type type;
-   /**< Setup xform for key generate or shared secret compute */
rte_crypto_uint p;
/**< Prime modulus data */
rte_crypto_uint g;
@@ -391,27 +389,29 @@ struct rte_crypto_rsa_op_param {
  * @note:
  */
 struct rte_crypto_dh_op_param {
+   enum rte_crypto_asym_op_type op_type;
+   /**< Diffie-Hellman operation phase */
rte_crypto_uint pub_key;
/**<
-* Output generated public key when xform type is
+* Output generated public key when op_type is
 * DH PUB_KEY_GENERATION.
-* Input peer public key when xform type is DH
+* Input peer public key when op_type is DH
 * SHARED_SECRET_COMPUTATION
 *
 */
 
rte_crypto_uint priv_key;
/**<
-* Output generated private key if xform type is
+* Output generated private key if op_type is
 * DH PRIVATE_KEY_GENERATION
-* Input when xform type is DH SHARED_SECRET_COMPUTATION.
+* Input when op_type is DH SHARED_SECRET_COMPUTATION.
 *
 */
 
rte_crypto_uint shared_secret;
/**<
 * Output with calculated shared secret
-* when dh xform set up with op type = SHARED_SECRET_COMPUTATION.
+* when dh op_type = SHARED_SECRET_COMPUTATION.
 *
 */
 };
-- 
2.13.6



[PATCH v3 2/2] test/crypto: move dh type from xform to dh op

2022-04-07 Thread Arek Kusztal
This commit reflects API changes in Diffie-Hellman,
now for setting crypto operation type asym_op no xform
is responsible.

Signed-off-by: Arek Kusztal 
---
v3:
- changed op_type comments in dh

 app/test/test_cryptodev_asym.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/app/test/test_cryptodev_asym.c b/app/test/test_cryptodev_asym.c
index 573af2a537..a5e385f4bd 100644
--- a/app/test/test_cryptodev_asym.c
+++ b/app/test/test_cryptodev_asym.c
@@ -1064,8 +1064,8 @@ test_dh_gen_shared_sec(struct rte_crypto_asym_xform *xfrm)
asym_op = op->asym;
 
/* Setup a xform and op to generate private key only */
-   xform.dh.type = RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE;
xform.next = NULL;
+   asym_op->dh.op_type = RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE;
asym_op->dh.priv_key.data = dh_test_params.priv_key.data;
asym_op->dh.priv_key.length = dh_test_params.priv_key.length;
asym_op->dh.pub_key.data = (uint8_t *)peer;
@@ -1146,7 +1146,7 @@ test_dh_gen_priv_key(struct rte_crypto_asym_xform *xfrm)
asym_op = op->asym;
 
/* Setup a xform and op to generate private key only */
-   xform.dh.type = RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE;
+   asym_op->dh.op_type = RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE;
xform.next = NULL;
asym_op->dh.priv_key.data = output;
asym_op->dh.priv_key.length = sizeof(output);
@@ -1229,7 +1229,7 @@ test_dh_gen_pub_key(struct rte_crypto_asym_xform *xfrm)
 * using test private key
 *
 */
-   xform.dh.type = RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE;
+   asym_op->dh.op_type = RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE;
xform.next = NULL;
 
asym_op->dh.pub_key.data = output;
@@ -1319,9 +1319,10 @@ test_dh_gen_kp(struct rte_crypto_asym_xform *xfrm)
/* Setup a xform chain to generate
 * private key first followed by
 * public key
-*/xform.dh.type = RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE;
+*/
+   asym_op->dh.op_type = RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE;
pub_key_xform.xform_type = RTE_CRYPTO_ASYM_XFORM_DH;
-   pub_key_xform.dh.type = RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE;
+   asym_op->dh.op_type = RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE;
xform.next = &pub_key_xform;
 
asym_op->dh.pub_key.data = out_pub_key;
-- 
2.13.6



Re: [RFC PATCH v1 00/15] merge DTS core files to DPDK

2022-04-07 Thread Thomas Monjalon
07/04/2022 07:04, Jerin Jacob:
> On Wed, Apr 6, 2022 at 8:26 PM Juraj Linkeš  
> wrote:
> >
> > These are the basic libraries that other libraries depend on. There's
> > also the basic framework functionality related to test execution.
> >
> > Juraj Linkeš (15):
> >   dts: merge DTS dep/tclclient.tgz to DPDK
> >   dts: merge DTS dep/tgen.tgz to DPDK
> >   dts: merge DTS dts to DPDK
> >   dts: merge DTS framework/__init__.py to DPDK
> >   dts: merge DTS framework/asan_test.py to DPDK
> >   dts: merge DTS framework/checkCase.py to DPDK
> >   dts: merge DTS framework/dts.py to DPDK
> >   dts: merge DTS framework/exception.py to DPDK
> >   dts: merge DTS framework/logger.py to DPDK
> >   dts: merge DTS framework/packet.py to DPDK
> >   dts: merge DTS framework/project_dpdk.py to DPDK
> >   dts: merge DTS framework/serializer.py to DPDK
> >   dts: merge DTS framework/utils.py to DPDK
> >   dts: merge DTS main.py to DPDK
> >   dts: merge DTS version.py to DPDK
> 
> merge->import
> 
> >
> >  dts/dep/tclclient.tgz |  Bin 0 -> 199327 bytes
> >  dts/dep/tgen.tgz  |  Bin 0 -> 134392 bytes
> 
> Some top level comments:
> - I think, we should not check in binary files.

+1

> - git commit comment should much more than "dts: merge DTS  to DPDK" where
> the commit log should have details on check in.

+1

> -Add the documentation from the first patch and update the
> documentation per patch
> based on the content.

+1

More comments:

- Please don't send so many patches, it looks like spam.
- Please let's start small with the very minimal code
to run a dummy test.
- Split by file does not make sense

The process is going to be very long.
The techboard said in the past that we must have a very careful review
of an import piece by piece. So please be patient.
Thank you




Re: [PATCH] kni: fix device address set

2022-04-07 Thread Thomas Monjalon
07/04/2022 02:44, Min Hu (Connor):
> Hi, Stephen,
>   I think this is a good option, but the macro definition is like:
> +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
> +#define ether_addr_copy(dst, src) memcpy(dst, src, ETH_ALEN)
> +#endif
> 
> @Ferry, why is it limited for "LINUX_VERSION_CODE < KERNEL_VERSION(3, 
> 14, 0)" ?

I guess that's because it is defined in "new kernels" so we need
a definition in DPDK for old kernels.





[PATCH] cryptodev: add elliptic curve diffie hellman

2022-04-07 Thread Arek Kusztal
This commit adds Elliptic Curve Diffie-Hellman option to Cryptodev.
This could be achieved with EC point multiplication but:
1) Phase 1 of DH is used with EC generator, multiplication expect
setting generator manually.
2) It will unify usage of DH.
3) Can be extended easily to support X25519 and X448.

Signed-off-by: Arek Kusztal 
---
Depends-on: series-22398 ("cryptodev: move dh type from xform to dh op")

 lib/cryptodev/rte_crypto_asym.h | 41 +
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/lib/cryptodev/rte_crypto_asym.h b/lib/cryptodev/rte_crypto_asym.h
index 40c1d90604..e65222b802 100644
--- a/lib/cryptodev/rte_crypto_asym.h
+++ b/lib/cryptodev/rte_crypto_asym.h
@@ -91,6 +91,8 @@ enum rte_crypto_asym_xform_type {
/**< Elliptic Curve Digital Signature Algorithm
 * Perform Signature Generation and Verification.
 */
+   RTE_CRYPTO_ASYM_XFORM_ECDH,
+   /**< Elliptic Curve Diffie Hellman */
RTE_CRYPTO_ASYM_XFORM_ECPM,
/**< Elliptic Curve Point Multiplication */
RTE_CRYPTO_ASYM_XFORM_TYPE_LIST_END
@@ -385,34 +387,41 @@ struct rte_crypto_rsa_op_param {
 };
 
 /**
- * Diffie-Hellman Operations params.
+ * Diffie-Hellman/Elliptic Curve Diffie-Hellman operation.
  * @note:
  */
 struct rte_crypto_dh_op_param {
enum rte_crypto_asym_op_type op_type;
/**< Diffie-Hellman operation phase */
-   rte_crypto_uint pub_key;
+
+   rte_crypto_param priv_key;
/**<
-* Output generated public key when op_type is
-* DH PUB_KEY_GENERATION.
-* Input peer public key when op_type is DH
-* SHARED_SECRET_COMPUTATION
-*
+* Diffie-Hallman private part
+* For DH and ECDH it is big-endian integer.
+* Input for both phases of Diffie-Hellman
 */
 
-   rte_crypto_uint priv_key;
+   union {
+   rte_crypto_uint pub_key;
+   struct rte_crypto_ec_point pub_point;
+   };
/**<
-* Output generated private key if op_type is
-* DH PRIVATE_KEY_GENERATION
-* Input when op_type is DH SHARED_SECRET_COMPUTATION.
-*
+* Diffie-Hallman public part
+* For DH it is big-endian unsigned integer.
+* For ECDH it is a point on the curve.
+* Output for RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE
+* Input for RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE
 */
 
-   rte_crypto_uint shared_secret;
+   union {
+   rte_crypto_uint shared_secret;
+   struct rte_crypto_ec_point shared_point;
+   };
/**<
-* Output with calculated shared secret
-* when dh op_type = SHARED_SECRET_COMPUTATION.
-*
+* Diffie-Hallman shared secret
+* For DH it is big-endian unsigned integer.
+* For ECDH it is a point on the curve.
+* Output for RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE
 */
 };
 
-- 
2.13.6



[PATCH v3] net/ice: refact parser API usage

2022-04-07 Thread Qi Zhang
Not necessary to create / destroy a parser instance for every raw packet
rule. A global parser instance will be created in ice_flow_init and be
destroyed in ice_flow_uninit.

Also, ice_dev_udp_tunnel_port_add has been hooked to perform corresponding
parser configure. This also fix the issue that RSS engine can't support
VXLAN inner through raw packet filter.

Fixes: 1b9c68120a1c ("net/ice: enable protocol agnostic flow offloading in RSS")
Cc: sta...@dpdk.org

Signed-off-by: Qi Zhang 
---

v3:
- add missing refact in ice_hash.c

v2:
- typo fix.

 drivers/net/ice/ice_ethdev.c   | 10 ++
 drivers/net/ice/ice_ethdev.h   |  1 +
 drivers/net/ice/ice_fdir_filter.c  | 14 --
 drivers/net/ice/ice_generic_flow.c |  8 
 drivers/net/ice/ice_hash.c |  9 -
 5 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index 73e550f5fb..8bb34b874b 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -5621,6 +5621,8 @@ ice_dev_udp_tunnel_port_add(struct rte_eth_dev *dev,
 {
int ret = 0;
struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+   struct ice_adapter *ad =
+   ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 
if (udp_tunnel == NULL)
return -EINVAL;
@@ -5628,6 +5630,9 @@ ice_dev_udp_tunnel_port_add(struct rte_eth_dev *dev,
switch (udp_tunnel->prot_type) {
case RTE_ETH_TUNNEL_TYPE_VXLAN:
ret = ice_create_tunnel(hw, TNL_VXLAN, udp_tunnel->udp_port);
+   if (!ret && ad->psr != NULL)
+   ice_parser_vxlan_tunnel_set(ad->psr,
+   udp_tunnel->udp_port, true);
break;
default:
PMD_DRV_LOG(ERR, "Invalid tunnel type");
@@ -5645,6 +5650,8 @@ ice_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
 {
int ret = 0;
struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+   struct ice_adapter *ad =
+   ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 
if (udp_tunnel == NULL)
return -EINVAL;
@@ -5652,6 +5659,9 @@ ice_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
switch (udp_tunnel->prot_type) {
case RTE_ETH_TUNNEL_TYPE_VXLAN:
ret = ice_destroy_tunnel(hw, udp_tunnel->udp_port, 0);
+   if (!ret && ad->psr != NULL)
+   ice_parser_vxlan_tunnel_set(ad->psr,
+   udp_tunnel->udp_port, false);
break;
default:
PMD_DRV_LOG(ERR, "Invalid tunnel type");
diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h
index 3ab310628f..3d8427225f 100644
--- a/drivers/net/ice/ice_ethdev.h
+++ b/drivers/net/ice/ice_ethdev.h
@@ -561,6 +561,7 @@ struct ice_adapter {
struct ice_rss_prof_info rss_prof_info[ICE_MAX_PTGS];
/* True if DCF state of the associated PF is on */
bool dcf_state_on;
+   struct ice_parser *psr;
 #ifdef RTE_ARCH_X86
bool rx_use_avx2;
bool rx_use_avx512;
diff --git a/drivers/net/ice/ice_fdir_filter.c 
b/drivers/net/ice/ice_fdir_filter.c
index 7954c6d8ea..0982478feb 100644
--- a/drivers/net/ice/ice_fdir_filter.c
+++ b/drivers/net/ice/ice_fdir_filter.c
@@ -1826,7 +1826,6 @@ ice_fdir_parse_pattern(__rte_unused struct ice_adapter 
*ad,
struct ice_fdir_v4 *p_v4 = NULL;
struct ice_fdir_v6 *p_v6 = NULL;
struct ice_parser_result rslt;
-   struct ice_parser *psr;
uint8_t item_num = 0;
 
for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
@@ -1861,6 +1860,9 @@ ice_fdir_parse_pattern(__rte_unused struct ice_adapter 
*ad,
 
switch (item_type) {
case RTE_FLOW_ITEM_TYPE_RAW: {
+   if (ad->psr == NULL)
+   return -rte_errno;
+
raw_spec = item->spec;
raw_mask = item->mask;
 
@@ -1872,7 +1874,6 @@ ice_fdir_parse_pattern(__rte_unused struct ice_adapter 
*ad,
(uint8_t *)(uintptr_t)raw_spec->pattern;
unsigned char *tmp_mask =
(uint8_t *)(uintptr_t)raw_mask->pattern;
-   uint16_t udp_port = 0;
uint16_t tmp_val = 0;
uint8_t pkt_len = 0;
uint8_t tmp = 0;
@@ -1921,15 +1922,8 @@ ice_fdir_parse_pattern(__rte_unused struct ice_adapter 
*ad,
 
pkt_len /= 2;
 
-   if (ice_parser_create(&ad->hw, &psr))
-   return -rte_errno;
-   if (ice_get_open_tunnel_port(&ad->hw, TNL_VXLAN,
-&udp_port))
-   ice_parser_vxlan_tunnel_se

[PATCH] test/crypto: add key exchange dh tests

2022-04-07 Thread Arek Kusztal
This patch adds Diffie-Hellman key exchange tests.
Alice's and Bob's private keys are generated in tests,
public keys are then generated followed by shared secret.
Alice's and Bob's shared secret are then compared to obtain
result of the test.

This test should be easy to extend to use ECDH.

Signed-off-by: Arek Kusztal 
---
 app/test/test_cryptodev_asym.c| 266 ++
 app/test/test_cryptodev_dh_test_vectors.h | 305 ++
 2 files changed, 571 insertions(+)

diff --git a/app/test/test_cryptodev_asym.c b/app/test/test_cryptodev_asym.c
index 573af2a537..1665709469 100644
--- a/app/test/test_cryptodev_asym.c
+++ b/app/test/test_cryptodev_asym.c
@@ -5,6 +5,9 @@
 
 #ifndef RTE_EXEC_ENV_WINDOWS
 
+#include 
+#include 
+
 #include 
 #include 
 #include 
@@ -35,6 +38,29 @@
 #define ASYM_TEST_MSG_LEN 256
 #define TEST_VECTOR_SIZE 256
 
+#define TEST_CRYPTO_ASYM_NULL_RETURN(p, str) \
+   do {\
+   if (p == NULL) {\
+   RTE_LOG(ERR, USER1, "line %u FAILED: %s", \
+   __LINE__, str); \
+   status = (ret == -ENOTSUP) ? \
+   TEST_SKIPPED : TEST_FAILED; \
+   goto error_exit;\
+   }   \
+   } while (0)
+
+#define TEST_CRYPTO_ASYM_NEG_RETURN(p, str) \
+   do {\
+   if (p) {\
+   RTE_LOG(ERR, USER1, \
+   "line %u FAILED: %s",   \
+   __LINE__, str); \
+   status = (ret == -ENOTSUP) ? \
+   TEST_SKIPPED : TEST_FAILED; \
+   goto error_exit; \
+   } \
+   } while (0)
+
 static int gbl_driver_id;
 struct crypto_testsuite_params_asym {
struct rte_mempool *op_mpool;
@@ -66,6 +92,39 @@ static uint32_t test_index;
 
 static struct crypto_testsuite_params_asym testsuite_params = { NULL };
 
+static void
+test_crypto_rand(int len, uint8_t *buffer)
+{
+   int i;
+
+   for (i = 0; i < len; ++i)
+   buffer[i] = (uint8_t)(rand() % ((uint8_t)-1)) | 1;
+}
+
+static int
+process_crypto_request(uint8_t dev_id, struct rte_crypto_op **op,
+   struct rte_crypto_op **result_op)
+{
+   /* Process crypto operation */
+   if (rte_cryptodev_enqueue_burst(dev_id, 0, op, 1) != 1) {
+   RTE_LOG(ERR, USER1,
+   "line %u FAILED: %s",
+   __LINE__, "Error sending packet for operation");
+   return -1;
+   }
+
+   while (rte_cryptodev_dequeue_burst(dev_id, 0, result_op, 1) == 0)
+   rte_pause();
+
+   if (*result_op == NULL) {
+   RTE_LOG(ERR, USER1,
+   "line %u FAILED: %s",
+   __LINE__, "Failed to process asym crypto op");
+   return -1;
+   }
+   return 0;
+}
+
 static int
 queue_ops_rsa_sign_verify(void *sess)
 {
@@ -809,6 +868,7 @@ testsuite_setup(void)
 
memset(ts_params, 0, sizeof(*ts_params));
 
+   srand(time(NULL));
test_vector.size = 0;
load_test_vectors();
 
@@ -2136,6 +2196,196 @@ test_ecpm_all_curve(void)
return overall_status;
 }
 
+static int
+test_dh_set_session(uint8_t dev_id, void **sess,
+   struct rte_crypto_op *op, struct rte_crypto_asym_xform *xform,
+   const struct test_dh_group *group,
+   enum rte_crypto_asym_op_type type)
+{
+   int ret = 0;
+
+   xform->xform_type = RTE_CRYPTO_ASYM_XFORM_DH;
+   xform->dh.g.data = group->g.data;
+   xform->dh.g.length = group->g.bytesize;
+   xform->dh.p.data = group->p.data;
+   xform->dh.p.length = group->p.bytesize;
+   xform->dh.type = type;
+   ret = rte_cryptodev_asym_session_create(dev_id, xform,
+   testsuite_params.session_mpool, sess);
+   if (ret)
+   return -1;
+   rte_crypto_op_attach_asym_session(op, *sess);
+
+   return 0;
+}
+
+static int
+test_dh_pub_compute(const char *str, uint8_t dev_id, struct rte_crypto_op **op,
+   int priv_size, uint8_t *private,
+   int result_size, uint8_t *result)
+{
+   struct rte_crypto_op *result_op;
+   struct rte_crypto_asym_op *asym_op = (*op)->asym;
+
+   asym_op->dh.priv_key.data = private;
+   asym_op->dh.priv_key.length = priv_size;
+   asym_op->dh.pub_key.data = result;
+   asym_op->dh.pub_key.length = result_size;
+
+   if (process_crypto_request(dev_id, op, &result_op))
+   return -1;
+
+   result_size = asym_op->dh.pub_key.length;
+   debug_hexdump(stdout, str,
+   asym_op->dh.pub_key.data,
+   result_size);
+   return result_size;
+}
+
+static int
+test_dh_shared_compute(const char *str,
+   uint8_t dev_i

[PATCH] examples/kni: add interrupt mode to receive packets

2022-04-07 Thread Tianli Lai
kni application have two main-loop threads that they
CPU utilization are up to 100 percent, this two theads are
writing thread and reading thread. I thank set interrupt mode
at reading thread would reduce this thread CPU utilization.

Signed-off-by: Tianli Lai 
---
 examples/kni/main.c | 107 +++-
 1 file changed, 105 insertions(+), 2 deletions(-)

diff --git a/examples/kni/main.c b/examples/kni/main.c
index e99ef5c38a..4e2d2df348 100644
--- a/examples/kni/main.c
+++ b/examples/kni/main.c
@@ -98,6 +98,8 @@ static struct rte_eth_conf port_conf = {
},
 };
 
+/* ethernet addresses of ports */
+static rte_spinlock_t locks[RTE_MAX_ETHPORTS];
 /* Mempool for mbufs */
 static struct rte_mempool * pktmbuf_pool = NULL;
 
@@ -107,6 +109,8 @@ static uint32_t ports_mask = 0;
 static int promiscuous_on = 0;
 /* Monitor link status continually. off by default. */
 static int monitor_links;
+/* rx set in interrupt mode off by default. */
+static int intr_rx_en;
 
 /* Structure type for recording kni interface specific stats */
 struct kni_interface_stats {
@@ -277,6 +281,87 @@ kni_egress(struct kni_port_params *p)
}
 }
 
+/**
+ * force polling thread sleep until one-shot rx interrupt triggers
+ * @param port_id
+ *  Port id.
+ * @param queue_id
+ *  Rx queue id.
+ * @return
+ *  0 on success
+ */
+static int
+sleep_until_rx_interrupt(int num, int lcore)
+{
+   /*
+* we want to track when we are woken up by traffic so that we can go
+* back to sleep again without log spamming. Avoid cache line sharing
+* to prevent threads stepping on each others' toes.
+*/
+   static struct {
+   bool wakeup;
+   } __rte_cache_aligned status[RTE_MAX_LCORE];
+   struct rte_epoll_event event[num];
+   int n, i;
+   uint16_t port_id;
+   uint8_t queue_id;
+   void *data;
+
+   if (status[lcore].wakeup) {
+   RTE_LOG(INFO, APP,
+   "lcore %u sleeps until interrupt triggers\n",
+   rte_lcore_id());
+   }
+
+   n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, 10);
+   for (i = 0; i < n; i++) {
+   data = event[i].epdata.data;
+   port_id = ((uintptr_t)data) >> CHAR_BIT;
+   queue_id = ((uintptr_t)data) &
+   RTE_LEN2MASK(CHAR_BIT, uint8_t);
+   RTE_LOG(INFO, APP,
+   "lcore %u is waked up from rx interrupt on"
+   " port %d queue %d\n",
+   rte_lcore_id(), port_id, queue_id);
+   }
+   status[lcore].wakeup = n != 0;
+
+   return 0;
+}
+
+static void
+turn_on_off_intr(uint16_t port_id, uint16_t queue_id, bool on)
+{
+   rte_spinlock_lock(&(locks[port_id]));
+   if (on)
+   rte_eth_dev_rx_intr_enable(port_id, queue_id);
+   else
+   rte_eth_dev_rx_intr_disable(port_id, queue_id);
+   rte_spinlock_unlock(&(locks[port_id]));
+}
+
+static int event_register(void)
+{
+   uint8_t queueid;
+   uint16_t portid;
+   uint32_t data;
+   int ret;
+
+   portid = 0;
+   queueid = 0;
+   data = portid << CHAR_BIT | queueid;
+
+   ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid,
+   RTE_EPOLL_PER_THREAD,
+   RTE_INTR_EVENT_ADD,
+   (void *)((uintptr_t)data));
+   if (ret)
+   return ret;
+
+
+   return 0;
+}
+
 static int
 main_loop(__rte_unused void *arg)
 {
@@ -291,12 +376,19 @@ main_loop(__rte_unused void *arg)
LCORE_MAX
};
enum lcore_rxtx flag = LCORE_NONE;
+   int intr_en = 0;
 
RTE_ETH_FOREACH_DEV(i) {
if (!kni_port_params_array[i])
continue;
+   /* initialize spinlock for each port */
+   rte_spinlock_init(&(locks[i]));
if (kni_port_params_array[i]->lcore_rx == (uint8_t)lcore_id) {
flag = LCORE_RX;
+   if (intr_rx_en && !event_register())
+   intr_en = 1;
+   else
+   RTE_LOG(INFO, APP, "RX interrupt won't 
enable.\n");
break;
} else if (kni_port_params_array[i]->lcore_tx ==
(uint8_t)lcore_id) {
@@ -317,6 +409,11 @@ main_loop(__rte_unused void *arg)
if (f_pause)
continue;
kni_ingress(kni_port_params_array[i]);
+   if (unlikely(intr_en)) {
+   turn_on_off_intr(i, 0, 1);
+   sleep_until_rx_interrupt(1, lcore_id);
+   turn_on_off_intr(i, 0, 0);
+   }
}
} else

Re: [RFC PATCH v2 2/9] eal: annotate spinlock and rwlock

2022-04-07 Thread David Marchand
On Mon, Apr 4, 2022 at 8:21 AM Stephen Hemminger
 wrote:
>
> On Wed, 30 Mar 2022 15:49:49 +0200
> David Marchand  wrote:
>
> > +#ifdef RTE_ANNOTATE_LOCKS
> > +
> > +#define RTE_ANNOTATED_LOCK \
> > + __attribute__((lockable))
> > +
> > +#define RTE_GUARDED_BY(...) \
> > + __attribute__((guarded_by(__VA_ARGS__)))
> > +#define RTE_GUARDED_VAR \
> > + __attribute__((guarded_var))
>
> Could we use attributes that match the existing syntax and lower case.
> That is what was done for hot/cold and format attributes.

Yes, I reconsidered and I'll do that.


-- 
David Marchand



Re: [PATCH v2] net/dpaa2: fix dpdmux default interface

2022-04-07 Thread Thomas Monjalon
31/03/2022 12:19, Hemant Agrawal:
> Acked-by:  Hemant Agrawal 
> 
> On 3/30/2022 3:31 AM, Tianli Lai wrote:
> > if dpdmux objects created by restool tools with
> > the argument "--default-if=", this
> > function would change it to 1
> >
> > Fixes: 1def64c2d79e ("net/dpaa2: add dpdmux initialization and 
> > configuration")
> > Cc: nipun.gu...@nxp.com
> >
> > Signed-off-by: Tianli Lai 

Applied in next-net, thanks.





[RFC] add support for async packed ring dequeue

2022-04-07 Thread Cheng Jiang
This RFC patch implements packed ring dequeue data path for asynchronous
vhost. It's based on the RFC patch:
http://patchwork.dpdk.org/project/dpdk/cover/20220310065407.17145-1-xuan.d...@intel.com/

Signed-off-by: Cheng Jiang 
---
 lib/vhost/virtio_net.c | 217 -
 1 file changed, 191 insertions(+), 26 deletions(-)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 3816caca79..4e6ea935c9 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -3312,7 +3312,7 @@ async_desc_to_mbuf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 }
 
 static __rte_always_inline uint16_t
-async_poll_dequeue_completed_split(struct virtio_net *dev, uint16_t queue_id,
+async_poll_dequeue_completed(struct virtio_net *dev, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count, uint16_t dma_id,
uint16_t vchan_id, bool legacy_ol_flags)
 {
@@ -3330,7 +3330,7 @@ async_poll_dequeue_completed_split(struct virtio_net 
*dev, uint16_t queue_id,
from = start_idx;
while (vq->async->pkts_cmpl_flag[from] && count--) {
vq->async->pkts_cmpl_flag[from] = false;
-   from = (from + 1) & (vq->size - 1);
+   from = (from + 1) % vq->size;
nr_cpl_pkts++;
}
 
@@ -3338,7 +3338,7 @@ async_poll_dequeue_completed_split(struct virtio_net 
*dev, uint16_t queue_id,
return 0;
 
for (i = 0; i < nr_cpl_pkts; i++) {
-   from = (start_idx + i) & (vq->size - 1);
+   from = (start_idx + i) % vq->size;
pkts[i] = pkts_info[from].mbuf;
 
if (virtio_net_with_host_offload(dev))
@@ -3347,10 +3347,14 @@ async_poll_dequeue_completed_split(struct virtio_net 
*dev, uint16_t queue_id,
}
 
/* write back completed descs to used ring and update used idx */
-   write_back_completed_descs_split(vq, nr_cpl_pkts);
-   __atomic_add_fetch(&vq->used->idx, nr_cpl_pkts, __ATOMIC_RELEASE);
-   vhost_vring_call_split(dev, vq);
-
+   if (vq_is_packed(dev)) {
+   write_back_completed_descs_packed(vq, nr_cpl_pkts);
+   vhost_vring_call_packed(dev, vq);
+   } else {
+   write_back_completed_descs_split(vq, nr_cpl_pkts);
+   __atomic_add_fetch(&vq->used->idx, nr_cpl_pkts, 
__ATOMIC_RELEASE);
+   vhost_vring_call_split(dev, vq);
+   }
vq->async->pkts_inflight_n -= nr_cpl_pkts;
 
return nr_cpl_pkts;
@@ -3486,8 +3490,8 @@ virtio_dev_tx_async_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 
 out:
/* DMA device may serve other queues, unconditionally check completed. 
*/
-   nr_done_pkts = async_poll_dequeue_completed_split(dev, queue_id, pkts, 
pkts_size,
- dma_id, vchan_id, 
legacy_ol_flags);
+   nr_done_pkts = async_poll_dequeue_completed(dev, queue_id, pkts, 
pkts_size,
+   dma_id, vchan_id, 
legacy_ol_flags);
 
return nr_done_pkts;
 }
@@ -3514,6 +3518,170 @@ virtio_dev_tx_async_split_compliant(struct virtio_net 
*dev,
pkts, count, dma_id, vchan_id, false);
 }
 
+static __rte_always_inline void
+vhost_async_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, uint16_t 
buf_id)
+{
+   struct vhost_async *async = vq->async;
+   uint16_t idx = async->buffer_idx_packed;
+
+   async->buffers_packed[idx].id = buf_id;
+   async->buffers_packed[idx].len = 0;
+   async->buffers_packed[idx].count = 1;
+
+   async->buffer_idx_packed++;
+   if (async->buffer_idx_packed >= vq->size)
+   async->buffer_idx_packed -= vq->size;
+
+}
+
+static __rte_always_inline int
+virtio_dev_tx_async_single_packed(struct virtio_net *dev,
+   struct vhost_virtqueue *vq,
+   struct rte_mempool *mbuf_pool,
+   struct rte_mbuf *pkts,
+   struct virtio_net_hdr *nethdr)
+{
+   int err;
+   uint16_t buf_id, desc_count = 0;
+   uint16_t nr_vec = 0;
+   uint32_t buf_len;
+   struct buf_vector buf_vec[BUF_VECTOR_MAX];
+   static bool allocerr_warned;
+
+   if (unlikely(fill_vec_buf_packed(dev, vq, vq->last_avail_idx, 
&desc_count,
+buf_vec, &nr_vec, &buf_id, &buf_len,
+VHOST_ACCESS_RO) < 0))
+   return -1;
+
+   if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) {
+   if (!allocerr_warned) {
+   VHOST_LOG_DATA(ERR, "Failed mbuf alloc of size %d from 
%s on %s.\n",
+   buf_len, mbuf_pool->name, dev->ifname);
+   allocerr_warned = true;
+   }
+   return -1;
+   }
+
+   err = async_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, mbuf_pool, 

DPDK 19.11.12 released

2022-04-07 Thread christian . ehrhardt
Hi all,
thanks to all the developers, testers and all of the community
we have completed another stable release for DPDK 19.11.

Here is a new stable release:
https://fast.dpdk.org/rel/dpdk-19.11.12.tar.xz

The git tree is at:
https://dpdk.org/browse/dpdk-stable/?h=19.11

Christian Ehrhardt 

---
 MAINTAINERS|   2 +
 VERSION|   2 +-
 app/pdump/main.c   |  16 +-
 app/test-compress-perf/main.c  |   3 +-
 app/test-pmd/cmdline.c |  16 +-
 app/test-pmd/cmdline_flow.c|   4 +-
 app/test-pmd/testpmd.c |   7 +
 app/test/meson.build   |   2 +-
 app/test/test_cryptodev_asym.c |   2 +-
 app/test/test_cryptodev_rsa_test_vectors.h |   2 +-
 app/test/test_efd.c|   2 +-
 app/test/test_mbuf.c   |   4 -
 app/test/test_memory.c |   2 +-
 buildtools/pmdinfogen/pmdinfogen.c |   3 +-
 config/arm/meson.build |   2 +-
 config/meson.build |   9 +-
 config/ppc_64/meson.build  |   4 +-
 config/x86/meson.build |   2 +-
 devtools/check-forbidden-tokens.awk|   3 +
 doc/api/generate_examples.sh   |   4 +
 doc/api/meson.build|  13 +-
 doc/guides/nics/kni.rst|   2 +-
 doc/guides/nics/mlx4.rst   |   4 +-
 doc/guides/nics/mlx5.rst   |  17 +-
 doc/guides/prog_guide/rte_flow.rst |   6 +-
 doc/guides/rel_notes/release_19_11.rst | 286 +
 doc/guides/sample_app_ug/fips_validation.rst   |   3 +-
 drivers/bus/dpaa/include/fsl_fman.h|   4 -
 drivers/bus/dpaa/rte_dpaa_bus.h|   4 +
 drivers/bus/ifpga/ifpga_bus.c  |   6 +-
 .../qat/qat_adf/adf_transport_access_macros.h  |   2 +-
 drivers/compress/octeontx/otx_zip_pmd.c|   6 +-
 drivers/crypto/qat/qat_asym.c  |   4 +-
 drivers/crypto/virtio/virtio_rxtx.c|   3 +
 drivers/meson.build|   2 +-
 drivers/net/af_xdp/rte_eth_af_xdp.c|  19 +-
 drivers/net/bnxt/bnxt.h|  12 +-
 drivers/net/bnxt/bnxt_cpr.c|  18 ++
 drivers/net/bnxt/bnxt_ethdev.c |  84 --
 drivers/net/bnxt/bnxt_hwrm.c   |  29 ++-
 drivers/net/bnxt/bnxt_hwrm.h   |   1 +
 drivers/net/bnxt/bnxt_ring.c   |   1 +
 drivers/net/bnxt/bnxt_rxq.c|   5 +-
 drivers/net/bnxt/bnxt_stats.c  |  63 +++--
 drivers/net/bnxt/bnxt_vnic.c   |  85 +++---
 drivers/net/bnxt/bnxt_vnic.h   |   7 +-
 drivers/net/bonding/eth_bond_private.h |   2 +-
 drivers/net/bonding/rte_eth_bond_api.c |   2 +-
 drivers/net/bonding/rte_eth_bond_pmd.c |  81 +-
 drivers/net/cxgbe/base/adapter.h   |   2 -
 drivers/net/cxgbe/base/common.h|   4 -
 drivers/net/cxgbe/base/t4_hw.c |  83 +++---
 drivers/net/cxgbe/base/t4vf_hw.c   |  28 +-
 drivers/net/cxgbe/cxgbe_ethdev.c   |   3 +-
 drivers/net/dpaa2/dpaa2_sparser.h  |   4 -
 drivers/net/e1000/base/meson.build |   2 +-
 drivers/net/ena/ena_ethdev.c   |  23 +-
 drivers/net/ena/ena_ethdev.h   |   5 -
 drivers/net/fm10k/base/meson.build |   2 +-
 drivers/net/hinic/base/meson.build |   2 +-
 drivers/net/hns3/hns3_cmd.h|   1 -
 drivers/net/hns3/hns3_ethdev.c |  82 +++---
 drivers/net/hns3/hns3_ethdev.h |   1 -
 drivers/net/hns3/hns3_ethdev_vf.c  |  21 +-
 drivers/net/hns3/hns3_flow.c   |  15 +-
 drivers/net/hns3/hns3_mp.c |  45 ++--
 drivers/net/hns3/hns3_mp.h |   9 +-
 drivers/net/hns3/hns3_rss.c|  56 ++--
 drivers/net/i40e/base/meson.build  |   2 +-
 drivers/net/iavf/base/meson.build  |   2 +-
 drivers/net/iavf/iavf_rxtx.c   |  26 +-
 drivers/net/ice/Makefile   |   4 +
 drivers/net/ice/base/meson.build   |   2 +-
 drivers/net/ice/ice_ethdev.c   |   2 +-
 drivers/net/ifc/base/ifcvf.c   |  14 +-
 drivers/net/ixgbe/base/meson.build |   2 +-
 drivers/net/ixgbe/

Re: [PATCH 08/20] net/cnxk: free 'node' memory when node add fail

2022-04-07 Thread Nithin Kumar Dabilpuram

Acked-by: Nithin Dabilpuram 

On 2/22/22 11:48 PM, Weiguo Li wrote:

When node_add failed and function return, then the memory of 'node'
is leaked.

Fixes: 4435371b8fb1c0 ("net/cnxk: add TM shaper and node operations")

Signed-off-by: Weiguo Li 
---
  drivers/net/cnxk/cnxk_tm.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/drivers/net/cnxk/cnxk_tm.c b/drivers/net/cnxk/cnxk_tm.c
index 9015a452f8..81afafd5b7 100644
--- a/drivers/net/cnxk/cnxk_tm.c
+++ b/drivers/net/cnxk/cnxk_tm.c
@@ -389,6 +389,7 @@ cnxk_nix_tm_node_add(struct rte_eth_dev *eth_dev, uint32_t 
node_id,
if (rc < 0) {
error->type = roc_nix_tm_err_to_rte_err(rc);
error->message = roc_error_msg_get(rc);
+   rte_free(node);
return rc;
}
error->type = RTE_TM_ERROR_TYPE_NONE;


RE: [PATCH v4] mempool: fix mempool cache flushing algorithm

2022-04-07 Thread Morten Brørup
> From: Morten Brørup [mailto:m...@smartsharesystems.com]
> Sent: Wednesday, 2 February 2022 11.34
> 
> This patch fixes the rte_mempool_do_generic_put() caching algorithm,
> which was fundamentally wrong, causing multiple performance issues when
> flushing.
> 

[...]

Olivier,

Will you please consider this patch [1] and the other one [2].

The primary bug here is this: When a mempool cache becomes full (i.e. exceeds 
the "flush threshold"), and is flushed to the backing ring, it is still full 
afterwards; but it should be empty afterwards. It is not flushed entirely, only 
the elements exceeding "size" are flushed.

E.g. pipelined applications having ingress threads and egress threads running 
on different lcores are affected by this bug.

I don't think the real performance impact is very big, but these algorithm 
level bugs really annoy me.

I'm still wondering how the patch introducing the mempool cache flush threshold 
could pass internal code review with so many bugs.

[1] 
https://patchwork.dpdk.org/project/dpdk/patch/20220202103354.79832-1...@smartsharesystems.com/
[2] 
https://patchwork.dpdk.org/project/dpdk/patch/20220202081426.77975-1...@smartsharesystems.com/

-Morten

> Signed-off-by: Morten Brørup 
> ---
>  lib/mempool/rte_mempool.h | 34 ++
>  1 file changed, 22 insertions(+), 12 deletions(-)
> 
> diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h
> index 1e7a3c1527..e7e09e48fc 100644
> --- a/lib/mempool/rte_mempool.h
> +++ b/lib/mempool/rte_mempool.h
> @@ -1344,31 +1344,41 @@ rte_mempool_do_generic_put(struct rte_mempool
> *mp, void * const *obj_table,
>   if (unlikely(cache == NULL || n > RTE_MEMPOOL_CACHE_MAX_SIZE))
>   goto ring_enqueue;
> 
> - cache_objs = &cache->objs[cache->len];
> + /* If the request itself is too big for the cache */
> + if (unlikely(n > cache->flushthresh))
> + goto ring_enqueue;
> 
>   /*
>* The cache follows the following algorithm
> -  *   1. Add the objects to the cache
> -  *   2. Anything greater than the cache min value (if it crosses
> the
> -  *   cache flush threshold) is flushed to the ring.

In the code, "the cache min value" is actually "the cache size". This indicates 
an intention to do something more. Perhaps the patch introducing the "flush 
threshold" was committed while still incomplete, and just never got completed?

> +  *   1. If the objects cannot be added to the cache without
> +  *   crossing the flush threshold, flush the cache to the ring.
> +  *   2. Add the objects to the cache.
>*/
> 
> - /* Add elements back into the cache */
> - rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n);
> + if (cache->len + n <= cache->flushthresh) {
> + cache_objs = &cache->objs[cache->len];
> 
> - cache->len += n;
> + cache->len += n;
> + } else {
> + cache_objs = &cache->objs[0];
> 
> - if (cache->len >= cache->flushthresh) {
> - rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size],
> - cache->len - cache->size);
> - cache->len = cache->size;
> +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> + if (rte_mempool_ops_enqueue_bulk(mp, cache_objs, cache-
> >len) < 0)
> + rte_panic("cannot put objects in mempool\n");
> +#else
> + rte_mempool_ops_enqueue_bulk(mp, cache_objs, cache->len);
> +#endif
> + cache->len = n;
>   }
> 
> + /* Add the objects to the cache. */
> + rte_memcpy(cache_objs, obj_table, sizeof(void *) * n);
> +
>   return;
> 
>  ring_enqueue:
> 
> - /* push remaining objects in ring */
> + /* Put the objects into the ring */
>  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>   if (rte_mempool_ops_enqueue_bulk(mp, obj_table, n) < 0)
>   rte_panic("cannot put objects in mempool\n");
> --
> 2.17.1



Re: [PATCH v4] mempool: fix mempool cache flushing algorithm

2022-04-07 Thread Bruce Richardson
On Thu, Apr 07, 2022 at 11:04:53AM +0200, Morten Brørup wrote:
> > From: Morten Brørup [mailto:m...@smartsharesystems.com]
> > Sent: Wednesday, 2 February 2022 11.34
> > 
> > This patch fixes the rte_mempool_do_generic_put() caching algorithm,
> > which was fundamentally wrong, causing multiple performance issues when
> > flushing.
> > 
> 
> [...]
> 
> Olivier,
> 
> Will you please consider this patch [1] and the other one [2].
> 
> The primary bug here is this: When a mempool cache becomes full (i.e. exceeds 
> the "flush threshold"), and is flushed to the backing ring, it is still full 
> afterwards; but it should be empty afterwards. It is not flushed entirely, 
> only the elements exceeding "size" are flushed.
> 

I don't believe it should be flushed entirely, there should always be some
elements left so that even after flush we can still allocate an additional
burst. We want to avoid the situation where a flush of all elements is
immediately followed by a refill of new elements. However, we can flush to
maybe size/2, and improve things. In short, this not emptying is by design
rather than a bug, though we can look to tweak the behaviour.

> E.g. pipelined applications having ingress threads and egress threads running 
> on different lcores are affected by this bug.
> 
If we are looking at improvements for pipelined applications, I think a
bigger win would be to change the default mempool from ring-based to
stack-based. For apps using a run-to-completion model, they should run out
of cache and should therefore be largely unaffected by such a change.

> I don't think the real performance impact is very big, but these algorithm 
> level bugs really annoy me.
> 
> I'm still wondering how the patch introducing the mempool cache flush 
> threshold could pass internal code review with so many bugs.
> 
> [1] 
> https://patchwork.dpdk.org/project/dpdk/patch/20220202103354.79832-1...@smartsharesystems.com/
> [2] 
> https://patchwork.dpdk.org/project/dpdk/patch/20220202081426.77975-1...@smartsharesystems.com/
> 
> -Morten
> 
> > Signed-off-by: Morten Brørup 
> > ---
> >  lib/mempool/rte_mempool.h | 34 ++
> >  1 file changed, 22 insertions(+), 12 deletions(-)
> > 
> > diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h
> > index 1e7a3c1527..e7e09e48fc 100644
> > --- a/lib/mempool/rte_mempool.h
> > +++ b/lib/mempool/rte_mempool.h
> > @@ -1344,31 +1344,41 @@ rte_mempool_do_generic_put(struct rte_mempool
> > *mp, void * const *obj_table,
> > if (unlikely(cache == NULL || n > RTE_MEMPOOL_CACHE_MAX_SIZE))
> > goto ring_enqueue;
> > 
> > -   cache_objs = &cache->objs[cache->len];
> > +   /* If the request itself is too big for the cache */
> > +   if (unlikely(n > cache->flushthresh))
> > +   goto ring_enqueue;
> > 
> > /*
> >  * The cache follows the following algorithm
> > -*   1. Add the objects to the cache
> > -*   2. Anything greater than the cache min value (if it crosses
> > the
> > -*   cache flush threshold) is flushed to the ring.
> 
> In the code, "the cache min value" is actually "the cache size". This 
> indicates an intention to do something more. Perhaps the patch introducing 
> the "flush threshold" was committed while still incomplete, and just never 
> got completed?
> 
> > +*   1. If the objects cannot be added to the cache without
> > +*   crossing the flush threshold, flush the cache to the ring.
> > +*   2. Add the objects to the cache.
> >  */
> > 
> > -   /* Add elements back into the cache */
> > -   rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n);
> > +   if (cache->len + n <= cache->flushthresh) {
> > +   cache_objs = &cache->objs[cache->len];
> > 
> > -   cache->len += n;
> > +   cache->len += n;
> > +   } else {
> > +   cache_objs = &cache->objs[0];
> > 
> > -   if (cache->len >= cache->flushthresh) {
> > -   rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size],
> > -   cache->len - cache->size);
> > -   cache->len = cache->size;
> > +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> > +   if (rte_mempool_ops_enqueue_bulk(mp, cache_objs, cache-
> > >len) < 0)
> > +   rte_panic("cannot put objects in mempool\n");
> > +#else
> > +   rte_mempool_ops_enqueue_bulk(mp, cache_objs, cache->len);
> > +#endif
> > +   cache->len = n;
> > }
> > 
> > +   /* Add the objects to the cache. */
> > +   rte_memcpy(cache_objs, obj_table, sizeof(void *) * n);
> > +
> > return;
> > 
> >  ring_enqueue:
> > 
> > -   /* push remaining objects in ring */
> > +   /* Put the objects into the ring */
> >  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> > if (rte_mempool_ops_enqueue_bulk(mp, obj_table, n) < 0)
> > rte_panic("cannot put objects in mempool\n");
> > --
> > 2.17.1
> 


RE: [PATCH v4] mempool: fix mempool cache flushing algorithm

2022-04-07 Thread Morten Brørup
> From: Bruce Richardson [mailto:bruce.richard...@intel.com]
> Sent: Thursday, 7 April 2022 11.14
> 
> On Thu, Apr 07, 2022 at 11:04:53AM +0200, Morten Brørup wrote:
> > > From: Morten Brørup [mailto:m...@smartsharesystems.com]
> > > Sent: Wednesday, 2 February 2022 11.34
> > >
> > > This patch fixes the rte_mempool_do_generic_put() caching
> algorithm,
> > > which was fundamentally wrong, causing multiple performance issues
> when
> > > flushing.
> > >
> >
> > [...]
> >
> > Olivier,
> >
> > Will you please consider this patch [1] and the other one [2].
> >
> > The primary bug here is this: When a mempool cache becomes full (i.e.
> exceeds the "flush threshold"), and is flushed to the backing ring, it
> is still full afterwards; but it should be empty afterwards. It is not
> flushed entirely, only the elements exceeding "size" are flushed.
> >
> 
> I don't believe it should be flushed entirely, there should always be
> some
> elements left so that even after flush we can still allocate an
> additional
> burst. We want to avoid the situation where a flush of all elements is
> immediately followed by a refill of new elements. However, we can flush
> to
> maybe size/2, and improve things. In short, this not emptying is by
> design
> rather than a bug, though we can look to tweak the behaviour.
> 

I initially agreed with you about flushing to size/2.

However, I did think further about it when I wrote the patch, and came to this 
conclusion: If an application thread repeatedly puts objects into the mempool, 
and does it so often that the cache overflows (i.e. reaches the flush 
threshold) and needs to be flushed, it is far more likely that the application 
thread will continue doing that, rather than start getting objects from the 
mempool. This speaks for flushing the cache entirely.

Both solutions are better than flushing to size, so if there is a preference 
for keeping some objects in the cache after flushing, I can update the patch 
accordingly.

> > E.g. pipelined applications having ingress threads and egress threads
> running on different lcores are affected by this bug.
> >
> If we are looking at improvements for pipelined applications, I think a
> bigger win would be to change the default mempool from ring-based to
> stack-based. For apps using a run-to-completion model, they should run
> out
> of cache and should therefore be largely unaffected by such a change.
> 
> > I don't think the real performance impact is very big, but these
> algorithm level bugs really annoy me.
> >
> > I'm still wondering how the patch introducing the mempool cache flush
> threshold could pass internal code review with so many bugs.
> >
> > [1]
> https://patchwork.dpdk.org/project/dpdk/patch/20220202103354.79832-1-
> m...@smartsharesystems.com/
> > [2]
> https://patchwork.dpdk.org/project/dpdk/patch/20220202081426.77975-1-
> m...@smartsharesystems.com/
> >
> > -Morten
> >
> > > Signed-off-by: Morten Brørup 
> > > ---
> > >  lib/mempool/rte_mempool.h | 34 ++
> > >  1 file changed, 22 insertions(+), 12 deletions(-)
> > >
> > > diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h
> > > index 1e7a3c1527..e7e09e48fc 100644
> > > --- a/lib/mempool/rte_mempool.h
> > > +++ b/lib/mempool/rte_mempool.h
> > > @@ -1344,31 +1344,41 @@ rte_mempool_do_generic_put(struct
> rte_mempool
> > > *mp, void * const *obj_table,
> > >   if (unlikely(cache == NULL || n > RTE_MEMPOOL_CACHE_MAX_SIZE))
> > >   goto ring_enqueue;
> > >
> > > - cache_objs = &cache->objs[cache->len];
> > > + /* If the request itself is too big for the cache */
> > > + if (unlikely(n > cache->flushthresh))
> > > + goto ring_enqueue;
> > >
> > >   /*
> > >* The cache follows the following algorithm
> > > -  *   1. Add the objects to the cache
> > > -  *   2. Anything greater than the cache min value (if it crosses
> > > the
> > > -  *   cache flush threshold) is flushed to the ring.
> >
> > In the code, "the cache min value" is actually "the cache size". This
> indicates an intention to do something more. Perhaps the patch
> introducing the "flush threshold" was committed while still incomplete,
> and just never got completed?
> >
> > > +  *   1. If the objects cannot be added to the cache without
> > > +  *   crossing the flush threshold, flush the cache to the ring.
> > > +  *   2. Add the objects to the cache.
> > >*/
> > >
> > > - /* Add elements back into the cache */
> > > - rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n);
> > > + if (cache->len + n <= cache->flushthresh) {
> > > + cache_objs = &cache->objs[cache->len];
> > >
> > > - cache->len += n;
> > > + cache->len += n;
> > > + } else {
> > > + cache_objs = &cache->objs[0];
> > >
> > > - if (cache->len >= cache->flushthresh) {
> > > - rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size],
> > > - cache->len - cache->size);
> > > - cache->len = cache->size;
> > > 

[PATCH] net/bonding: fix rss key configuration when the key length is 52

2022-04-07 Thread Ke Zhang
when creating a bonding device, if the slave device's rss key length
is 52, then bonding device will be same as slave, in function
bond_ethdev_configure(), the default_rss_key length is 40, it
is not matched, so it should calculate a new key for bonding
device if the deault key could not be used.

Signed-off-by: Ke Zhang 
---
 drivers/net/bonding/rte_eth_bond_pmd.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c 
b/drivers/net/bonding/rte_eth_bond_pmd.c
index b305b6a35b..4214b33f40 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -3617,13 +3617,13 @@ bond_ethdev_configure(struct rte_eth_dev *dev)
   internals->rss_key_len);
} else {
if (internals->rss_key_len > sizeof(default_rss_key)) {
-   RTE_BOND_LOG(ERR,
-  "There is no suitable default hash key");
-   return -EINVAL;
+   /* If the rss_key_len is 52, it should 
calculate the hash key */
+   for (i = 0; i < internals->rss_key_len; i++)
+   internals->rss_key[i] = 
(uint8_t)rte_rand();
+   } else {
+   memcpy(internals->rss_key, default_rss_key,
+   internals->rss_key_len);
}
-
-   memcpy(internals->rss_key, default_rss_key,
-  internals->rss_key_len);
}
 
for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
-- 
2.25.1



[PATCH] crypto/qat: enable asymmetric crypto on gen4 device

2022-04-07 Thread Arek Kusztal
This commit enables asymmetric crypto in generation four
devices (4xxx).

Signed-off-by: Arek Kusztal 
---
 doc/guides/cryptodevs/qat.rst|  1 +
 drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c | 12 
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/doc/guides/cryptodevs/qat.rst b/doc/guides/cryptodevs/qat.rst
index 785e041324..18ad1646a4 100644
--- a/doc/guides/cryptodevs/qat.rst
+++ b/doc/guides/cryptodevs/qat.rst
@@ -169,6 +169,7 @@ poll mode crypto driver support for the following hardware 
accelerator devices:
 * ``Intel QuickAssist Technology C3xxx``
 * ``Intel QuickAssist Technology D15xx``
 * ``Intel QuickAssist Technology C4xxx``
+* ``Intel QuickAssist Technology 4xxx``
 
 The QAT ASYM PMD has support for:
 
diff --git a/drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c 
b/drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c
index 3d8b2e377c..a9457d9278 100644
--- a/drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c
+++ b/drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c
@@ -375,8 +375,12 @@ RTE_INIT(qat_sym_crypto_gen4_init)
 
 RTE_INIT(qat_asym_crypto_gen4_init)
 {
-   qat_asym_gen_dev_ops[QAT_GEN4].cryptodev_ops = NULL;
-   qat_asym_gen_dev_ops[QAT_GEN4].get_capabilities = NULL;
-   qat_asym_gen_dev_ops[QAT_GEN4].get_feature_flags = NULL;
-   qat_asym_gen_dev_ops[QAT_GEN4].set_session = NULL;
+   qat_asym_gen_dev_ops[QAT_GEN4].cryptodev_ops =
+   &qat_asym_crypto_ops_gen1;
+   qat_asym_gen_dev_ops[QAT_GEN4].get_capabilities =
+   qat_asym_crypto_cap_get_gen1;
+   qat_asym_gen_dev_ops[QAT_GEN4].get_feature_flags =
+   qat_asym_crypto_feature_flags_get_gen1;
+   qat_asym_gen_dev_ops[QAT_GEN4].set_session =
+   qat_asym_crypto_set_session_gen1;
 }
-- 
2.30.2



[PATCH 0/3] add partial SGL support to AESNI_MB

2022-04-07 Thread Ciara Power
This patchset adds SGL support for GCM and CHACHA20-POLY1305 algorithms,
using the IPSec-MB JOB API.

Supported SGL types:
 - INPLACE SGL
 - OOP SGL IN, LB OUT
 - OOP SGL IN, SGL OUT

The SGL Feature Flags for AESNI_MB PMD are not added,
as it does not yet support SGL for all other algorithms.

Ciara Power (3):
  crypto/ipsec_mb: add GCM sgl support to aesni_mb
  crypto/ipsec_mb: add chachapoly SGL support to aesni_mb
  crypto/ipsec_mb: check SGL support for algorithm

 drivers/crypto/ipsec_mb/pmd_aesni_mb.c  | 160 +++-
 drivers/crypto/ipsec_mb/pmd_aesni_mb_priv.h |   5 +
 2 files changed, 160 insertions(+), 5 deletions(-)

-- 
2.25.1



[PATCH 1/3] crypto/ipsec_mb: add GCM sgl support to aesni_mb

2022-04-07 Thread Ciara Power
Add SGL support for GCM algorithm through JOB API.

This change supports IN-PLACE SGL, OOP SGL IN and LB OUT,
and OOP SGL IN and SGL OUT.

Feature flags are not added, as the PMD does not yet support SGL for
all other algorithms.

Signed-off-by: Ciara Power 
---
 drivers/crypto/ipsec_mb/pmd_aesni_mb.c  | 144 +++-
 drivers/crypto/ipsec_mb/pmd_aesni_mb_priv.h |   2 +
 2 files changed, 142 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/ipsec_mb/pmd_aesni_mb.c 
b/drivers/crypto/ipsec_mb/pmd_aesni_mb.c
index afa0b6e3a4..09a0cc5ace 100644
--- a/drivers/crypto/ipsec_mb/pmd_aesni_mb.c
+++ b/drivers/crypto/ipsec_mb/pmd_aesni_mb.c
@@ -4,6 +4,11 @@
 
 #include "pmd_aesni_mb_priv.h"
 
+struct aesni_mb_op_buf_data {
+   struct rte_mbuf *m;
+   uint32_t offset;
+};
+
 /**
  * Calculate the authentication pre-computes
  *
@@ -1092,6 +1097,69 @@ set_cpu_mb_job_params(IMB_JOB *job, struct 
aesni_mb_session *session,
job->user_data = udata;
 }
 
+static int
+handle_aead_sgl_job(IMB_JOB *job, IMB_MGR *mb_mgr,
+   uint32_t *total_len,
+   struct aesni_mb_op_buf_data *src_data,
+   struct aesni_mb_op_buf_data *dst_data)
+{
+   uint32_t data_len, part_len;
+
+   if (*total_len == 0) {
+   job->sgl_state = IMB_SGL_COMPLETE;
+   return 0;
+   }
+
+   if (src_data->m == NULL) {
+   IPSEC_MB_LOG(ERR, "Invalid source buffer");
+   return -EINVAL;
+   }
+
+   job->sgl_state = IMB_SGL_UPDATE;
+
+   data_len = src_data->m->data_len - src_data->offset;
+
+   job->src = rte_pktmbuf_mtod_offset(src_data->m, uint8_t *,
+   src_data->offset);
+
+   if (dst_data->m != NULL) {
+   if (dst_data->m->data_len - dst_data->offset == 0) {
+   dst_data->m = dst_data->m->next;
+   if (dst_data->m == NULL) {
+   IPSEC_MB_LOG(ERR, "Invalid destination buffer");
+   return -EINVAL;
+   }
+   dst_data->offset = 0;
+   }
+   part_len = RTE_MIN(data_len, (dst_data->m->data_len -
+   dst_data->offset));
+   job->dst = rte_pktmbuf_mtod_offset(dst_data->m,
+   uint8_t *, dst_data->offset);
+   dst_data->offset += part_len;
+   } else {
+   part_len = RTE_MIN(data_len, *total_len);
+   job->dst = rte_pktmbuf_mtod_offset(src_data->m, uint8_t *,
+   src_data->offset);
+   }
+
+   job->msg_len_to_cipher_in_bytes = part_len;
+   job->msg_len_to_hash_in_bytes = part_len;
+
+   job = IMB_SUBMIT_JOB(mb_mgr);
+
+   *total_len -= part_len;
+
+   if (part_len != data_len) {
+   src_data->offset += part_len;
+   } else {
+   src_data->m = src_data->m->next;
+   src_data->offset = 0;
+   }
+
+   return 0;
+}
+
+
 /**
  * Process a crypto operation and complete a IMB_JOB job structure for
  * submission to the multi buffer library for processing.
@@ -1107,16 +1175,23 @@ set_cpu_mb_job_params(IMB_JOB *job, struct 
aesni_mb_session *session,
  */
 static inline int
 set_mb_job_params(IMB_JOB *job, struct ipsec_mb_qp *qp,
-   struct rte_crypto_op *op, uint8_t *digest_idx)
+   struct rte_crypto_op *op, uint8_t *digest_idx,
+   IMB_MGR *mb_mgr)
 {
struct rte_mbuf *m_src = op->sym->m_src, *m_dst;
struct aesni_mb_qp_data *qp_data = ipsec_mb_get_qp_private_data(qp);
+   struct aesni_mb_op_buf_data src_sgl = {0};
+   struct aesni_mb_op_buf_data dst_sgl = {0};
struct aesni_mb_session *session;
uint32_t m_offset, oop;
uint32_t auth_off_in_bytes;
uint32_t ciph_off_in_bytes;
uint32_t auth_len_in_bytes;
uint32_t ciph_len_in_bytes;
+   uint32_t total_len;
+   IMB_JOB base_job;
+   uint8_t sgl = 0;
+   int ret;
 
session = ipsec_mb_get_session_private(qp, op);
if (session == NULL) {
@@ -1124,6 +1199,9 @@ set_mb_job_params(IMB_JOB *job, struct ipsec_mb_qp *qp,
return -1;
}
 
+   if (op->sym->m_src->nb_segs > 1)
+   sgl = 1;
+
/* Set crypto operation */
job->chain_order = session->chain_order;
 
@@ -1175,6 +1253,11 @@ set_mb_job_params(IMB_JOB *job, struct ipsec_mb_qp *qp,
if (session->cipher.mode == IMB_CIPHER_GCM) {
job->u.GCM.aad = op->sym->aead.aad.data;
job->u.GCM.aad_len_in_bytes = session->aead.aad_len;
+   if (sgl) {
+   job->u.GCM.ctx = &session->aead.gcm_sgl_ctx;
+   job->cipher_mode = IMB_CIPHER_GCM_SGL;
+   job->hash_alg = IMB_AUTH_GCM_SGL;
+   }
  

[PATCH 2/3] crypto/ipsec_mb: add chachapoly SGL support to aesni_mb

2022-04-07 Thread Ciara Power
Add SGL support for chacha20_poly1305 algorithm through JOB API.

Supports IN-PLACE SGL, OOP SGL IN and LB OUT,
and OOP SGL IN and SGL OUT.

Feature flags not added, as the PMD does not support SGL for all
other algorithms.

Signed-off-by: Ciara Power 
---
 drivers/crypto/ipsec_mb/pmd_aesni_mb.c  | 9 -
 drivers/crypto/ipsec_mb/pmd_aesni_mb_priv.h | 5 -
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/ipsec_mb/pmd_aesni_mb.c 
b/drivers/crypto/ipsec_mb/pmd_aesni_mb.c
index 09a0cc5ace..606c8a0caf 100644
--- a/drivers/crypto/ipsec_mb/pmd_aesni_mb.c
+++ b/drivers/crypto/ipsec_mb/pmd_aesni_mb.c
@@ -1289,6 +1289,12 @@ set_mb_job_params(IMB_JOB *job, struct ipsec_mb_qp *qp,
job->u.CHACHA20_POLY1305.aad = op->sym->aead.aad.data;
job->u.CHACHA20_POLY1305.aad_len_in_bytes =
session->aead.aad_len;
+   if (sgl) {
+   job->u.CHACHA20_POLY1305.ctx =
+   &session->aead.chacha_sgl_ctx;
+   job->cipher_mode = IMB_CIPHER_CHACHA20_POLY1305_SGL;
+   job->hash_alg = IMB_AUTH_CHACHA20_POLY1305_SGL;
+   }
job->enc_keys = session->cipher.expanded_aes_keys.encode;
job->dec_keys = session->cipher.expanded_aes_keys.encode;
break;
@@ -1394,6 +1400,7 @@ set_mb_job_params(IMB_JOB *job, struct ipsec_mb_qp *qp,
break;
 
case IMB_AUTH_GCM_SGL:
+   case IMB_AUTH_CHACHA20_POLY1305_SGL:
job->hash_start_src_offset_in_bytes = 0;
job->msg_len_to_hash_in_bytes = 0;
job->iv = rte_crypto_op_ctod_offset(op, uint8_t *,
@@ -1405,7 +1412,6 @@ set_mb_job_params(IMB_JOB *job, struct ipsec_mb_qp *qp,
op->sym->aead.data.offset;
job->msg_len_to_hash_in_bytes =
op->sym->aead.data.length;
-
job->iv = rte_crypto_op_ctod_offset(op, uint8_t *,
session->iv.offset);
break;
@@ -1491,6 +1497,7 @@ set_mb_job_params(IMB_JOB *job, struct ipsec_mb_qp *qp,
job->msg_len_to_cipher_in_bytes = op->sym->aead.data.length;
break;
case IMB_CIPHER_GCM_SGL:
+   case IMB_CIPHER_CHACHA20_POLY1305_SGL:
job->msg_len_to_cipher_in_bytes = 0;
job->cipher_start_src_offset_in_bytes = 0;
break;
diff --git a/drivers/crypto/ipsec_mb/pmd_aesni_mb_priv.h 
b/drivers/crypto/ipsec_mb/pmd_aesni_mb_priv.h
index 1d1e9dde00..f7fce7c39f 100644
--- a/drivers/crypto/ipsec_mb/pmd_aesni_mb_priv.h
+++ b/drivers/crypto/ipsec_mb/pmd_aesni_mb_priv.h
@@ -947,7 +947,10 @@ struct aesni_mb_session {
/* * AAD data length */
uint16_t aad_len;
 
-   struct gcm_context_data gcm_sgl_ctx;
+   union {
+   struct gcm_context_data gcm_sgl_ctx;
+   struct chacha20_poly1305_context_data chacha_sgl_ctx;
+   };
} aead;
 } __rte_cache_aligned;
 
-- 
2.25.1



[PATCH 3/3] crypto/ipsec_mb: check SGL support for algorithm

2022-04-07 Thread Ciara Power
This patch adds a check when dequeueing ops and processing, SGL support
only exists for AES-GCM and CHACHA20_POLY1305 algorithms.
If an SGL op for an unsupported algorithm is being processed,
submit a NULL job instead.

Signed-off-by: Ciara Power 
---
 drivers/crypto/ipsec_mb/pmd_aesni_mb.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/crypto/ipsec_mb/pmd_aesni_mb.c 
b/drivers/crypto/ipsec_mb/pmd_aesni_mb.c
index 606c8a0caf..9b21c14f58 100644
--- a/drivers/crypto/ipsec_mb/pmd_aesni_mb.c
+++ b/drivers/crypto/ipsec_mb/pmd_aesni_mb.c
@@ -1202,6 +1202,13 @@ set_mb_job_params(IMB_JOB *job, struct ipsec_mb_qp *qp,
if (op->sym->m_src->nb_segs > 1)
sgl = 1;
 
+   if (sgl && (session->cipher.mode != IMB_CIPHER_GCM
+   && session->cipher.mode != 
IMB_CIPHER_CHACHA20_POLY1305)) {
+   op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS;
+   IPSEC_MB_LOG(ERR, "Device only supports SGL for AES-GCM or 
CHACHA20_POLY1305 algorithms.");
+   return -1;
+   }
+
/* Set crypto operation */
job->chain_order = session->chain_order;
 
-- 
2.25.1



Re: [PATCH v4] mempool: fix mempool cache flushing algorithm

2022-04-07 Thread Bruce Richardson
On Thu, Apr 07, 2022 at 11:26:53AM +0200, Morten Brørup wrote:
> > From: Bruce Richardson [mailto:bruce.richard...@intel.com]
> > Sent: Thursday, 7 April 2022 11.14
> > 
> > On Thu, Apr 07, 2022 at 11:04:53AM +0200, Morten Brørup wrote:
> > > > From: Morten Brørup [mailto:m...@smartsharesystems.com]
> > > > Sent: Wednesday, 2 February 2022 11.34
> > > >
> > > > This patch fixes the rte_mempool_do_generic_put() caching
> > algorithm,
> > > > which was fundamentally wrong, causing multiple performance issues
> > when
> > > > flushing.
> > > >
> > >
> > > [...]
> > >
> > > Olivier,
> > >
> > > Will you please consider this patch [1] and the other one [2].
> > >
> > > The primary bug here is this: When a mempool cache becomes full (i.e.
> > exceeds the "flush threshold"), and is flushed to the backing ring, it
> > is still full afterwards; but it should be empty afterwards. It is not
> > flushed entirely, only the elements exceeding "size" are flushed.
> > >
> > 
> > I don't believe it should be flushed entirely, there should always be
> > some
> > elements left so that even after flush we can still allocate an
> > additional
> > burst. We want to avoid the situation where a flush of all elements is
> > immediately followed by a refill of new elements. However, we can flush
> > to
> > maybe size/2, and improve things. In short, this not emptying is by
> > design
> > rather than a bug, though we can look to tweak the behaviour.
> > 
> 
> I initially agreed with you about flushing to size/2.
> 
> However, I did think further about it when I wrote the patch, and came to 
> this conclusion: If an application thread repeatedly puts objects into the 
> mempool, and does it so often that the cache overflows (i.e. reaches the 
> flush threshold) and needs to be flushed, it is far more likely that the 
> application thread will continue doing that, rather than start getting 
> objects from the mempool. This speaks for flushing the cache entirely.
> 
> Both solutions are better than flushing to size, so if there is a preference 
> for keeping some objects in the cache after flushing, I can update the patch 
> accordingly.
> 

Would it be worth looking at adding per-core hinting to the mempool?
Indicate for a core that it allocates-only, i.e. RX thread, frees-only,
i.e. TX-thread, or does both alloc and free (the default)? That hint could
be used only on flush or refill to specify whether to flush all or partial,
and similarly to refill to max possible or just to size.

/Bruce


Re: [PATCH v4] mempool: fix mempool cache flushing algorithm

2022-04-07 Thread Bruce Richardson
On Thu, Apr 07, 2022 at 11:32:12AM +0100, Bruce Richardson wrote:
> On Thu, Apr 07, 2022 at 11:26:53AM +0200, Morten Brørup wrote:
> > > From: Bruce Richardson [mailto:bruce.richard...@intel.com]
> > > Sent: Thursday, 7 April 2022 11.14
> > > 
> > > On Thu, Apr 07, 2022 at 11:04:53AM +0200, Morten Brørup wrote:
> > > > > From: Morten Brørup [mailto:m...@smartsharesystems.com]
> > > > > Sent: Wednesday, 2 February 2022 11.34
> > > > >
> > > > > This patch fixes the rte_mempool_do_generic_put() caching
> > > algorithm,
> > > > > which was fundamentally wrong, causing multiple performance issues
> > > when
> > > > > flushing.
> > > > >
> > > >
> > > > [...]
> > > >
> > > > Olivier,
> > > >
> > > > Will you please consider this patch [1] and the other one [2].
> > > >
> > > > The primary bug here is this: When a mempool cache becomes full (i.e.
> > > exceeds the "flush threshold"), and is flushed to the backing ring, it
> > > is still full afterwards; but it should be empty afterwards. It is not
> > > flushed entirely, only the elements exceeding "size" are flushed.
> > > >
> > > 
> > > I don't believe it should be flushed entirely, there should always be
> > > some
> > > elements left so that even after flush we can still allocate an
> > > additional
> > > burst. We want to avoid the situation where a flush of all elements is
> > > immediately followed by a refill of new elements. However, we can flush
> > > to
> > > maybe size/2, and improve things. In short, this not emptying is by
> > > design
> > > rather than a bug, though we can look to tweak the behaviour.
> > > 
> > 
> > I initially agreed with you about flushing to size/2.
> > 
> > However, I did think further about it when I wrote the patch, and came to 
> > this conclusion: If an application thread repeatedly puts objects into the 
> > mempool, and does it so often that the cache overflows (i.e. reaches the 
> > flush threshold) and needs to be flushed, it is far more likely that the 
> > application thread will continue doing that, rather than start getting 
> > objects from the mempool. This speaks for flushing the cache entirely.
> > 
> > Both solutions are better than flushing to size, so if there is a 
> > preference for keeping some objects in the cache after flushing, I can 
> > update the patch accordingly.
> > 
> 
> Would it be worth looking at adding per-core hinting to the mempool?
> Indicate for a core that it allocates-only, i.e. RX thread, frees-only,
> i.e. TX-thread, or does both alloc and free (the default)? That hint could
> be used only on flush or refill to specify whether to flush all or partial,
> and similarly to refill to max possible or just to size.
> 
Actually, taking the idea further, we could always track per-core whether a
core has ever done a flush/refill and use that as the hint instead. It
could even be done in a branch-free manner if we want. For example:

on flush:
keep_entries = (size >> 1) & (never_refills - 1);

which will set the entries to keep to be 0 if we have never had to refill, or
half of size, if the thread has previously done refills.

/Bruce


Re: [v4 1/3] ethdev: introduce protocol type based header split

2022-04-07 Thread Andrew Rybchenko

On 4/2/22 13:41, wenxuanx...@intel.com wrote:

From: Xuan Ding 

Header split consists of splitting a received packet into two separate
regions based on the packet content. The split happens after the
packet header and before the packet payload. Splitting is usually between
the packet header that can be posted to a dedicated buffer and the packet
payload that can be posted to a different buffer.

Currently, Rx buffer split supports length and offset based packet split.
Although header split is a subset of buffer split, configuring buffer
split based on length is not suitable for NICs that do split based on
header protocol types. Because tunneling makes the conversion from length
to protocol type impossible.

This patch extends the current buffer split to support protocol type and
offset based header split. A new proto field is introduced in the
rte_eth_rxseg_split structure reserved field to specify header protocol
type. With Rx offload flag RTE_ETH_RX_OFFLOAD_HEADER_SPLIT enabled and
protocol type configured, PMD will split the ingress packets into two
separate regions. Currently, both inner and outer L2/L3/L4 level header
split can be supported.


RTE_ETH_RX_OFFLOAD_HEADER_SPLIT offload was introduced some
time ago to substitute bit-field header_split in struct
rte_eth_rxmode. It allows to enable header split offload with
the header size controlled using split_hdr_size in the same
structure.

Right now I see no single PMD which actually supports
RTE_ETH_RX_OFFLOAD_HEADER_SPLIT with above definition.
Many examples and test apps initialize the field to 0
explicitly. The most of drivers simply ignore split_hdr_size
since the offload is not advertised, but some double-check
that its value is 0.

I think that it means that the field should be removed on
the next LTS, and I'd say, together with the
RTE_ETH_RX_OFFLOAD_HEADER_SPLIT offload bit.

We should not redefine the offload meaning. 


















For example, let's suppose we configured the Rx queue with the
following segments:
 seg0 - pool0, off0=2B
 seg1 - pool1, off1=128B


Corresponding feature is named Rx buffer split.
Does it mean that protocol type based header split
requires Rx buffer split feature to be supported?



With header split type configured with RTE_ETH_RX_HEADER_SPLIT_UDP,
the packet consists of MAC_IP_UDP_PAYLOAD will be split like following:
 seg0 - udp header @ RTE_PKTMBUF_HEADROOM + 2 in mbuf from pool0
 seg1 - payload @ 128 in mbuf from pool1


Is it always outermost UDP? Does it require both UDP over IPv4
and UDP over IPv6 to be supported? What will happen if only one
is supported? How application can find out which protocol stack
are supported?



The memory attributes for the split parts may differ either - for example
the mempool0 and mempool1 belong to dpdk memory and external memory,
respectively.

Signed-off-by: Xuan Ding 
Signed-off-by: Yuan Wang 
Signed-off-by: Wenxuan Wu 
Reviewed-by: Qi Zhang 
---
  lib/ethdev/rte_ethdev.c | 34 ++---
  lib/ethdev/rte_ethdev.h | 48 +++--
  2 files changed, 72 insertions(+), 10 deletions(-)

diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 29a3d80466..29adcdc2f0 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1661,6 +1661,7 @@ rte_eth_rx_queue_check_split(const struct 
rte_eth_rxseg_split *rx_seg,
struct rte_mempool *mpl = rx_seg[seg_idx].mp;
uint32_t length = rx_seg[seg_idx].length;
uint32_t offset = rx_seg[seg_idx].offset;
+   uint16_t proto = rx_seg[seg_idx].proto;
  
  		if (mpl == NULL) {

RTE_ETHDEV_LOG(ERR, "null mempool pointer\n");
@@ -1694,13 +1695,29 @@ rte_eth_rx_queue_check_split(const struct 
rte_eth_rxseg_split *rx_seg,
}
offset += seg_idx != 0 ? 0 : RTE_PKTMBUF_HEADROOM;
*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
-   length = length != 0 ? length : *mbp_buf_size;
-   if (*mbp_buf_size < length + offset) {
-   RTE_ETHDEV_LOG(ERR,
-  "%s mbuf_data_room_size %u < %u (segment 
length=%u + segment offset=%u)\n",
-  mpl->name, *mbp_buf_size,
-  length + offset, length, offset);
-   return -EINVAL;
+   if (proto == RTE_ETH_RX_HEADER_SPLIT_NONE) {
+   /* Check buffer split. */
+   length = length != 0 ? length : *mbp_buf_size;
+   if (*mbp_buf_size < length + offset) {
+   RTE_ETHDEV_LOG(ERR,
+   "%s mbuf_data_room_size %u < %u (segment 
length=%u + segment offset=%u)\n",
+   mpl->name, *mbp_buf_size,
+   length + offset, length, offset);
+   

Re: [PATCH v3 1/1] ethdev: mtr: support input color selection

2022-04-07 Thread Jerin Jacob
00 0  0

00 0  0


On Tue, Mar 1, 2022 at 11:18 PM Dumitrescu, Cristian
 wrote:
>
> HI Jerin,

Hi Cristian,

>
> Thanks for your patch! I think we are making great progress, here are a few 
> more comments:
>
> 
>
> > +/**
> > + * Input color method
> > + */
> > +enum rte_mtr_input_color_method {
>
> We should clean up the names of these methods a bit: we should not mix header 
> names (VLAN, IP) with header field names (DSCP, PCP), in the sense that to me 
> METHOD_VLAN_DSCP should be replaced with either:
> * METHOD_OUTER_VLAN_IP :shorter name, as only the headers are mentioned (my 
> preference, but I am OK with both)

OK, We will keep VLAN and IP. By default OUTER is implicit in other
DPDK API spec,i.e if not mentioned, it is outer. Hence I removed the
outer. I can add outer explicit if you think in that way. See last
comment.

> * METHOD_OUTER_VLAN_PCP_IP_DSCP: longer name, as both the headers and the 
> header fields are mentioned
>
> Please put a blank line in between these methods to better readability.
>
> I see some issues in the list of methods below, I am trying to do my best to 
> catch them all:

Thanks. Sorry for the delay in reply.


>
> > + /**
> > +  * The input color is always green.
> > +  * The default_input_color is ignored for this method.
> > +  * @see struct rte_mtr_params::default_input_color
> > +  */
> > + RTE_MTR_INPUT_COLOR_METHOD_COLOR_BLIND  = RTE_BIT64(0),
>
> OK.
>
> > + /**
> > +  * If the input packet has at least one VLAN label, its input color is
> > +  * detected by the outermost VLAN DEI(1bit), PCP(3 bits)
> > +  * indexing into the struct rte_mtr_params::vlan_table.
> > +  * Otherwise, the default_input_color is applied.
> > +  * @see struct rte_mtr_params::default_input_color
> > +  * @see struct rte_mtr_params::vlan_table
> > +  */
> > + RTE_MTR_INPUT_COLOR_METHOD_VLAN = RTE_BIT64(1),
>
> OK.
> Does your HW use PCP+DEI , or just PCP?

PCP + DEI

>
> > + /**
> > +  * If the input packet is IPv4 or IPv6, its input color is detected by
> > +  * the outermost DSCP field indexing into the
> > +  * struct rte_mtr_params::dscp_table.
> > +  * Otherwise, the default_input_color is applied.
> > +  * @see struct rte_mtr_params::default_input_color
> > +  * @see struct rte_mtr_params::dscp_table
> > +  */
> > + RTE_MTR_INPUT_COLOR_METHOD_DSCP = RTE_BIT64(2),
>
> OK.
> Please change name to METHOD_IP.
> Description: Change the "outermost DSCP" to "the DSCP field of the outermost 
> IP header".

OK

> I would move this up on the second position (to follow immediately after the 
> color blind method).

Please check the summary below.

>
> > + /**
> > +  * If the input packet has at least one VLAN label, its input color is
> > +  * detected by the outermost VLAN DEI(1bit), PCP(3 bits)
> > +  * indexing into the struct rte_mtr_params::vlan_table.
> > +  * If the input packet is IPv4 or IPv6, its input color is detected by
> > +  * the outermost DSCP field indexing into the
> > +  * struct rte_mtr_params::dscp_table.
> > +  * Otherwise, the default_input_color is applied.
> > +  * @see struct rte_mtr_params::default_input_color
> > +  * @see struct rte_mtr_params::vlan_table
> > +  * @see struct rte_mtr_params::dscp_table
> > +  */
> > + RTE_MTR_INPUT_COLOR_METHOD_VLAN_DSCP = RTE_BIT64(3),
>
> OK.
> Please change name to METHOD_VLAN_IP.

OK

> This should follow immediately after the METHOD_VLAN.

OK

> Description: please use "Otherwise" before "if the input packet is IP"; 
> please replace "outermost DSCP" as above.

OK

> Is your HW using DEI + PCP or just PCP?

OK

>
> > + /**
> > +  * If the input packet has at least one VLAN label, its input color is
> > +  * detected by the innermost VLAN DEI(1bit), PCP(3 bits)
> > +  * indexing into the struct rte_mtr_params::vlan_table.
> > +  * Otherwise, the default_input_color is applied.
> > +  * @see struct rte_mtr_params::default_input_color
> > +  * @see struct rte_mtr_params::vlan_table
> > +  */
> > + RTE_MTR_INPUT_COLOR_METHOD_INNER_VLAN = RTE_BIT64(4),
>
> OK.
> Is your HW using DEI + PCP or just PCP?

DEI + PCP

>
> > + /**
> > +  * If the input packet is IPv4 or IPv6, its input color is detected by
> > +  * the innermost DSCP field indexing into the
> > +  * struct rte_mtr_params::dscp_table.
> > +  * Otherwise, the default_input_color is applied.
> > +  * @see struct rte_mtr_params::default_input_color
> > +  * @see struct rte_mtr_params::dscp_table
> > +  */
> > + RTE_MTR_INPUT_COLOR_METHOD_INNER_DSCP = RTE_BIT64(5),
>
> This is very confusing to me, I don't get what this one is about: The "inner" 
> word in the name suggests that inner VLAN is attempted first, then IP DSCP 
> (if no VLAN is present), but the desc

Re: [PATCH] eal/windows: add missing C++ include guards

2022-04-07 Thread Tyler Retzlaff
On Tue, Apr 05, 2022 at 03:48:58PM +0200, David Marchand wrote:
> Add missing 'extern "C"' to file.
> 
> Fixes: 1db72630da0c ("eal/windows: do not expose private facilities")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: David Marchand 

Acked-by: Tyler Retzlaff 



RE: [RFC 1/2] ethdev: port flags for pre-configuration flow hints

2022-04-07 Thread Ori Kam
Hi Jack,

> -Original Message-
> From: Jack Min 
> Sent: Thursday, April 7, 2022 8:31 AM
> Subject: [RFC 1/2] ethdev: port flags for pre-configuration flow hints
> 
> The data-path focused flow rule management can manage flow rules in more
> optimized way then tranditional one by using hits provided by
> application in initialization phase.
> 
> In addition to the current hints we have in port attr, more hints could
> be proivded by application about it's behaviour.
> 
> One example is how the application do with the same flow:
> A. create/destroy flow on same queue but query flow on different queue
>or queue-less way (i.e, counter query)
> B. All flow operations will be exactly on the same queue, by which PMD
>could be in more optimized way then A because resource could be
>isolated and access based on queue, without lock for example.
> 
> This patch add flag about above situation and could be extanded to cover
> more situations.
> 
> Signed-off-by: Xiaoyu Min 
> ---
>  lib/ethdev/rte_flow.h | 16 
>  1 file changed, 16 insertions(+)
> 
> diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
> index d8827dd184..578dd837f5 100644
> --- a/lib/ethdev/rte_flow.h
> +++ b/lib/ethdev/rte_flow.h
> @@ -4875,6 +4875,17 @@ rte_flow_flex_item_release(uint16_t port_id,
>  const struct rte_flow_item_flex_handle *handle,
>  struct rte_flow_error *error);
> 
> +/**
> + * The flags of rte flow port
> + */
> +enum rte_flow_port_flag {
> + /**
> +  * All flow operations for one specified flow will _strictlly_ happen
> +  * on the same queue (create/destroy/query/update).
> +  */
> + RTE_FLOW_PORT_FLAG_STRICT_QUEUE = RTE_BIT32(0),
> +};
> +
>  /**
>   * @warning
>   * @b EXPERIMENTAL: this API may change without prior notice.
> @@ -4972,6 +4983,11 @@ struct rte_flow_port_attr {
>* @see RTE_FLOW_ACTION_TYPE_METER
>*/
>   uint32_t nb_meters;
> + /**
> +  * Port flags.
> +  * @see enum rte_flow_port_flag
> +  */
> + enum rte_flow_port_flag flags;

Why the use of enum and not flags?
I guess there will be more flags in future, and those flags will not be related 
to the strict queue.

>  };
> 
>  /**
> --
> 2.35.1



RE: [PATCH v4] mempool: fix mempool cache flushing algorithm

2022-04-07 Thread Morten Brørup
> From: Bruce Richardson [mailto:bruce.richard...@intel.com]
> Sent: Thursday, 7 April 2022 12.44
> 
> On Thu, Apr 07, 2022 at 11:32:12AM +0100, Bruce Richardson wrote:
> > On Thu, Apr 07, 2022 at 11:26:53AM +0200, Morten Brørup wrote:
> > > > From: Bruce Richardson [mailto:bruce.richard...@intel.com]
> > > > Sent: Thursday, 7 April 2022 11.14
> > > >
> > > > On Thu, Apr 07, 2022 at 11:04:53AM +0200, Morten Brørup wrote:
> > > > > > From: Morten Brørup [mailto:m...@smartsharesystems.com]
> > > > > > Sent: Wednesday, 2 February 2022 11.34
> > > > > >
> > > > > > This patch fixes the rte_mempool_do_generic_put() caching
> > > > algorithm,
> > > > > > which was fundamentally wrong, causing multiple performance
> issues
> > > > when
> > > > > > flushing.
> > > > > >
> > > > >
> > > > > [...]
> > > > >
> > > > > Olivier,
> > > > >
> > > > > Will you please consider this patch [1] and the other one [2].
> > > > >
> > > > > The primary bug here is this: When a mempool cache becomes full
> (i.e.
> > > > exceeds the "flush threshold"), and is flushed to the backing
> ring, it
> > > > is still full afterwards; but it should be empty afterwards. It
> is not
> > > > flushed entirely, only the elements exceeding "size" are flushed.
> > > > >
> > > >
> > > > I don't believe it should be flushed entirely, there should
> always be
> > > > some
> > > > elements left so that even after flush we can still allocate an
> > > > additional
> > > > burst. We want to avoid the situation where a flush of all
> elements is
> > > > immediately followed by a refill of new elements. However, we can
> flush
> > > > to
> > > > maybe size/2, and improve things. In short, this not emptying is
> by
> > > > design
> > > > rather than a bug, though we can look to tweak the behaviour.
> > > >
> > >
> > > I initially agreed with you about flushing to size/2.
> > >
> > > However, I did think further about it when I wrote the patch, and
> came to this conclusion: If an application thread repeatedly puts
> objects into the mempool, and does it so often that the cache overflows
> (i.e. reaches the flush threshold) and needs to be flushed, it is far
> more likely that the application thread will continue doing that,
> rather than start getting objects from the mempool. This speaks for
> flushing the cache entirely.
> > >
> > > Both solutions are better than flushing to size, so if there is a
> preference for keeping some objects in the cache after flushing, I can
> update the patch accordingly.

I forgot to mention some details here...

The cache is a stack, so leaving objects in it after flushing can be done in 
one of two ways:

1. Flush the top objects, and leave the bottom objects, which are extremely 
cold.
2. Flush the bottom objects, and move the objects from the top to the bottom, 
which is a costly operation.

Theoretically, there is a third option: Make the stack a circular buffer, so 
its "bottom pointer" can be moved around, instead of copying objects from the 
top to the bottom after flushing. However, this will add complexity when 
copying arrays to/from the stack in both the normal cases, i.e. to/from the 
application. And it introduces requirements to the cache size. So I quickly 
discarded the idea when it first came to me.

The provided patch flushes the entire cache, and then stores the newly added 
objects (the ones causing the flush) in the cache. So it is not completely 
empty after flushing. It contains some (but not many) objects, and they are hot.

> > >
> >
> > Would it be worth looking at adding per-core hinting to the mempool?
> > Indicate for a core that it allocates-only, i.e. RX thread, frees-
> only,
> > i.e. TX-thread, or does both alloc and free (the default)? That hint
> could
> > be used only on flush or refill to specify whether to flush all or
> partial,
> > and similarly to refill to max possible or just to size.
> >
> Actually, taking the idea further, we could always track per-core
> whether a
> core has ever done a flush/refill and use that as the hint instead. It
> could even be done in a branch-free manner if we want. For example:
> 
> on flush:
>   keep_entries = (size >> 1) & (never_refills - 1);
> 
> which will set the entries to keep to be 0 if we have never had to
> refill, or
> half of size, if the thread has previously done refills.
> 

Your suggestion is a good idea for a performance improvement.

We would also need "mostly" variants in addition to the "only" variants. Or the 
automatic detection will cause problems if triggered by some rare event.

And applications using the "service cores" concept will just fall back to the 
default alloc-free balanced variant.


Perhaps we should fix the current bugs (my term, not consensus) first, and then 
look at further performance improvements. It's already uphill getting Acks for 
my fixes as they are.


Another performance improvement could be hard coding the mempool cache size to 
RTE_MEMPOOL_CACHE_MAX_SIZE, so the copying between the cache and the b

[PATCH] examples/l2fwd-crypto: fix stats refresh rate

2022-04-07 Thread Raja Zidane
TIMER_MILLISECOND is defined as the number of cpu cycles per millisecond,
current definition is correct for cores with frequency of 2GHZ, for cores
with different frequency, it caused different periods between refresh,
(i.e. the definition is about 14ms on ARM cores).
The devarg that stated the period between stats print was not used,
instead, it was always defaulted to 10 seconds (on 2GHZ core).

Use dpdk API to get CPU frequency, to define TIMER_MILLISECOND.
Use the refresh period devarg instead of defaulting to 10s always.

Fixes: 387259bd6c67 ("examples/l2fwd-crypto: add sample application")
Cc: sta...@dpdk.org

Signed-off-by: Raja Zidane 
Acked-by: Matan Azrad 
---
 examples/l2fwd-crypto/main.c | 16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/examples/l2fwd-crypto/main.c b/examples/l2fwd-crypto/main.c
index bbdb263143..b1e2613ccf 100644
--- a/examples/l2fwd-crypto/main.c
+++ b/examples/l2fwd-crypto/main.c
@@ -252,11 +252,9 @@ struct l2fwd_port_statistics 
port_statistics[RTE_MAX_ETHPORTS];
 struct l2fwd_crypto_statistics crypto_statistics[RTE_CRYPTO_MAX_DEVS];
 
 /* A tsc-based timer responsible for triggering statistics printout */
-#define TIMER_MILLISECOND 200ULL /* around 1ms at 2 Ghz */
+#define TIMER_MILLISECOND (rte_get_tsc_hz() / 1000)
 #define MAX_TIMER_PERIOD 86400UL /* 1 day max */
-
-/* default period is 10 seconds */
-static int64_t timer_period = 10 * TIMER_MILLISECOND * 1000;
+#define DEFAULT_TIMER_PERIOD 10UL
 
 /* Print out statistics on packets dropped */
 static void
@@ -894,18 +892,17 @@ l2fwd_main_loop(struct l2fwd_crypto_options *options)
}
 
/* if timer is enabled */
-   if (timer_period > 0) {
+   if (options->refresh_period > 0) {
 
/* advance the timer */
timer_tsc += diff_tsc;
 
/* if timer has reached its timeout */
if (unlikely(timer_tsc >=
-   (uint64_t)timer_period)) {
+   options->refresh_period)) {
 
/* do this only on main core */
-   if (lcore_id == rte_get_main_lcore()
-   && options->refresh_period) {
+   if (lcore_id == rte_get_main_lcore()) {
print_stats();
timer_tsc = 0;
}
@@ -1481,7 +1478,8 @@ l2fwd_crypto_default_options(struct l2fwd_crypto_options 
*options)
 {
options->portmask = 0x;
options->nb_ports_per_lcore = 1;
-   options->refresh_period = 1;
+   options->refresh_period = DEFAULT_TIMER_PERIOD *
+   TIMER_MILLISECOND * 1000;
options->single_lcore = 0;
options->sessionless = 0;
 
-- 
2.21.0



Re: [PATCH v2 0/2] rte_dump_stack: improvements

2022-04-07 Thread David Marchand
On Sat, Feb 12, 2022 at 7:44 PM Stephen Hemminger
 wrote:
>
> This is update to earlier RFC. Add some more comments and changes
> to have common code for Linux and FreeBSD
>
> Stephen Hemminger (2):
>   eal_debug: do not use malloc in rte_dump_stack
>   eal: common rte_dump_stack for both Linux and FreeBSD
>
>  lib/eal/freebsd/eal_debug.c | 43 
>  lib/eal/freebsd/meson.build |  1 -
>  lib/eal/linux/eal_debug.c   | 43 
>  lib/eal/linux/meson.build   |  1 -
>  lib/eal/unix/eal_debug.c| 65 +
>  lib/eal/unix/meson.build|  5 +--
>  6 files changed, 68 insertions(+), 90 deletions(-)
>  delete mode 100644 lib/eal/freebsd/eal_debug.c
>  delete mode 100644 lib/eal/linux/eal_debug.c
>  create mode 100644 lib/eal/unix/eal_debug.c

Strange to change only the Linux implementation as a first patch, then
merge implementations in a second time effectively changing FreeBSD
implementation in what is presented in commitlog as a factorisation
cleanup.
Please invert the patches.

Besides, the series does not compile on current main.
It's probably a result of the header inclusion cleanup we had in
v22.03, but I prefer you check.


Thanks.

-- 
David Marchand



Re: [RFC 1/2] ethdev: port flags for pre-configuration flow hints

2022-04-07 Thread Jack Min

On 4/7/22 19:27, Ori Kam wrote:

Hi Jack,

Hey Ori,



-Original Message-
From: Jack Min
Sent: Thursday, April 7, 2022 8:31 AM
Subject: [RFC 1/2] ethdev: port flags for pre-configuration flow hints

The data-path focused flow rule management can manage flow rules in more
optimized way then tranditional one by using hits provided by
application in initialization phase.

In addition to the current hints we have in port attr, more hints could
be proivded by application about it's behaviour.

One example is how the application do with the same flow:
A. create/destroy flow on same queue but query flow on different queue
or queue-less way (i.e, counter query)
B. All flow operations will be exactly on the same queue, by which PMD
could be in more optimized way then A because resource could be
isolated and access based on queue, without lock for example.

This patch add flag about above situation and could be extanded to cover
more situations.

Signed-off-by: Xiaoyu Min
---
  lib/ethdev/rte_flow.h | 16 
  1 file changed, 16 insertions(+)

diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
index d8827dd184..578dd837f5 100644
--- a/lib/ethdev/rte_flow.h
+++ b/lib/ethdev/rte_flow.h
@@ -4875,6 +4875,17 @@ rte_flow_flex_item_release(uint16_t port_id,
   const struct rte_flow_item_flex_handle *handle,
   struct rte_flow_error *error);

+/**
+ * The flags of rte flow port
+ */
+enum rte_flow_port_flag {
+   /**
+* All flow operations for one specified flow will _strictlly_ happen
+* on the same queue (create/destroy/query/update).
+*/
+   RTE_FLOW_PORT_FLAG_STRICT_QUEUE = RTE_BIT32(0),
+};
+
  /**
   * @warning
   * @b EXPERIMENTAL: this API may change without prior notice.
@@ -4972,6 +4983,11 @@ struct rte_flow_port_attr {
 * @see RTE_FLOW_ACTION_TYPE_METER
 */
uint32_t nb_meters;
+   /**
+* Port flags.
+* @see enum rte_flow_port_flag
+*/
+   enum rte_flow_port_flag flags;

Why the use of enum and not flags?
I guess there will be more flags in future, and those flags will not be related 
to the strict queue.


Yes, you are right. We will have more flags, and they will not relate to 
strict queue.


I will change it to "flags".

Thank you.


  };

  /**
--
2.35.1

Re: [PATCH] eal/windows: set Windows main lcore affinitization

2022-04-07 Thread David Marchand
Hello Tyler,

On Wed, Mar 30, 2022 at 11:00 AM Tyler Retzlaff
 wrote:
>
> add missing code to affinitize main_lcore from lcore configuration.

Nit: Add*

>
> Signed-off-by: Tyler Retzlaff 
> ---
>  lib/eal/windows/eal.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/lib/eal/windows/eal.c b/lib/eal/windows/eal.c
> index ca3c41a..9c61780 100644
> --- a/lib/eal/windows/eal.c
> +++ b/lib/eal/windows/eal.c
> @@ -401,6 +401,12 @@ enum rte_proc_type_t
> return -1;
> }
>
> +   if (pthread_setaffinity_np(pthread_self(), sizeof(rte_cpuset_t),
> +   &lcore_config[config->main_lcore].cpuset) != 0) {
> +   rte_eal_init_alert("Cannot set affinity");
> +   rte_errno = EINVAL;
> +   return -1;
> +   }
> __rte_thread_init(config->main_lcore,
> &lcore_config[config->main_lcore].cpuset);
>

- It looks like the affinity is dumped for workers (see below), I
would dump affinity for the main lcore like other OS do:

ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset));

RTE_LOG(DEBUG, EAL, "Main lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
config->main_lcore, thread_id, cpuset,
ret == 0 ? "" : "...");


- Which makes me notice that windows/eal_thread.c probably dumps
random stuff in logs because it is missing a call to
eal_thread_dump_current_affinity() to format affinity as a string.

lib/eal/windows/eal_thread.c:   char cpuset[RTE_CPU_AFFINITY_STR_LEN];
lib/eal/windows/eal_thread.c:   __rte_thread_init(lcore_id,
&lcore_config[lcore_id].cpuset);
lib/eal/windows/eal_thread.c:   RTE_LOG(DEBUG, EAL, "lcore %u is ready
(tid=%zx;cpuset=[%s])\n",
lib/eal/windows/eal_thread.c:   lcore_id, (uintptr_t)thread_id, cpuset);


-- 
David Marchand



RE: [PATCH v3 1/1] ethdev: mtr: support input color selection

2022-04-07 Thread Dumitrescu, Cristian
> 
> To summarize we have 4 attributes, Please find below the truth table
> 1) Outer VLAN
> 2) Outer IP
> 3) Inner VLAN
> 4) Inner IP
> 
> 
> Inner IP -Inner VLAN- Outer IP-Outer VLAN
> 00 0  0
> - Not valid case
> 00 0  1
> - RTE_MTR_INPUT_COLOR_METHOD_OUTER_VLAN
> 00 1  0
> - RTE_MTR_INPUT_COLOR_METHOD_OUTER_IP
> 00 1  1
> - RTE_MTR_INPUT_COLOR_METHOD_OUTER_VLAN_OUTER_IP - If found outer
> VLAN
> then vlan else outer IP
> 01 0  0
> - RTE_MTR_INPUT_COLOR_METHOD_INNER_VLAN
> 01 0  1
> - RTE_MTR_INPUT_COLOR_METHOD_INNER_VLAN_OUTER_VLAN - If found
> inner
> VLAN else outer VLAN
> 01 1  0  -
>  RTE_MTR_INPUT_COLOR_METHOD_INNER_VLAN_OUTER_IP
> 01 1  1  -
>  RTE_MTR_INPUT_COLOR_METHOD_INNER_VLAN_OUTER_IP_OUTER_VLAN -
> If found
> inner vlan then inner vlan else outer IP else outer VLAN
> 10 0  0  -
>  RTE_MTR_INPUT_COLOR_METHOD_INNER_IP
> 10 0  1  -
>  RTE_MTR_INPUT_COLOR_METHOD_INNER_IP_OUTER_VLAN
> 10 1  0  -
>  RTE_MTR_INPUT_COLOR_METHOD_INNER_IP_OUTER_IP
> 10 1  1  -
>  RTE_MTR_INPUT_COLOR_METHOD_INNER_IP_OUTER_IP_OUTER_VLAN
> 11 0  0  -
>  RTE_MTR_INPUT_COLOR_METHOD_INNER_IP_INNER_VLAN
> 11 0  1  -
>  RTE_MTR_INPUT_COLOR_METHOD_INNER_IP_INNER_VLAN_OUTER_VLAN
> 11 1  0  -
>  RTE_MTR_INPUT_COLOR_METHOD_INNER_IP_INNER_VLAN_OUTER_IP
> 11 1  1  -
> 
> RTE_MTR_INPUT_COLOR_METHOD_INNER_IP_INNER_VLAN_OUTER_IP_OUTE
> R_VLAN
> 
> Is this above enumeration fine, If not, Please suggest.
> 
> In Interms of name,
> a) We could omit explicit OUTER to reduce the length as suggestion.
> b) or change IIP, OIP, IVLAN, OVLAN kind of scheme to reduce the name.
> 
> Let me know the names and enumeration you prefer, I will change
> accordingly in the next version?
> 

Hi Jerin,

The above table looks confusing to me, I suggest we have a meeting next week to 
go over it and then report back to the list?

Regards,
Cristian


Re: [v4 1/3] ethdev: introduce protocol type based header split

2022-04-07 Thread Jerin Jacob
On Sat, Apr 2, 2022 at 4:33 PM  wrote:
>
> From: Xuan Ding 
>
> Header split consists of splitting a received packet into two separate
> regions based on the packet content. The split happens after the
> packet header and before the packet payload. Splitting is usually between
> the packet header that can be posted to a dedicated buffer and the packet
> payload that can be posted to a different buffer.
>
> Currently, Rx buffer split supports length and offset based packet split.
> Although header split is a subset of buffer split, configuring buffer
> split based on length is not suitable for NICs that do split based on
> header protocol types. Because tunneling makes the conversion from length
> to protocol type impossible.
>
> This patch extends the current buffer split to support protocol type and
> offset based header split. A new proto field is introduced in the
> rte_eth_rxseg_split structure reserved field to specify header protocol
> type. With Rx offload flag RTE_ETH_RX_OFFLOAD_HEADER_SPLIT enabled and
> protocol type configured, PMD will split the ingress packets into two
> separate regions. Currently, both inner and outer L2/L3/L4 level header
> split can be supported.
>
> For example, let's suppose we configured the Rx queue with the
> following segments:
> seg0 - pool0, off0=2B
> seg1 - pool1, off1=128B
>
> With header split type configured with RTE_ETH_RX_HEADER_SPLIT_UDP,
> the packet consists of MAC_IP_UDP_PAYLOAD will be split like following:
> seg0 - udp header @ RTE_PKTMBUF_HEADROOM + 2 in mbuf from pool0

If we set rte_eth_rxseg_split::proto = RTE_ETH_RX_HEADER_SPLIT_UDP and
rte_eth_rxseg_split.offset = 2,
What will be the content for seg0,
Will it be,
- offset as Starts atUDP Header
- size of segment as MAX(size of UDP header + 2, 128(as seg 1 start from128).
Right? If not, Please describe

Also, I don't think we need duplate
rte_eth_rx_header_split_protocol_type instead we can
reuse existing RTE_PTYPE_*  flags.


> seg1 - payload @ 128 in mbuf from pool1
>
> The memory attributes for the split parts may differ either - for example
> the mempool0 and mempool1 belong to dpdk memory and external memory,
> respectively.


[RFC PATCH] cryptodev: add diffie hellman verify, change ec enum

2022-04-07 Thread Arek Kusztal
This commit:
1) adds Diffie-Hellman verify operation.
2) splits asym_op_type with dh op_type
3) removes next pointer from asym_xform
4) changes enumeration of elliptic curves

Signed-off-by: Arek Kusztal 
---
 lib/cryptodev/rte_crypto_asym.h | 36 +---
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/lib/cryptodev/rte_crypto_asym.h b/lib/cryptodev/rte_crypto_asym.h
index cd24d4b07b..6fbc7b7708 100644
--- a/lib/cryptodev/rte_crypto_asym.h
+++ b/lib/cryptodev/rte_crypto_asym.h
@@ -43,11 +43,11 @@ rte_crypto_asym_op_strings[];
  */
 enum rte_crypto_ec_group {
RTE_CRYPTO_EC_GROUP_UNKNOWN  = 0,
-   RTE_CRYPTO_EC_GROUP_SECP192R1 = 19,
-   RTE_CRYPTO_EC_GROUP_SECP224R1 = 21,
-   RTE_CRYPTO_EC_GROUP_SECP256R1 = 23,
-   RTE_CRYPTO_EC_GROUP_SECP384R1 = 24,
-   RTE_CRYPTO_EC_GROUP_SECP521R1 = 25,
+   RTE_CRYPTO_EC_GROUP_SECP192R1 = 1,
+   RTE_CRYPTO_EC_GROUP_SECP224R1 = 2,
+   RTE_CRYPTO_EC_GROUP_SECP256R1 = 3,
+   RTE_CRYPTO_EC_GROUP_SECP384R1 = 4,
+   RTE_CRYPTO_EC_GROUP_SECP521R1 = 5,
 };
 
 /**
@@ -109,13 +109,19 @@ enum rte_crypto_asym_op_type {
/**< Signature Generation operation */
RTE_CRYPTO_ASYM_OP_VERIFY,
/**< Signature Verification operation */
-   RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE,
+   RTE_CRYPTO_ASYM_OP_LIST_END
+};
+
+enum rte_crypto_dh_op_type {
+   RTE_CRYPTO_DH_OP_INVALID,
+   RTE_CRYPTO_DH_OP_PRIVATE_KEY_GENERATE,
/**< DH Private Key generation operation */
-   RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE,
+   RTE_CRYPTO_DH_OP_PUBLIC_KEY_GENERATE,
/**< DH Public Key generation operation */
-   RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE,
+   RTE_CRYPTO_DH_OP_SHARED_SECRET_COMPUTE,
/**< DH Shared Secret compute operation */
-   RTE_CRYPTO_ASYM_OP_LIST_END
+   RTE_CRYPTO_DH_OP_PUBLIC_KEY_VERIFY,
+   /**< DH verify correctness of public key */
 };
 
 /**
@@ -256,7 +262,7 @@ struct rte_crypto_modinv_xform {
  *
  */
 struct rte_crypto_dh_xform {
-   enum rte_crypto_asym_op_type type;
+   enum rte_crypto_dh_op_type type;
/**< Setup xform for key generate or shared secret compute */
rte_crypto_uint p;
/**< Prime modulus data */
@@ -278,13 +284,7 @@ struct rte_crypto_dsa_xform {
rte_crypto_uint g;
/**< Generator of the subgroup */
rte_crypto_uint x;
-   /**< x: Private key of the signer in octet-string network
-* byte order format.
-* Used when app has pre-defined private key.
-* Valid only when xform chain is DSA ONLY.
-* if xform chain is DH private key generate + DSA, then DSA sign
-* compute will use internally generated key.
-*/
+   /**< x: Private key */
 };
 
 /**
@@ -504,8 +504,6 @@ struct rte_crypto_ecpm_op_param {
  * Structure describing asym xforms.
  */
 struct rte_crypto_asym_xform {
-   struct rte_crypto_asym_xform *next;
-   /**< Pointer to next xform to set up xform chain.*/
enum rte_crypto_asym_xform_type xform_type;
/**< Asymmetric crypto transform */
 
-- 
2.13.6



[PATCH] cryptodev: add support for 25519 and 448 curves

2022-04-07 Thread Arek Kusztal
This commit adds support for following elliptic curves:
1) Curve25519
2) Curve448

Signed-off-by: Arek Kusztal 
---
 lib/cryptodev/rte_crypto_asym.h | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/lib/cryptodev/rte_crypto_asym.h b/lib/cryptodev/rte_crypto_asym.h
index cd24d4b07b..775b2f6277 100644
--- a/lib/cryptodev/rte_crypto_asym.h
+++ b/lib/cryptodev/rte_crypto_asym.h
@@ -48,6 +48,8 @@ enum rte_crypto_ec_group {
RTE_CRYPTO_EC_GROUP_SECP256R1 = 23,
RTE_CRYPTO_EC_GROUP_SECP384R1 = 24,
RTE_CRYPTO_EC_GROUP_SECP521R1 = 25,
+   RTE_CRYPTO_EC_GROUP_CURVE25519 = 29,
+   RTE_CRYPTO_EC_GROUP_CURVE448 = 30,
 };
 
 /**
@@ -180,9 +182,17 @@ typedef rte_crypto_param rte_crypto_uint;
  */
 struct rte_crypto_ec_point {
rte_crypto_param x;
-   /**< X coordinate */
+   /**<
+* X coordinate
+* For curve25519 and curve448 - little-endian integer
+* otherwise, big-endian integer
+*/
rte_crypto_param y;
-   /**< Y coordinate */
+   /**<
+* Y coordinate
+* For curve25519 and curve448 - little-endian integer
+* otherwise, big-endian integer
+*/
 };
 
 /**
-- 
2.13.6



RE: OVS DPDK DMA-Dev library/Design Discussion

2022-04-07 Thread Van Haaren, Harry
Hi OVS & DPDK, Maintainers & Community,

Top posting overview of discussion as replies to thread become slower:
perhaps it is a good time to review and plan for next steps?

>From my perspective, it those most vocal in the thread seem to be in favour of 
>the clean
rx/tx split ("defer work"), with the tradeoff that the application must be 
aware of handling
the async DMA completions. If there are any concerns opposing upstreaming of 
this method,
please indicate this promptly, and we can continue technical discussions here 
now.

In absence of continued technical discussion here, I suggest Sunil and Ian 
collaborate on getting
the OVS Defer-work approach, and DPDK VHost Async patchsets available on GitHub 
for easier
consumption and future development (as suggested in slides presented on last 
call).

Regards, -Harry

No inline-replies below; message just for context.

> -Original Message-
> From: Van Haaren, Harry
> Sent: Wednesday, March 30, 2022 10:02 AM
> To: Morten Brørup ; Richardson, Bruce
> 
> Cc: Maxime Coquelin ; Pai G, Sunil
> ; Stokes, Ian ; Hu, Jiayu
> ; Ferriter, Cian ; Ilya Maximets
> ; ovs-...@openvswitch.org; dev@dpdk.org; Mcnamara,
> John ; O'Driscoll, Tim ;
> Finn, Emma 
> Subject: RE: OVS DPDK DMA-Dev library/Design Discussion
> 
> > -Original Message-
> > From: Morten Brørup 
> > Sent: Tuesday, March 29, 2022 8:59 PM
> > To: Van Haaren, Harry ; Richardson, Bruce
> > 
> > Cc: Maxime Coquelin ; Pai G, Sunil
> > ; Stokes, Ian ; Hu, Jiayu
> > ; Ferriter, Cian ; Ilya 
> > Maximets
> > ; ovs-...@openvswitch.org; dev@dpdk.org; Mcnamara,
> John
> > ; O'Driscoll, Tim ; Finn,
> > Emma 
> > Subject: RE: OVS DPDK DMA-Dev library/Design Discussion
> >
> > > From: Van Haaren, Harry [mailto:harry.van.haa...@intel.com]
> > > Sent: Tuesday, 29 March 2022 19.46
> > >
> > > > From: Morten Brørup 
> > > > Sent: Tuesday, March 29, 2022 6:14 PM
> > > >
> > > > > From: Bruce Richardson [mailto:bruce.richard...@intel.com]
> > > > > Sent: Tuesday, 29 March 2022 19.03
> > > > >
> > > > > On Tue, Mar 29, 2022 at 06:45:19PM +0200, Morten Brørup wrote:
> > > > > > > From: Maxime Coquelin [mailto:maxime.coque...@redhat.com]
> > > > > > > Sent: Tuesday, 29 March 2022 18.24
> > > > > > >
> > > > > > > Hi Morten,
> > > > > > >
> > > > > > > On 3/29/22 16:44, Morten Brørup wrote:
> > > > > > > >> From: Van Haaren, Harry [mailto:harry.van.haa...@intel.com]
> > > > > > > >> Sent: Tuesday, 29 March 2022 15.02
> > > > > > > >>
> > > > > > > >>> From: Morten Brørup 
> > > > > > > >>> Sent: Tuesday, March 29, 2022 1:51 PM
> > > > > > > >>>
> > > > > > > >>> Having thought more about it, I think that a completely
> > > > > different
> > > > > > > architectural approach is required:
> > > > > > > >>>
> > > > > > > >>> Many of the DPDK Ethernet PMDs implement a variety of RX
> > > and TX
> > > > > > > packet burst functions, each optimized for different CPU vector
> > > > > > > instruction sets. The availability of a DMA engine should be
> > > > > treated
> > > > > > > the same way. So I suggest that PMDs copying packet contents,
> > > e.g.
> > > > > > > memif, pcap, vmxnet3, should implement DMA optimized RX and TX
> > > > > packet
> > > > > > > burst functions.
> > > > > > > >>>
> > > > > > > >>> Similarly for the DPDK vhost library.
> > > > > > > >>>
> > > > > > > >>> In such an architecture, it would be the application's job
> > > to
> > > > > > > allocate DMA channels and assign them to the specific PMDs that
> > > > > should
> > > > > > > use them. But the actual use of the DMA channels would move
> > > down
> > > > > below
> > > > > > > the application and into the DPDK PMDs and libraries.
> > > > > > > >>>
> > > > > > > >>>
> > > > > > > >>> Med venlig hilsen / Kind regards,
> > > > > > > >>> -Morten Brørup
> > > > > > > >>
> > > > > > > >> Hi Morten,
> > > > > > > >>
> > > > > > > >> That's *exactly* how this architecture is designed &
> > > > > implemented.
> > > > > > > >> 1. The DMA configuration and initialization is up to the
> > > > > application
> > > > > > > (OVS).
> > > > > > > >> 2. The VHost library is passed the DMA-dev ID, and its
> > > new
> > > > > async
> > > > > > > rx/tx APIs, and uses the DMA device to accelerate the copy.
> > > > > > > >>
> > > > > > > >> Looking forward to talking on the call that just started.
> > > > > Regards, -
> > > > > > > Harry
> > > > > > > >>
> > > > > > > >
> > > > > > > > OK, thanks - as I said on the call, I haven't looked at the
> > > > > patches.
> > > > > > > >
> > > > > > > > Then, I suppose that the TX completions can be handled in the
> > > TX
> > > > > > > function, and the RX completions can be handled in the RX
> > > function,
> > > > > > > just like the Ethdev PMDs handle packet descriptors:
> > > > > > > >
> > > > > > > > TX_Burst(tx_packet_array):
> > > > > > > > 1.  Clean up descriptors processed by the NIC chip. -->
> > > Process
> > > > > TX
> > > > > > > DMA channel completions. (Effectively, the 2nd pipeline stage.)
> > > > >

RE: [RFC PATCH] cryptodev: add diffie hellman verify, change ec enum

2022-04-07 Thread Kusztal, ArkadiuszX
Hi,

Some explanations below.

> -Original Message-
> From: Kusztal, ArkadiuszX 
> Sent: Thursday, April 7, 2022 3:43 PM
> To: dev@dpdk.org
> Cc: gak...@marvell.com; Zhang, Roy Fan ; Kusztal,
> ArkadiuszX 
> Subject: [RFC PATCH] cryptodev: add diffie hellman verify, change ec enum
> 
> This commit:
> 1) adds Diffie-Hellman verify operation.
> 2) splits asym_op_type with dh op_type
> 3) removes next pointer from asym_xform
> 4) changes enumeration of elliptic curves
> 
> Signed-off-by: Arek Kusztal 
> ---
>  lib/cryptodev/rte_crypto_asym.h | 36 +---
>  1 file changed, 17 insertions(+), 19 deletions(-)
> 
> diff --git a/lib/cryptodev/rte_crypto_asym.h b/lib/cryptodev/rte_crypto_asym.h
> index cd24d4b07b..6fbc7b7708 100644
> --- a/lib/cryptodev/rte_crypto_asym.h
> +++ b/lib/cryptodev/rte_crypto_asym.h
> @@ -43,11 +43,11 @@ rte_crypto_asym_op_strings[];
>   */
>  enum rte_crypto_ec_group {
>   RTE_CRYPTO_EC_GROUP_UNKNOWN  = 0,
> - RTE_CRYPTO_EC_GROUP_SECP192R1 = 19,
> - RTE_CRYPTO_EC_GROUP_SECP224R1 = 21,
> - RTE_CRYPTO_EC_GROUP_SECP256R1 = 23,
> - RTE_CRYPTO_EC_GROUP_SECP384R1 = 24,
> - RTE_CRYPTO_EC_GROUP_SECP521R1 = 25,
> + RTE_CRYPTO_EC_GROUP_SECP192R1 = 1,
> + RTE_CRYPTO_EC_GROUP_SECP224R1 = 2,
> + RTE_CRYPTO_EC_GROUP_SECP256R1 = 3,
> + RTE_CRYPTO_EC_GROUP_SECP384R1 = 4,
> + RTE_CRYPTO_EC_GROUP_SECP521R1 = 5,
>  };

[Arek] - this one we could change for the following reasons:
- this is TLS specific registry, this values does not need to correspond with 
other protocols like Ikev2
- we cannot set deprecated values < 19
- TLS registry to some extent is incorrectly named to. It contains mod exp 
groups as well, and we do not even support that. But if we would, it probably 
be not "crypto_ec_group"
 

> 
>  /**
> @@ -109,13 +109,19 @@ enum rte_crypto_asym_op_type {
>   /**< Signature Generation operation */
>   RTE_CRYPTO_ASYM_OP_VERIFY,
>   /**< Signature Verification operation */
> - RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE,
> + RTE_CRYPTO_ASYM_OP_LIST_END
> +};
> +
> +enum rte_crypto_dh_op_type {
> + RTE_CRYPTO_DH_OP_INVALID,
> + RTE_CRYPTO_DH_OP_PRIVATE_KEY_GENERATE,
>   /**< DH Private Key generation operation */
> - RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE,
> + RTE_CRYPTO_DH_OP_PUBLIC_KEY_GENERATE,
>   /**< DH Public Key generation operation */
> - RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE,
> + RTE_CRYPTO_DH_OP_SHARED_SECRET_COMPUTE,
>   /**< DH Shared Secret compute operation */
> - RTE_CRYPTO_ASYM_OP_LIST_END
> + RTE_CRYPTO_DH_OP_PUBLIC_KEY_VERIFY,
> + /**< DH verify correctness of public key */
>  };
[Arek] - DH Verify need to be added anyway, but rfc here is because I have 
split asym_op_type into dh_op_type and asym_op_type. This leaves asym_op_type 
like that:
{
ENCRYPT,
DECRYPT,
AUTHENTICATE = SIGN,
VERIFY = VERIFY_SIGNATURE,
}
I know it is too late to create one generic op_type for symmetric and 
asymmetric but still it is better to group crypto and key exchange operations.
> 
>  /**
> @@ -256,7 +262,7 @@ struct rte_crypto_modinv_xform {
>   *
>   */
>  struct rte_crypto_dh_xform {
> - enum rte_crypto_asym_op_type type;
> + enum rte_crypto_dh_op_type type;
>   /**< Setup xform for key generate or shared secret compute */
>   rte_crypto_uint p;
>   /**< Prime modulus data */
> @@ -278,13 +284,7 @@ struct rte_crypto_dsa_xform {
>   rte_crypto_uint g;
>   /**< Generator of the subgroup */
>   rte_crypto_uint x;
> - /**< x: Private key of the signer in octet-string network
> -  * byte order format.
> -  * Used when app has pre-defined private key.
> -  * Valid only when xform chain is DSA ONLY.
> -  * if xform chain is DH private key generate + DSA, then DSA sign
> -  * compute will use internally generated key.
> -  */
> + /**< x: Private key */
[Arek] - unless someone can show how it works we should drop it.
>  };
> 
>  /**
> @@ -504,8 +504,6 @@ struct rte_crypto_ecpm_op_param {
>   * Structure describing asym xforms.
>   */
>  struct rte_crypto_asym_xform {
> - struct rte_crypto_asym_xform *next;
> - /**< Pointer to next xform to set up xform chain.*/
[Arek] - same as above, unless there is any application for it we should drop 
it.
>   enum rte_crypto_asym_xform_type xform_type;
>   /**< Asymmetric crypto transform */
> 
> --
> 2.13.6



Re: [PATCH v5] ip_frag: add IPv4 options fragment and test data

2022-04-07 Thread Aaron Conole
Hi,

"Ananyev, Konstantin"  writes:

> Hi Huichao,
>
>  
>
> In general yes, it is developer responsibility to address any issues with 
> his/her patches.

+1

> In that particular case, looking at the logs, it seems to be some 
> misconfiguration
>
> on test-machine not related anyhow to your changes.
>
> BTW, there are few similar failures with other patches at about the same date:
>
> https://lab.dpdk.org/results/dashboard/patchsets/21562/
>
> https://lab.dpdk.org/results/dashboard/patchsets/21546/
>
> Which again, makes me think that  it is just a tesc-config related failure.
>
> What is the best way to deal with it?

Agreed.  I've CC'd UNH lab, but in this case I think these are the BRCM
managed systems.

> Probably the easiest and safest thing – to resubmit the patch to force
>
> another run of test harness.
>
> Aaron, is there any better way to deal with it?

At the moment, no.  We do have an effort for resubmits to be requested -
but that hasn't been completed yet.

> Thanks
>
> Konstantin
>
>  
>
>  
>
> From: Huichao Cai  
> Sent: Wednesday, April 6, 2022 2:22 AM
> To: Ananyev, Konstantin 
> Cc: dev@dpdk.org
> Subject: Re:RE: [PATCH v5] ip_frag: add IPv4 options fragment and test data
>
>  
>
> Hi Konstantin,
>
>  
>
> This patch has a test case failure:ci/iol-broadcom-Functional.
>
> Failed Tests:
>
>- mtu_update
>
>- scatter
>
> The same goes for many other patches,Do I need to deal with it, how to deal 
> with it?
>
>  
>
> Huichao,Cai



Re: OVS DPDK DMA-Dev library/Design Discussion

2022-04-07 Thread Maxime Coquelin

Hi Harry,

On 4/7/22 16:04, Van Haaren, Harry wrote:

Hi OVS & DPDK, Maintainers & Community,

Top posting overview of discussion as replies to thread become slower:
perhaps it is a good time to review and plan for next steps?

 From my perspective, it those most vocal in the thread seem to be in favour of 
the clean
rx/tx split ("defer work"), with the tradeoff that the application must be 
aware of handling
the async DMA completions. If there are any concerns opposing upstreaming of 
this method,
please indicate this promptly, and we can continue technical discussions here 
now.


Wasn't there some discussions about handling the Virtio completions with
the DMA engine? With that, we wouldn't need the deferral of work.

Thanks,
Maxime


In absence of continued technical discussion here, I suggest Sunil and Ian 
collaborate on getting
the OVS Defer-work approach, and DPDK VHost Async patchsets available on GitHub 
for easier
consumption and future development (as suggested in slides presented on last 
call).

Regards, -Harry

No inline-replies below; message just for context.


-Original Message-
From: Van Haaren, Harry
Sent: Wednesday, March 30, 2022 10:02 AM
To: Morten Brørup ; Richardson, Bruce

Cc: Maxime Coquelin ; Pai G, Sunil
; Stokes, Ian ; Hu, Jiayu
; Ferriter, Cian ; Ilya Maximets
; ovs-...@openvswitch.org; dev@dpdk.org; Mcnamara,
John ; O'Driscoll, Tim ;
Finn, Emma 
Subject: RE: OVS DPDK DMA-Dev library/Design Discussion


-Original Message-
From: Morten Brørup 
Sent: Tuesday, March 29, 2022 8:59 PM
To: Van Haaren, Harry ; Richardson, Bruce

Cc: Maxime Coquelin ; Pai G, Sunil
; Stokes, Ian ; Hu, Jiayu
; Ferriter, Cian ; Ilya Maximets
; ovs-...@openvswitch.org; dev@dpdk.org; Mcnamara,

John

; O'Driscoll, Tim ; Finn,
Emma 
Subject: RE: OVS DPDK DMA-Dev library/Design Discussion


From: Van Haaren, Harry [mailto:harry.van.haa...@intel.com]
Sent: Tuesday, 29 March 2022 19.46


From: Morten Brørup 
Sent: Tuesday, March 29, 2022 6:14 PM


From: Bruce Richardson [mailto:bruce.richard...@intel.com]
Sent: Tuesday, 29 March 2022 19.03

On Tue, Mar 29, 2022 at 06:45:19PM +0200, Morten Brørup wrote:

From: Maxime Coquelin [mailto:maxime.coque...@redhat.com]
Sent: Tuesday, 29 March 2022 18.24

Hi Morten,

On 3/29/22 16:44, Morten Brørup wrote:

From: Van Haaren, Harry [mailto:harry.van.haa...@intel.com]
Sent: Tuesday, 29 March 2022 15.02


From: Morten Brørup 
Sent: Tuesday, March 29, 2022 1:51 PM

Having thought more about it, I think that a completely

different

architectural approach is required:


Many of the DPDK Ethernet PMDs implement a variety of RX

and TX

packet burst functions, each optimized for different CPU vector
instruction sets. The availability of a DMA engine should be

treated

the same way. So I suggest that PMDs copying packet contents,

e.g.

memif, pcap, vmxnet3, should implement DMA optimized RX and TX

packet

burst functions.


Similarly for the DPDK vhost library.

In such an architecture, it would be the application's job

to

allocate DMA channels and assign them to the specific PMDs that

should

use them. But the actual use of the DMA channels would move

down

below

the application and into the DPDK PMDs and libraries.



Med venlig hilsen / Kind regards,
-Morten Brørup


Hi Morten,

That's *exactly* how this architecture is designed &

implemented.

1.  The DMA configuration and initialization is up to the

application

(OVS).

2.  The VHost library is passed the DMA-dev ID, and its

new

async

rx/tx APIs, and uses the DMA device to accelerate the copy.


Looking forward to talking on the call that just started.

Regards, -

Harry




OK, thanks - as I said on the call, I haven't looked at the

patches.


Then, I suppose that the TX completions can be handled in the

TX

function, and the RX completions can be handled in the RX

function,

just like the Ethdev PMDs handle packet descriptors:


TX_Burst(tx_packet_array):
1.  Clean up descriptors processed by the NIC chip. -->

Process

TX

DMA channel completions. (Effectively, the 2nd pipeline stage.)

2.  Pass on the tx_packet_array to the NIC chip

descriptors. --

Pass

on the tx_packet_array to the TX DMA channel. (Effectively, the

1st

pipeline stage.)

The problem is Tx function might not be called again, so

enqueued

packets in 2. may never be completed from a Virtio point of

view.

IOW,

the packets will be copied to the Virtio descriptors buffers,

but

the

descriptors will not be made available to the Virtio driver.


In that case, the application needs to call TX_Burst()

periodically

with an empty array, for completion purposes.


This is what the "defer work" does at the OVS thread-level, but instead
of
"brute-forcing" and *always* making the call, the defer work concept
tracks
*when* there is outstanding work (DMA copies) to be completed
("deferred work")
and calls the generic completion function at that point.

So "defer work" is generic infrastructure at the OVS threa

ethdev: mtr: input color - Discussion

2022-04-07 Thread Jerin Jacob Kollanukkaran
BEGIN:VCALENDAR
METHOD:REQUEST
PRODID:Microsoft Exchange Server 2010
VERSION:2.0
BEGIN:VTIMEZONE
TZID:India Standard Time
BEGIN:STANDARD
DTSTART:16010101T00
TZOFFSETFROM:+0530
TZOFFSETTO:+0530
END:STANDARD
BEGIN:DAYLIGHT
DTSTART:16010101T00
TZOFFSETFROM:+0530
TZOFFSETTO:+0530
END:DAYLIGHT
END:VTIMEZONE
BEGIN:VEVENT
ORGANIZER;CN=Jerin Jacob Kollanukkaran:mailto:jer...@marvell.com
ATTENDEE;ROLE=REQ-PARTICIPANT;PARTSTAT=NEEDS-ACTION;RSVP=TRUE;CN="Dumitrescu
 , Cristian":mailto:cristian.dumitre...@intel.com
ATTENDEE;ROLE=REQ-PARTICIPANT;PARTSTAT=NEEDS-ACTION;RSVP=TRUE;CN=Sunil Kuma
 r Kori:mailto:sk...@marvell.com
ATTENDEE;ROLE=REQ-PARTICIPANT;PARTSTAT=NEEDS-ACTION;RSVP=TRUE;CN=dev@dpdk.o
 rg:mailto:dev@dpdk.org
DESCRIPTION;LANGUAGE=en-US:Agenda:\nTo discuss https://patches.dpdk.org/pro
 ject/dpdk/patch/20220301085824.1041009-1-sk...@marvell.com/ patch\n\n\nLet
  me know\, If time needs to change to include any interested participants.
 \n\n\nHi there\,\n\nJerin Jacob is inviting you to a scheduled Zoom meetin
 g.\n\nTopic: Jerin Jacob Kollanukkaran's Personal Meeting Room\n\n\nJoin Z
 oom Meeting: https://marvell.zoom.us/j/9901077677?pwd=T2lTTGMwYlc1YTQzMnR4
 eGRWQXR6QT09\nPassword: 339888\n\n\nOr Telephone:\nDial(for high
 er quality\, dial a number based on your current location):\nUS:
  +1 301 715 8592  or +1 312 626 6799  or +1 346 248 7799  or +1 646 558 86
 56  or +1 669 900 6833  or +1 253 215 8782  or 888 788 0099 (Toll Free) or
  833 548 0276 (Toll Free) or 833 548 0282 (Toll Free) or 877 853 5247 (Tol
 l Free)\nMeeting ID: 990 107 7677\nPassword: 358309\nInternati
 onal numbers available: https://marvell.zoom.us/u/adpcCpMHYt\n\nOr a Video
  Conference Room:\nFrom Touchpad: Tap Join Zoom button. When prompted\, en
 ter 990 107 7677\nPassword: 358309\n\nFor China locations\, from Touchpad:
  Dial* then 990 107 7677\nPassword: 358309\n\n\n\n
UID:04008200E00074C5B7101A82E00876820015864AD801000
 0100015E442163FA00847A5B98DC21C5CF1AF
SUMMARY;LANGUAGE=en-US:ethdev: mtr: input color - Discussion
DTSTART;TZID=India Standard Time:20220411T183000
DTEND;TZID=India Standard Time:20220411T19
CLASS:PUBLIC
PRIORITY:5
DTSTAMP:20220407T143516Z
TRANSP:OPAQUE
STATUS:CONFIRMED
SEQUENCE:1
LOCATION;LANGUAGE=en-US:
X-MICROSOFT-CDO-APPT-SEQUENCE:1
X-MICROSOFT-CDO-OWNERAPPTID:2120499062
X-MICROSOFT-CDO-BUSYSTATUS:TENTATIVE
X-MICROSOFT-CDO-INTENDEDSTATUS:BUSY
X-MICROSOFT-CDO-ALLDAYEVENT:FALSE
X-MICROSOFT-CDO-IMPORTANCE:1
X-MICROSOFT-CDO-INSTTYPE:0
X-MICROSOFT-EVENTPROPERTIESTODELETE:0
X-MICROSOFT-DONOTFORWARDMEETING:FALSE
X-MICROSOFT-DISALLOW-COUNTER:FALSE
X-MICROSOFT-LOCATIONS:[]
BEGIN:VALARM
DESCRIPTION:REMINDER
TRIGGER;RELATED=START:-PT15M
ACTION:DISPLAY
END:VALARM
END:VEVENT
END:VCALENDAR


[PATCH] crypto/qat: add ecdh key exchange algorithm

2022-04-07 Thread Arek Kusztal
This commit adds Elliptic Curve Diffie-Hellman
algorithm to Intel QuickAssist Technology PMD.

Signed-off-by: Arek Kusztal 
---
Depends-on: patch-109409 ("cryptodev: add elliptic curve diffie hellman")

 drivers/crypto/qat/qat_asym.c | 95 +++
 1 file changed, 95 insertions(+)

diff --git a/drivers/crypto/qat/qat_asym.c b/drivers/crypto/qat/qat_asym.c
index c2a985b355..5dccd26201 100644
--- a/drivers/crypto/qat/qat_asym.c
+++ b/drivers/crypto/qat/qat_asym.c
@@ -831,6 +831,63 @@ dh_mod_set_input(struct rte_crypto_asym_op *asym_op,
 }
 
 static int
+ecdh_set_input(struct rte_crypto_asym_op *asym_op,
+   struct icp_qat_fw_pke_request *qat_req,
+   struct qat_asym_op_cookie *cookie,
+   struct rte_crypto_asym_xform *xform)
+{
+   struct qat_asym_function qat_function;
+   uint32_t qat_func_alignsize, func_id;
+   int curve_id;
+
+   curve_id = pick_curve(xform);
+   if (curve_id < 0) {
+   QAT_LOG(DEBUG, "Incorrect elliptic curve");
+   return -EINVAL;
+   }
+
+   qat_function = get_ecpm_function(xform);
+   func_id = qat_function.func_id;
+   if (func_id == 0) {
+   QAT_LOG(ERR, "Cannot obtain functionality id");
+   return -EINVAL;
+   }
+   qat_func_alignsize = RTE_ALIGN_CEIL(qat_function.bytesize, 8);
+
+   if (asym_op->dh.op_type == RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE) {
+   SET_PKE_LN(asym_op->dh.priv_key, qat_func_alignsize, 0);
+   SET_PKE_LN_EC(curve[curve_id], x, 1);
+   SET_PKE_LN_EC(curve[curve_id], y, 2);
+   } else {
+   SET_PKE_LN(asym_op->dh.priv_key, qat_func_alignsize, 0);
+   SET_PKE_LN(asym_op->dh.pub_point.x, qat_func_alignsize, 1);
+   SET_PKE_LN(asym_op->dh.pub_point.y, qat_func_alignsize, 2);
+   }
+   SET_PKE_LN_EC(curve[curve_id], a, 3);
+   SET_PKE_LN_EC(curve[curve_id], b, 4);
+   SET_PKE_LN_EC(curve[curve_id], p, 5);
+   SET_PKE_LN_EC(curve[curve_id], h, 6);
+
+   cookie->alg_bytesize = curve[curve_id].bytesize;
+   cookie->qat_func_alignsize = qat_func_alignsize;
+   qat_req->pke_hdr.cd_pars.func_id = func_id;
+   qat_req->input_param_count =
+   QAT_ASYM_ECPM_IN_PARAMS;
+   qat_req->output_param_count =
+   QAT_ASYM_ECPM_OUT_PARAMS;
+
+   HEXDUMP("k", cookie->input_array[0], qat_func_alignsize);
+   HEXDUMP("xG", cookie->input_array[1], qat_func_alignsize);
+   HEXDUMP("yG", cookie->input_array[2], qat_func_alignsize);
+   HEXDUMP("a", cookie->input_array[3], qat_func_alignsize);
+   HEXDUMP("b", cookie->input_array[4], qat_func_alignsize);
+   HEXDUMP("q", cookie->input_array[5], qat_func_alignsize);
+   HEXDUMP("h", cookie->input_array[6], qat_func_alignsize);
+
+   return 0;
+}
+
+static int
 dh_set_input(struct rte_crypto_asym_op *asym_op,
struct icp_qat_fw_pke_request *qat_req,
struct qat_asym_op_cookie *cookie,
@@ -839,6 +896,8 @@ dh_set_input(struct rte_crypto_asym_op *asym_op,
switch (xform->xform_type) {
case RTE_CRYPTO_ASYM_XFORM_DH:
return dh_mod_set_input(asym_op, qat_req, cookie, xform);
+   case RTE_CRYPTO_ASYM_XFORM_ECDH:
+   return ecdh_set_input(asym_op, qat_req, cookie, xform);
default:
QAT_LOG(ERR,
"Invalid/unsupported asymmetric crypto xform type");
@@ -866,6 +925,38 @@ dh_collect(struct rte_crypto_asym_op *asym_op,
return RTE_CRYPTO_OP_STATUS_SUCCESS;
 }
 
+static uint8_t
+ecdh_collect(struct rte_crypto_asym_op *asym_op,
+   struct qat_asym_op_cookie *cookie,
+   struct rte_crypto_asym_xform *xform)
+{
+   uint8_t *x, *y;
+   uint32_t alg_bytesize = cookie->alg_bytesize;
+   uint32_t qat_func_alignsize = cookie->qat_func_alignsize;
+   uint32_t ltrim = qat_func_alignsize - alg_bytesize;
+
+   if (asym_op->dh.op_type == RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE) {
+   asym_op->dh.pub_point.x.length = alg_bytesize;
+   asym_op->dh.pub_point.y.length = alg_bytesize;
+   x = asym_op->dh.pub_point.x.data;
+   y = asym_op->dh.pub_point.y.data;
+   } else {
+   asym_op->dh.shared_point.x.length = alg_bytesize;
+   asym_op->dh.shared_point.y.length = alg_bytesize;
+   x = asym_op->dh.shared_point.x.data;
+   y = asym_op->dh.shared_point.y.data;
+   }
+
+   rte_memcpy(x, &cookie->output_array[0][ltrim], alg_bytesize);
+   rte_memcpy(y, &cookie->output_array[1][ltrim], alg_bytesize);
+
+   HEXDUMP("X", cookie->output_array[0],
+   qat_func_alignsize);
+   HEXDUMP("Y", cookie->output_array[1],
+   qat_func_alignsize);
+   return RTE_CRYPTO_OP_STATUS_SUCCESS;
+}
+
 static int
 asym_set_input(struc

Re: OVS DPDK DMA-Dev library/Design Discussion

2022-04-07 Thread Ilya Maximets
On 4/7/22 16:25, Maxime Coquelin wrote:
> Hi Harry,
> 
> On 4/7/22 16:04, Van Haaren, Harry wrote:
>> Hi OVS & DPDK, Maintainers & Community,
>>
>> Top posting overview of discussion as replies to thread become slower:
>> perhaps it is a good time to review and plan for next steps?
>>
>>  From my perspective, it those most vocal in the thread seem to be in favour 
>> of the clean
>> rx/tx split ("defer work"), with the tradeoff that the application must be 
>> aware of handling
>> the async DMA completions. If there are any concerns opposing upstreaming of 
>> this method,
>> please indicate this promptly, and we can continue technical discussions 
>> here now.
> 
> Wasn't there some discussions about handling the Virtio completions with
> the DMA engine? With that, we wouldn't need the deferral of work.

+1

With the virtio completions handled by DMA itself, the vhost port
turns almost into a real HW NIC.  With that we will not need any
extra manipulations from the OVS side, i.e. no need to defer any
work while maintaining clear split between rx and tx operations.

I'd vote for that.

> 
> Thanks,
> Maxime
> 
>> In absence of continued technical discussion here, I suggest Sunil and Ian 
>> collaborate on getting
>> the OVS Defer-work approach, and DPDK VHost Async patchsets available on 
>> GitHub for easier
>> consumption and future development (as suggested in slides presented on last 
>> call).
>>
>> Regards, -Harry
>>
>> No inline-replies below; message just for context.
>>
>>> -Original Message-
>>> From: Van Haaren, Harry
>>> Sent: Wednesday, March 30, 2022 10:02 AM
>>> To: Morten Brørup ; Richardson, Bruce
>>> 
>>> Cc: Maxime Coquelin ; Pai G, Sunil
>>> ; Stokes, Ian ; Hu, Jiayu
>>> ; Ferriter, Cian ; Ilya 
>>> Maximets
>>> ; ovs-...@openvswitch.org; dev@dpdk.org; Mcnamara,
>>> John ; O'Driscoll, Tim ;
>>> Finn, Emma 
>>> Subject: RE: OVS DPDK DMA-Dev library/Design Discussion
>>>
 -Original Message-
 From: Morten Brørup 
 Sent: Tuesday, March 29, 2022 8:59 PM
 To: Van Haaren, Harry ; Richardson, Bruce
 
 Cc: Maxime Coquelin ; Pai G, Sunil
 ; Stokes, Ian ; Hu, Jiayu
 ; Ferriter, Cian ; Ilya 
 Maximets
 ; ovs-...@openvswitch.org; dev@dpdk.org; Mcnamara,
>>> John
 ; O'Driscoll, Tim ; Finn,
 Emma 
 Subject: RE: OVS DPDK DMA-Dev library/Design Discussion

> From: Van Haaren, Harry [mailto:harry.van.haa...@intel.com]
> Sent: Tuesday, 29 March 2022 19.46
>
>> From: Morten Brørup 
>> Sent: Tuesday, March 29, 2022 6:14 PM
>>
>>> From: Bruce Richardson [mailto:bruce.richard...@intel.com]
>>> Sent: Tuesday, 29 March 2022 19.03
>>>
>>> On Tue, Mar 29, 2022 at 06:45:19PM +0200, Morten Brørup wrote:
> From: Maxime Coquelin [mailto:maxime.coque...@redhat.com]
> Sent: Tuesday, 29 March 2022 18.24
>
> Hi Morten,
>
> On 3/29/22 16:44, Morten Brørup wrote:
>>> From: Van Haaren, Harry [mailto:harry.van.haa...@intel.com]
>>> Sent: Tuesday, 29 March 2022 15.02
>>>
 From: Morten Brørup 
 Sent: Tuesday, March 29, 2022 1:51 PM

 Having thought more about it, I think that a completely
>>> different
> architectural approach is required:

 Many of the DPDK Ethernet PMDs implement a variety of RX
> and TX
> packet burst functions, each optimized for different CPU vector
> instruction sets. The availability of a DMA engine should be
>>> treated
> the same way. So I suggest that PMDs copying packet contents,
> e.g.
> memif, pcap, vmxnet3, should implement DMA optimized RX and TX
>>> packet
> burst functions.

 Similarly for the DPDK vhost library.

 In such an architecture, it would be the application's job
> to
> allocate DMA channels and assign them to the specific PMDs that
>>> should
> use them. But the actual use of the DMA channels would move
> down
>>> below
> the application and into the DPDK PMDs and libraries.


 Med venlig hilsen / Kind regards,
 -Morten Brørup
>>>
>>> Hi Morten,
>>>
>>> That's *exactly* how this architecture is designed &
>>> implemented.
>>> 1.    The DMA configuration and initialization is up to the
>>> application
> (OVS).
>>> 2.    The VHost library is passed the DMA-dev ID, and its
> new
>>> async
> rx/tx APIs, and uses the DMA device to accelerate the copy.
>>>
>>> Looking forward to talking on the call that just started.
>>> Regards, -
> Harry
>>>
>>
>> OK, thanks - as I said on the call, I haven't looked at the
>>> patches.
>>
>> Then, I suppose that the TX completions can be handled in the

Re: [PATCH v3 1/1] ethdev: mtr: support input color selection

2022-04-07 Thread Jerin Jacob
On Thu, Apr 7, 2022 at 6:55 PM Dumitrescu, Cristian
 wrote:
>
> >
> > To summarize we have 4 attributes, Please find below the truth table
> > 1) Outer VLAN
> > 2) Outer IP
> > 3) Inner VLAN
> > 4) Inner IP
> >
> >
> > Inner IP -Inner VLAN- Outer IP-Outer VLAN
> > 00 0  0
> > - Not valid case
> > 00 0  1
> > - RTE_MTR_INPUT_COLOR_METHOD_OUTER_VLAN
> > 00 1  0
> > - RTE_MTR_INPUT_COLOR_METHOD_OUTER_IP
> > 00 1  1
> > - RTE_MTR_INPUT_COLOR_METHOD_OUTER_VLAN_OUTER_IP - If found outer
> > VLAN
> > then vlan else outer IP
> > 01 0  0
> > - RTE_MTR_INPUT_COLOR_METHOD_INNER_VLAN
> > 01 0  1
> > - RTE_MTR_INPUT_COLOR_METHOD_INNER_VLAN_OUTER_VLAN - If found
> > inner
> > VLAN else outer VLAN
> > 01 1  0  -
> >  RTE_MTR_INPUT_COLOR_METHOD_INNER_VLAN_OUTER_IP
> > 01 1  1  -
> >  RTE_MTR_INPUT_COLOR_METHOD_INNER_VLAN_OUTER_IP_OUTER_VLAN -
> > If found
> > inner vlan then inner vlan else outer IP else outer VLAN
> > 10 0  0  -
> >  RTE_MTR_INPUT_COLOR_METHOD_INNER_IP
> > 10 0  1  -
> >  RTE_MTR_INPUT_COLOR_METHOD_INNER_IP_OUTER_VLAN
> > 10 1  0  -
> >  RTE_MTR_INPUT_COLOR_METHOD_INNER_IP_OUTER_IP
> > 10 1  1  -
> >  RTE_MTR_INPUT_COLOR_METHOD_INNER_IP_OUTER_IP_OUTER_VLAN
> > 11 0  0  -
> >  RTE_MTR_INPUT_COLOR_METHOD_INNER_IP_INNER_VLAN
> > 11 0  1  -
> >  RTE_MTR_INPUT_COLOR_METHOD_INNER_IP_INNER_VLAN_OUTER_VLAN
> > 11 1  0  -
> >  RTE_MTR_INPUT_COLOR_METHOD_INNER_IP_INNER_VLAN_OUTER_IP
> > 11 1  1  -
> >
> > RTE_MTR_INPUT_COLOR_METHOD_INNER_IP_INNER_VLAN_OUTER_IP_OUTE
> > R_VLAN
> >
> > Is this above enumeration fine, If not, Please suggest.
> >
> > In Interms of name,
> > a) We could omit explicit OUTER to reduce the length as suggestion.
> > b) or change IIP, OIP, IVLAN, OVLAN kind of scheme to reduce the name.
> >
> > Let me know the names and enumeration you prefer, I will change
> > accordingly in the next version?
> >
>
> Hi Jerin,
>
> The above table looks confusing to me, I suggest we have a meeting next week 
> to go over it and then report back to the list?

Scheduled a meeting at 11th April - 6:30 IST


Agenda:
To discuss 
https://patches.dpdk.org/project/dpdk/patch/20220301085824.1041009-1-sk...@marvell.com/
patch


Let me know, If time needs to change to include any interested participants.


Hi there,

Jerin Jacob is inviting you to a scheduled Zoom meeting.

Topic: Jerin Jacob Kollanukkaran's Personal Meeting Room


Join Zoom Meeting:
https://marvell.zoom.us/j/9901077677?pwd=T2lTTGMwYlc1YTQzMnR4eGRWQXR6QT09
Password: 339888


Or Telephone:
Dial(for higher quality, dial a number based on your current location):
US: +1 301 715 8592  or +1 312 626 6799  or +1 346 248 7799
or +1 646 558 8656  or +1 669 900 6833  or +1 253 215 8782  or 888 788
0099 (Toll Free) or 833 548 0276 (Toll Free) or 833 548 0282 (Toll
Free) or 877 853 5247 (Toll Free)
Meeting ID: 990 107 7677
Password: 358309
International numbers available: https://marvell.zoom.us/u/adpcCpMHYt

Or a Video Conference Room:
>From Touchpad: Tap Join Zoom button. When prompted, enter 990 107 7677
Password: 358309

For China locations, from Touchpad: Dial* then 990 107 7677
Password: 358309

>
> Regards,
> Cristian


RE: OVS DPDK DMA-Dev library/Design Discussion

2022-04-07 Thread Van Haaren, Harry
> -Original Message-
> From: Ilya Maximets 
> Sent: Thursday, April 7, 2022 3:40 PM
> To: Maxime Coquelin ; Van Haaren, Harry
> ; Morten Brørup ;
> Richardson, Bruce 
> Cc: i.maxim...@ovn.org; Pai G, Sunil ; Stokes, Ian
> ; Hu, Jiayu ; Ferriter, Cian
> ; ovs-...@openvswitch.org; dev@dpdk.org; Mcnamara,
> John ; O'Driscoll, Tim ;
> Finn, Emma 
> Subject: Re: OVS DPDK DMA-Dev library/Design Discussion
> 
> On 4/7/22 16:25, Maxime Coquelin wrote:
> > Hi Harry,
> >
> > On 4/7/22 16:04, Van Haaren, Harry wrote:
> >> Hi OVS & DPDK, Maintainers & Community,
> >>
> >> Top posting overview of discussion as replies to thread become slower:
> >> perhaps it is a good time to review and plan for next steps?
> >>
> >>  From my perspective, it those most vocal in the thread seem to be in 
> >> favour
> of the clean
> >> rx/tx split ("defer work"), with the tradeoff that the application must be
> aware of handling
> >> the async DMA completions. If there are any concerns opposing upstreaming
> of this method,
> >> please indicate this promptly, and we can continue technical discussions 
> >> here
> now.
> >
> > Wasn't there some discussions about handling the Virtio completions with
> > the DMA engine? With that, we wouldn't need the deferral of work.
> 
> +1

Yes there was, the DMA/virtq completions thread here for reference;
https://mail.openvswitch.org/pipermail/ovs-dev/2022-March/392908.html

I do not believe that there is a viable path to actually implementing it, and 
particularly
not in the more complex cases; e.g. virtio with guest-interrupt enabled.

The thread above mentions additional threads and various other options; none of 
which
I believe to be a clean or workable solution. I'd like input from other folks 
more familiar
with the exact implementations of VHost/vrings, as well as those with DMA 
engine expertise.


> With the virtio completions handled by DMA itself, the vhost port
> turns almost into a real HW NIC.  With that we will not need any
> extra manipulations from the OVS side, i.e. no need to defer any
> work while maintaining clear split between rx and tx operations.
> 
> I'd vote for that.
> 
> >
> > Thanks,
> > Maxime

Thanks for the prompt responses, and lets understand if there is a viable 
workable way
to totally hide DMA-completions from the application.

Regards,  -Harry


> >> In absence of continued technical discussion here, I suggest Sunil and Ian
> collaborate on getting
> >> the OVS Defer-work approach, and DPDK VHost Async patchsets available on
> GitHub for easier
> >> consumption and future development (as suggested in slides presented on
> last call).
> >>
> >> Regards, -Harry
> >>
> >> No inline-replies below; message just for context.
> >>
> >>> -Original Message-
> >>> From: Van Haaren, Harry
> >>> Sent: Wednesday, March 30, 2022 10:02 AM
> >>> To: Morten Brørup ; Richardson, Bruce
> >>> 
> >>> Cc: Maxime Coquelin ; Pai G, Sunil
> >>> ; Stokes, Ian ; Hu, Jiayu
> >>> ; Ferriter, Cian ; Ilya
> Maximets
> >>> ; ovs-...@openvswitch.org; dev@dpdk.org;
> Mcnamara,
> >>> John ; O'Driscoll, Tim
> ;
> >>> Finn, Emma 
> >>> Subject: RE: OVS DPDK DMA-Dev library/Design Discussion
> >>>
>  -Original Message-
>  From: Morten Brørup 
>  Sent: Tuesday, March 29, 2022 8:59 PM
>  To: Van Haaren, Harry ; Richardson, Bruce
>  
>  Cc: Maxime Coquelin ; Pai G, Sunil
>  ; Stokes, Ian ; Hu, Jiayu
>  ; Ferriter, Cian ; Ilya
> Maximets
>  ; ovs-...@openvswitch.org; dev@dpdk.org;
> Mcnamara,
> >>> John
>  ; O'Driscoll, Tim ;
> Finn,
>  Emma 
>  Subject: RE: OVS DPDK DMA-Dev library/Design Discussion
> 
> > From: Van Haaren, Harry [mailto:harry.van.haa...@intel.com]
> > Sent: Tuesday, 29 March 2022 19.46
> >
> >> From: Morten Brørup 
> >> Sent: Tuesday, March 29, 2022 6:14 PM
> >>
> >>> From: Bruce Richardson [mailto:bruce.richard...@intel.com]
> >>> Sent: Tuesday, 29 March 2022 19.03
> >>>
> >>> On Tue, Mar 29, 2022 at 06:45:19PM +0200, Morten Brørup wrote:
> > From: Maxime Coquelin [mailto:maxime.coque...@redhat.com]
> > Sent: Tuesday, 29 March 2022 18.24
> >
> > Hi Morten,
> >
> > On 3/29/22 16:44, Morten Brørup wrote:
> >>> From: Van Haaren, Harry [mailto:harry.van.haa...@intel.com]
> >>> Sent: Tuesday, 29 March 2022 15.02
> >>>
>  From: Morten Brørup 
>  Sent: Tuesday, March 29, 2022 1:51 PM
> 
>  Having thought more about it, I think that a completely
> >>> different
> > architectural approach is required:
> 
>  Many of the DPDK Ethernet PMDs implement a variety of RX
> > and TX
> > packet burst functions, each optimized for different CPU vector
> > instruction sets. The availability of a DMA engine should be
> >>> treated
> > the same way. So I suggest that PMDs copying packet contents,
> > e.g

[dpdk][PATCH 1/2] sched: enable/disable TC OV at runtime

2022-04-07 Thread Marcin Danilewicz
From: Megha Ajmera 

Added new API to enable or disable TC over subscription for best
effort traffic class at subport level.

By default TC OV is disabled for subport.

Signed-off-by: Megha Ajmera 

diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index ec74bee939..1d05089d00 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -155,6 +155,7 @@ struct rte_sched_subport {
uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
/* TC oversubscription */
+   uint8_t is_tc_ov_enabled;
uint64_t tc_ov_wm;
uint64_t tc_ov_wm_min;
uint64_t tc_ov_wm_max;
@@ -1165,6 +1166,45 @@ rte_sched_cman_config(struct rte_sched_port *port,
 }
 #endif
 
+int
+rte_sched_subport_tc_ov_config(struct rte_sched_port *port,
+   uint32_t subport_id,
+   bool tc_ov_enable)
+{
+   struct rte_sched_subport *s;
+   struct rte_sched_subport_profile *profile;
+
+   if (port == NULL) {
+   RTE_LOG(ERR, SCHED,
+   "%s: Incorrect value for parameter port\n", __func__);
+   return -EINVAL;
+   }
+
+   if (subport_id >= port->n_subports_per_port) {
+   RTE_LOG(ERR, SCHED,
+   "%s: Incorrect value for parameter subport id\n", 
__func__);
+   return  -EINVAL;
+   }
+
+   s = port->subports[subport_id];
+   s->is_tc_ov_enabled = tc_ov_enable;
+
+   if (s->is_tc_ov_enabled) {
+   /* TC oversubscription */
+   s->tc_ov_wm_min = port->mtu;
+   s->tc_ov_period_id = 0;
+   s->tc_ov = 0;
+   s->tc_ov_n = 0;
+   s->tc_ov_rate = 0;
+
+   profile = port->subport_profiles + s->profile;
+   s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period,
+   s->pipe_tc_be_rate_max);
+   s->tc_ov_wm = s->tc_ov_wm_max;
+   }
+   return 0;
+}
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
uint32_t subport_id,
@@ -1317,12 +1357,8 @@ rte_sched_subport_config(struct rte_sched_port *port,
for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
s->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID;
 
-   /* TC oversubscription */
-   s->tc_ov_wm_min = port->mtu;
-   s->tc_ov_period_id = 0;
-   s->tc_ov = 0;
-   s->tc_ov_n = 0;
-   s->tc_ov_rate = 0;
+   /* TC over-subscription is disabled by default */
+   s->is_tc_ov_enabled = 0;
}
 
{
@@ -1342,9 +1378,6 @@ rte_sched_subport_config(struct rte_sched_port *port,
else
profile->tc_credits_per_period[i] = 0;
 
-   s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period,
-   s->pipe_tc_be_rate_max);
-   s->tc_ov_wm = s->tc_ov_wm_max;
s->profile = subport_profile_id;
 
}
@@ -1417,17 +1450,20 @@ rte_sched_pipe_config(struct rte_sched_port *port,
double pipe_tc_be_rate =
(double) 
params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
/ (double) params->tc_period;
-   uint32_t tc_be_ov = s->tc_ov;
 
-   /* Unplug pipe from its subport */
-   s->tc_ov_n -= params->tc_ov_weight;
-   s->tc_ov_rate -= pipe_tc_be_rate;
-   s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
+   if (s->is_tc_ov_enabled) {
+   uint32_t tc_be_ov = s->tc_ov;
 
-   if (s->tc_ov != tc_be_ov) {
-   RTE_LOG(DEBUG, SCHED,
-   "Subport %u Best-effort TC oversubscription is 
OFF (%.4lf >= %.4lf)\n",
-   subport_id, subport_tc_be_rate, s->tc_ov_rate);
+   /* Unplug pipe from its subport */
+   s->tc_ov_n -= params->tc_ov_weight;
+   s->tc_ov_rate -= pipe_tc_be_rate;
+   s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
+
+   if (s->tc_ov != tc_be_ov) {
+   RTE_LOG(DEBUG, SCHED,
+   "Subport %u Best-effort TC 
oversubscription is OFF (%.4lf >= %.4lf)\n",
+   subport_id, subport_tc_be_rate, 
s->tc_ov_rate);
+   }
}
 
/* Reset the pipe */
@@ -1460,19 +1496,22 @@ rte_sched_pipe_config(struct rte_sched_port *port,
double pipe_tc_be_rate =
(double) 
params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
/ (double) params->tc_period;
-   uint32_t tc_be_ov = s->tc_ov;
 
-   s->tc_ov_n += params->tc_ov_weight;
-   s->tc_ov_r

[dpdk][PATCH 2/2] sched: fix to manage TC OV at runtime

2022-04-07 Thread Marcin Danilewicz
Added changes after review and increased throughput.

Signed-off-by: Marcin Danilewicz 

diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index 1d05089d00..6e7d81df46 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -155,7 +155,6 @@ struct rte_sched_subport {
uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
/* TC oversubscription */
-   uint8_t is_tc_ov_enabled;
uint64_t tc_ov_wm;
uint64_t tc_ov_wm_min;
uint64_t tc_ov_wm_max;
@@ -214,6 +213,9 @@ struct rte_sched_subport {
uint8_t *bmp_array;
struct rte_mbuf **queue_array;
uint8_t memory[0] __rte_cache_aligned;
+
+   /* TC oversubscription activation */
+   int is_tc_ov_enabled;
 } __rte_cache_aligned;
 
 struct rte_sched_port {
@@ -1187,7 +1189,7 @@ rte_sched_subport_tc_ov_config(struct rte_sched_port 
*port,
}
 
s = port->subports[subport_id];
-   s->is_tc_ov_enabled = tc_ov_enable;
+   s->is_tc_ov_enabled = tc_ov_enable ? 1 : 0;
 
if (s->is_tc_ov_enabled) {
/* TC oversubscription */
@@ -1294,6 +1296,9 @@ rte_sched_subport_config(struct rte_sched_port *port,
s->n_pipe_profiles = params->n_pipe_profiles;
s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
+   /* TC over-subscription is disabled by default */
+   s->is_tc_ov_enabled = 0;
+
 #ifdef RTE_SCHED_CMAN
if (params->cman_params != NULL) {
s->cman_enabled = true;
@@ -1356,9 +1361,6 @@ rte_sched_subport_config(struct rte_sched_port *port,
 
for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
s->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID;
-
-   /* TC over-subscription is disabled by default */
-   s->is_tc_ov_enabled = 0;
}
 
{
@@ -2514,12 +2516,15 @@ grinder_schedule(struct rte_sched_port *port,
uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
uint32_t be_tc_active;
 
-   if (unlikely(subport->is_tc_ov_enabled)) {
+   switch (subport->is_tc_ov_enabled) {
+   case 1:
if (!grinder_credits_check_with_tc_ov(port, subport, pos))
return 0;
-   } else {
+   break;
+   case 0:
if (!grinder_credits_check(port, subport, pos))
return 0;
+   break;
}
 
/* Advance port time */
-- 
2.25.1

--
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.



Re: OVS DPDK DMA-Dev library/Design Discussion

2022-04-07 Thread Ilya Maximets
On 4/7/22 16:42, Van Haaren, Harry wrote:
>> -Original Message-
>> From: Ilya Maximets 
>> Sent: Thursday, April 7, 2022 3:40 PM
>> To: Maxime Coquelin ; Van Haaren, Harry
>> ; Morten Brørup ;
>> Richardson, Bruce 
>> Cc: i.maxim...@ovn.org; Pai G, Sunil ; Stokes, Ian
>> ; Hu, Jiayu ; Ferriter, Cian
>> ; ovs-...@openvswitch.org; dev@dpdk.org; Mcnamara,
>> John ; O'Driscoll, Tim ;
>> Finn, Emma 
>> Subject: Re: OVS DPDK DMA-Dev library/Design Discussion
>>
>> On 4/7/22 16:25, Maxime Coquelin wrote:
>>> Hi Harry,
>>>
>>> On 4/7/22 16:04, Van Haaren, Harry wrote:
 Hi OVS & DPDK, Maintainers & Community,

 Top posting overview of discussion as replies to thread become slower:
 perhaps it is a good time to review and plan for next steps?

  From my perspective, it those most vocal in the thread seem to be in 
 favour
>> of the clean
 rx/tx split ("defer work"), with the tradeoff that the application must be
>> aware of handling
 the async DMA completions. If there are any concerns opposing upstreaming
>> of this method,
 please indicate this promptly, and we can continue technical discussions 
 here
>> now.
>>>
>>> Wasn't there some discussions about handling the Virtio completions with
>>> the DMA engine? With that, we wouldn't need the deferral of work.
>>
>> +1
> 
> Yes there was, the DMA/virtq completions thread here for reference;
> https://mail.openvswitch.org/pipermail/ovs-dev/2022-March/392908.html
> 
> I do not believe that there is a viable path to actually implementing it, and 
> particularly
> not in the more complex cases; e.g. virtio with guest-interrupt enabled.
> 
> The thread above mentions additional threads and various other options; none 
> of which
> I believe to be a clean or workable solution. I'd like input from other folks 
> more familiar
> with the exact implementations of VHost/vrings, as well as those with DMA 
> engine expertise.

I tend to trust Maxime as a vhost maintainer in such questions. :)

In my own opinion though, the implementation is possible and concerns doesn't
sound deal-breaking as solutions for them might work well enough.  So I think
the viability should be tested out before solution is disregarded.  Especially
because the decision will form the API of the vhost library.

> 
> 
>> With the virtio completions handled by DMA itself, the vhost port
>> turns almost into a real HW NIC.  With that we will not need any
>> extra manipulations from the OVS side, i.e. no need to defer any
>> work while maintaining clear split between rx and tx operations.
>>
>> I'd vote for that.
>>
>>>
>>> Thanks,
>>> Maxime
> 
> Thanks for the prompt responses, and lets understand if there is a viable 
> workable way
> to totally hide DMA-completions from the application.
> 
> Regards,  -Harry
> 
> 
 In absence of continued technical discussion here, I suggest Sunil and Ian
>> collaborate on getting
 the OVS Defer-work approach, and DPDK VHost Async patchsets available on
>> GitHub for easier
 consumption and future development (as suggested in slides presented on
>> last call).

 Regards, -Harry

 No inline-replies below; message just for context.

> -Original Message-
> From: Van Haaren, Harry
> Sent: Wednesday, March 30, 2022 10:02 AM
> To: Morten Brørup ; Richardson, Bruce
> 
> Cc: Maxime Coquelin ; Pai G, Sunil
> ; Stokes, Ian ; Hu, Jiayu
> ; Ferriter, Cian ; Ilya
>> Maximets
> ; ovs-...@openvswitch.org; dev@dpdk.org;
>> Mcnamara,
> John ; O'Driscoll, Tim
>> ;
> Finn, Emma 
> Subject: RE: OVS DPDK DMA-Dev library/Design Discussion
>
>> -Original Message-
>> From: Morten Brørup 
>> Sent: Tuesday, March 29, 2022 8:59 PM
>> To: Van Haaren, Harry ; Richardson, Bruce
>> 
>> Cc: Maxime Coquelin ; Pai G, Sunil
>> ; Stokes, Ian ; Hu, Jiayu
>> ; Ferriter, Cian ; Ilya
>> Maximets
>> ; ovs-...@openvswitch.org; dev@dpdk.org;
>> Mcnamara,
> John
>> ; O'Driscoll, Tim ;
>> Finn,
>> Emma 
>> Subject: RE: OVS DPDK DMA-Dev library/Design Discussion
>>
>>> From: Van Haaren, Harry [mailto:harry.van.haa...@intel.com]
>>> Sent: Tuesday, 29 March 2022 19.46
>>>
 From: Morten Brørup 
 Sent: Tuesday, March 29, 2022 6:14 PM

> From: Bruce Richardson [mailto:bruce.richard...@intel.com]
> Sent: Tuesday, 29 March 2022 19.03
>
> On Tue, Mar 29, 2022 at 06:45:19PM +0200, Morten Brørup wrote:
>>> From: Maxime Coquelin [mailto:maxime.coque...@redhat.com]
>>> Sent: Tuesday, 29 March 2022 18.24
>>>
>>> Hi Morten,
>>>
>>> On 3/29/22 16:44, Morten Brørup wrote:
> From: Van Haaren, Harry [mailto:harry.van.haa...@intel.com]
> Sent: Tuesday, 29 March 2022 15.02
>
>> From: Morten Brørup 
>> Sent: Tuesday, March 29, 2022 1:51 PM
>>
>

Re: [RFC 1/2] ethdev: port flags for pre-configuration flow hints

2022-04-07 Thread Stephen Hemminger
On Thu, 7 Apr 2022 13:30:46 +0800
Xiaoyu Min  wrote:

>   * @b EXPERIMENTAL: this API may change without prior notice.
> @@ -4972,6 +4983,11 @@ struct rte_flow_port_attr {
>* @see RTE_FLOW_ACTION_TYPE_METER
>*/
>   uint32_t nb_meters;
> + /**
> +  * Port flags.
> +  * @see enum rte_flow_port_flag
> +  */
> + enum rte_flow_port_flag flags;

This would have to wait until 22.11 because it is ABI breakage.
Also, how would this work with old users of API?


Re: [PATCH v7] eal: fix rte_memcpy strict aliasing/alignment bugs

2022-04-07 Thread David Marchand
On Thu, Mar 10, 2022 at 3:55 PM Ananyev, Konstantin
 wrote:
> > Calls to rte_memcpy for 1 < n < 16 could result in unaligned
> > loads/stores, which is undefined behaviour according to the C
> > standard, and strict aliasing violations.
> >
> > The code was changed to use a packed structure that allows aliasing
> > (using the __may_alias__ attribute) to perform the load/store
> > operations. This results in code that has the same performance as the
> > original code and that is also C standards-compliant.
> >
> > Fixes: d35cc1fe6a7a ("eal/x86: revert select optimized memcpy at run-time")
> > Cc: sta...@dpdk.org
> >
> > Signed-off-by: Luc Pelletier 
> Acked-by: Konstantin Ananyev 
> Tested-by: Konstantin Ananyev 

Thanks, applied.


> As a side note, we probably need to check other similar places in DPDK code.

What would be the best way to detect those problematic places?

I tried UBsan, and it did report some of the issues fixed with this patch.


-- 
David Marchand



[PATCH] crypto/qat: use intel-ipsec-mb for partial hash

2022-04-07 Thread Fan Zhang
Since openssl 3.0 now deprecates the low level API QAT required to
perform partial hash operation when creating the session. This
patch is to transfer such dependency from openssl to intel-ipsec-mb.

Signed-off-by: Fan Zhang 
---
 drivers/common/qat/meson.build   |  10 +++
 drivers/crypto/qat/qat_sym_session.c | 101 +--
 2 files changed, 28 insertions(+), 83 deletions(-)

diff --git a/drivers/common/qat/meson.build b/drivers/common/qat/meson.build
index b7027f3164..d35fc69d96 100644
--- a/drivers/common/qat/meson.build
+++ b/drivers/common/qat/meson.build
@@ -35,6 +35,16 @@ if qat_crypto and not libcrypto.found()
 'missing dependency, libcrypto')
 endif
 
+
+IMB_required_ver = '1.0.0'
+libipsecmb = cc.find_library('IPSec_MB', required: false)
+if not lib.found()
+build = false
+reason = 'missing dependency, "libIPSec_MB"'
+else
+ext_deps += libipsecmb
+endif
+
 # The driver should not build if both compression and crypto are disabled
 #FIXME common code depends on compression files so check only compress!
 if not qat_compress # and not qat_crypto
diff --git a/drivers/crypto/qat/qat_sym_session.c 
b/drivers/crypto/qat/qat_sym_session.c
index 9d6a19c0be..05a11db750 100644
--- a/drivers/crypto/qat/qat_sym_session.c
+++ b/drivers/crypto/qat/qat_sym_session.c
@@ -6,6 +6,7 @@
 #include/* Needed to calculate pre-compute values */
 #include/* Needed to calculate pre-compute values */
 #include/* Needed for bpi runt block processing */
+#include 
 
 #include 
 #include 
@@ -1057,139 +1058,73 @@ static int qat_hash_get_block_size(enum 
icp_qat_hw_auth_algo qat_hash_alg)
return -EFAULT;
 }
 
-static int partial_hash_sha1(uint8_t *data_in, uint8_t *data_out)
-{
-   SHA_CTX ctx;
-
-   if (!SHA1_Init(&ctx))
-   return -EFAULT;
-   SHA1_Transform(&ctx, data_in);
-   rte_memcpy(data_out, &ctx, SHA_DIGEST_LENGTH);
-   return 0;
-}
-
-static int partial_hash_sha224(uint8_t *data_in, uint8_t *data_out)
-{
-   SHA256_CTX ctx;
-
-   if (!SHA224_Init(&ctx))
-   return -EFAULT;
-   SHA256_Transform(&ctx, data_in);
-   rte_memcpy(data_out, &ctx, SHA256_DIGEST_LENGTH);
-   return 0;
-}
-
-static int partial_hash_sha256(uint8_t *data_in, uint8_t *data_out)
-{
-   SHA256_CTX ctx;
-
-   if (!SHA256_Init(&ctx))
-   return -EFAULT;
-   SHA256_Transform(&ctx, data_in);
-   rte_memcpy(data_out, &ctx, SHA256_DIGEST_LENGTH);
-   return 0;
-}
-
-static int partial_hash_sha384(uint8_t *data_in, uint8_t *data_out)
-{
-   SHA512_CTX ctx;
-
-   if (!SHA384_Init(&ctx))
-   return -EFAULT;
-   SHA512_Transform(&ctx, data_in);
-   rte_memcpy(data_out, &ctx, SHA512_DIGEST_LENGTH);
-   return 0;
-}
-
-static int partial_hash_sha512(uint8_t *data_in, uint8_t *data_out)
-{
-   SHA512_CTX ctx;
-
-   if (!SHA512_Init(&ctx))
-   return -EFAULT;
-   SHA512_Transform(&ctx, data_in);
-   rte_memcpy(data_out, &ctx, SHA512_DIGEST_LENGTH);
-   return 0;
-}
-
-static int partial_hash_md5(uint8_t *data_in, uint8_t *data_out)
-{
-   MD5_CTX ctx;
-
-   if (!MD5_Init(&ctx))
-   return -EFAULT;
-   MD5_Transform(&ctx, data_in);
-   rte_memcpy(data_out, &ctx, MD5_DIGEST_LENGTH);
-
-   return 0;
-}
-
 static int
 partial_hash_compute(enum icp_qat_hw_auth_algo hash_alg,
uint8_t *data_in, uint8_t *data_out)
 {
+   IMB_MGR *m;
+   uint32_t *hash_state_out_be32;
+   uint64_t *hash_state_out_be64;
int digest_size;
uint8_t digest[qat_hash_get_digest_size(
ICP_QAT_HW_AUTH_ALGO_DELIMITER)];
-   uint32_t *hash_state_out_be32;
-   uint64_t *hash_state_out_be64;
int i;
 
+   hash_state_out_be32 = (uint32_t *)data_out;
+   hash_state_out_be64 = (uint64_t *)data_out;
+
/* Initialize to avoid gcc warning */
memset(digest, 0, sizeof(digest));
 
digest_size = qat_hash_get_digest_size(hash_alg);
if (digest_size <= 0)
return -EFAULT;
+   m = alloc_mb_mgr(0);
+   if (m == NULL)
+   return -ENOMEM;
 
-   hash_state_out_be32 = (uint32_t *)data_out;
-   hash_state_out_be64 = (uint64_t *)data_out;
+   init_mb_mgr_auto(m, NULL);
 
switch (hash_alg) {
case ICP_QAT_HW_AUTH_ALGO_SHA1:
-   if (partial_hash_sha1(data_in, digest))
-   return -EFAULT;
+   IMB_SHA1_ONE_BLOCK(m, data_in, digest);
for (i = 0; i < digest_size >> 2; i++, hash_state_out_be32++)
*hash_state_out_be32 =
rte_bswap32(*(((uint32_t *)digest)+i));
break;
case ICP_QAT_HW_AUTH_ALGO_SHA224:
-   if (partial_hash_sha224(data_in, digest))
-   return -EFAULT;
+   IMB_SHA224_ONE_BLOCK(m, data_in, dige

[PATCH v1 0/5] vhost: support async dequeue data path

2022-04-07 Thread xuan . ding
From: Xuan Ding 

The presence of asynchronous path allows applications to offload
memory copies to DMA engine, so as to save CPU cycles and improve
the copy performance. This patch implements vhost async dequeue data
path for split ring.

This patch set is a new design and implementation of [2]. Since dmadev
was introduced in DPDK 21.11, to simplify application logics, this patch
integrates dmadev in vhost. With dmadev integrated, vhost supports M:N
mapping between vrings and DMA virtual channels. Specifically, one vring
can use multiple different DMA channels and one DMA channel can be
shared by multiple vrings at the same time.

A new asynchronous dequeue function is introduced:
1) rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
uint16_t count, int *nr_inflight,
uint16_t dma_id, uint16_t vchan_id)

Receive packets from the guest and offloads copies to DMA
virtual channel.

[1] https://mails.dpdk.org/archives/dev/2022-February/234555.html
[2] https://mails.dpdk.org/archives/dev/2021-September/218591.html

RFC v3 -> v1:
* add sync and async path descriptor to mbuf refactoring
* add API description in docs

RFC v2 -> RFC v3:
* rebase to latest DPDK version

RFC v1 -> RFC v2:
* fix one bug in example
* rename vchan to vchan_id
* check if dma_id and vchan_id valid
* rework all the logs to new standard

Xuan Ding (5):
  vhost: prepare sync for descriptor to mbuf refactoring
  vhost: prepare async for descriptor to mbuf refactoring
  vhost: merge sync and async descriptor to mbuf filling
  vhost: support async dequeue for split ring
  examples/vhost: support async dequeue data path

 doc/guides/prog_guide/vhost_lib.rst|   7 +
 doc/guides/rel_notes/release_22_07.rst |   4 +
 doc/guides/sample_app_ug/vhost.rst |   9 +-
 examples/vhost/main.c  | 292 +++-
 examples/vhost/main.h  |  35 +-
 examples/vhost/virtio_net.c|  16 +-
 lib/vhost/rte_vhost_async.h|  33 ++
 lib/vhost/version.map  |   3 +
 lib/vhost/vhost.h  |   1 +
 lib/vhost/virtio_net.c | 459 ++---
 10 files changed, 711 insertions(+), 148 deletions(-)

-- 
2.17.1



[PATCH v1 1/5] vhost: prepare sync for descriptor to mbuf refactoring

2022-04-07 Thread xuan . ding
From: Xuan Ding 

This patch extracts the descriptors to buffers filling from
copy_desc_to_mbuf() into a dedicated function. Besides, enqueue
and dequeue path are refactored to use the same function
sync_fill_seg() for preparing batch elements, which simplies
the code without performance degradation.

Signed-off-by: Xuan Ding 
---
 lib/vhost/virtio_net.c | 66 +++---
 1 file changed, 29 insertions(+), 37 deletions(-)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 5f432b0d77..a2d04a1f60 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -1030,9 +1030,9 @@ async_mbuf_to_desc_seg(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 }
 
 static __rte_always_inline void
-sync_mbuf_to_desc_seg(struct virtio_net *dev, struct vhost_virtqueue *vq,
+sync_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq,
struct rte_mbuf *m, uint32_t mbuf_offset,
-   uint64_t buf_addr, uint64_t buf_iova, uint32_t cpy_len)
+   uint64_t buf_addr, uint64_t buf_iova, uint32_t cpy_len, bool 
to_desc)
 {
struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
 
@@ -1043,10 +1043,17 @@ sync_mbuf_to_desc_seg(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
vhost_log_cache_write_iova(dev, vq, buf_iova, cpy_len);
PRINT_PACKET(dev, (uintptr_t)(buf_addr), cpy_len, 0);
} else {
-   batch_copy[vq->batch_copy_nb_elems].dst =
-   (void *)((uintptr_t)(buf_addr));
-   batch_copy[vq->batch_copy_nb_elems].src =
-   rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
+   if (to_desc) {
+   batch_copy[vq->batch_copy_nb_elems].dst =
+   (void *)((uintptr_t)(buf_addr));
+   batch_copy[vq->batch_copy_nb_elems].src =
+   rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
+   } else {
+   batch_copy[vq->batch_copy_nb_elems].dst =
+   rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
+   batch_copy[vq->batch_copy_nb_elems].src =
+   (void *)((uintptr_t)(buf_addr));
+   }
batch_copy[vq->batch_copy_nb_elems].log_addr = buf_iova;
batch_copy[vq->batch_copy_nb_elems].len = cpy_len;
vq->batch_copy_nb_elems++;
@@ -1158,9 +1165,9 @@ mbuf_to_desc(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
buf_iova + buf_offset, cpy_len) 
< 0)
goto error;
} else {
-   sync_mbuf_to_desc_seg(dev, vq, m, mbuf_offset,
+   sync_fill_seg(dev, vq, m, mbuf_offset,
buf_addr + buf_offset,
-   buf_iova + buf_offset, cpy_len);
+   buf_iova + buf_offset, cpy_len, true);
}
 
mbuf_avail  -= cpy_len;
@@ -2474,7 +2481,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
  bool legacy_ol_flags)
 {
uint32_t buf_avail, buf_offset;
-   uint64_t buf_addr, buf_len;
+   uint64_t buf_addr, buf_iova, buf_len;
uint32_t mbuf_avail, mbuf_offset;
uint32_t cpy_len;
struct rte_mbuf *cur = m, *prev = m;
@@ -2482,16 +2489,13 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
struct virtio_net_hdr *hdr = NULL;
/* A counter to avoid desc dead loop chain */
uint16_t vec_idx = 0;
-   struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
-   int error = 0;
 
buf_addr = buf_vec[vec_idx].buf_addr;
+   buf_iova = buf_vec[vec_idx].buf_iova;
buf_len = buf_vec[vec_idx].buf_len;
 
-   if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
-   error = -1;
-   goto out;
-   }
+   if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1))
+   return -1;
 
if (virtio_net_with_host_offload(dev)) {
if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
@@ -2515,11 +2519,12 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
buf_offset = dev->vhost_hlen - buf_len;
vec_idx++;
buf_addr = buf_vec[vec_idx].buf_addr;
+   buf_iova = buf_vec[vec_idx].buf_iova;
buf_len = buf_vec[vec_idx].buf_len;
buf_avail  = buf_len - buf_offset;
} else if (buf_len == dev->vhost_hlen) {
if (unlikely(++vec_idx >= nr_vec))
-   goto out;
+   goto error;
buf_addr = buf_vec[vec_idx].buf_addr;
buf_len = buf_vec[vec_idx].buf_len;
 
@@ -2539,22 +254

[PATCH v1 2/5] vhost: prepare async for descriptor to mbuf refactoring

2022-04-07 Thread xuan . ding
From: Xuan Ding 

This patch refactors vhost async enqueue path and dequeue path to use
the same function async_fill_seg() for preparing batch elements,
which simplies the code without performance degradation.

Signed-off-by: Xuan Ding 
---
 lib/vhost/virtio_net.c | 23 +++
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index a2d04a1f60..709ff483a3 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -997,13 +997,14 @@ async_iter_reset(struct vhost_async *async)
 }
 
 static __rte_always_inline int
-async_mbuf_to_desc_seg(struct virtio_net *dev, struct vhost_virtqueue *vq,
+async_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq,
struct rte_mbuf *m, uint32_t mbuf_offset,
-   uint64_t buf_iova, uint32_t cpy_len)
+   uint64_t buf_iova, uint32_t cpy_len, bool to_desc)
 {
struct vhost_async *async = vq->async;
uint64_t mapped_len;
uint32_t buf_offset = 0;
+   void *src, *dst;
void *host_iova;
 
while (cpy_len) {
@@ -1015,10 +1016,16 @@ async_mbuf_to_desc_seg(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
return -1;
}
 
-   if (unlikely(async_iter_add_iovec(dev, async,
-   (void 
*)(uintptr_t)rte_pktmbuf_iova_offset(m,
-   mbuf_offset),
-   host_iova, (size_t)mapped_len)))
+   if (to_desc) {
+   src = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, 
mbuf_offset);
+   dst = host_iova;
+   } else {
+   src = host_iova;
+   dst = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, 
mbuf_offset);
+   }
+
+   if (unlikely(async_iter_add_iovec(dev, async, src, dst,
+(size_t)mapped_len)))
return -1;
 
cpy_len -= (uint32_t)mapped_len;
@@ -1161,8 +1168,8 @@ mbuf_to_desc(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
cpy_len = RTE_MIN(buf_avail, mbuf_avail);
 
if (is_async) {
-   if (async_mbuf_to_desc_seg(dev, vq, m, mbuf_offset,
-   buf_iova + buf_offset, cpy_len) 
< 0)
+   if (async_fill_seg(dev, vq, m, mbuf_offset,
+   buf_iova + buf_offset, cpy_len, 
true) < 0)
goto error;
} else {
sync_fill_seg(dev, vq, m, mbuf_offset,
-- 
2.17.1



[PATCH v1 3/5] vhost: merge sync and async descriptor to mbuf filling

2022-04-07 Thread xuan . ding
From: Xuan Ding 

This patches refactors copy_desc_to_mbuf() used by the sync
path to support both sync and async descriptor to mbuf filling.

Signed-off-by: Xuan Ding 
---
 lib/vhost/vhost.h  |  1 +
 lib/vhost/virtio_net.c | 47 --
 2 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
index a9edc271aa..9209558465 100644
--- a/lib/vhost/vhost.h
+++ b/lib/vhost/vhost.h
@@ -177,6 +177,7 @@ extern struct async_dma_info 
dma_copy_track[RTE_DMADEV_DEFAULT_MAX];
  * inflight async packet information
  */
 struct async_inflight_info {
+   struct virtio_net_hdr nethdr;
struct rte_mbuf *mbuf;
uint16_t descs; /* num of descs inflight */
uint16_t nr_buffers; /* num of buffers inflight for packed ring */
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 709ff483a3..382e953c2d 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -2482,10 +2482,10 @@ copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
 }
 
 static __rte_always_inline int
-copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
+desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
  struct buf_vector *buf_vec, uint16_t nr_vec,
  struct rte_mbuf *m, struct rte_mempool *mbuf_pool,
- bool legacy_ol_flags)
+ bool legacy_ol_flags, uint16_t slot_idx, bool is_async)
 {
uint32_t buf_avail, buf_offset;
uint64_t buf_addr, buf_iova, buf_len;
@@ -2496,6 +2496,8 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
struct virtio_net_hdr *hdr = NULL;
/* A counter to avoid desc dead loop chain */
uint16_t vec_idx = 0;
+   struct vhost_async *async = vq->async;
+   struct async_inflight_info *pkts_info;
 
buf_addr = buf_vec[vec_idx].buf_addr;
buf_iova = buf_vec[vec_idx].buf_iova;
@@ -2548,12 +2550,25 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 
mbuf_offset = 0;
mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
+
+   if (is_async) {
+   pkts_info = async->pkts_info;
+   if (async_iter_initialize(dev, async))
+   return -1;
+   }
+
while (1) {
cpy_len = RTE_MIN(buf_avail, mbuf_avail);
 
-   sync_fill_seg(dev, vq, m, mbuf_offset,
- buf_addr + buf_offset,
- buf_iova + buf_offset, cpy_len, true);
+   if (is_async) {
+   if (async_fill_seg(dev, vq, m, mbuf_offset,
+   buf_iova + buf_offset, cpy_len, 
false) < 0)
+   goto error;
+   } else {
+   sync_fill_seg(dev, vq, m, mbuf_offset,
+   buf_addr + buf_offset,
+   buf_iova + buf_offset, cpy_len, true);
+   }
 
mbuf_avail  -= cpy_len;
mbuf_offset += cpy_len;
@@ -2602,11 +2617,20 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
prev->data_len = mbuf_offset;
m->pkt_len+= mbuf_offset;
 
-   if (hdr)
-   vhost_dequeue_offload(dev, hdr, m, legacy_ol_flags);
+   if (hdr) {
+   if (is_async) {
+   async_iter_finalize(async);
+   pkts_info[slot_idx].nethdr = *hdr;
+   } else {
+   vhost_dequeue_offload(dev, hdr, m, legacy_ol_flags);
+   }
+   }
 
return 0;
 error:
+   if (is_async)
+   async_iter_cancel(async);
+
return -1;
 }
 
@@ -2738,8 +2762,8 @@ virtio_dev_tx_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
break;
}
 
-   err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
-   mbuf_pool, legacy_ol_flags);
+   err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
+   mbuf_pool, legacy_ol_flags, 0, false);
if (unlikely(err)) {
if (!allocerr_warned) {
VHOST_LOG_DATA(ERR, "(%s) failed to copy desc 
to mbuf.\n",
@@ -2750,6 +2774,7 @@ virtio_dev_tx_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
i++;
break;
}
+
}
 
if (dropped)
@@ -2931,8 +2956,8 @@ vhost_dequeue_single_packed(struct virtio_net *dev,
return -1;
}
 
-   err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts,
-   mbuf_pool, legacy_ol_flags);
+   err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts,
+   mbuf_pool, legacy_ol_flags, 0, false);
if (unlikely(e

[PATCH v1 4/5] vhost: support async dequeue for split ring

2022-04-07 Thread xuan . ding
From: Xuan Ding 

This patch implements asynchronous dequeue data path for vhost split
ring, a new API rte_vhost_async_try_dequeue_burst() is introduced.

Signed-off-by: Xuan Ding 
Signed-off-by: Yuan Wang 
---
 doc/guides/prog_guide/vhost_lib.rst|   7 +
 doc/guides/rel_notes/release_22_07.rst |   4 +
 lib/vhost/rte_vhost_async.h|  33 +++
 lib/vhost/version.map  |   3 +
 lib/vhost/virtio_net.c | 335 +
 5 files changed, 382 insertions(+)

diff --git a/doc/guides/prog_guide/vhost_lib.rst 
b/doc/guides/prog_guide/vhost_lib.rst
index 886f8f5e72..40cf315170 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -276,6 +276,13 @@ The following is an overview of some key Vhost API 
functions:
   Clear inflight packets which are submitted to DMA engine in vhost async data
   path. Completed packets are returned to applications through ``pkts``.
 
+* ``rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
+  struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
+   int *nr_inflight, uint16_t dma_id, uint16_t vchan_id)``
+
+  Receives (dequeues) ``count`` packets from guest to host in async data path,
+  and stored them at ``pkts``.
+
 Vhost-user Implementations
 --
 
diff --git a/doc/guides/rel_notes/release_22_07.rst 
b/doc/guides/rel_notes/release_22_07.rst
index 42a5f2d990..422a6673cb 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -55,6 +55,10 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* **Added vhost async dequeue API to receive pkts from guest.**
+
+  Added vhost async dequeue API which can leverage DMA devices to accelerate
+  receiving pkts from guest.
 
 Removed Items
 -
diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index f1293c6a9d..23fe1a7316 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -187,6 +187,39 @@ uint16_t rte_vhost_clear_queue_thread_unsafe(int vid, 
uint16_t queue_id,
 __rte_experimental
 int rte_vhost_async_dma_configure(int16_t dma_id, uint16_t vchan_id);
 
+/**
+ * This function tries to receive packets from the guest with offloading
+ * copies to the async channel. The packets that are transfer completed
+ * are returned in "pkts". The other packets that their copies are submitted to
+ * the async channel but not completed are called "in-flight packets".
+ * This function will not return in-flight packets until their copies are
+ * completed by the async channel.
+ *
+ * @param vid
+ *  ID of vhost device to dequeue data
+ * @param queue_id
+ *  ID of virtqueue to dequeue data
+ * @param mbuf_pool
+ *  Mbuf_pool where host mbuf is allocated
+ * @param pkts
+ *  Blank array to keep successfully dequeued packets
+ * @param count
+ *  Size of the packet array
+ * @param nr_inflight
+ *  The amount of in-flight packets. If error occurred, its value is set to -1.
+ * @param dma_id
+ *  The identifier of DMA device
+ * @param vchan_id
+ *  The identifier of virtual DMA channel
+ * @return
+ *  Number of successfully dequeued packets
+ */
+__rte_experimental
+uint16_t
+rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
+   struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
+   int *nr_inflight, uint16_t dma_id, uint16_t vchan_id);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index 0a66c5840c..514e3ff6a6 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -87,6 +87,9 @@ EXPERIMENTAL {
 
# added in 22.03
rte_vhost_async_dma_configure;
+
+   # added in 22.07
+   rte_vhost_async_try_dequeue_burst;
 };
 
 INTERNAL {
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 382e953c2d..3085905d17 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -3165,3 +3165,338 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 
return count;
 }
+
+static __rte_always_inline uint16_t
+async_poll_dequeue_completed_split(struct virtio_net *dev, uint16_t queue_id,
+   struct rte_mbuf **pkts, uint16_t count, uint16_t dma_id,
+   uint16_t vchan_id, bool legacy_ol_flags)
+{
+   uint16_t start_idx, from, i;
+   uint16_t nr_cpl_pkts = 0;
+   struct async_inflight_info *pkts_info;
+   struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
+
+   pkts_info = vq->async->pkts_info;
+
+   vhost_async_dma_check_completed(dev, dma_id, vchan_id, 
VHOST_DMA_MAX_COPY_COMPLETE);
+
+   start_idx = async_get_first_inflight_pkt_idx(vq);
+
+   from = start_idx;
+   while (vq->async->pkts_cmpl_flag[from] && count--) {
+   vq->async->pkts_cmpl_flag[from] = false;
+   from = (from + 1) & (vq->size - 1);
+   nr_c

[PATCH v1 5/5] examples/vhost: support async dequeue data path

2022-04-07 Thread xuan . ding
From: Xuan Ding 

This patch adds the use case for async dequeue API. Vswitch can
leverage DMA device to accelerate vhost async dequeue path.

Signed-off-by: Wenwu Ma 
Signed-off-by: Yuan Wang 
Signed-off-by: Xuan Ding 
---
 doc/guides/sample_app_ug/vhost.rst |   9 +-
 examples/vhost/main.c  | 292 -
 examples/vhost/main.h  |  35 +++-
 examples/vhost/virtio_net.c|  16 +-
 4 files changed, 254 insertions(+), 98 deletions(-)

diff --git a/doc/guides/sample_app_ug/vhost.rst 
b/doc/guides/sample_app_ug/vhost.rst
index a6ce4bc8ac..09db965e70 100644
--- a/doc/guides/sample_app_ug/vhost.rst
+++ b/doc/guides/sample_app_ug/vhost.rst
@@ -169,9 +169,12 @@ demonstrates how to use the async vhost APIs. It's used in 
combination with dmas
 **--dmas**
 This parameter is used to specify the assigned DMA device of a vhost device.
 Async vhost-user net driver will be used if --dmas is set. For example
---dmas [txd0@00:04.0,txd1@00:04.1] means use DMA channel 00:04.0 for vhost
-device 0 enqueue operation and use DMA channel 00:04.1 for vhost device 1
-enqueue operation.
+--dmas [txd0@00:04.0,txd1@00:04.1,rxd0@00:04.2,rxd1@00:04.3] means use
+DMA channel 00:04.0/00:04.2 for vhost device 0 enqueue/dequeue operation
+and use DMA channel 00:04.1/00:04.3 for vhost device 1 enqueue/dequeue
+operation. The index of the device corresponds to the socket file in order,
+that means vhost device 0 is created through the first socket file, vhost
+device 1 is created through the second socket file, and so on.
 
 Common Issues
 -
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index d94fabb060..d26e40ab73 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -63,6 +63,9 @@
 
 #define DMA_RING_SIZE 4096
 
+#define ASYNC_ENQUEUE_VHOST 1
+#define ASYNC_DEQUEUE_VHOST 2
+
 /* number of mbufs in all pools - if specified on command-line. */
 static int total_num_mbufs = NUM_MBUFS_DEFAULT;
 
@@ -116,6 +119,8 @@ static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;
 static char *socket_files;
 static int nb_sockets;
 
+static struct vhost_queue_ops vdev_queue_ops[RTE_MAX_VHOST_DEVICE];
+
 /* empty VMDq configuration structure. Filled in programmatically */
 static struct rte_eth_conf vmdq_conf_default = {
.rxmode = {
@@ -205,6 +210,18 @@ struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE * 
RTE_MAX_VHOST_DEVICE];
 #define MBUF_TABLE_DRAIN_TSC   ((rte_get_tsc_hz() + US_PER_S - 1) \
 / US_PER_S * BURST_TX_DRAIN_US)
 
+static int vid2socketid[RTE_MAX_VHOST_DEVICE];
+
+static uint32_t get_async_flag_by_socketid(int socketid)
+{
+   return dma_bind[socketid].async_flag;
+}
+
+static void init_vid2socketid_array(int vid, int socketid)
+{
+   vid2socketid[vid] = socketid;
+}
+
 static inline bool
 is_dma_configured(int16_t dev_id)
 {
@@ -224,7 +241,7 @@ open_dma(const char *value)
char *addrs = input;
char *ptrs[2];
char *start, *end, *substr;
-   int64_t vid;
+   int64_t socketid, vring_id;
 
struct rte_dma_info info;
struct rte_dma_conf dev_config = { .nb_vchans = 1 };
@@ -262,7 +279,9 @@ open_dma(const char *value)
 
while (i < args_nr) {
char *arg_temp = dma_arg[i];
+   char *txd, *rxd;
uint8_t sub_nr;
+   int async_flag;
 
sub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2, '@');
if (sub_nr != 2) {
@@ -270,14 +289,23 @@ open_dma(const char *value)
goto out;
}
 
-   start = strstr(ptrs[0], "txd");
-   if (start == NULL) {
+   txd = strstr(ptrs[0], "txd");
+   rxd = strstr(ptrs[0], "rxd");
+   if (txd) {
+   start = txd;
+   vring_id = VIRTIO_RXQ;
+   async_flag = ASYNC_ENQUEUE_VHOST;
+   } else if (rxd) {
+   start = rxd;
+   vring_id = VIRTIO_TXQ;
+   async_flag = ASYNC_DEQUEUE_VHOST;
+   } else {
ret = -1;
goto out;
}
 
start += 3;
-   vid = strtol(start, &end, 0);
+   socketid = strtol(start, &end, 0);
if (end == start) {
ret = -1;
goto out;
@@ -338,7 +366,8 @@ open_dma(const char *value)
dmas_id[dma_count++] = dev_id;
 
 done:
-   (dma_info + vid)->dmas[VIRTIO_RXQ].dev_id = dev_id;
+   (dma_info + socketid)->dmas[vring_id].dev_id = dev_id;
+   (dma_info + socketid)->async_flag |= async_flag;
i++;
}
 out:
@@ -990,13 +1019,13 @@ complete_async_pkts(struct vhost_dev *vdev)
 {
struct rte_mbuf *p_cpl[MAX_PKT_BURST];
uint16_t complete_count;
-   int16_t dma_id = dma_

Re: [PATCH v7] eal: fix rte_memcpy strict aliasing/alignment bugs

2022-04-07 Thread David Marchand
On Thu, Apr 7, 2022 at 5:24 PM David Marchand  wrote:
>
> On Thu, Mar 10, 2022 at 3:55 PM Ananyev, Konstantin
>  wrote:
> > > Calls to rte_memcpy for 1 < n < 16 could result in unaligned
> > > loads/stores, which is undefined behaviour according to the C
> > > standard, and strict aliasing violations.
> > >
> > > The code was changed to use a packed structure that allows aliasing
> > > (using the __may_alias__ attribute) to perform the load/store
> > > operations. This results in code that has the same performance as the
> > > original code and that is also C standards-compliant.
> > >
> > > Fixes: d35cc1fe6a7a ("eal/x86: revert select optimized memcpy at 
> > > run-time")

Actually, looking again at the history, it fixes:
Fixes: f5472703c0bd ("eal: optimize aligned memcpy on x86")

I'll change before pushing.


-- 
David Marchand



Re: [PATCH v7] eal: fix rte_memcpy strict aliasing/alignment bugs

2022-04-07 Thread David Marchand
On Thu, Apr 7, 2022 at 5:32 PM David Marchand  wrote:
>
> On Thu, Apr 7, 2022 at 5:24 PM David Marchand  
> wrote:
> >
> > On Thu, Mar 10, 2022 at 3:55 PM Ananyev, Konstantin
> >  wrote:
> > > > Calls to rte_memcpy for 1 < n < 16 could result in unaligned
> > > > loads/stores, which is undefined behaviour according to the C
> > > > standard, and strict aliasing violations.
> > > >
> > > > The code was changed to use a packed structure that allows aliasing
> > > > (using the __may_alias__ attribute) to perform the load/store
> > > > operations. This results in code that has the same performance as the
> > > > original code and that is also C standards-compliant.
> > > >
> > > > Fixes: d35cc1fe6a7a ("eal/x86: revert select optimized memcpy at 
> > > > run-time")
>
> Actually, looking again at the history, it fixes:
> Fixes: f5472703c0bd ("eal: optimize aligned memcpy on x86")

Nop, that's probably even older, could you double check?
I'll hold on pushing this fix.


-- 
David Marchand



Re: OVS DPDK DMA-Dev library/Design Discussion

2022-04-07 Thread Maxime Coquelin




On 4/7/22 17:01, Ilya Maximets wrote:

On 4/7/22 16:42, Van Haaren, Harry wrote:

-Original Message-
From: Ilya Maximets 
Sent: Thursday, April 7, 2022 3:40 PM
To: Maxime Coquelin ; Van Haaren, Harry
; Morten Brørup ;
Richardson, Bruce 
Cc: i.maxim...@ovn.org; Pai G, Sunil ; Stokes, Ian
; Hu, Jiayu ; Ferriter, Cian
; ovs-...@openvswitch.org; dev@dpdk.org; Mcnamara,
John ; O'Driscoll, Tim ;
Finn, Emma 
Subject: Re: OVS DPDK DMA-Dev library/Design Discussion

On 4/7/22 16:25, Maxime Coquelin wrote:

Hi Harry,

On 4/7/22 16:04, Van Haaren, Harry wrote:

Hi OVS & DPDK, Maintainers & Community,

Top posting overview of discussion as replies to thread become slower:
perhaps it is a good time to review and plan for next steps?

  From my perspective, it those most vocal in the thread seem to be in favour

of the clean

rx/tx split ("defer work"), with the tradeoff that the application must be

aware of handling

the async DMA completions. If there are any concerns opposing upstreaming

of this method,

please indicate this promptly, and we can continue technical discussions here

now.


Wasn't there some discussions about handling the Virtio completions with
the DMA engine? With that, we wouldn't need the deferral of work.


+1


Yes there was, the DMA/virtq completions thread here for reference;
https://mail.openvswitch.org/pipermail/ovs-dev/2022-March/392908.html

I do not believe that there is a viable path to actually implementing it, and 
particularly
not in the more complex cases; e.g. virtio with guest-interrupt enabled.

The thread above mentions additional threads and various other options; none of 
which
I believe to be a clean or workable solution. I'd like input from other folks 
more familiar
with the exact implementations of VHost/vrings, as well as those with DMA 
engine expertise.


I tend to trust Maxime as a vhost maintainer in such questions. :)

In my own opinion though, the implementation is possible and concerns doesn't
sound deal-breaking as solutions for them might work well enough.  So I think
the viability should be tested out before solution is disregarded.  Especially
because the decision will form the API of the vhost library.


I agree, we need a PoC adding interrupt support to dmadev API using
eventfd, and adding a thread in Vhost library that polls for DMA
interrupts and calls vhost_vring_call if needed.





With the virtio completions handled by DMA itself, the vhost port
turns almost into a real HW NIC.  With that we will not need any
extra manipulations from the OVS side, i.e. no need to defer any
work while maintaining clear split between rx and tx operations.

I'd vote for that.



Thanks,
Maxime


Thanks for the prompt responses, and lets understand if there is a viable 
workable way
to totally hide DMA-completions from the application.

Regards,  -Harry



In absence of continued technical discussion here, I suggest Sunil and Ian

collaborate on getting

the OVS Defer-work approach, and DPDK VHost Async patchsets available on

GitHub for easier

consumption and future development (as suggested in slides presented on

last call).


Regards, -Harry

No inline-replies below; message just for context.


-Original Message-
From: Van Haaren, Harry
Sent: Wednesday, March 30, 2022 10:02 AM
To: Morten Brørup ; Richardson, Bruce

Cc: Maxime Coquelin ; Pai G, Sunil
; Stokes, Ian ; Hu, Jiayu
; Ferriter, Cian ; Ilya

Maximets

; ovs-...@openvswitch.org; dev@dpdk.org;

Mcnamara,

John ; O'Driscoll, Tim

;

Finn, Emma 
Subject: RE: OVS DPDK DMA-Dev library/Design Discussion


-Original Message-
From: Morten Brørup 
Sent: Tuesday, March 29, 2022 8:59 PM
To: Van Haaren, Harry ; Richardson, Bruce

Cc: Maxime Coquelin ; Pai G, Sunil
; Stokes, Ian ; Hu, Jiayu
; Ferriter, Cian ; Ilya

Maximets

; ovs-...@openvswitch.org; dev@dpdk.org;

Mcnamara,

John

; O'Driscoll, Tim ;

Finn,

Emma 
Subject: RE: OVS DPDK DMA-Dev library/Design Discussion


From: Van Haaren, Harry [mailto:harry.van.haa...@intel.com]
Sent: Tuesday, 29 March 2022 19.46


From: Morten Brørup 
Sent: Tuesday, March 29, 2022 6:14 PM


From: Bruce Richardson [mailto:bruce.richard...@intel.com]
Sent: Tuesday, 29 March 2022 19.03

On Tue, Mar 29, 2022 at 06:45:19PM +0200, Morten Brørup wrote:

From: Maxime Coquelin [mailto:maxime.coque...@redhat.com]
Sent: Tuesday, 29 March 2022 18.24

Hi Morten,

On 3/29/22 16:44, Morten Brørup wrote:

From: Van Haaren, Harry [mailto:harry.van.haa...@intel.com]
Sent: Tuesday, 29 March 2022 15.02


From: Morten Brørup 
Sent: Tuesday, March 29, 2022 1:51 PM

Having thought more about it, I think that a completely

different

architectural approach is required:


Many of the DPDK Ethernet PMDs implement a variety of RX

and TX

packet burst functions, each optimized for different CPU vector
instruction sets. The availability of a DMA engine should be

treated

the same way. So I suggest that PMDs copying packet contents,

e.g.

memif, pcap, vmxnet3, should implement DMA optim

Re: OVS DPDK DMA-Dev library/Design Discussion

2022-04-07 Thread Bruce Richardson
On Thu, Apr 07, 2022 at 05:46:32PM +0200, Maxime Coquelin wrote:
> 
> 
> On 4/7/22 17:01, Ilya Maximets wrote:
> > On 4/7/22 16:42, Van Haaren, Harry wrote:
> > > > -Original Message-
> > > > From: Ilya Maximets 
> > > > Sent: Thursday, April 7, 2022 3:40 PM
> > > > To: Maxime Coquelin ; Van Haaren, Harry
> > > > ; Morten Brørup 
> > > > ;
> > > > Richardson, Bruce 
> > > > Cc: i.maxim...@ovn.org; Pai G, Sunil ; Stokes, 
> > > > Ian
> > > > ; Hu, Jiayu ; Ferriter, Cian
> > > > ; ovs-...@openvswitch.org; dev@dpdk.org; 
> > > > Mcnamara,
> > > > John ; O'Driscoll, Tim 
> > > > ;
> > > > Finn, Emma 
> > > > Subject: Re: OVS DPDK DMA-Dev library/Design Discussion
> > > > 
> > > > On 4/7/22 16:25, Maxime Coquelin wrote:
> > > > > Hi Harry,
> > > > > 
> > > > > On 4/7/22 16:04, Van Haaren, Harry wrote:
> > > > > > Hi OVS & DPDK, Maintainers & Community,
> > > > > > 
> > > > > > Top posting overview of discussion as replies to thread become 
> > > > > > slower:
> > > > > > perhaps it is a good time to review and plan for next steps?
> > > > > > 
> > > > > >   From my perspective, it those most vocal in the thread seem to be 
> > > > > > in favour
> > > > of the clean
> > > > > > rx/tx split ("defer work"), with the tradeoff that the application 
> > > > > > must be
> > > > aware of handling
> > > > > > the async DMA completions. If there are any concerns opposing 
> > > > > > upstreaming
> > > > of this method,
> > > > > > please indicate this promptly, and we can continue technical 
> > > > > > discussions here
> > > > now.
> > > > > 
> > > > > Wasn't there some discussions about handling the Virtio completions 
> > > > > with
> > > > > the DMA engine? With that, we wouldn't need the deferral of work.
> > > > 
> > > > +1
> > > 
> > > Yes there was, the DMA/virtq completions thread here for reference;
> > > https://mail.openvswitch.org/pipermail/ovs-dev/2022-March/392908.html
> > > 
> > > I do not believe that there is a viable path to actually implementing it, 
> > > and particularly
> > > not in the more complex cases; e.g. virtio with guest-interrupt enabled.
> > > 
> > > The thread above mentions additional threads and various other options; 
> > > none of which
> > > I believe to be a clean or workable solution. I'd like input from other 
> > > folks more familiar
> > > with the exact implementations of VHost/vrings, as well as those with DMA 
> > > engine expertise.
> > 
> > I tend to trust Maxime as a vhost maintainer in such questions. :)
> > 
> > In my own opinion though, the implementation is possible and concerns 
> > doesn't
> > sound deal-breaking as solutions for them might work well enough.  So I 
> > think
> > the viability should be tested out before solution is disregarded.  
> > Especially
> > because the decision will form the API of the vhost library.
> 
> I agree, we need a PoC adding interrupt support to dmadev API using
> eventfd, and adding a thread in Vhost library that polls for DMA
> interrupts and calls vhost_vring_call if needed.
>
Hi Maxime,

couple of questions, perhaps you can clarify. Firstly, why would an eventfd
be needed for the interrupts, can they not just use the regular interrupt
handling like other devices in DPDK, e.g. read on /dev/node.

In terms of the new thread - what is this thread going to handle? Is it
going to take interrupts from all dma operations and handle the
completion/cleanup of all jobs for all queues once the DMA engine is
finished? Or is it just going to periodically be woken up to check for the
edge case if there are any virtio queues sleeping with interrupts enabled
where there are completed - but unsignalled to the VM - packets?

Regards,
/Bruce


[dpdk-dev v1] crypto/openssl: openssl 3.0 support on asym crypto routine

2022-04-07 Thread Kai Ji
This patch update the asymmetric RSA and DH routine in crypto
openssl pmd to adopt openssl 3.0 EVP library.

Signed-off-by: Kai Ji 
---
 drivers/crypto/openssl/compat.h  |  43 ++-
 drivers/crypto/openssl/openssl_pmd_private.h |  11 +
 drivers/crypto/openssl/rte_openssl_pmd.c | 288 ++-
 drivers/crypto/openssl/rte_openssl_pmd_ops.c | 151 +-
 4 files changed, 489 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/openssl/compat.h b/drivers/crypto/openssl/compat.h
index eecb7d3698..80b1493a5f 100644
--- a/drivers/crypto/openssl/compat.h
+++ b/drivers/crypto/openssl/compat.h
@@ -104,8 +104,49 @@ get_dsa_priv_key(DSA *dsa, BIGNUM **priv_key)
*priv_key = dsa->priv_key;
 }
 
-#else
+#elif (OPENSSL_VERSION_NUMBER >= 0x3000L)
+static __rte_always_inline int
+set_dsa_params(DSA *dsa, BIGNUM *p, BIGNUM *q, BIGNUM *g)
+{
+   return !(DSA_set0_pqg(dsa, p, q, g));
+}
+
+static __rte_always_inline void
+set_dsa_priv_key(DSA *dsa, BIGNUM *priv_key)
+{
+   DSA_set0_key(dsa, NULL, priv_key);
+}
+
+static __rte_always_inline void
+set_dsa_sign(DSA_SIG *sign, BIGNUM *r, BIGNUM *s)
+{
+   DSA_SIG_set0(sign, r, s);
+}
+
+static __rte_always_inline void
+get_dsa_sign(DSA_SIG *sign, const BIGNUM **r, const BIGNUM **s)
+{
+   DSA_SIG_get0(sign, r, s);
+}
 
+static __rte_always_inline int
+set_dsa_keys(DSA *dsa, BIGNUM *pub, BIGNUM *priv)
+{
+   return !(DSA_set0_key(dsa, pub, priv));
+}
+
+static __rte_always_inline void
+set_dsa_pub_key(DSA *dsa, BIGNUM *pub_key)
+{
+   DSA_set0_key(dsa, pub_key, NULL);
+}
+
+static __rte_always_inline void
+get_dsa_priv_key(DSA *dsa, const BIGNUM **priv_key)
+{
+   DSA_get0_key(dsa, NULL, priv_key);
+}
+#else
 static __rte_always_inline int
 set_rsa_params(RSA *rsa, BIGNUM *p, BIGNUM *q)
 {
diff --git a/drivers/crypto/openssl/openssl_pmd_private.h 
b/drivers/crypto/openssl/openssl_pmd_private.h
index 86dc169aaf..aef12c3e21 100644
--- a/drivers/crypto/openssl/openssl_pmd_private.h
+++ b/drivers/crypto/openssl/openssl_pmd_private.h
@@ -12,6 +12,11 @@
 #include 
 #include 
 
+#if (OPENSSL_VERSION_NUMBER >= 0x3000L)
+#include 
+#include 
+#endif
+
 #define CRYPTODEV_NAME_OPENSSL_PMD crypto_openssl
 /**< Open SSL Crypto PMD device name */
 
@@ -157,6 +162,9 @@ struct openssl_asym_session {
union {
struct rsa {
RSA *rsa;
+#if (OPENSSL_VERSION_NUMBER >= 0x3000L)
+   EVP_PKEY_CTX * ctx;
+#endif
} r;
struct exp {
BIGNUM *exp;
@@ -170,6 +178,9 @@ struct openssl_asym_session {
struct dh {
DH *dh_key;
uint32_t key_op;
+#if (OPENSSL_VERSION_NUMBER >= 0x3000L)
+   OSSL_PARAM_BLD * param_bld;
+#endif
} dh;
struct {
DSA *dsa;
diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c 
b/drivers/crypto/openssl/rte_openssl_pmd.c
index 5840ab472e..e423114c08 100644
--- a/drivers/crypto/openssl/rte_openssl_pmd.c
+++ b/drivers/crypto/openssl/rte_openssl_pmd.c
@@ -43,6 +43,7 @@ static void HMAC_CTX_free(HMAC_CTX *ctx)
 
 #include 
 #include 
+#include 
 
 #define MAX_OSSL_ALGO_NAME_SIZE16
 
@@ -1845,6 +1846,134 @@ process_openssl_dsa_verify_op(struct rte_crypto_op *cop,
return 0;
 }
 
+#if (OPENSSL_VERSION_NUMBER >= 0x3000L)
+/* process dh operation */
+static int
+process_openssl_dh_op_evp(struct rte_crypto_op *cop,
+   struct openssl_asym_session *sess)
+{
+   struct rte_crypto_dh_op_param *op = &cop->asym->dh;
+   OSSL_PARAM_BLD *param_bld = sess->u.dh.param_bld;
+   OSSL_PARAM *params = NULL;
+   EVP_PKEY *dhpkey = NULL;
+   BIGNUM *priv_key = NULL;
+   BIGNUM *pub_key = NULL;
+
+   EVP_PKEY_CTX *dh_ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_DH, NULL);
+   if (dh_ctx == NULL || param_bld == NULL) {
+   cop->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
+   return -1;
+   }
+
+   params = OSSL_PARAM_BLD_to_param(param_bld);
+   if (params == NULL) {
+   EVP_PKEY_CTX_free(dh_ctx);
+   cop->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
+   return -1;
+   }
+
+   if (sess->u.dh.key_op & (1 << 
RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE)) {
+   pub_key = BN_bin2bn(op->pub_key.data, op->pub_key.length,
+   pub_key);
+   if (pub_key == NULL)
+   goto err;
+
+   if (!OSSL_PARAM_BLD_push_BN(param_bld, OSSL_PKEY_PARAM_PUB_KEY,
+   pub_key)) {
+   OPENSSL_LOG(ERR, "Failed to set public key\n");
+   BN_free(pub_key);
+   goto err;
+   }
+   }
+
+   if ((sess->u.dh.key_op & (1 << RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE)) 
&&
+

[PATCH] cryptodev: add dh verify option

2022-04-07 Thread Arek Kusztal
For some elliptic curves public point in DH exchange
needs to be checked, if lays on the curve.
Modular exponentiation needs certain checks as well, though
mathematically much easier.
This commit adds verify option to asym_op operations.

Signed-off-by: Arek Kusztal 
---
Depends-on: patch-109409 ("cryptodev: add elliptic curve diffie hellman")

 lib/cryptodev/rte_crypto_asym.h | 2 ++
 lib/cryptodev/rte_cryptodev.c   | 1 +
 2 files changed, 3 insertions(+)

diff --git a/lib/cryptodev/rte_crypto_asym.h b/lib/cryptodev/rte_crypto_asym.h
index e65222b802..5a581c77b2 100644
--- a/lib/cryptodev/rte_crypto_asym.h
+++ b/lib/cryptodev/rte_crypto_asym.h
@@ -117,6 +117,8 @@ enum rte_crypto_asym_op_type {
/**< DH Public Key generation operation */
RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE,
/**< DH Shared Secret compute operation */
+   RTE_CRYPTO_ASYM_OP_KEY_VERIFY,
+   /**< DH Public Key Verification */
RTE_CRYPTO_ASYM_OP_LIST_END
 };
 
diff --git a/lib/cryptodev/rte_cryptodev.c b/lib/cryptodev/rte_cryptodev.c
index 3500a2d470..082780a5f0 100644
--- a/lib/cryptodev/rte_cryptodev.c
+++ b/lib/cryptodev/rte_cryptodev.c
@@ -181,6 +181,7 @@ const char *rte_crypto_asym_op_strings[] = {
[RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE]   = "priv_key_generate",
[RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE] = "pub_key_generate",
[RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE] = "sharedsecret_compute",
+   [RTE_CRYPTO_ASYM_OP_KEY_VERIFY] = "dh_pubkey_verify",
 };
 
 /**
-- 
2.13.6



[PATCH] crypto/qat: add ec point verify function

2022-04-07 Thread Arek Kusztal
This commit adds elliptic curve point verification
to Intel QuickAssist Technology PMD.

Signed-off-by: Arek Kusztal 
---
Depends-on: patch-109436 ("cryptodev: add dh verify option")

 drivers/common/qat/qat_adf/qat_pke.h | 24 ++
 drivers/crypto/qat/qat_asym.c| 64 +++-
 2 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/drivers/common/qat/qat_adf/qat_pke.h 
b/drivers/common/qat/qat_adf/qat_pke.h
index c727e4e1af..5c6569adf5 100644
--- a/drivers/common/qat/qat_adf/qat_pke.h
+++ b/drivers/common/qat/qat_adf/qat_pke.h
@@ -326,4 +326,28 @@ get_ecpm_function(struct rte_crypto_asym_xform *xform)
return qat_function;
 }
 
+static struct qat_asym_function
+get_ec_verify_function(struct rte_crypto_asym_xform *xform)
+{
+   struct qat_asym_function qat_function;
+
+   switch (xform->ec.curve_id) {
+   case RTE_CRYPTO_EC_GROUP_SECP256R1:
+   qat_function.func_id = MATHS_POINT_VERIFY_GFP_L256;
+   qat_function.bytesize = 32;
+   break;
+   case RTE_CRYPTO_EC_GROUP_SECP384R1:
+   qat_function.func_id = MATHS_POINT_VERIFY_GFP_L512;
+   qat_function.bytesize = 64;
+   break;
+   case RTE_CRYPTO_EC_GROUP_SECP521R1:
+   qat_function.func_id = MATHS_POINT_VERIFY_GFP_521;
+   qat_function.bytesize = 66;
+   break;
+   default:
+   qat_function.func_id = 0;
+   }
+   return qat_function;
+}
+
 #endif
diff --git a/drivers/crypto/qat/qat_asym.c b/drivers/crypto/qat/qat_asym.c
index 5dccd26201..8f27219583 100644
--- a/drivers/crypto/qat/qat_asym.c
+++ b/drivers/crypto/qat/qat_asym.c
@@ -831,7 +831,7 @@ dh_mod_set_input(struct rte_crypto_asym_op *asym_op,
 }
 
 static int
-ecdh_set_input(struct rte_crypto_asym_op *asym_op,
+ecdh_set_input_phase(struct rte_crypto_asym_op *asym_op,
struct icp_qat_fw_pke_request *qat_req,
struct qat_asym_op_cookie *cookie,
struct rte_crypto_asym_xform *xform)
@@ -888,6 +888,65 @@ ecdh_set_input(struct rte_crypto_asym_op *asym_op,
 }
 
 static int
+ecdh_set_input_verify(struct rte_crypto_asym_op *asym_op,
+   struct icp_qat_fw_pke_request *qat_req,
+   struct qat_asym_op_cookie *cookie,
+   struct rte_crypto_asym_xform *xform)
+{
+   struct qat_asym_function qat_function;
+   uint32_t qat_func_alignsize, func_id;
+   int curve_id;
+
+   curve_id = pick_curve(xform);
+   if (curve_id < 0) {
+   QAT_LOG(DEBUG, "Incorrect elliptic curve");
+   return -EINVAL;
+   }
+
+   qat_function = get_ec_verify_function(xform);
+   func_id = qat_function.func_id;
+   if (func_id == 0) {
+   QAT_LOG(ERR, "Cannot obtain functionality id");
+   return -EINVAL;
+   }
+   qat_func_alignsize = RTE_ALIGN_CEIL(qat_function.bytesize, 8);
+
+   SET_PKE_LN(asym_op->dh.pub_point.x, qat_func_alignsize, 0);
+   SET_PKE_LN(asym_op->dh.pub_point.y, qat_func_alignsize, 1);
+   SET_PKE_LN_EC(curve[curve_id], p, 2);
+   SET_PKE_LN_EC(curve[curve_id], a, 3);
+   SET_PKE_LN_EC(curve[curve_id], b, 4);
+
+   cookie->alg_bytesize = curve[curve_id].bytesize;
+   cookie->qat_func_alignsize = qat_func_alignsize;
+   qat_req->pke_hdr.cd_pars.func_id = func_id;
+   qat_req->input_param_count =
+   5;
+   qat_req->output_param_count =
+   0;
+
+   HEXDUMP("x", cookie->input_array[0], qat_func_alignsize);
+   HEXDUMP("y", cookie->input_array[1], qat_func_alignsize);
+   HEXDUMP("p", cookie->input_array[2], qat_func_alignsize);
+   HEXDUMP("a", cookie->input_array[3], qat_func_alignsize);
+   HEXDUMP("b", cookie->input_array[4], qat_func_alignsize);
+
+   return 0;
+}
+
+static int
+ecdh_set_input(struct rte_crypto_asym_op *asym_op,
+   struct icp_qat_fw_pke_request *qat_req,
+   struct qat_asym_op_cookie *cookie,
+   struct rte_crypto_asym_xform *xform)
+{
+   if (asym_op->dh.op_type == RTE_CRYPTO_ASYM_OP_KEY_VERIFY)
+   return ecdh_set_input_verify(asym_op, qat_req, cookie, xform);
+   else
+   return ecdh_set_input_phase(asym_op, qat_req, cookie, xform);
+}
+
+static int
 dh_set_input(struct rte_crypto_asym_op *asym_op,
struct icp_qat_fw_pke_request *qat_req,
struct qat_asym_op_cookie *cookie,
@@ -935,6 +994,9 @@ ecdh_collect(struct rte_crypto_asym_op *asym_op,
uint32_t qat_func_alignsize = cookie->qat_func_alignsize;
uint32_t ltrim = qat_func_alignsize - alg_bytesize;
 
+   if (asym_op->dh.op_type == RTE_CRYPTO_ASYM_OP_KEY_VERIFY)
+   return RTE_CRYPTO_OP_STATUS_SUCCESS;
+
if (asym_op->dh.op_type == RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE) {
asym_op->dh.pub_point.x.length = alg_bytesize;

Re: i40e QinQ Offload w/ NVM 8.40 not working

2022-04-07 Thread Ben Magistro
Hello,

We were able to narrow it down to a NVM change between 8.30 (v26.2) and
8.40 (v26.4).  The release notes for v26.2 indicate the wrong NVM version.
We are trying to reach out to Intel as well to report the issue and get
additional assistance.  If anyone can assist with that, it would be
appreciated.

Cheers,

Ben

On Mon, Apr 4, 2022 at 11:36 AM Ben Magistro  wrote:

> Hello,
>
> Wanted to follow up with some additional testing results.  I believe this
> is a bug at the NVM firmware level but still want someone else to confirm.
> We can easily retest or change parameters of testpmd to provide additional
> information if desired.  In parallel to this we will be trying to reach out
> to Intel and Dell (Intel branded card with firmware provided by Dell) to
> report this bug for additional follow up.
>
> Device configuration:
> traffic gen (trex) --> sw1 (basic vlan -- vl 200) --> sw2 (qinq push -- vl
> 300) -- dut (testpmd)
>
> OS: CentOS 7.9
> DPDK 21.11 (different than initial report, used to move to a current
> version and try to rule out other issues, but same issue)
> testpmd cmd: sudo /tmp/dpdk-testpmd -c 0x -- -i --enable-hw-vlan
> --enable-hw-vlan-strip --enable-hw-vlan-extend --enable-hw-qinq-strip
> NVM version(s): 8.15 (working) and 8.40 (non-working)
>
> Offload configuration (these were the same under both 8.15 and 8.40 so
> only providing one copy)
> testpmd> show port 0 rx_offload configuration
> Rx Offloading Configuration of port 0 :
>   Port : VLAN_STRIP QINQ_STRIP VLAN_FILTER VLAN_EXTEND
>   Queue[ 0] : VLAN_STRIP QINQ_STRIP VLAN_FILTER VLAN_EXTEND
>
> testpmd> show port 1 rx_offload configuration
> Rx Offloading Configuration of port 1 :
>   Port : VLAN_STRIP QINQ_STRIP VLAN_FILTER VLAN_EXTEND
>   Queue[ 0] : VLAN_STRIP QINQ_STRIP VLAN_FILTER VLAN_EXTEND
>
> testpmd> show port 2 rx_offload configuration
> Rx Offloading Configuration of port 2 :
>   Port : VLAN_STRIP QINQ_STRIP VLAN_FILTER VLAN_EXTEND
>   Queue[ 0] : VLAN_STRIP QINQ_STRIP VLAN_FILTER VLAN_EXTEND
>
> testpmd> show port 3 rx_offload configuration
> Rx Offloading Configuration of port 3 :
>   Port : VLAN_STRIP QINQ_STRIP VLAN_FILTER VLAN_EXTEND
>   Queue[ 0] : VLAN_STRIP QINQ_STRIP VLAN_FILTER VLAN_EXTEND
>
> When running testpmd with the above cmdline parameters and then setting
> "set fwd rxonly" we observe the following results with the different
> firmwares.
> 8.15 (working)
>   src=F8:F2:1E:31:96:D0 - dst=F8:F2:1E:31:96:D1 - type=0x0800 -
> length=74 - nb_segs=1 - QinQ VLAN tci=0xc8, VLAN tci outer=0x12c - hw
> ptype: L2_ETHER L3_IPV4_EXT_UNKNOWN L4_TCP  - sw ptype: L2_ETHER L3_IPV4
> L4_TCP  - l2_len=14 - l3_len=20 - l4_len=40 - Receive queue=0x0
> ol_flags: RTE_MBUF_F_RX_VLAN RTE_MBUF_F_RX_L4_CKSUM_GOOD
> RTE_MBUF_F_RX_IP_CKSUM_GOOD RTE_MBUF_F_RX_VLAN_STRIPPED
> RTE_MBUF_F_RX_QINQ_STRIPPED RTE_MBUF_F_RX_QINQ
> RTE_MBUF_F_RX_OUTER_L4_CKSUM_UNKNOWN
>
> 8.40 (non working)
>  src=F8:F2:1E:31:96:D0 - dst=F8:F2:1E:31:96:D1 - type=0x8100 -
> length=78 - nb_segs=1 - VLAN tci=0xc8 - hw ptype: L2_ETHER
> L3_IPV4_EXT_UNKNOWN L4_TCP  - sw ptype: L2_ETHER_VLAN L3_IPV4 L4_TCP  -
> l2_len=18 - l3_len=20 - l4_len=40 - Receive queue=0x0
> ol_flags: RTE_MBUF_F_RX_VLAN RTE_MBUF_F_RX_L4_CKSUM_GOOD
> RTE_MBUF_F_RX_IP_CKSUM_GOOD RTE_MBUF_F_RX_VLAN_STRIPPED
> RTE_MBUF_F_RX_OUTER_L4_CKSUM_UNKNOWN
>
>
> Thanks,
>
> Ben
>
> On Fri, Apr 1, 2022 at 11:13 AM Ben Magistro  wrote:
>
>> Hello,
>>
>> We recently needed to apply a firmware upgrade for some XXV710s to
>> resolve a FEC issue (I'd have to find the details in email) but applied
>> this same firmware to other nics (XL710s) to maintain a
>> consistent baseline.  In testing we have seen the NVM 8.40 resolve the FEC
>> issue but it introduces an issue with QinQ offloading + stripping.  When
>> running NVM 8.15 (previous version), we could send QinQ traffic, and the
>> nic would properly strip and store the values into vlan_tci and
>> vlan_tci_outer as expected.  When running NVM 8.40 (FEC fix version)
>> sending QinQ traffic is only stripping the inner tag.  The code we are
>> using has not changed.
>>
>> I added some additional lines to drivers/net/i40e/i40e_rxtx.c to help
>> troubleshoot this, specifically one to log the vlans and one to log
>> ext_status.  In comparing the two, ext_status is 0 under 8.40 while it is 1
>> under 8.15.  This does correspond with not running the second layer
>> processing code in the i40e_rxtx.c (line ~87).  We will continue to
>> investigate but would like to get this out there sooner and ask for
>> assistance in confirming this behavior.
>>
>> This is a Dell based card so the firmware package used to
>> update/downgrade the card is coming from Dell and not Intel directly.  It
>> is our assumption that the firmware in general should be pretty consistent
>> between the two.
>>
>> Traffic is being generated by trex with the vlan nesting being pushed by
>> some Juniper switches.  Both vlan tags are 0x8100.
>>
>> OS: CentOS 7.9
>> DPDK: 2

[PATCH v1 0/4] [RFC] Testpmd RPC API

2022-04-07 Thread ohilyard
From: Owen Hilyard 

Currently, DTS uses Testpmd for most of its testing. This has been 
successful in reducing the need to create more test apps, but it has a few 
drawbacks. First, if some part of DPDK is not exposed via Testpmd or one of the 
example applications, for the purposes of DTS it is not testable. This is a 
situation I’d like to avoid. However, adding new functionality to Testpmd is 
labor-intensive. Testpmd currently uses a hand-written LL(1) parser 
(https://en.wikipedia.org/wiki/LL_parser) to parse command line options. This 
makes adding new functionality difficult since the parser is stored as a series 
of several thousand line long lookup tables. To look at it another way, 64% of 
the 52238 lines in Testpmd are related to command line input in some way. The 
command line interface of testpmd also presents several challenges for the 
underlying implementation, since it requires that everything a user might want 
to reference is identified via something that is reasonable to ask a user to 
type. As of right now, this is handled via either strings or integers. This can 
be handled by creating a global registry for objects, but it is still extra 
work that I think can be avoided. In addition, this leads to more places where 
things can go wrong. 

This is what DTS running a single command in testpmd looks like right now:
https://drive.google.com/file/d/1hvTcjfVdh8-I3CUNoq6bx82EuNQSK6qW/view?usp=sharing

This approach has a number of disadvantages. First, it requires assembling 
all commands as strings inside of the test suite and sending them through a 
full round trip of SSH. This means that any non-trivial command, such as 
creating an RTE flow, will involve a lot of string templating. This normally 
wouldn’t be a big issue, except that some of the test suites are designed to 
hundreds of commands over the course of a test, paying the cost of an SSH round 
trip for each. Once Testpmd has the commands, it will then call the appropriate 
functions inside of DPDK, and then print out all of the state to standard out. 
All of this is sent back to DTS, where the author of the test case then needs 
to handle all possible outputs of Trex, often by either declaring the presence 
of a single word or short phrase in the output as meaning success or failure. 
In my opinion, this is something that is perfectly fine for humans to interact 
with, but it causes a lot of issues with automation due to its inherent 
inflexibility and the less-than-ideal methods of information transfer. This is 
why I am proposing the creation of an automation-oriented pmd, with a focus on 
exposing as much.

https://drive.google.com/file/d/1wj4-RnFPVERCzM8b68VJswAOEI9cg-X8/view?usp=sharing
 

That diagram is a high-level overview of the design, which explicitly 
excludes implementation details. However, it already has some benefits. First, 
making DPDK do something is a normal method call, instead of needing to format 
things into a string. This provides a much better interface for people working 
in both DTS and DPDK. Second, the ability to return structured data means that 
there won’t be parsers on both sides of communication anymore. Structured data 
also allows much more verbosity, since it is no longer an interface designed 
for humans. If a test case author needs to return the bytes of every received 
packet back to DTS for comparison with the expected value, they can. If you 
need to return a pointer for DTS to use later, that becomes reasonable. Simply 
moving to shuffling structured data around and using RPC already provides a lot 
of benefits. 
The next obvious question would be what to use for the implementation. 
The initial attempt was made using Python on both sides and the standard 
library xmlrpc module. The RPC aspect of this approach worked very well, with 
the ability to send arbitrary python objects back and forth between DTS and 
app. However, having Python interacting with DPDK has a few issues. First, DPDK 
is generally very multi-threaded and the most common implementation of Python, 
CPython, does not have concurrency. It has something known as the global 
interpretr lock, which is a global mutex. This makes it very difficult to 
interact with blocking, multi-threaded code. The other issue is that I was not 
able to find a binding generator that I feel would be sufficient for DPDK. Many 
generators assumed sizeof(int) == 4 or had other portability issues such as 
assuming GCC or Clang as a C compiler. Others focused on some subset of C, 
meaning they would throw errors on alignment annotations. 
Given this, I decided to look for cross-language RPC libraries. Although 
libraries exist for performing xmlrpc in C, they generally appeared quite 
difficult to use and required a lot of manual work. The next best option was 
gRPC. gRPC allows using a simple language, protobuf, with a language extension 
for rpc. It provides code generation to make it easy to use multiple langua

[PATCH v1 4/4] app/test-pmd-api: Implementation files for the API

2022-04-07 Thread ohilyard
From: Owen Hilyard 

As of right now, this is a fairly direct port. As such, most of the main
file from test-acl is present in api_impl.cc. If this proof of concept
is going to expand into a usable application, the acl test helper can be
moved to another file to help keep the service definition file clean.
The header file must remain a C header file in order to be able to be
included in the main file. At this point, the main file is just a stub
that starts the RPC server, but I have left it so that any extensions
can be written in C and the C++ parts of this app can be easily
encapsulated.

Signed-off-by: Owen Hilyard 
---
 app/test-pmd-api/api_impl.cc | 1160 ++
 app/test-pmd-api/api_impl.h  |   10 +
 app/test-pmd-api/main.c  |   11 +
 3 files changed, 1181 insertions(+)
 create mode 100644 app/test-pmd-api/api_impl.cc
 create mode 100644 app/test-pmd-api/api_impl.h
 create mode 100644 app/test-pmd-api/main.c

diff --git a/app/test-pmd-api/api_impl.cc b/app/test-pmd-api/api_impl.cc
new file mode 100644
index 00..6972172598
--- /dev/null
+++ b/app/test-pmd-api/api_impl.cc
@@ -0,0 +1,1160 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ * Copyright(c) 2022 University of New Hampshire
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#include "api.pb.h"
+#include "api.grpc.pb.h"
+
+/*
+C++ includes
+*/
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define PRINT_USAGE_START "%s [EAL options] --\n"
+
+#define RTE_LOGTYPE_TESTACL RTE_LOGTYPE_USER1
+
+#define APP_NAME "TESTACL"
+
+#define GET_CB_FIELD(in, fd, base, lim, dlm)   
\
+do {   \
+unsigned long val; \
+char *end_fld; \
+errno = 0; \
+val = strtoul((in), &end_fld, (base)); \
+if (errno != 0 || end_fld[0] != (dlm) || val > (lim))  \
+return -EINVAL;\
+(fd) = (typeof(fd))val;\
+(in) = end_fld + 1;\
+} while (0)
+
+#define OPT_RULE_FILE "rulesf"
+#define OPT_TRACE_FILE "tracef"
+#define OPT_RULE_NUM "rulenum"
+#define OPT_TRACE_NUM "tracenum"
+#define OPT_TRACE_STEP "tracestep"
+#define OPT_SEARCH_ALG "alg"
+#define OPT_BLD_CATEGORIES "bldcat"
+#define OPT_RUN_CATEGORIES "runcat"
+#define OPT_MAX_SIZE "maxsize"
+#define OPT_ITER_NUM "iter"
+#define OPT_VERBOSE "verbose"
+#define OPT_IPV6 "ipv6"
+
+#define TRACE_DEFAULT_NUM 0x1
+#define TRACE_STEP_MAX 0x1000
+#define TRACE_STEP_DEF 0x100
+
+#define RULE_NUM 0x1
+
+#define COMMENT_LEAD_CHAR '#'
+
+enum {
+   DUMP_NONE, DUMP_SEARCH, DUMP_PKT, DUMP_MAX
+};
+
+struct acl_alg {
+   const char *name;
+   enum rte_acl_classify_alg alg;
+};
+
+static const struct acl_alg acl_alg[] = {
+   {
+   .name = "scalar",
+   .alg = RTE_ACL_CLASSIFY_SCALAR,
+   },
+   {
+   .name = "sse",
+   .alg = RTE_ACL_CLASSIFY_SSE,
+   },
+   {
+   .name = "avx2",
+   .alg = RTE_ACL_CLASSIFY_AVX2,
+   },
+   {
+   .name = "neon",
+   .alg = RTE_ACL_CLASSIFY_NEON,
+   },
+   {
+   .name = "altivec",
+   .alg = RTE_ACL_CLASSIFY_ALTIVEC,
+   },
+   {
+   .name = "avx512x16",
+   .alg = RTE_ACL_CLASSIFY_AVX512X16,
+   },
+   {
+   .name = "avx512x32",
+   .alg = RTE_ACL_CLASSIFY_AVX512X32,
+   },
+};
+
+static struct {
+   const char *prgname;
+   const char *rule_file;
+   const char *trace_file;
+   size_t max_size;
+   uint32_t bld_categories;
+   uint32_t run_categories;
+   uint32_t nb_rules;
+   uint32_t nb_traces;
+   uint32_t trace_step;
+   uint32_t trace_sz;
+   uint32_t iter_num;
+   uint32_t verbose;
+   uint32_t ipv6;
+   struct acl_alg alg;
+   uint32_t used_traces;
+   void *traces;
+   struct rte_acl_ctx *acx;
+} config = {
+   .prgname = NULL,
+   .rule_file = NULL,
+   .trace_file = NULL,
+   .max_size = 0,
+   .bld_categories = 3,
+   .run_categories = 1,
+   .nb_rules = RULE_NUM,
+   .nb

[PATCH v1 1/4] app/test-pmd-api: Add C++ Compiler

2022-04-07 Thread ohilyard
From: Owen Hilyard 

Adds a C++ compiler to the project, which is currently enabled by
default for ease of testing. Meson currently lacks a way to try to get a
compiler, and failing to find a compiler for a language always causes a
hard error, so this is the only workable approach.

Signed-off-by: Owen Hilyard 
---
 meson.build   | 3 +++
 meson_options.txt | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/meson.build b/meson.build
index 937f6110c0..01d47100f2 100644
--- a/meson.build
+++ b/meson.build
@@ -31,6 +31,9 @@ endif
 
 # set up some global vars for compiler, platform, configuration, etc.
 cc = meson.get_compiler('c')
+if get_option('use_cpp')
+cxx = meson.get_compiler('cpp')
+endif
 dpdk_source_root = meson.current_source_dir()
 dpdk_build_root = meson.current_build_dir()
 dpdk_conf = configuration_data()
diff --git a/meson_options.txt b/meson_options.txt
index 7c220ad68d..9461d194a1 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -48,3 +48,5 @@ option('tests', type: 'boolean', value: true, description:
'build unit tests')
 option('use_hpet', type: 'boolean', value: false, description:
'use HPET timer in EAL')
+option('use_cpp', type: 'boolean', value: true, description: 
+   'enable components requiring a C++ compiler.')
\ No newline at end of file
-- 
2.30.2



[PATCH v1 2/4] app/test-pmd-api: Add POC with gRPC deps

2022-04-07 Thread ohilyard
From: Owen Hilyard 

The new app is disabled if the dependencies are not present, in order to
avoid breaking the build on any system that does not have gRPC
installed. The meson file for the app is heavily derived from
the testpmd.

Signed-off-by: Owen Hilyard 
---
 app/meson.build  | 17 +++
 app/test-pmd-api/meson.build | 96 
 2 files changed, 113 insertions(+)
 create mode 100644 app/test-pmd-api/meson.build

diff --git a/app/meson.build b/app/meson.build
index 93d8c15032..3dfd5c003e 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -20,6 +20,23 @@ apps = [
 'test-sad',
 ]
 
+if get_option('use_cpp')
+protoc = find_program('protoc', required : false)
+protobuf_dep = dependency('protobuf', required : false)
+grpc_cpp_plugin = find_program('grpc_cpp_plugin', required: false)
+grpc_python_plugin = find_program('grpc_python_plugin', required: false)
+grpc_dep = dependency('grpc', required: false)
+grpcpp_dep = dependency('grpc++', required: false)
+
+if protoc.found() and dep.found() and grpc_cpp_plugin.found() and 
grpc_python_plugin.found() and grpc_dep.found() and grpcpp_dep.found()
+apps += [
+'test-pmd-api'
+]
+endif
+
+endif
+
+
 default_cflags = machine_args + ['-DALLOW_EXPERIMENTAL_API']
 default_ldflags = []
 if get_option('default_library') == 'static' and not is_windows
diff --git a/app/test-pmd-api/meson.build b/app/test-pmd-api/meson.build
new file mode 100644
index 00..7438098e9d
--- /dev/null
+++ b/app/test-pmd-api/meson.build
@@ -0,0 +1,96 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation
+
+# override default name to drop the hyphen
+name = 'testpmd-api'
+cflags += [
+'-Wno-deprecated-declarations'
+]
+sources += files(
+'main.c',
+'api_impl.cc'
+)
+
+ldflags += [
+'-ldl',
+'-lgrpc++_reflection',
+]
+
+ext_deps += [protobuf_dep, grpc_dep, grpcpp_dep, dependency('threads')]
+
+if dpdk_conf.has('RTE_HAS_JANSSON')
+ext_deps += jansson_dep
+endif
+
+deps += ['ethdev', 'cmdline', 'bus_pci']
+if dpdk_conf.has('RTE_CRYPTO_SCHEDULER')
+deps += 'crypto_scheduler'
+endif
+if dpdk_conf.has('RTE_LIB_BITRATESTATS')
+deps += 'bitratestats'
+endif
+if dpdk_conf.has('RTE_LIB_BPF')
+deps += 'bpf'
+endif
+if dpdk_conf.has('RTE_LIB_GRO')
+deps += 'gro'
+endif
+if dpdk_conf.has('RTE_LIB_GSO')
+deps += 'gso'
+endif
+if dpdk_conf.has('RTE_LIB_LATENCYSTATS')
+deps += 'latencystats'
+endif
+if dpdk_conf.has('RTE_LIB_METRICS')
+deps += 'metrics'
+endif
+if dpdk_conf.has('RTE_LIB_PDUMP')
+deps += 'pdump'
+endif
+if dpdk_conf.has('RTE_NET_BOND')
+deps += 'net_bond'
+endif
+if dpdk_conf.has('RTE_NET_BNXT')
+deps += 'net_bnxt'
+endif
+if dpdk_conf.has('RTE_NET_I40E')
+deps += 'net_i40e'
+endif
+if dpdk_conf.has('RTE_NET_IXGBE')
+deps += 'net_ixgbe'
+endif
+if dpdk_conf.has('RTE_NET_DPAA')
+deps += ['bus_dpaa', 'mempool_dpaa', 'net_dpaa']
+endif
+
+if meson.version().version_compare('>=0.55')
+grpc_cpp_plugin_path = grpc_cpp_plugin.full_path()
+grpc_python_plugin_path = grpc_python_plugin.full_path()
+else
+grpc_cpp_plugin_path = grpc_cpp_plugin.path()
+grpc_python_plugin_path = grpc_python_plugin.path()
+endif
+
+
+cpp_generator = generator(protoc, 
+output: ['@basen...@.pb.cc', '@BASENAME@.pb.h', 
'@basen...@.grpc.pb.cc', '@basen...@.grpc.pb.h'],
+arguments : [
+'--proto_path=@CURRENT_SOURCE_DIR@',
+
'--plugin=protoc-gen-grpc=@0@'.format(grpc_cpp_plugin_path), 
+'--cpp_out=@BUILD_DIR@',
+'--grpc_out=@BUILD_DIR@',
+'@INPUT@'
+])
+
+python_generator = generator(protoc, 
+output: ['@BASENAME@_pb2.py', '@BASENAME@_pb2_grpc.py'],
+arguments : [
+'--proto_path=@CURRENT_SOURCE_DIR@',
+
'--plugin=protoc-gen-grpc=@0@'.format(grpc_python_plugin_path), 
+'--python_out=@BUILD_DIR@',
+'--grpc_out=@BUILD_DIR@',
+'@INPUT@'
+])
+
+sources += cpp_generator.process('api.proto')
+sources += python_generator.process('api.proto')
\ No newline at end of file
-- 
2.30.2



[PATCH v1 3/4] app/test-pmd-api: Add protobuf file

2022-04-07 Thread ohilyard
From: Owen Hilyard 

This file contains the gRPC definitions for the api as it currently
stands.

Signed-off-by: Owen Hilyard 
---
 app/test-pmd-api/api.proto | 12 
 1 file changed, 12 insertions(+)
 create mode 100644 app/test-pmd-api/api.proto

diff --git a/app/test-pmd-api/api.proto b/app/test-pmd-api/api.proto
new file mode 100644
index 00..ba52e379e9
--- /dev/null
+++ b/app/test-pmd-api/api.proto
@@ -0,0 +1,12 @@
+syntax = "proto3";
+import "google/protobuf/empty.proto";
+
+message AclSetupArgs {
+repeated string args = 1;
+}
+
+service TestpmdAPI {
+rpc acl_setup (AclSetupArgs) returns (google.protobuf.Empty);
+rpc acl_search (google.protobuf.Empty) returns (google.protobuf.Empty);
+rpc acl_cleanup_config (google.protobuf.Empty) returns 
(google.protobuf.Empty);
+}
\ No newline at end of file
-- 
2.30.2



Re: [PATCH v2 0/2] rte_dump_stack: improvements

2022-04-07 Thread Stephen Hemminger
On Thu, 7 Apr 2022 14:45:07 +0200
David Marchand  wrote:

> On Sat, Feb 12, 2022 at 7:44 PM Stephen Hemminger
>  wrote:
> >
> > This is update to earlier RFC. Add some more comments and changes
> > to have common code for Linux and FreeBSD
> >
> > Stephen Hemminger (2):
> >   eal_debug: do not use malloc in rte_dump_stack
> >   eal: common rte_dump_stack for both Linux and FreeBSD
> >
> >  lib/eal/freebsd/eal_debug.c | 43 
> >  lib/eal/freebsd/meson.build |  1 -
> >  lib/eal/linux/eal_debug.c   | 43 
> >  lib/eal/linux/meson.build   |  1 -
> >  lib/eal/unix/eal_debug.c| 65 +
> >  lib/eal/unix/meson.build|  5 +--
> >  6 files changed, 68 insertions(+), 90 deletions(-)
> >  delete mode 100644 lib/eal/freebsd/eal_debug.c
> >  delete mode 100644 lib/eal/linux/eal_debug.c
> >  create mode 100644 lib/eal/unix/eal_debug.c  
> 
> Strange to change only the Linux implementation as a first patch, then
> merge implementations in a second time effectively changing FreeBSD
> implementation in what is presented in commitlog as a factorisation
> cleanup.
> Please invert the patches.
> 
> Besides, the series does not compile on current main.
> It's probably a result of the header inclusion cleanup we had in
> v22.03, but I prefer you check.
> 
> 
> Thanks.


As I looked at it more, there was more there.
Turns out that printf and therefore rte_log() is not signal safe.
There is a version of backtrace_symbols_fd that just uses writev() on  glibc 
for Linux
so that is a better alternative, but format changes.

But the BSD version of backtrace_symbols_fd uses printf and is therefore not 
signal safe.
Not sure if that matters.


[PATCH] examples/kni: add interrupt mode to receive packets

2022-04-07 Thread Tianli Lai
kni application have two main-loop threads that they
CPU utilization are up to 100 percent, this two theads are
writing thread and reading thread. I thank set interrupt mode
at reading thread would reduce this thread CPU utilization.

Signed-off-by: Tianli Lai 
---
 examples/kni/main.c | 135 +---
 1 file changed, 128 insertions(+), 7 deletions(-)

diff --git a/examples/kni/main.c b/examples/kni/main.c
index e99ef5c38a..eb9a7ce863 100644
--- a/examples/kni/main.c
+++ b/examples/kni/main.c
@@ -75,6 +75,7 @@
 #define KNI_SECOND_PER_DAY  86400
 
 #define KNI_MAX_KTHREAD 32
+#define MIN_ZERO_POLL_COUNT100
 /*
  * Structure of port parameters
  */
@@ -98,6 +99,8 @@ static struct rte_eth_conf port_conf = {
},
 };
 
+/* ethernet addresses of ports */
+static rte_spinlock_t locks[RTE_MAX_ETHPORTS];
 /* Mempool for mbufs */
 static struct rte_mempool * pktmbuf_pool = NULL;
 
@@ -107,6 +110,8 @@ static uint32_t ports_mask = 0;
 static int promiscuous_on = 0;
 /* Monitor link status continually. off by default. */
 static int monitor_links;
+/* rx set in interrupt mode off by default. */
+static int intr_rx_en;
 
 /* Structure type for recording kni interface specific stats */
 struct kni_interface_stats {
@@ -206,7 +211,7 @@ kni_burst_free_mbufs(struct rte_mbuf **pkts, unsigned num)
 /**
  * Interface to burst rx and enqueue mbufs into rx_q
  */
-static void
+static int
 kni_ingress(struct kni_port_params *p)
 {
uint8_t i;
@@ -214,9 +219,9 @@ kni_ingress(struct kni_port_params *p)
unsigned nb_rx, num;
uint32_t nb_kni;
struct rte_mbuf *pkts_burst[PKT_BURST_SZ];
-
+   int ret = 0;
if (p == NULL)
-   return;
+   return -1;
 
nb_kni = p->nb_kni;
port_id = p->port_id;
@@ -225,8 +230,10 @@ kni_ingress(struct kni_port_params *p)
nb_rx = rte_eth_rx_burst(port_id, 0, pkts_burst, PKT_BURST_SZ);
if (unlikely(nb_rx > PKT_BURST_SZ)) {
RTE_LOG(ERR, APP, "Error receiving from eth\n");
-   return;
+   return -1;
}
+   if (nb_rx == 0)
+   ret = 1;
/* Burst tx to kni */
num = rte_kni_tx_burst(p->kni[i], pkts_burst, nb_rx);
if (num)
@@ -239,6 +246,7 @@ kni_ingress(struct kni_port_params *p)
kni_stats[port_id].rx_dropped += nb_rx - num;
}
}
+   return ret;
 }
 
 /**
@@ -277,12 +285,95 @@ kni_egress(struct kni_port_params *p)
}
 }
 
+/**
+ * force polling thread sleep until one-shot rx interrupt triggers
+ * @param port_id
+ *  Port id.
+ * @param queue_id
+ *  Rx queue id.
+ * @return
+ *  0 on success
+ */
+static int
+sleep_until_rx_interrupt(int num, int lcore)
+{
+   /*
+* we want to track when we are woken up by traffic so that we can go
+* back to sleep again without log spamming. Avoid cache line sharing
+* to prevent threads stepping on each others' toes.
+*/
+   static struct {
+   bool wakeup;
+   } __rte_cache_aligned status[RTE_MAX_LCORE];
+   struct rte_epoll_event event[num];
+   int n, i;
+   uint16_t port_id;
+   uint8_t queue_id;
+   void *data;
+
+   if (status[lcore].wakeup) {
+   RTE_LOG(INFO, APP,
+   "lcore %u sleeps until interrupt triggers\n",
+   rte_lcore_id());
+   }
+
+   n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, 10);
+   for (i = 0; i < n; i++) {
+   data = event[i].epdata.data;
+   port_id = ((uintptr_t)data) >> CHAR_BIT;
+   queue_id = ((uintptr_t)data) &
+   RTE_LEN2MASK(CHAR_BIT, uint8_t);
+   RTE_LOG(INFO, APP,
+   "lcore %u is waked up from rx interrupt on"
+   " port %d queue %d\n",
+   rte_lcore_id(), port_id, queue_id);
+   }
+   status[lcore].wakeup = n != 0;
+
+   return 0;
+}
+
+static void
+turn_on_off_intr(uint16_t port_id, uint16_t queue_id, bool on)
+{
+   rte_spinlock_lock(&(locks[port_id]));
+   if (on)
+   rte_eth_dev_rx_intr_enable(port_id, queue_id);
+   else
+   rte_eth_dev_rx_intr_disable(port_id, queue_id);
+   rte_spinlock_unlock(&(locks[port_id]));
+}
+
+static int event_register(void)
+{
+   uint8_t queueid;
+   uint16_t portid;
+   uint32_t data;
+   int ret;
+
+   portid = 0;
+   queueid = 0;
+   data = portid << CHAR_BIT | queueid;
+
+   ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid,
+   RTE_EPOLL_PER_THREAD,
+   RTE_INTR_EVENT_ADD,
+   (void *)((uintptr_t)data));
+   if (ret)
+   return

[PATCH v3] examples/kni: add interrupt mode to receive packets

2022-04-07 Thread Tianli Lai
kni application have two main-loop threads that they
CPU utilization are up to 100 percent, this two theads are
writing thread and reading thread. I thank set interrupt mode
at reading thread would reduce this thread CPU utilization.

Signed-off-by: Tianli Lai 
---
 examples/kni/main.c | 134 +---
 1 file changed, 127 insertions(+), 7 deletions(-)

diff --git a/examples/kni/main.c b/examples/kni/main.c
index e99ef5c38a..72f2d0c6f4 100644
--- a/examples/kni/main.c
+++ b/examples/kni/main.c
@@ -75,6 +75,7 @@
 #define KNI_SECOND_PER_DAY  86400
 
 #define KNI_MAX_KTHREAD 32
+#define MIN_ZERO_POLL_COUNT100
 /*
  * Structure of port parameters
  */
@@ -98,6 +99,8 @@ static struct rte_eth_conf port_conf = {
},
 };
 
+/* ethernet addresses of ports */
+static rte_spinlock_t locks[RTE_MAX_ETHPORTS];
 /* Mempool for mbufs */
 static struct rte_mempool * pktmbuf_pool = NULL;
 
@@ -107,6 +110,8 @@ static uint32_t ports_mask = 0;
 static int promiscuous_on = 0;
 /* Monitor link status continually. off by default. */
 static int monitor_links;
+/* rx set in interrupt mode off by default. */
+static int intr_rx_en;
 
 /* Structure type for recording kni interface specific stats */
 struct kni_interface_stats {
@@ -206,7 +211,7 @@ kni_burst_free_mbufs(struct rte_mbuf **pkts, unsigned num)
 /**
  * Interface to burst rx and enqueue mbufs into rx_q
  */
-static void
+static int
 kni_ingress(struct kni_port_params *p)
 {
uint8_t i;
@@ -214,9 +219,9 @@ kni_ingress(struct kni_port_params *p)
unsigned nb_rx, num;
uint32_t nb_kni;
struct rte_mbuf *pkts_burst[PKT_BURST_SZ];
-
+   int ret = 0;
if (p == NULL)
-   return;
+   return -1;
 
nb_kni = p->nb_kni;
port_id = p->port_id;
@@ -225,8 +230,10 @@ kni_ingress(struct kni_port_params *p)
nb_rx = rte_eth_rx_burst(port_id, 0, pkts_burst, PKT_BURST_SZ);
if (unlikely(nb_rx > PKT_BURST_SZ)) {
RTE_LOG(ERR, APP, "Error receiving from eth\n");
-   return;
+   return -1;
}
+   if (nb_rx == 0)
+   ret = 1;
/* Burst tx to kni */
num = rte_kni_tx_burst(p->kni[i], pkts_burst, nb_rx);
if (num)
@@ -239,6 +246,7 @@ kni_ingress(struct kni_port_params *p)
kni_stats[port_id].rx_dropped += nb_rx - num;
}
}
+   return ret;
 }
 
 /**
@@ -277,12 +285,95 @@ kni_egress(struct kni_port_params *p)
}
 }
 
+/**
+ * force polling thread sleep until one-shot rx interrupt triggers
+ * @param port_id
+ *  Port id.
+ * @param queue_id
+ *  Rx queue id.
+ * @return
+ *  0 on success
+ */
+static int
+sleep_until_rx_interrupt(int num, int lcore)
+{
+   /*
+* we want to track when we are woken up by traffic so that we can go
+* back to sleep again without log spamming. Avoid cache line sharing
+* to prevent threads stepping on each others' toes.
+*/
+   static struct {
+   bool wakeup;
+   } __rte_cache_aligned status[RTE_MAX_LCORE];
+   struct rte_epoll_event event[num];
+   int n, i;
+   uint16_t port_id;
+   uint8_t queue_id;
+   void *data;
+
+   if (status[lcore].wakeup) {
+   RTE_LOG(INFO, APP,
+   "lcore %u sleeps until interrupt triggers\n",
+   rte_lcore_id());
+   }
+
+   n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, 10);
+   for (i = 0; i < n; i++) {
+   data = event[i].epdata.data;
+   port_id = ((uintptr_t)data) >> CHAR_BIT;
+   queue_id = ((uintptr_t)data) &
+   RTE_LEN2MASK(CHAR_BIT, uint8_t);
+   RTE_LOG(INFO, APP,
+   "lcore %u is waked up from rx interrupt on"
+   " port %d queue %d\n",
+   rte_lcore_id(), port_id, queue_id);
+   }
+   status[lcore].wakeup = n != 0;
+
+   return 0;
+}
+
+static void
+turn_on_off_intr(uint16_t port_id, uint16_t queue_id, bool on)
+{
+   rte_spinlock_lock(&(locks[port_id]));
+   if (on)
+   rte_eth_dev_rx_intr_enable(port_id, queue_id);
+   else
+   rte_eth_dev_rx_intr_disable(port_id, queue_id);
+   rte_spinlock_unlock(&(locks[port_id]));
+}
+
+static int event_register(void)
+{
+   uint8_t queueid;
+   uint16_t portid;
+   uint32_t data;
+   int ret;
+
+   portid = 0;
+   queueid = 0;
+   data = portid << CHAR_BIT | queueid;
+
+   ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid,
+   RTE_EPOLL_PER_THREAD,
+   RTE_INTR_EVENT_ADD,
+   (void *)((uintptr_t)data));
+   if (ret)
+   return

[PATCH v2 0/3] Enable queue rate limit and quanta size configuration

2022-04-07 Thread Wenjun Wu
This patch set adds queue rate limit and quanta size configuration.
Quanta size can be changed by driver devarg quanta_size=xxx. Quanta
size should be set to the value between 256 and 4096 and be the product
of 64.

v2: rework virtchnl

Wenjun Wu (3):
  common/iavf: support queue rate limit and quanta size configuration
  net/iavf: support queue rate limit configuration
  net/iavf: support quanta size configuration

 drivers/common/iavf/virtchnl.h |  50 +
 drivers/net/iavf/iavf.h|  16 +++
 drivers/net/iavf/iavf_ethdev.c |  40 +++
 drivers/net/iavf/iavf_tm.c | 190 +++--
 drivers/net/iavf/iavf_vchnl.c  |  51 +
 5 files changed, 339 insertions(+), 8 deletions(-)

-- 
2.25.1



[PATCH v2 1/3] common/iavf: support queue rate limit and quanta size configuration

2022-04-07 Thread Wenjun Wu
This patch adds new virtchnl opcodes and structures for rate limit
and quanta size configuration, which include:
1. VIRTCHNL_OP_CONFIG_QUEUE_BW, to configure max bandwidth for each
VF per queue.
2. VIRTCHNL_OP_CONFIG_QUANTA, to configure quanta size per queue.

Signed-off-by: Ting Xu 
Signed-off-by: Wenjun Wu 
---
 drivers/common/iavf/virtchnl.h | 50 ++
 1 file changed, 50 insertions(+)

diff --git a/drivers/common/iavf/virtchnl.h b/drivers/common/iavf/virtchnl.h
index 3e44eca7d8..249ae6ed23 100644
--- a/drivers/common/iavf/virtchnl.h
+++ b/drivers/common/iavf/virtchnl.h
@@ -164,6 +164,8 @@ enum virtchnl_ops {
VIRTCHNL_OP_ENABLE_QUEUES_V2 = 107,
VIRTCHNL_OP_DISABLE_QUEUES_V2 = 108,
VIRTCHNL_OP_MAP_QUEUE_VECTOR = 111,
+   VIRTCHNL_OP_CONFIG_QUEUE_BW = 112,
+   VIRTCHNL_OP_CONFIG_QUANTA = 113,
VIRTCHNL_OP_MAX,
 };
 
@@ -1872,6 +1874,23 @@ struct virtchnl_queue_tc_mapping {
 
 VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_tc_mapping);
 
+/* VIRTCHNL_OP_CONFIG_QUEUE_BW */
+struct virtchnl_queue_bw {
+   u16 queue_id;
+   u8 tc;
+   u8 pad;
+   struct virtchnl_shaper_bw shaper;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_bw);
+
+struct virtchnl_queues_bw_cfg {
+   u16 vsi_id;
+   u16 num_queues;
+   struct virtchnl_queue_bw cfg[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_queues_bw_cfg);
 
 /* TX and RX queue types are valid in legacy as well as split queue models.
  * With Split Queue model, 2 additional types are introduced - TX_COMPLETION
@@ -1978,6 +1997,12 @@ struct virtchnl_queue_vector_maps {
 
 VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_queue_vector_maps);
 
+struct virtchnl_quanta_cfg {
+   u16 quanta_size;
+   struct virtchnl_queue_chunk queue_select;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_quanta_cfg);
 
 /* Since VF messages are limited by u16 size, precalculate the maximum possible
  * values of nested elements in virtchnl structures that virtual channel can
@@ -2244,6 +2269,31 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info 
*ver, u32 v_opcode,
 sizeof(q_tc->tc[0]);
}
break;
+   case VIRTCHNL_OP_CONFIG_QUEUE_BW:
+   valid_len = sizeof(struct virtchnl_queues_bw_cfg);
+   if (msglen >= valid_len) {
+   struct virtchnl_queues_bw_cfg *q_bw =
+   (struct virtchnl_queues_bw_cfg *)msg;
+   if (q_bw->num_queues == 0) {
+   err_msg_format = true;
+   break;
+   }
+   valid_len += (q_bw->num_queues - 1) *
+sizeof(q_bw->cfg[0]);
+   }
+   break;
+   case VIRTCHNL_OP_CONFIG_QUANTA:
+   valid_len = sizeof(struct virtchnl_quanta_cfg);
+   if (msglen >= valid_len) {
+   struct virtchnl_quanta_cfg *q_quanta =
+   (struct virtchnl_quanta_cfg *)msg;
+   if (q_quanta->quanta_size == 0 ||
+   q_quanta->queue_select.num_queues == 0) {
+   err_msg_format = true;
+   break;
+   }
+   }
+   break;
case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS:
break;
case VIRTCHNL_OP_ADD_VLAN_V2:
-- 
2.25.1



[PATCH v2 2/3] net/iavf: support queue rate limit configuration

2022-04-07 Thread Wenjun Wu
This patch adds queue rate limit configuration support.
Only max bandwidth is supported.

Signed-off-by: Ting Xu 
Signed-off-by: Wenjun Wu 
---
 drivers/net/iavf/iavf.h   |  13 +++
 drivers/net/iavf/iavf_tm.c| 190 --
 drivers/net/iavf/iavf_vchnl.c |  23 
 3 files changed, 218 insertions(+), 8 deletions(-)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index a01d18e61b..96515a3ee9 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -170,11 +170,21 @@ struct iavf_tm_node {
uint32_t weight;
uint32_t reference_count;
struct iavf_tm_node *parent;
+   struct iavf_tm_shaper_profile *shaper_profile;
struct rte_tm_node_params params;
 };
 
 TAILQ_HEAD(iavf_tm_node_list, iavf_tm_node);
 
+struct iavf_tm_shaper_profile {
+   TAILQ_ENTRY(iavf_tm_shaper_profile) node;
+   uint32_t shaper_profile_id;
+   uint32_t reference_count;
+   struct rte_tm_shaper_params profile;
+};
+
+TAILQ_HEAD(iavf_shaper_profile_list, iavf_tm_shaper_profile);
+
 /* node type of Traffic Manager */
 enum iavf_tm_node_type {
IAVF_TM_NODE_TYPE_PORT,
@@ -188,6 +198,7 @@ struct iavf_tm_conf {
struct iavf_tm_node *root; /* root node - vf vsi */
struct iavf_tm_node_list tc_list; /* node list for all the TCs */
struct iavf_tm_node_list queue_list; /* node list for all the queues */
+   struct iavf_shaper_profile_list shaper_profile_list;
uint32_t nb_tc_node;
uint32_t nb_queue_node;
bool committed;
@@ -451,6 +462,8 @@ int iavf_add_del_mc_addr_list(struct iavf_adapter *adapter,
 int iavf_request_queues(struct rte_eth_dev *dev, uint16_t num);
 int iavf_get_max_rss_queue_region(struct iavf_adapter *adapter);
 int iavf_get_qos_cap(struct iavf_adapter *adapter);
+int iavf_set_q_bw(struct rte_eth_dev *dev,
+ struct virtchnl_queues_bw_cfg *q_bw, uint16_t size);
 int iavf_set_q_tc_map(struct rte_eth_dev *dev,
struct virtchnl_queue_tc_mapping *q_tc_mapping,
uint16_t size);
diff --git a/drivers/net/iavf/iavf_tm.c b/drivers/net/iavf/iavf_tm.c
index 8d92062c7f..32bb3be45e 100644
--- a/drivers/net/iavf/iavf_tm.c
+++ b/drivers/net/iavf/iavf_tm.c
@@ -8,6 +8,13 @@
 static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
 __rte_unused int clear_on_fail,
 __rte_unused struct rte_tm_error *error);
+static int iavf_shaper_profile_add(struct rte_eth_dev *dev,
+  uint32_t shaper_profile_id,
+  struct rte_tm_shaper_params *profile,
+  struct rte_tm_error *error);
+static int iavf_shaper_profile_del(struct rte_eth_dev *dev,
+  uint32_t shaper_profile_id,
+  struct rte_tm_error *error);
 static int iavf_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
  uint32_t parent_node_id, uint32_t priority,
  uint32_t weight, uint32_t level_id,
@@ -30,6 +37,8 @@ static int iavf_node_type_get(struct rte_eth_dev *dev, 
uint32_t node_id,
   int *is_leaf, struct rte_tm_error *error);
 
 const struct rte_tm_ops iavf_tm_ops = {
+   .shaper_profile_add = iavf_shaper_profile_add,
+   .shaper_profile_delete = iavf_shaper_profile_del,
.node_add = iavf_tm_node_add,
.node_delete = iavf_tm_node_delete,
.capabilities_get = iavf_tm_capabilities_get,
@@ -44,6 +53,9 @@ iavf_tm_conf_init(struct rte_eth_dev *dev)
 {
struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 
+   /* initialize shaper profile list */
+   TAILQ_INIT(&vf->tm_conf.shaper_profile_list);
+
/* initialize node configuration */
vf->tm_conf.root = NULL;
TAILQ_INIT(&vf->tm_conf.tc_list);
@@ -57,6 +69,7 @@ void
 iavf_tm_conf_uninit(struct rte_eth_dev *dev)
 {
struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+   struct iavf_tm_shaper_profile *shaper_profile;
struct iavf_tm_node *tm_node;
 
/* clear node configuration */
@@ -74,6 +87,14 @@ iavf_tm_conf_uninit(struct rte_eth_dev *dev)
rte_free(vf->tm_conf.root);
vf->tm_conf.root = NULL;
}
+
+   /* Remove all shaper profiles */
+   while ((shaper_profile =
+  TAILQ_FIRST(&vf->tm_conf.shaper_profile_list))) {
+   TAILQ_REMOVE(&vf->tm_conf.shaper_profile_list,
+shaper_profile, node);
+   rte_free(shaper_profile);
+   }
 }
 
 static inline struct iavf_tm_node *
@@ -132,13 +153,6 @@ iavf_node_param_check(struct iavf_info *vf, uint32_t 
node_id,
return -EINVAL;
}
 
-   /* not support shaper profile */
-   if (params->shaper_profile_id) {
-   error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SH

[PATCH v2 3/3] net/iavf: support quanta size configuration

2022-04-07 Thread Wenjun Wu
This patch adds quanta size configuration support.
Quanta size should between 256 and 4096, and be a product of 64.

Signed-off-by: Wenjun Wu 
---
 drivers/net/iavf/iavf.h|  3 +++
 drivers/net/iavf/iavf_ethdev.c | 40 ++
 drivers/net/iavf/iavf_vchnl.c  | 28 
 3 files changed, 71 insertions(+)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 96515a3ee9..c0a4a47b04 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -292,6 +292,7 @@ enum iavf_proto_xtr_type {
 struct iavf_devargs {
uint8_t proto_xtr_dflt;
uint8_t proto_xtr[IAVF_MAX_QUEUE_NUM];
+   uint16_t quanta_size;
 };
 
 struct iavf_security_ctx;
@@ -467,6 +468,8 @@ int iavf_set_q_bw(struct rte_eth_dev *dev,
 int iavf_set_q_tc_map(struct rte_eth_dev *dev,
struct virtchnl_queue_tc_mapping *q_tc_mapping,
uint16_t size);
+int iavf_set_vf_quanta_size(struct iavf_adapter *adapter, u16 start_queue_id,
+   u16 num_queues);
 void iavf_tm_conf_init(struct rte_eth_dev *dev);
 void iavf_tm_conf_uninit(struct rte_eth_dev *dev);
 int iavf_ipsec_crypto_request(struct iavf_adapter *adapter,
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index d6190ac24a..255459f162 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -34,9 +34,11 @@
 
 /* devargs */
 #define IAVF_PROTO_XTR_ARG "proto_xtr"
+#define IAVF_QUANTA_SIZE_ARG   "quanta_size"
 
 static const char * const iavf_valid_args[] = {
IAVF_PROTO_XTR_ARG,
+   IAVF_QUANTA_SIZE_ARG,
NULL
 };
 
@@ -950,6 +952,11 @@ iavf_dev_start(struct rte_eth_dev *dev)
return -1;
}
 
+   if (iavf_set_vf_quanta_size(adapter, index, num_queue_pairs) != 0) {
+   PMD_DRV_LOG(ERR, "configure quanta size failed");
+   goto err_queue;
+   }
+
/* If needed, send configure queues msg multiple times to make the
 * adminq buffer length smaller than the 4K limitation.
 */
@@ -2092,6 +2099,25 @@ iavf_handle_proto_xtr_arg(__rte_unused const char *key, 
const char *value,
return 0;
 }
 
+static int
+parse_u16(__rte_unused const char *key, const char *value, void *args)
+{
+   u16 *num = (u16 *)args;
+   u16 tmp;
+
+   errno = 0;
+   tmp = strtoull(value, NULL, 10);
+   if (errno || !tmp) {
+   PMD_DRV_LOG(WARNING, "%s: \"%s\" is not a valid u16",
+   key, value);
+   return -1;
+   }
+
+   *num = tmp;
+
+   return 0;
+}
+
 static int iavf_parse_devargs(struct rte_eth_dev *dev)
 {
struct iavf_adapter *ad =
@@ -2118,6 +2144,20 @@ static int iavf_parse_devargs(struct rte_eth_dev *dev)
if (ret)
goto bail;
 
+   ret = rte_kvargs_process(kvlist, IAVF_QUANTA_SIZE_ARG,
+&parse_u16, &ad->devargs.quanta_size);
+   if (ret)
+   goto bail;
+
+   if (ad->devargs.quanta_size == 0)
+   ad->devargs.quanta_size = 1024;
+
+   if (ad->devargs.quanta_size < 256 || ad->devargs.quanta_size > 4096 ||
+   ad->devargs.quanta_size & 0x40) {
+   PMD_INIT_LOG(ERR, "invalid quanta size\n");
+   return -EINVAL;
+   }
+
 bail:
rte_kvargs_free(kvlist);
return ret;
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index 537369f736..ee26e45acf 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -1828,3 +1828,31 @@ iavf_ipsec_crypto_request(struct iavf_adapter *adapter,
 
return 0;
 }
+
+int
+iavf_set_vf_quanta_size(struct iavf_adapter *adapter, u16 start_queue_id, u16 
num_queues)
+{
+   struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+   struct iavf_cmd_info args;
+   struct virtchnl_quanta_cfg q_quanta;
+   int err;
+
+   q_quanta.quanta_size = adapter->devargs.quanta_size;
+   q_quanta.queue_select.type = VIRTCHNL_QUEUE_TYPE_TX;
+   q_quanta.queue_select.start_queue_id = start_queue_id;
+   q_quanta.queue_select.num_queues = num_queues;
+
+   args.ops = VIRTCHNL_OP_CONFIG_QUANTA;
+   args.in_args = (uint8_t *)&q_quanta;
+   args.in_args_size = sizeof(q_quanta);
+   args.out_buffer = vf->aq_resp;
+   args.out_size = IAVF_AQ_BUF_SZ;
+
+   err = iavf_execute_vf_cmd(adapter, &args, 0);
+   if (err) {
+   PMD_DRV_LOG(ERR, "Failed to execute command of 
VIRTCHNL_OP_CONFIG_QUANTA");
+   return err;
+   }
+
+   return 0;
+}
-- 
2.25.1



[PATCH v1 0/3] net/iavf: support Rx timestamp on flex descriptor

2022-04-07 Thread Simei Su
[PATCH v1 1/3] add related ops and structure for Rx timestamp in virtual 
channel.
[PATCH v1 2/3] add support for Rx timestamp on flex descriptor in driver.
[PATCH v1 3/3] improve performance with Rx timestamp enabled.

Simei Su (2):
  common/iavf: support Rx timestamp in virtual channel
  net/iavf: enable Rx timestamp on Flex Descriptor

Wenjun Wu (1):
  net/iavf: improve performance of Rx timestamp offload

 drivers/common/iavf/virtchnl.h  | 62 ++--
 drivers/net/iavf/iavf.h |  6 +++
 drivers/net/iavf/iavf_ethdev.c  | 34 ++
 drivers/net/iavf/iavf_rxtx.c| 72 
 drivers/net/iavf/iavf_rxtx.h| 21 +
 drivers/net/iavf/iavf_rxtx_vec_common.h |  3 ++
 drivers/net/iavf/iavf_vchnl.c   | 83 -
 7 files changed, 267 insertions(+), 14 deletions(-)

-- 
2.9.5



[PATCH v1 1/3] common/iavf: support Rx timestamp in virtual channel

2022-04-07 Thread Simei Su
Add new ops and structures to support VF to support Rx timestamp
on flex descriptor.

"VIRTCHNL_OP_1588_PTP_GET_CAPS" ops is sent by the VF to request PTP
capablilities and responded by the PF with capabilities enabled for
that VF.

"VIRTCHNL_OP_1588_PTP_GET_TIME" ops is sent by the VF to request
the current time of the PHC. The PF will respond by reading the
device time and reporting it back to the VF.

Signed-off-by: Simei Su 
---
 drivers/common/iavf/virtchnl.h | 62 --
 1 file changed, 59 insertions(+), 3 deletions(-)

diff --git a/drivers/common/iavf/virtchnl.h b/drivers/common/iavf/virtchnl.h
index 3e44eca..d3a99e9 100644
--- a/drivers/common/iavf/virtchnl.h
+++ b/drivers/common/iavf/virtchnl.h
@@ -159,6 +159,8 @@ enum virtchnl_ops {
VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57,
VIRTCHNL_OP_ENABLE_VLAN_FILTERING_V2 = 58,
VIRTCHNL_OP_DISABLE_VLAN_FILTERING_V2 = 59,
+   VIRTCHNL_OP_1588_PTP_GET_CAPS = 60,
+   VIRTCHNL_OP_1588_PTP_GET_TIME = 61,
VIRTCHNL_OP_GET_QOS_CAPS = 66,
VIRTCHNL_OP_CONFIG_QUEUE_TC_MAP = 67,
VIRTCHNL_OP_ENABLE_QUEUES_V2 = 107,
@@ -274,6 +276,10 @@ static inline const char *virtchnl_op_str(enum 
virtchnl_ops v_opcode)
return "VIRTCHNL_OP_ENABLE_VLAN_FILTERING_V2";
case VIRTCHNL_OP_DISABLE_VLAN_FILTERING_V2:
return "VIRTCHNL_OP_DISABLE_VLAN_FILTERING_V2";
+   case VIRTCHNL_OP_1588_PTP_GET_CAPS:
+   return "VIRTCHNL_OP_1588_PTP_GET_CAPS";
+   case VIRTCHNL_OP_1588_PTP_GET_TIME:
+   return "VIRTCHNL_OP_1588_PTP_GET_TIME";
case VIRTCHNL_OP_MAX:
return "VIRTCHNL_OP_MAX";
default:
@@ -409,8 +415,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_FDIR_PFBIT(28)
 #define VIRTCHNL_VF_OFFLOAD_QOSBIT(29)
 #define VIRTCHNL_VF_CAP_DCFBIT(30)
-   /* BIT(31) is reserved */
-
+#define VIRTCHNL_VF_CAP_PTPBIT(31)
 #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \
   VIRTCHNL_VF_OFFLOAD_VLAN | \
   VIRTCHNL_VF_OFFLOAD_RSS_PF)
@@ -496,6 +501,18 @@ enum virtchnl_rx_desc_id_bitmasks {
/* 22 through 63 are reserved */
 };
 
+/* virtchnl_rxq_info_flags
+ *
+ * Definition of bits in the flags field of the virtchnl_rxq_info structure.
+ */
+enum virtchnl_rxq_info_flags {
+   /* If the VIRTCHNL_PTP_RX_TSTAMP bit of the flag field is set, this is
+* a request to enable Rx timestamp. Other flag bits are currently
+* reserved and they may be extended in the future.
+*/
+   VIRTCHNL_PTP_RX_TSTAMP = BIT(0),
+};
+
 /* VIRTCHNL_OP_CONFIG_RX_QUEUE
  * VF sends this message to set up parameters for one RX queue.
  * External data buffer contains one instance of virtchnl_rxq_info.
@@ -524,7 +541,8 @@ struct virtchnl_rxq_info {
 * with VIRTCHNL_RXDID_1_32B_BASE.
 */
u8 rxdid;
-   u8 pad1[2];
+   u8 flags; /* see virtchnl_rxq_info_flags */
+   u8 pad1;
u64 dma_ring_addr;
 
/* see enum virtchnl_rx_hsplit; deprecated with AVF 1.0 */
@@ -1978,6 +1996,38 @@ struct virtchnl_queue_vector_maps {
 
 VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_queue_vector_maps);
 
+#define VIRTCHNL_1588_PTP_CAP_RX_TSTAMPBIT(1)
+#define VIRTCHNL_1588_PTP_CAP_READ_PHC  BIT(2)
+
+struct virtchnl_phc_regs {
+   u32 clock_hi;
+   u32 clock_lo;
+   u8 pcie_region;
+   u8 rsvd[15];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_phc_regs);
+
+struct virtchnl_ptp_caps {
+   struct virtchnl_phc_regs phc_regs;
+   u32 caps;
+   s32 max_adj;
+   u8 tx_tstamp_idx;
+   u8 n_ext_ts;
+   u8 n_per_out;
+   u8 n_pins;
+   u8 tx_tstamp_format;
+   u8 rsvd[11];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(48, virtchnl_ptp_caps);
+
+struct virtchnl_phc_time {
+   uint64_t time;
+   uint8_t rsvd[8];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_phc_time);
 
 /* Since VF messages are limited by u16 size, precalculate the maximum possible
  * values of nested elements in virtchnl structures that virtual channel can
@@ -2271,6 +2321,12 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info 
*ver, u32 v_opcode,
case VIRTCHNL_OP_DISABLE_VLAN_FILTERING_V2:
valid_len = sizeof(struct virtchnl_vlan_setting);
break;
+   case VIRTCHNL_OP_1588_PTP_GET_CAPS:
+   valid_len = sizeof(struct virtchnl_ptp_caps);
+   break;
+   case VIRTCHNL_OP_1588_PTP_GET_TIME:
+   valid_len = sizeof(struct virtchnl_phc_time);
+   break;
case VIRTCHNL_OP_ENABLE_QUEUES_V2:
case VIRTCHNL_OP_DISABLE_QUEUES_V2:
valid_len = sizeof(struct virtchnl_del_ena_dis_queues);
-- 
2.9.5



[PATCH v1 2/3] net/iavf: enable Rx timestamp on Flex Descriptor

2022-04-07 Thread Simei Su
Dump Rx timestamp value into dynamic mbuf field by flex descriptor.
This feature is turned on by dev config "enable-rx-timestamp".
Currently, it's only supported under scalar path.

Signed-off-by: Simei Su 
---
 drivers/net/iavf/iavf.h |  5 ++
 drivers/net/iavf/iavf_ethdev.c  | 26 +++
 drivers/net/iavf/iavf_rxtx.c| 58 +++
 drivers/net/iavf/iavf_rxtx.h| 22 +
 drivers/net/iavf/iavf_rxtx_vec_common.h |  3 ++
 drivers/net/iavf/iavf_vchnl.c   | 83 -
 6 files changed, 186 insertions(+), 11 deletions(-)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index a01d18e..2838b5e 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -257,6 +257,8 @@ struct iavf_info {
struct iavf_tm_conf tm_conf;
 
struct rte_eth_dev *eth_dev;
+
+   uint32_t ptp_caps;
 };
 
 #define IAVF_MAX_PKT_TYPE 1024
@@ -300,6 +302,7 @@ struct iavf_adapter {
bool stopped;
uint16_t fdir_ref_cnt;
struct iavf_devargs devargs;
+   uint64_t phc_time;
 };
 
 /* IAVF_DEV_PRIVATE_TO */
@@ -460,4 +463,6 @@ int iavf_ipsec_crypto_request(struct iavf_adapter *adapter,
uint8_t *msg, size_t msg_len,
uint8_t *resp_msg, size_t resp_msg_len);
 extern const struct rte_tm_ops iavf_tm_ops;
+int iavf_get_ptp_cap(struct iavf_adapter *adapter);
+int iavf_get_phc_time(struct iavf_adapter *adapter);
 #endif /* _IAVF_ETHDEV_H_ */
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index d6190ac..704c174 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -35,6 +35,9 @@
 /* devargs */
 #define IAVF_PROTO_XTR_ARG "proto_xtr"
 
+uint64_t iavf_timestamp_dynflag;
+int iavf_timestamp_dynfield_offset = -1;
+
 static const char * const iavf_valid_args[] = {
IAVF_PROTO_XTR_ARG,
NULL
@@ -685,6 +688,7 @@ iavf_init_rxq(struct rte_eth_dev *dev, struct iavf_rx_queue 
*rxq)
struct rte_eth_dev_data *dev_data = dev->data;
uint16_t buf_size, max_pkt_len;
uint32_t frame_size = dev->data->mtu + IAVF_ETH_OVERHEAD;
+   enum iavf_status err;
 
buf_size = rte_pktmbuf_data_room_size(rxq->mp) - RTE_PKTMBUF_HEADROOM;
 
@@ -703,6 +707,18 @@ iavf_init_rxq(struct rte_eth_dev *dev, struct 
iavf_rx_queue *rxq)
return -EINVAL;
}
 
+   if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+   /* Register mbuf field and flag for Rx timestamp */
+   err = rte_mbuf_dyn_rx_timestamp_register(
+   &iavf_timestamp_dynfield_offset,
+   &iavf_timestamp_dynflag);
+   if (err) {
+   PMD_DRV_LOG(ERR,
+   "Cannot register mbuf field/flag for 
timestamp");
+   return -EINVAL;
+   }
+   }
+
rxq->max_pkt_len = max_pkt_len;
if ((dev_data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_SCATTER) ||
rxq->max_pkt_len > buf_size) {
@@ -945,6 +961,13 @@ iavf_dev_start(struct rte_eth_dev *dev)
return -1;
}
 
+   if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_CAP_PTP) {
+   if (iavf_get_ptp_cap(adapter)) {
+   PMD_INIT_LOG(ERR, "Failed to get ptp capability");
+   return -1;
+   }
+   }
+
if (iavf_init_queues(dev) != 0) {
PMD_DRV_LOG(ERR, "failed to do Queue init");
return -1;
@@ -1087,6 +1110,9 @@ iavf_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_CRC)
dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_KEEP_CRC;
 
+   if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_CAP_PTP)
+   dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_TIMESTAMP;
+
if (iavf_ipsec_crypto_supported(adapter)) {
dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_SECURITY;
dev_info->tx_offload_capa |= RTE_ETH_TX_OFFLOAD_SECURITY;
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 764218a..ab5b3de 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -1429,6 +1429,11 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
rx_id = rxq->rx_tail;
rx_ring = rxq->rx_ring;
ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+   struct iavf_adapter *ad = rxq->vsi->adapter;
+   uint64_t ts_ns;
+
+   if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+   rxq->hw_register_set = 1;
 
while (nb_rx < nb_pkts) {
rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
@@ -1491,6 +1496,21 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
&rxq->stats.ipsec_crypto);
rxd_to_pkt_fields_ops[rxq

[PATCH v1 3/3] net/iavf: improve performance of Rx timestamp offload

2022-04-07 Thread Simei Su
From: Wenjun Wu 

In this patch, We use CPU ticks instead of HW register
to determin whether low 32 bits timestamp has turned
over. It can avoid requesting register value frequently
and improve receving performance.

Signed-off-by: Wenjun Wu 
---
 drivers/net/iavf/iavf.h|  1 +
 drivers/net/iavf/iavf_ethdev.c |  8 +++
 drivers/net/iavf/iavf_rxtx.c   | 50 +++---
 drivers/net/iavf/iavf_rxtx.h   |  1 -
 4 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 2838b5e..ad5c0d4 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -303,6 +303,7 @@ struct iavf_adapter {
uint16_t fdir_ref_cnt;
struct iavf_devargs devargs;
uint64_t phc_time;
+   uint64_t hw_time_update;
 };
 
 /* IAVF_DEV_PRIVATE_TO */
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index 704c174..ffdc368 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -1014,6 +1014,14 @@ iavf_dev_start(struct rte_eth_dev *dev)
goto err_mac;
}
 
+   if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) 
{
+   if (iavf_get_phc_time(adapter)) {
+   PMD_DRV_LOG(ERR, "get physical time failed");
+   goto err_mac;
+   }
+   adapter->hw_time_update = rte_get_timer_cycles() / 
(rte_get_timer_hz() / 1000);
+   }
+
return 0;
 
 err_mac:
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index ab5b3de..02ee279 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -1432,8 +1432,14 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
struct iavf_adapter *ad = rxq->vsi->adapter;
uint64_t ts_ns;
 
-   if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
-   rxq->hw_register_set = 1;
+   if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+   uint64_t sw_cur_time = rte_get_timer_cycles() / 
(rte_get_timer_hz() / 1000);
+   if (sw_cur_time - ad->hw_time_update > 4) {
+   if (iavf_get_phc_time(ad))
+   PMD_DRV_LOG(ERR, "get physical time failed");
+   ad->hw_time_update = sw_cur_time;
+   }
+   }
 
while (nb_rx < nb_pkts) {
rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
@@ -1498,13 +1504,12 @@ iavf_recv_pkts_flex_rxd(void *rx_queue,
pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
 
if (iavf_timestamp_dynflag > 0) {
-   if (rxq->hw_register_set)
-   iavf_get_phc_time(ad);
-
-   rxq->hw_register_set = 0;
ts_ns = iavf_tstamp_convert_32b_64b(ad->phc_time,
rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
 
+   ad->phc_time = ts_ns;
+   ad->hw_time_update = rte_get_timer_cycles() / 
(rte_get_timer_hz() / 1000);
+
*RTE_MBUF_DYNFIELD(rxm,
iavf_timestamp_dynfield_offset,
rte_mbuf_timestamp_t *) = ts_ns;
@@ -1546,8 +1551,14 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct 
rte_mbuf **rx_pkts,
volatile union iavf_rx_flex_desc *rxdp;
const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
 
-   if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
-   rxq->hw_register_set = 1;
+   if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
+   uint64_t sw_cur_time = rte_get_timer_cycles() / 
(rte_get_timer_hz() / 1000);
+   if (sw_cur_time - ad->hw_time_update > 4) {
+   if (iavf_get_phc_time(ad))
+   PMD_DRV_LOG(ERR, "get physical time failed");
+   ad->hw_time_update = sw_cur_time;
+   }
+   }
 
while (nb_rx < nb_pkts) {
rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id];
@@ -1662,13 +1673,12 @@ iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, 
struct rte_mbuf **rx_pkts,
pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0);
 
if (iavf_timestamp_dynflag > 0) {
-   if (rxq->hw_register_set)
-   iavf_get_phc_time(ad);
-
-   rxq->hw_register_set = 0;
ts_ns = iavf_tstamp_convert_32b_64b(ad->phc_time,
rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
 
+   ad->phc_time = ts_ns;
+   ad->hw_time_update = rte_get_timer_cycles() / 
(rte_get_timer_hz() / 1000);
+
*RTE_MBUF_DYNFIELD(first_seg,
iavf_timestamp_dynfield_offset,
 

Re: [RFC 1/2] ethdev: port flags for pre-configuration flow hints

2022-04-07 Thread Jack Min

On 4/7/22 23:04, Stephen Hemminger wrote:

On Thu, 7 Apr 2022 13:30:46 +0800
Xiaoyu Min  wrote:


   * @b EXPERIMENTAL: this API may change without prior notice.
@@ -4972,6 +4983,11 @@ struct rte_flow_port_attr {
 * @see RTE_FLOW_ACTION_TYPE_METER
 */
uint32_t nb_meters;
+   /**
+* Port flags.
+* @see enum rte_flow_port_flag
+*/
+   enum rte_flow_port_flag flags;

This would have to wait until 22.11 because it is ABI breakage.
Also, how would this work with old users of API?


I'm not familiar with DPKD API/ABI policy,

But as my understanding this one is marked as _experimental_ and also 
all related APIs


The experimental is not considered as part of ABI, and we can change 
them anytime, no?


[Bug 992] [dpdk 21.11.1-rc1] drivers/net/cnxk/cnxk_ethdev_mtr meson build failed with cflag param optimization=1 on Ubuntu20.04 with GCC10.3

2022-04-07 Thread bugzilla
https://bugs.dpdk.org/show_bug.cgi?id=992

Bug ID: 992
   Summary: [dpdk 21.11.1-rc1] drivers/net/cnxk/cnxk_ethdev_mtr
meson build failed with cflag param optimization=1 on
Ubuntu20.04 with GCC10.3
   Product: DPDK
   Version: 19.11
  Hardware: All
OS: All
Status: UNCONFIRMED
  Severity: normal
  Priority: Normal
 Component: core
  Assignee: dev@dpdk.org
  Reporter: daxuex@intel.com
  Target Milestone: ---

[DPDK version]
dpdk 21.11.1-rc1

[Bad commit]:
commit 713a4bc48c52058172520805373103c79ef17a3a (HEAD)
Author: Bruce Richardson 
Date:   Wed Mar 16 13:45:43 2022 +

doc: replace characters for (R) symbol in Linux guide

[ upstream commit c265d58619e7fc0f72441faafec56a2a8633a15b ]

Some IDEs, such as eclipse, complained on save about the use of special
characters in the (R) symbol in linux GSG doc. We can replace those with
the equivalent "|reg|" text, and including isonum.txt.

Signed-off-by: Bruce Richardson 


[OS version]:
gcc version 10.3.0(Ubuntu 10.3.0-1ubuntu1~20.04)
UB20.04/5.8.0-48-generic

[Test Setup]:
CC=gcc meson --optimization=1 --werror -Denable_kmods=True  -Dlibdir=lib 
--default-library=static x86_64-native-linuxapp-gcc 
ninja -C x86_64-native-linuxapp-gcc -j 10

[Ubuntu20.04 log as below]
Found ninja-1.9.0.git.kitware.dyndep-1.jobserver-1 at /usr/local/bin/ninja
ninja: Entering directory `x86_64-native-linuxapp-gcc'
[1128/3000] Compiling C object
drivers/libtmp_rte_net_cnxk.a.p/net_cnxk_cnxk_ethdev_mtr.c.o
FAILED: drivers/libtmp_rte_net_cnxk.a.p/net_cnxk_cnxk_ethdev_mtr.c.o
gcc -Idrivers/libtmp_rte_net_cnxk.a.p -Idrivers -I../drivers -Idrivers/net/cnxk
-I../drivers/net/cnxk -Ilib/ethdev -I../lib/ethdev -I. -I.. -Iconfig
-I../config -Ilib/eal/include -I../lib/eal/include -Ilib/eal/linux/include
-I../lib/eal/linux/include -Ilib/eal/x86/include -I../lib/eal/x86/include
-Ilib/eal/common -I../lib/eal/common -Ilib/eal -I../lib/eal -Ilib/kvargs
-I../lib/kvargs -Ilib/metrics -I../lib/metrics -Ilib/telemetry
-I../lib/telemetry -Ilib/net -I../lib/net -Ilib/mbuf -I../lib/mbuf
-Ilib/mempool -I../lib/mempool -Ilib/ring -I../lib/ring -Ilib/meter
-I../lib/meter -Idrivers/bus/pci -I../drivers/bus/pci
-I../drivers/bus/pci/linux -Ilib/pci -I../lib/pci -Idrivers/bus/vdev
-I../drivers/bus/vdev -Ilib/cryptodev -I../lib/cryptodev -Ilib/rcu -I../lib/rcu
-Ilib/eventdev -I../lib/eventdev -Ilib/hash -I../lib/hash -Ilib/timer
-I../lib/timer -Ilib/security -I../lib/security -Idrivers/common/cnxk
-I../drivers/common/cnxk -Idrivers/mempool/cnxk -I../drivers/mempool/cnxk
-fdiagnostics-color=always -pipe -D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch
-Werror -O1 -include rte_config.h -Wextra -Wcast-qual -Wdeprecated -Wformat
-Wformat-nonliteral -Wformat-security -Wmissing-declarations
-Wmissing-prototypes -Wnested-externs -Wold-style-definition -Wpointer-arith
-Wsign-compare -Wstrict-prototypes -Wundef -Wwrite-strings
-Wno-address-of-packed-member -Wno-packed-not-aligned
-Wno-missing-field-initializers -Wno-zero-length-bounds -D_GNU_SOURCE -fPIC
-march=native -DALLOW_EXPERIMENTAL_API -DALLOW_INTERNAL_API
-Wno-format-truncation -flax-vector-conversions -Wno-strict-aliasing
-DRTE_LOG_DEFAULT_LOGTYPE=pmd.net.cnxk -MD -MQ
drivers/libtmp_rte_net_cnxk.a.p/net_cnxk_cnxk_ethdev_mtr.c.o -MF
drivers/libtmp_rte_net_cnxk.a.p/net_cnxk_cnxk_ethdev_mtr.c.o.d -o
drivers/libtmp_rte_net_cnxk.a.p/net_cnxk_cnxk_ethdev_mtr.c.o -c
../drivers/net/cnxk/cnxk_ethdev_mtr.c
In file included from ../drivers/net/cnxk/cnxk_ethdev.h:16,
 from ../drivers/net/cnxk/cnxk_ethdev_mtr.c:5:
../drivers/net/cnxk/cnxk_ethdev_mtr.c: In function
‘cnxk_nix_mtr_policy_validate’:
../lib/ethdev/rte_mtr_driver.h:188:10: error: ‘str’ may be used uninitialized
in this function [-Werror=maybe-uninitialized]
  188 |   *error = (struct rte_mtr_error){
  |   ~~~^
  189 |.type = type,
  |~
  190 |.cause = cause,
  |~~~
  191 |.message = message,
  |~~~
  192 |   };
  |   ~
../drivers/net/cnxk/cnxk_ethdev_mtr.c:283:14: note: ‘str’ was declared here
  283 |  const char *str;
  |  ^~~
In file included from ../drivers/net/cnxk/cnxk_ethdev.h:16,
 from ../drivers/net/cnxk/cnxk_ethdev_mtr.c:5:
../lib/ethdev/rte_mtr_driver.h:188:10: error: ‘str’ may be used uninitialized
in this function [-Werror=maybe-uninitialized]
  188 |   *error = (struct rte_mtr_error){
  |   ~~~^
  189 |.type = type,
  |~
  190 |.cause = cause,
  |~~~
  191 |.message = message,
  |~~~
  192 |   };
  |   ~
../drivers/net/cnxk/cnxk_ethdev_mtr.c:283:14: note: ‘str’ was declared here
  283 |  const char *str;
  |  ^~~
cc1: all warnings being treated as 

[RFC] ethdev: datapath-focused meter actions

2022-04-07 Thread Alexander Kozyrev
The introduction of asynchronous flow rules operations allowed users
to create/destroy flow rules as part of the datapath without blocking
on Flow API and slowing the packet processing down.

That applies to every possible action that has no preparation steps.
Unfortunately, one notable exception is the meter action.
There is a separate API to prepare a meter profile and a meter policy
before any meter object can be used as a flow rule action.

The application logic is the following:
1. rte_mtr_meter_profile_add() is called to create the meter profile
first to define how to classify incoming packets and to assign an
appropriate color to them.
2. rte_mtr_meter_policy_add() is invoked to define the fate of a packet,
based on its color (practically creating flow rules, matching colors).
3. rte_mtr_create() is then needed to search (with locks) for previously
created profile and policy in order to create the meter object.
4. rte_flow_create() is now finally can be used to specify the created
meter as an action.

This approach doesn't fit into the asynchronous rule creation model
and can be improved with the following proposal:
1. Creating a policy may be replaced with the creation of a group with
up to 3 different rules for every color using asynchronous Flow API.
That requires the introduction of a new pattern item - meter color.
Then creation a flow rule with the meter means a simple jump to a group:
rte_flow_async_create(group=1, pattern=color, actions=...);
rte_flow_async_create(group=0, pattern=5-tuple,
  actions=meter,jump group 1);
This allows to classify packets and act upon their color classifications.
The Meter action assigns a color to a packet and an appropriate action
is selected based on the Meter color in group 1.

2. Preparing a meter object should be the part of flow rule creation
and use the same flow queue to benefit from asynchronous operations:
rte_flow_async_create(group=0, pattern=5-tuple,
  actions=meter id 1 profile rfc2697, jump group 1);
Creation of the meter object takes time and flow creation must wait
until it is ready before inserting the rule. Using the same queue allows
ensuring that. There is no need to create a meter object outside of the
Flow API, but this approach won't affect the old Meter API in any way.

3. Another point of optimization is to prepare all the resources needed
in advance in rte_flow_configure(). All the policy rules can be created
during the initialization stage easily and put into several groups.
These groups can be used by many meter objects by simple jump action to
an appropriate group. Meter objects can be preallocated as well and
configured with required profile parameters later at the flow rule
creation stage. The number of pre-allocated profiles/policies is
specified in the Flow engine resources settings.

These optimizations alongside already existing pattern/actions templates
can improve the insertion rate significantly and allow meter usage as
part of the datapath. The introduction of the new API is intended to be
used with the asynchronous Flow API. Deprecation of the old Meter API
is not planned at this point.

Signed-off-by: Alexander Kozyrev 
---
 lib/ethdev/rte_flow.h | 71 ++-
 1 file changed, 70 insertions(+), 1 deletion(-)

diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
index d8827dd184..aec36a9f0a 100644
--- a/lib/ethdev/rte_flow.h
+++ b/lib/ethdev/rte_flow.h
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -671,6 +672,13 @@ enum rte_flow_item_type {
 * See struct rte_flow_item_gre_opt.
 */
RTE_FLOW_ITEM_TYPE_GRE_OPTION,
+
+   /**
+* Matches Meter Color.
+*
+* See struct rte_flow_item_meter_color.
+*/
+   RTE_FLOW_ITEM_TYPE_METER_COLOR,
 };
 
 /**
@@ -1990,6 +1998,26 @@ static const struct rte_flow_item_ppp 
rte_flow_item_ppp_mask = {
 };
 #endif
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * RTE_FLOW_ITEM_TYPE_METER_COLOR
+ *
+ * Matches a meter color set in the packet meta-data
+ * (i.e. struct rte_mbuf::sched::color).
+ */
+struct rte_flow_item_meter_color {
+   enum rte_color color; /**< Packet color. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_METER_COLOR. */
+#ifndef __cplusplus
+static const struct rte_flow_item_meter_color rte_flow_item_meter_color_mask = 
{
+   .color = 0x3,
+};
+#endif
+
 /**
  * Matching pattern item definition.
  *
@@ -2376,6 +2404,14 @@ enum rte_flow_action_type {
 */
RTE_FLOW_ACTION_TYPE_METER,
 
+   /**
+* Extended Traffic metering and policing (MTR).
+*
+* See struct rte_flow_action_meter_ext.
+* See file rte_mtr.h for MTR object configuration.
+*/
+   RTE_FLOW_ACTION_TYPE_METER_EXT,
+
/**
 * Redirects packets to security engine of current device for security
 

[PATCH v3 0/4] Enable queue rate limit and quanta size configuration

2022-04-07 Thread Wenjun Wu
This patch set adds queue rate limit and quanta size configuration.
Quanta size can be changed by driver devarg quanta_size=xxx. Quanta
size should be set to the value between 256 and 4096 and be the product
of 64.

v2: rework virtchnl
v3: add release note

Wenjun Wu (4):
  common/iavf: support queue rate limit and quanta size configuration
  net/iavf: support queue rate limit configuration
  net/iavf: support quanta size configuration
  doc: add release notes for 22.07

 doc/guides/rel_notes/release_22_07.rst |   4 +
 drivers/common/iavf/virtchnl.h |  50 +++
 drivers/net/iavf/iavf.h|  16 +++
 drivers/net/iavf/iavf_ethdev.c |  40 ++
 drivers/net/iavf/iavf_tm.c | 190 +++--
 drivers/net/iavf/iavf_vchnl.c  |  51 +++
 6 files changed, 343 insertions(+), 8 deletions(-)

-- 
2.25.1



[PATCH v3 1/4] common/iavf: support queue rate limit and quanta size configuration

2022-04-07 Thread Wenjun Wu
This patch adds new virtchnl opcodes and structures for rate limit
and quanta size configuration, which include:
1. VIRTCHNL_OP_CONFIG_QUEUE_BW, to configure max bandwidth for each
VF per queue.
2. VIRTCHNL_OP_CONFIG_QUANTA, to configure quanta size per queue.

Signed-off-by: Ting Xu 
Signed-off-by: Wenjun Wu 
---
 drivers/common/iavf/virtchnl.h | 50 ++
 1 file changed, 50 insertions(+)

diff --git a/drivers/common/iavf/virtchnl.h b/drivers/common/iavf/virtchnl.h
index 3e44eca7d8..249ae6ed23 100644
--- a/drivers/common/iavf/virtchnl.h
+++ b/drivers/common/iavf/virtchnl.h
@@ -164,6 +164,8 @@ enum virtchnl_ops {
VIRTCHNL_OP_ENABLE_QUEUES_V2 = 107,
VIRTCHNL_OP_DISABLE_QUEUES_V2 = 108,
VIRTCHNL_OP_MAP_QUEUE_VECTOR = 111,
+   VIRTCHNL_OP_CONFIG_QUEUE_BW = 112,
+   VIRTCHNL_OP_CONFIG_QUANTA = 113,
VIRTCHNL_OP_MAX,
 };
 
@@ -1872,6 +1874,23 @@ struct virtchnl_queue_tc_mapping {
 
 VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_tc_mapping);
 
+/* VIRTCHNL_OP_CONFIG_QUEUE_BW */
+struct virtchnl_queue_bw {
+   u16 queue_id;
+   u8 tc;
+   u8 pad;
+   struct virtchnl_shaper_bw shaper;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_bw);
+
+struct virtchnl_queues_bw_cfg {
+   u16 vsi_id;
+   u16 num_queues;
+   struct virtchnl_queue_bw cfg[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_queues_bw_cfg);
 
 /* TX and RX queue types are valid in legacy as well as split queue models.
  * With Split Queue model, 2 additional types are introduced - TX_COMPLETION
@@ -1978,6 +1997,12 @@ struct virtchnl_queue_vector_maps {
 
 VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_queue_vector_maps);
 
+struct virtchnl_quanta_cfg {
+   u16 quanta_size;
+   struct virtchnl_queue_chunk queue_select;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_quanta_cfg);
 
 /* Since VF messages are limited by u16 size, precalculate the maximum possible
  * values of nested elements in virtchnl structures that virtual channel can
@@ -2244,6 +2269,31 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info 
*ver, u32 v_opcode,
 sizeof(q_tc->tc[0]);
}
break;
+   case VIRTCHNL_OP_CONFIG_QUEUE_BW:
+   valid_len = sizeof(struct virtchnl_queues_bw_cfg);
+   if (msglen >= valid_len) {
+   struct virtchnl_queues_bw_cfg *q_bw =
+   (struct virtchnl_queues_bw_cfg *)msg;
+   if (q_bw->num_queues == 0) {
+   err_msg_format = true;
+   break;
+   }
+   valid_len += (q_bw->num_queues - 1) *
+sizeof(q_bw->cfg[0]);
+   }
+   break;
+   case VIRTCHNL_OP_CONFIG_QUANTA:
+   valid_len = sizeof(struct virtchnl_quanta_cfg);
+   if (msglen >= valid_len) {
+   struct virtchnl_quanta_cfg *q_quanta =
+   (struct virtchnl_quanta_cfg *)msg;
+   if (q_quanta->quanta_size == 0 ||
+   q_quanta->queue_select.num_queues == 0) {
+   err_msg_format = true;
+   break;
+   }
+   }
+   break;
case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS:
break;
case VIRTCHNL_OP_ADD_VLAN_V2:
-- 
2.25.1



[PATCH v3 2/4] net/iavf: support queue rate limit configuration

2022-04-07 Thread Wenjun Wu
This patch adds queue rate limit configuration support.
Only max bandwidth is supported.

Signed-off-by: Ting Xu 
Signed-off-by: Wenjun Wu 
---
 drivers/net/iavf/iavf.h   |  13 +++
 drivers/net/iavf/iavf_tm.c| 190 --
 drivers/net/iavf/iavf_vchnl.c |  23 
 3 files changed, 218 insertions(+), 8 deletions(-)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index a01d18e61b..96515a3ee9 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -170,11 +170,21 @@ struct iavf_tm_node {
uint32_t weight;
uint32_t reference_count;
struct iavf_tm_node *parent;
+   struct iavf_tm_shaper_profile *shaper_profile;
struct rte_tm_node_params params;
 };
 
 TAILQ_HEAD(iavf_tm_node_list, iavf_tm_node);
 
+struct iavf_tm_shaper_profile {
+   TAILQ_ENTRY(iavf_tm_shaper_profile) node;
+   uint32_t shaper_profile_id;
+   uint32_t reference_count;
+   struct rte_tm_shaper_params profile;
+};
+
+TAILQ_HEAD(iavf_shaper_profile_list, iavf_tm_shaper_profile);
+
 /* node type of Traffic Manager */
 enum iavf_tm_node_type {
IAVF_TM_NODE_TYPE_PORT,
@@ -188,6 +198,7 @@ struct iavf_tm_conf {
struct iavf_tm_node *root; /* root node - vf vsi */
struct iavf_tm_node_list tc_list; /* node list for all the TCs */
struct iavf_tm_node_list queue_list; /* node list for all the queues */
+   struct iavf_shaper_profile_list shaper_profile_list;
uint32_t nb_tc_node;
uint32_t nb_queue_node;
bool committed;
@@ -451,6 +462,8 @@ int iavf_add_del_mc_addr_list(struct iavf_adapter *adapter,
 int iavf_request_queues(struct rte_eth_dev *dev, uint16_t num);
 int iavf_get_max_rss_queue_region(struct iavf_adapter *adapter);
 int iavf_get_qos_cap(struct iavf_adapter *adapter);
+int iavf_set_q_bw(struct rte_eth_dev *dev,
+ struct virtchnl_queues_bw_cfg *q_bw, uint16_t size);
 int iavf_set_q_tc_map(struct rte_eth_dev *dev,
struct virtchnl_queue_tc_mapping *q_tc_mapping,
uint16_t size);
diff --git a/drivers/net/iavf/iavf_tm.c b/drivers/net/iavf/iavf_tm.c
index 8d92062c7f..32bb3be45e 100644
--- a/drivers/net/iavf/iavf_tm.c
+++ b/drivers/net/iavf/iavf_tm.c
@@ -8,6 +8,13 @@
 static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
 __rte_unused int clear_on_fail,
 __rte_unused struct rte_tm_error *error);
+static int iavf_shaper_profile_add(struct rte_eth_dev *dev,
+  uint32_t shaper_profile_id,
+  struct rte_tm_shaper_params *profile,
+  struct rte_tm_error *error);
+static int iavf_shaper_profile_del(struct rte_eth_dev *dev,
+  uint32_t shaper_profile_id,
+  struct rte_tm_error *error);
 static int iavf_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
  uint32_t parent_node_id, uint32_t priority,
  uint32_t weight, uint32_t level_id,
@@ -30,6 +37,8 @@ static int iavf_node_type_get(struct rte_eth_dev *dev, 
uint32_t node_id,
   int *is_leaf, struct rte_tm_error *error);
 
 const struct rte_tm_ops iavf_tm_ops = {
+   .shaper_profile_add = iavf_shaper_profile_add,
+   .shaper_profile_delete = iavf_shaper_profile_del,
.node_add = iavf_tm_node_add,
.node_delete = iavf_tm_node_delete,
.capabilities_get = iavf_tm_capabilities_get,
@@ -44,6 +53,9 @@ iavf_tm_conf_init(struct rte_eth_dev *dev)
 {
struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 
+   /* initialize shaper profile list */
+   TAILQ_INIT(&vf->tm_conf.shaper_profile_list);
+
/* initialize node configuration */
vf->tm_conf.root = NULL;
TAILQ_INIT(&vf->tm_conf.tc_list);
@@ -57,6 +69,7 @@ void
 iavf_tm_conf_uninit(struct rte_eth_dev *dev)
 {
struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+   struct iavf_tm_shaper_profile *shaper_profile;
struct iavf_tm_node *tm_node;
 
/* clear node configuration */
@@ -74,6 +87,14 @@ iavf_tm_conf_uninit(struct rte_eth_dev *dev)
rte_free(vf->tm_conf.root);
vf->tm_conf.root = NULL;
}
+
+   /* Remove all shaper profiles */
+   while ((shaper_profile =
+  TAILQ_FIRST(&vf->tm_conf.shaper_profile_list))) {
+   TAILQ_REMOVE(&vf->tm_conf.shaper_profile_list,
+shaper_profile, node);
+   rte_free(shaper_profile);
+   }
 }
 
 static inline struct iavf_tm_node *
@@ -132,13 +153,6 @@ iavf_node_param_check(struct iavf_info *vf, uint32_t 
node_id,
return -EINVAL;
}
 
-   /* not support shaper profile */
-   if (params->shaper_profile_id) {
-   error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SH

[PATCH v3 3/4] net/iavf: support quanta size configuration

2022-04-07 Thread Wenjun Wu
This patch adds quanta size configuration support.
Quanta size should between 256 and 4096, and be a product of 64.

Signed-off-by: Wenjun Wu 
---
 drivers/net/iavf/iavf.h|  3 +++
 drivers/net/iavf/iavf_ethdev.c | 40 ++
 drivers/net/iavf/iavf_vchnl.c  | 28 
 3 files changed, 71 insertions(+)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 96515a3ee9..c0a4a47b04 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -292,6 +292,7 @@ enum iavf_proto_xtr_type {
 struct iavf_devargs {
uint8_t proto_xtr_dflt;
uint8_t proto_xtr[IAVF_MAX_QUEUE_NUM];
+   uint16_t quanta_size;
 };
 
 struct iavf_security_ctx;
@@ -467,6 +468,8 @@ int iavf_set_q_bw(struct rte_eth_dev *dev,
 int iavf_set_q_tc_map(struct rte_eth_dev *dev,
struct virtchnl_queue_tc_mapping *q_tc_mapping,
uint16_t size);
+int iavf_set_vf_quanta_size(struct iavf_adapter *adapter, u16 start_queue_id,
+   u16 num_queues);
 void iavf_tm_conf_init(struct rte_eth_dev *dev);
 void iavf_tm_conf_uninit(struct rte_eth_dev *dev);
 int iavf_ipsec_crypto_request(struct iavf_adapter *adapter,
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index d6190ac24a..255459f162 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -34,9 +34,11 @@
 
 /* devargs */
 #define IAVF_PROTO_XTR_ARG "proto_xtr"
+#define IAVF_QUANTA_SIZE_ARG   "quanta_size"
 
 static const char * const iavf_valid_args[] = {
IAVF_PROTO_XTR_ARG,
+   IAVF_QUANTA_SIZE_ARG,
NULL
 };
 
@@ -950,6 +952,11 @@ iavf_dev_start(struct rte_eth_dev *dev)
return -1;
}
 
+   if (iavf_set_vf_quanta_size(adapter, index, num_queue_pairs) != 0) {
+   PMD_DRV_LOG(ERR, "configure quanta size failed");
+   goto err_queue;
+   }
+
/* If needed, send configure queues msg multiple times to make the
 * adminq buffer length smaller than the 4K limitation.
 */
@@ -2092,6 +2099,25 @@ iavf_handle_proto_xtr_arg(__rte_unused const char *key, 
const char *value,
return 0;
 }
 
+static int
+parse_u16(__rte_unused const char *key, const char *value, void *args)
+{
+   u16 *num = (u16 *)args;
+   u16 tmp;
+
+   errno = 0;
+   tmp = strtoull(value, NULL, 10);
+   if (errno || !tmp) {
+   PMD_DRV_LOG(WARNING, "%s: \"%s\" is not a valid u16",
+   key, value);
+   return -1;
+   }
+
+   *num = tmp;
+
+   return 0;
+}
+
 static int iavf_parse_devargs(struct rte_eth_dev *dev)
 {
struct iavf_adapter *ad =
@@ -2118,6 +2144,20 @@ static int iavf_parse_devargs(struct rte_eth_dev *dev)
if (ret)
goto bail;
 
+   ret = rte_kvargs_process(kvlist, IAVF_QUANTA_SIZE_ARG,
+&parse_u16, &ad->devargs.quanta_size);
+   if (ret)
+   goto bail;
+
+   if (ad->devargs.quanta_size == 0)
+   ad->devargs.quanta_size = 1024;
+
+   if (ad->devargs.quanta_size < 256 || ad->devargs.quanta_size > 4096 ||
+   ad->devargs.quanta_size & 0x40) {
+   PMD_INIT_LOG(ERR, "invalid quanta size\n");
+   return -EINVAL;
+   }
+
 bail:
rte_kvargs_free(kvlist);
return ret;
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index 537369f736..ee26e45acf 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -1828,3 +1828,31 @@ iavf_ipsec_crypto_request(struct iavf_adapter *adapter,
 
return 0;
 }
+
+int
+iavf_set_vf_quanta_size(struct iavf_adapter *adapter, u16 start_queue_id, u16 
num_queues)
+{
+   struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+   struct iavf_cmd_info args;
+   struct virtchnl_quanta_cfg q_quanta;
+   int err;
+
+   q_quanta.quanta_size = adapter->devargs.quanta_size;
+   q_quanta.queue_select.type = VIRTCHNL_QUEUE_TYPE_TX;
+   q_quanta.queue_select.start_queue_id = start_queue_id;
+   q_quanta.queue_select.num_queues = num_queues;
+
+   args.ops = VIRTCHNL_OP_CONFIG_QUANTA;
+   args.in_args = (uint8_t *)&q_quanta;
+   args.in_args_size = sizeof(q_quanta);
+   args.out_buffer = vf->aq_resp;
+   args.out_size = IAVF_AQ_BUF_SZ;
+
+   err = iavf_execute_vf_cmd(adapter, &args, 0);
+   if (err) {
+   PMD_DRV_LOG(ERR, "Failed to execute command of 
VIRTCHNL_OP_CONFIG_QUANTA");
+   return err;
+   }
+
+   return 0;
+}
-- 
2.25.1



[PATCH v3 4/4] doc: add release notes for 22.07

2022-04-07 Thread Wenjun Wu
Add support for queue rate limit and quanta size configuration

Signed-off-by: Wenjun Wu 
---
 doc/guides/rel_notes/release_22_07.rst | 4 
 1 file changed, 4 insertions(+)

diff --git a/doc/guides/rel_notes/release_22_07.rst 
b/doc/guides/rel_notes/release_22_07.rst
index 42a5f2d990..f1b4057d70 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -55,6 +55,10 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* **Updated Intel iavf driver.**
+
+  * Added Tx QoS queue rate limitation support.
+  * Added quanta size configuration support.
 
 Removed Items
 -
-- 
2.25.1



[PATCH v6 01/10] net/ice/base: fix dead lock issue when getting node from ID type

2022-04-07 Thread Wenjun Wu
The function ice_sched_get_node_by_id_type needs to be called
with the scheduler lock held. However, the function
ice_sched_get_node also requests the scheduler lock.
It will cause the dead lock issue.

This patch replaces function ice_sched_get_node with
function ice_sched_find_node_by_teid to solve this problem.

Signed-off-by: Wenjun Wu 
---
 drivers/net/ice/base/ice_sched.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ice/base/ice_sched.c b/drivers/net/ice/base/ice_sched.c
index 2620892c9e..e697c579be 100644
--- a/drivers/net/ice/base/ice_sched.c
+++ b/drivers/net/ice/base/ice_sched.c
@@ -4774,12 +4774,12 @@ ice_sched_get_node_by_id_type(struct ice_port_info *pi, 
u32 id,
 
case ICE_AGG_TYPE_Q:
/* The current implementation allows single queue to modify */
-   node = ice_sched_get_node(pi, id);
+   node = ice_sched_find_node_by_teid(pi->root, id);
break;
 
case ICE_AGG_TYPE_QG:
/* The current implementation allows single qg to modify */
-   child_node = ice_sched_get_node(pi, id);
+   child_node = ice_sched_find_node_by_teid(pi->root, id);
if (!child_node)
break;
node = child_node->parent;
-- 
2.25.1



[PATCH v6 00/10] Enable ETS-based TX QoS on PF

2022-04-07 Thread Wenjun Wu
This patch set enables ETS-based TX QoS on PF. It is supported to
configure bandwidth and priority in both queue and queue group level,
and weight only in queue level.

v2: fix code style issue.
v3: fix uninitialization issue.
v4: fix logical issue.
v5: fix CI testing issue. Add explicit cast.
v6: add release note.

Ting Xu (1):
  net/ice: support queue bandwidth limit

Wenjun Wu (9):
  net/ice/base: fix dead lock issue when getting node from ID type
  net/ice/base: support priority configuration of the exact node
  net/ice/base: support queue BW allocation configuration
  net/ice: support queue group bandwidth limit
  net/ice: support queue priority configuration
  net/ice: support queue weight configuration
  net/ice: support queue group priority configuration
  net/ice: add warning log for unsupported configuration
  doc: add release notes for 22.07

 doc/guides/rel_notes/release_22_07.rst |   4 +
 drivers/net/ice/base/ice_sched.c   |  89 ++-
 drivers/net/ice/base/ice_sched.h   |   6 +
 drivers/net/ice/ice_ethdev.c   |  19 +
 drivers/net/ice/ice_ethdev.h   |  55 ++
 drivers/net/ice/ice_tm.c   | 844 +
 drivers/net/ice/meson.build|   1 +
 7 files changed, 1016 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ice/ice_tm.c

-- 
2.25.1



[PATCH v6 02/10] net/ice/base: support priority configuration of the exact node

2022-04-07 Thread Wenjun Wu
This patch adds priority configuration support of the exact
node in the scheduler tree.
This function does not need additional calls to the scheduler
lock.

Signed-off-by: Wenjun Wu 
---
 drivers/net/ice/base/ice_sched.c | 21 +
 drivers/net/ice/base/ice_sched.h |  3 +++
 2 files changed, 24 insertions(+)

diff --git a/drivers/net/ice/base/ice_sched.c b/drivers/net/ice/base/ice_sched.c
index e697c579be..c0f90b762b 100644
--- a/drivers/net/ice/base/ice_sched.c
+++ b/drivers/net/ice/base/ice_sched.c
@@ -3613,6 +3613,27 @@ ice_cfg_vsi_q_priority(struct ice_port_info *pi, u16 
num_qs, u32 *q_ids,
return status;
 }
 
+/**
+ * ice_cfg_node_priority - config priority of node
+ * @pi: port information structure
+ * @node: sched node to configure
+ * @priority: sibling priority
+ *
+ * This function configures node element's sibling priority only.
+ */
+enum ice_status
+ice_cfg_node_priority(struct ice_port_info *pi, struct ice_sched_node *node,
+ u8 priority)
+{
+   enum ice_status status = ICE_ERR_PARAM;
+
+   ice_acquire_lock(&pi->sched_lock);
+   status = ice_sched_cfg_sibl_node_prio(pi, node, priority);
+   ice_release_lock(&pi->sched_lock);
+
+   return status;
+}
+
 /**
  * ice_cfg_agg_vsi_priority_per_tc - config aggregator's VSI priority per TC
  * @pi: port information structure
diff --git a/drivers/net/ice/base/ice_sched.h b/drivers/net/ice/base/ice_sched.h
index 1441b5f191..e1dc6e18a4 100644
--- a/drivers/net/ice/base/ice_sched.h
+++ b/drivers/net/ice/base/ice_sched.h
@@ -172,6 +172,9 @@ enum ice_status
 ice_cfg_vsi_q_priority(struct ice_port_info *pi, u16 num_qs, u32 *q_ids,
   u8 *q_prio);
 enum ice_status
+ice_cfg_node_priority(struct ice_port_info *pi,
+ struct ice_sched_node *node, u8 priority);
+enum ice_status
 ice_cfg_vsi_bw_alloc(struct ice_port_info *pi, u16 vsi_handle, u8 ena_tcmap,
 enum ice_rl_type rl_type, u8 *bw_alloc);
 enum ice_status
-- 
2.25.1



[PATCH v6 03/10] net/ice/base: support queue BW allocation configuration

2022-04-07 Thread Wenjun Wu
This patch adds BW allocation support of queue scheduling node
to support WFQ in queue level.

Signed-off-by: Wenjun Wu 
---
 drivers/net/ice/base/ice_sched.c | 64 
 drivers/net/ice/base/ice_sched.h |  3 ++
 2 files changed, 67 insertions(+)

diff --git a/drivers/net/ice/base/ice_sched.c b/drivers/net/ice/base/ice_sched.c
index c0f90b762b..4b7fdb2f13 100644
--- a/drivers/net/ice/base/ice_sched.c
+++ b/drivers/net/ice/base/ice_sched.c
@@ -3613,6 +3613,70 @@ ice_cfg_vsi_q_priority(struct ice_port_info *pi, u16 
num_qs, u32 *q_ids,
return status;
 }
 
+/**
+ * ice_sched_save_q_bw_alloc - save queue node's BW allocation information
+ * @q_ctx: queue context structure
+ * @rl_type: rate limit type min, max, or shared
+ * @bw_alloc: BW weight/allocation
+ *
+ * Save BW information of queue type node for post replay use.
+ */
+static enum ice_status
+ice_sched_save_q_bw_alloc(struct ice_q_ctx *q_ctx, enum ice_rl_type rl_type,
+ u32 bw_alloc)
+{
+   switch (rl_type) {
+   case ICE_MIN_BW:
+   ice_set_clear_cir_bw_alloc(&q_ctx->bw_t_info, bw_alloc);
+   break;
+   case ICE_MAX_BW:
+   ice_set_clear_eir_bw_alloc(&q_ctx->bw_t_info, bw_alloc);
+   break;
+   default:
+   return ICE_ERR_PARAM;
+   }
+   return ICE_SUCCESS;
+}
+
+/**
+ * ice_cfg_q_bw_alloc - configure queue BW weight/alloc params
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ * @bw_alloc: BW weight/allocation
+ *
+ * This function configures BW allocation of queue scheduling node.
+ */
+enum ice_status
+ice_cfg_q_bw_alloc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+  u16 q_handle, enum ice_rl_type rl_type, u32 bw_alloc)
+{
+   enum ice_status status = ICE_ERR_PARAM;
+   struct ice_sched_node *node;
+   struct ice_q_ctx *q_ctx;
+
+   ice_acquire_lock(&pi->sched_lock);
+   q_ctx = ice_get_lan_q_ctx(pi->hw, vsi_handle, tc, q_handle);
+   if (!q_ctx)
+   goto exit_q_bw_alloc;
+
+   node = ice_sched_find_node_by_teid(pi->root, q_ctx->q_teid);
+   if (!node) {
+   ice_debug(pi->hw, ICE_DBG_SCHED, "Wrong q_teid\n");
+   goto exit_q_bw_alloc;
+   }
+
+   status = ice_sched_cfg_node_bw_alloc(pi->hw, node, rl_type, bw_alloc);
+   if (!status)
+   status = ice_sched_save_q_bw_alloc(q_ctx, rl_type, bw_alloc);
+
+exit_q_bw_alloc:
+   ice_release_lock(&pi->sched_lock);
+   return status;
+}
+
 /**
  * ice_cfg_node_priority - config priority of node
  * @pi: port information structure
diff --git a/drivers/net/ice/base/ice_sched.h b/drivers/net/ice/base/ice_sched.h
index e1dc6e18a4..454a1570bb 100644
--- a/drivers/net/ice/base/ice_sched.h
+++ b/drivers/net/ice/base/ice_sched.h
@@ -172,6 +172,9 @@ enum ice_status
 ice_cfg_vsi_q_priority(struct ice_port_info *pi, u16 num_qs, u32 *q_ids,
   u8 *q_prio);
 enum ice_status
+ice_cfg_q_bw_alloc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+  u16 q_handle, enum ice_rl_type rl_type, u32 bw_alloc);
+enum ice_status
 ice_cfg_node_priority(struct ice_port_info *pi,
  struct ice_sched_node *node, u8 priority);
 enum ice_status
-- 
2.25.1



[PATCH v6 04/10] net/ice: support queue bandwidth limit

2022-04-07 Thread Wenjun Wu
From: Ting Xu 

Enable basic TM API for PF only. Support for adding profiles and queue
nodes. Only max bandwidth is supported in profiles. Profiles can be
assigned to target queues. Only TC0 is valid.

Signed-off-by: Wenjun Wu 
Signed-off-by: Ting Xu 
---
 drivers/net/ice/ice_ethdev.c |  19 ++
 drivers/net/ice/ice_ethdev.h |  48 +++
 drivers/net/ice/ice_tm.c | 599 +++
 drivers/net/ice/meson.build  |   1 +
 4 files changed, 667 insertions(+)
 create mode 100644 drivers/net/ice/ice_tm.c

diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index 13adcf90ed..37897765c8 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -205,6 +205,18 @@ static const struct rte_pci_id pci_id_ice_map[] = {
{ .vendor_id = 0, /* sentinel */ },
 };
 
+static int
+ice_tm_ops_get(struct rte_eth_dev *dev __rte_unused,
+   void *arg)
+{
+   if (!arg)
+   return -EINVAL;
+
+   *(const void **)arg = &ice_tm_ops;
+
+   return 0;
+}
+
 static const struct eth_dev_ops ice_eth_dev_ops = {
.dev_configure= ice_dev_configure,
.dev_start= ice_dev_start,
@@ -267,6 +279,7 @@ static const struct eth_dev_ops ice_eth_dev_ops = {
.timesync_read_time   = ice_timesync_read_time,
.timesync_write_time  = ice_timesync_write_time,
.timesync_disable = ice_timesync_disable,
+   .tm_ops_get   = ice_tm_ops_get,
 };
 
 /* store statistics names and its offset in stats structure */
@@ -2312,6 +2325,9 @@ ice_dev_init(struct rte_eth_dev *dev)
/* Initialize RSS context for gtpu_eh */
ice_rss_ctx_init(pf);
 
+   /* Initialize TM configuration */
+   ice_tm_conf_init(dev);
+
if (!ad->is_safe_mode) {
ret = ice_flow_init(ad);
if (ret) {
@@ -2492,6 +2508,9 @@ ice_dev_close(struct rte_eth_dev *dev)
rte_free(pf->proto_xtr);
pf->proto_xtr = NULL;
 
+   /* Uninit TM configuration */
+   ice_tm_conf_uninit(dev);
+
if (ad->devargs.pps_out_ena) {
ICE_WRITE_REG(hw, GLTSYN_AUX_OUT(pin_idx, timer), 0);
ICE_WRITE_REG(hw, GLTSYN_CLKO(pin_idx, timer), 0);
diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h
index 3ed580d438..0841e1866c 100644
--- a/drivers/net/ice/ice_ethdev.h
+++ b/drivers/net/ice/ice_ethdev.h
@@ -9,10 +9,12 @@
 #include 
 
 #include 
+#include 
 
 #include "base/ice_common.h"
 #include "base/ice_adminq_cmd.h"
 #include "base/ice_flow.h"
+#include "base/ice_sched.h"
 
 #define ICE_ADMINQ_LEN   32
 #define ICE_SBIOQ_LEN32
@@ -453,6 +455,48 @@ struct ice_acl_info {
uint64_t hw_entry_id[MAX_ACL_NORMAL_ENTRIES];
 };
 
+TAILQ_HEAD(ice_shaper_profile_list, ice_tm_shaper_profile);
+TAILQ_HEAD(ice_tm_node_list, ice_tm_node);
+
+struct ice_tm_shaper_profile {
+   TAILQ_ENTRY(ice_tm_shaper_profile) node;
+   uint32_t shaper_profile_id;
+   uint32_t reference_count;
+   struct rte_tm_shaper_params profile;
+};
+
+/* Struct to store Traffic Manager node configuration. */
+struct ice_tm_node {
+   TAILQ_ENTRY(ice_tm_node) node;
+   uint32_t id;
+   uint32_t tc;
+   uint32_t priority;
+   uint32_t weight;
+   uint32_t reference_count;
+   struct ice_tm_node *parent;
+   struct ice_tm_shaper_profile *shaper_profile;
+   struct rte_tm_node_params params;
+};
+
+/* node type of Traffic Manager */
+enum ice_tm_node_type {
+   ICE_TM_NODE_TYPE_PORT,
+   ICE_TM_NODE_TYPE_TC,
+   ICE_TM_NODE_TYPE_QUEUE,
+   ICE_TM_NODE_TYPE_MAX,
+};
+
+/* Struct to store all the Traffic Manager configuration. */
+struct ice_tm_conf {
+   struct ice_shaper_profile_list shaper_profile_list;
+   struct ice_tm_node *root; /* root node - vf vsi */
+   struct ice_tm_node_list tc_list; /* node list for all the TCs */
+   struct ice_tm_node_list queue_list; /* node list for all the queues */
+   uint32_t nb_tc_node;
+   uint32_t nb_queue_node;
+   bool committed;
+};
+
 struct ice_pf {
struct ice_adapter *adapter; /* The adapter this PF associate to */
struct ice_vsi *main_vsi; /* pointer to main VSI structure */
@@ -497,6 +541,7 @@ struct ice_pf {
uint64_t old_tx_bytes;
uint64_t supported_rxdid; /* bitmap for supported RXDID */
uint64_t rss_hf;
+   struct ice_tm_conf tm_conf;
 };
 
 #define ICE_MAX_QUEUE_NUM  2048
@@ -620,6 +665,9 @@ int ice_add_rss_cfg_wrap(struct ice_pf *pf, uint16_t vsi_id,
 struct ice_rss_hash_cfg *cfg);
 int ice_rem_rss_cfg_wrap(struct ice_pf *pf, uint16_t vsi_id,
 struct ice_rss_hash_cfg *cfg);
+void ice_tm_conf_init(struct rte_eth_dev *dev);
+void ice_tm_conf_uninit(struct rte_eth_dev *dev);
+extern const struct rte_tm_ops ice_tm_ops;
 
 static inline int
 ice_align_floor(

[PATCH v6 05/10] net/ice: support queue group bandwidth limit

2022-04-07 Thread Wenjun Wu
To set up the exact queue group, we need to reconfigure topology by
delete and then recreate queue nodes.

This patch adds queue group configuration support and queue group
bandwidth limit support.

Signed-off-by: Wenjun Wu 
---
 drivers/net/ice/ice_ethdev.h |   9 +-
 drivers/net/ice/ice_tm.c | 239 ---
 2 files changed, 232 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h
index 0841e1866c..6ddbcc9972 100644
--- a/drivers/net/ice/ice_ethdev.h
+++ b/drivers/net/ice/ice_ethdev.h
@@ -474,6 +474,7 @@ struct ice_tm_node {
uint32_t weight;
uint32_t reference_count;
struct ice_tm_node *parent;
+   struct ice_tm_node **children;
struct ice_tm_shaper_profile *shaper_profile;
struct rte_tm_node_params params;
 };
@@ -482,6 +483,8 @@ struct ice_tm_node {
 enum ice_tm_node_type {
ICE_TM_NODE_TYPE_PORT,
ICE_TM_NODE_TYPE_TC,
+   ICE_TM_NODE_TYPE_VSI,
+   ICE_TM_NODE_TYPE_QGROUP,
ICE_TM_NODE_TYPE_QUEUE,
ICE_TM_NODE_TYPE_MAX,
 };
@@ -489,10 +492,14 @@ enum ice_tm_node_type {
 /* Struct to store all the Traffic Manager configuration. */
 struct ice_tm_conf {
struct ice_shaper_profile_list shaper_profile_list;
-   struct ice_tm_node *root; /* root node - vf vsi */
+   struct ice_tm_node *root; /* root node - port */
struct ice_tm_node_list tc_list; /* node list for all the TCs */
+   struct ice_tm_node_list vsi_list; /* node list for all the VSIs */
+   struct ice_tm_node_list qgroup_list; /* node list for all the queue 
groups */
struct ice_tm_node_list queue_list; /* node list for all the queues */
uint32_t nb_tc_node;
+   uint32_t nb_vsi_node;
+   uint32_t nb_qgroup_node;
uint32_t nb_queue_node;
bool committed;
 };
diff --git a/drivers/net/ice/ice_tm.c b/drivers/net/ice/ice_tm.c
index 383af88981..d70d077286 100644
--- a/drivers/net/ice/ice_tm.c
+++ b/drivers/net/ice/ice_tm.c
@@ -44,8 +44,12 @@ ice_tm_conf_init(struct rte_eth_dev *dev)
TAILQ_INIT(&pf->tm_conf.shaper_profile_list);
pf->tm_conf.root = NULL;
TAILQ_INIT(&pf->tm_conf.tc_list);
+   TAILQ_INIT(&pf->tm_conf.vsi_list);
+   TAILQ_INIT(&pf->tm_conf.qgroup_list);
TAILQ_INIT(&pf->tm_conf.queue_list);
pf->tm_conf.nb_tc_node = 0;
+   pf->tm_conf.nb_vsi_node = 0;
+   pf->tm_conf.nb_qgroup_node = 0;
pf->tm_conf.nb_queue_node = 0;
pf->tm_conf.committed = false;
 }
@@ -62,6 +66,16 @@ ice_tm_conf_uninit(struct rte_eth_dev *dev)
rte_free(tm_node);
}
pf->tm_conf.nb_queue_node = 0;
+   while ((tm_node = TAILQ_FIRST(&pf->tm_conf.qgroup_list))) {
+   TAILQ_REMOVE(&pf->tm_conf.qgroup_list, tm_node, node);
+   rte_free(tm_node);
+   }
+   pf->tm_conf.nb_qgroup_node = 0;
+   while ((tm_node = TAILQ_FIRST(&pf->tm_conf.vsi_list))) {
+   TAILQ_REMOVE(&pf->tm_conf.vsi_list, tm_node, node);
+   rte_free(tm_node);
+   }
+   pf->tm_conf.nb_vsi_node = 0;
while ((tm_node = TAILQ_FIRST(&pf->tm_conf.tc_list))) {
TAILQ_REMOVE(&pf->tm_conf.tc_list, tm_node, node);
rte_free(tm_node);
@@ -79,6 +93,8 @@ ice_tm_node_search(struct rte_eth_dev *dev,
 {
struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
struct ice_tm_node_list *tc_list = &pf->tm_conf.tc_list;
+   struct ice_tm_node_list *vsi_list = &pf->tm_conf.vsi_list;
+   struct ice_tm_node_list *qgroup_list = &pf->tm_conf.qgroup_list;
struct ice_tm_node_list *queue_list = &pf->tm_conf.queue_list;
struct ice_tm_node *tm_node;
 
@@ -94,6 +110,20 @@ ice_tm_node_search(struct rte_eth_dev *dev,
}
}
 
+   TAILQ_FOREACH(tm_node, vsi_list, node) {
+   if (tm_node->id == node_id) {
+   *node_type = ICE_TM_NODE_TYPE_VSI;
+   return tm_node;
+   }
+   }
+
+   TAILQ_FOREACH(tm_node, qgroup_list, node) {
+   if (tm_node->id == node_id) {
+   *node_type = ICE_TM_NODE_TYPE_QGROUP;
+   return tm_node;
+   }
+   }
+
TAILQ_FOREACH(tm_node, queue_list, node) {
if (tm_node->id == node_id) {
*node_type = ICE_TM_NODE_TYPE_QUEUE;
@@ -354,6 +384,7 @@ ice_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
struct ice_tm_node *tm_node;
struct ice_tm_node *parent_node;
uint16_t tc_nb = 1;
+   uint16_t vsi_nb = 1;
int ret;
 
if (!params || !error)
@@ -415,6 +446,8 @@ ice_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
tm_node->id = node_id;
tm_node->parent = NULL;
tm_node->reference_count = 0;
+   tm_node->children = (struct ice_tm_node **)
+ 

[PATCH v6 06/10] net/ice: support queue priority configuration

2022-04-07 Thread Wenjun Wu
This patch adds queue priority configuration support.
The highest priority is 0, and the lowest priority is 7.

Signed-off-by: Wenjun Wu 
---
 drivers/net/ice/ice_tm.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ice/ice_tm.c b/drivers/net/ice/ice_tm.c
index d70d077286..91e420d653 100644
--- a/drivers/net/ice/ice_tm.c
+++ b/drivers/net/ice/ice_tm.c
@@ -147,9 +147,9 @@ ice_node_param_check(struct ice_pf *pf, uint32_t node_id,
return -EINVAL;
}
 
-   if (priority) {
+   if (priority >= 8) {
error->type = RTE_TM_ERROR_TYPE_NODE_PRIORITY;
-   error->message = "priority should be 0";
+   error->message = "priority should be less than 8";
return -EINVAL;
}
 
@@ -684,6 +684,7 @@ static int ice_hierarchy_commit(struct rte_eth_dev *dev,
struct ice_vsi *vsi;
int ret_val = ICE_SUCCESS;
uint64_t peak = 0;
+   uint8_t priority;
uint32_t i;
uint32_t idx_vsi_child;
uint32_t idx_qg;
@@ -779,6 +780,7 @@ static int ice_hierarchy_commit(struct rte_eth_dev *dev,
qid = tm_node->id;
txq = dev->data->tx_queues[qid];
vsi = txq->vsi;
+   q_teid = txq->q_teid;
if (tm_node->shaper_profile) {
/* Transfer from Byte per seconds to Kbps */
peak = tm_node->shaper_profile->profile.peak.rate;
@@ -794,6 +796,14 @@ static int ice_hierarchy_commit(struct rte_eth_dev *dev,
goto fail_clear;
}
}
+   priority = 7 - tm_node->priority;
+   ret_val = ice_cfg_vsi_q_priority(hw->port_info, 1,
+&q_teid, &priority);
+   if (ret_val) {
+   error->type = RTE_TM_ERROR_TYPE_NODE_PRIORITY;
+   PMD_DRV_LOG(ERR, "configure queue %u priority failed", 
tm_node->priority);
+   goto fail_clear;
+   }
}
 
return ret_val;
-- 
2.25.1



[PATCH v6 07/10] net/ice: support queue weight configuration

2022-04-07 Thread Wenjun Wu
This patch adds queue weight configuration support.

Signed-off-by: Wenjun Wu 
---
 drivers/net/ice/ice_tm.c | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ice/ice_tm.c b/drivers/net/ice/ice_tm.c
index 91e420d653..4d7bb9102c 100644
--- a/drivers/net/ice/ice_tm.c
+++ b/drivers/net/ice/ice_tm.c
@@ -153,9 +153,9 @@ ice_node_param_check(struct ice_pf *pf, uint32_t node_id,
return -EINVAL;
}
 
-   if (weight != 1) {
+   if (weight > 200 || weight < 1) {
error->type = RTE_TM_ERROR_TYPE_NODE_WEIGHT;
-   error->message = "weight must be 1";
+   error->message = "weight must be between 1 and 200";
return -EINVAL;
}
 
@@ -804,6 +804,15 @@ static int ice_hierarchy_commit(struct rte_eth_dev *dev,
PMD_DRV_LOG(ERR, "configure queue %u priority failed", 
tm_node->priority);
goto fail_clear;
}
+
+   ret_val = ice_cfg_q_bw_alloc(hw->port_info, vsi->idx,
+tm_node->tc, tm_node->id,
+ICE_MAX_BW, (u32)tm_node->weight);
+   if (ret_val) {
+   error->type = RTE_TM_ERROR_TYPE_NODE_WEIGHT;
+   PMD_DRV_LOG(ERR, "configure queue %u weight failed", 
tm_node->weight);
+   goto fail_clear;
+   }
}
 
return ret_val;
-- 
2.25.1



  1   2   >