Re: [dpdk-dev] [PATCH v2 5/7] power: support callbacks for multiple Rx queues

2021-06-28 Thread David Marchand
On Fri, Jun 25, 2021 at 4:01 PM Anatoly Burakov
 wrote:
> diff --git a/doc/guides/prog_guide/power_man.rst 
> b/doc/guides/prog_guide/power_man.rst
> index c70ae128ac..38f876466a 100644
> --- a/doc/guides/prog_guide/power_man.rst
> +++ b/doc/guides/prog_guide/power_man.rst

[snip]

> +* Not all Ethernet devices support monitoring, even if the underlying
> +  platform may support the necessary CPU instructions. Support for 
> monitoring is
> +  currently implemented in the following DPDK drivers:
> +
> +  * net/ixgbe
> +  * net/i40e
> +  * net/ice
> +  * net/iavf
> +  * net/mlx5
> +  * net/af_xdp

This list will get obsolete.

It looks like a driver capability, so can we have a ethdev feature added?
Then mark drivers that supports this feature.

And the power lib documentation will have a reference to
doc/guides/nics/features.rst.


-- 
David Marchand



[dpdk-dev] [PATCH v2 0/2] add Rx/Tx offload paths for ICE AVX2

2021-06-28 Thread Wenzhuo Lu
Add specific paths for RX/TX AVX2, called offload paths.
In these paths, support the HW offload features, like, checksum, VLAN, RSS 
offload.
These paths are chosen automatically according to the configuration.

v2:
 - fdir should be supported by offload and normal path.

Wenzhuo Lu (2):
  net/ice: add Tx AVX2 offload path
  net/ice: add Rx AVX2 offload path

 doc/guides/rel_notes/release_21_08.rst |   6 +
 drivers/net/ice/ice_rxtx.c |  96 ++---
 drivers/net/ice/ice_rxtx.h |   7 +
 drivers/net/ice/ice_rxtx_vec_avx2.c| 350 +++--
 4 files changed, 286 insertions(+), 173 deletions(-)

-- 
1.8.3.1



[dpdk-dev] [PATCH v2 1/2] net/ice: add Tx AVX2 offload path

2021-06-28 Thread Wenzhuo Lu
Add a specific path for TX AVX2.
In this path, support the HW offload features, like,
checksum insertion, VLAN insertion.
This path is chosen automatically according to the
configuration.

'inline' is used, then the duplicate code is generated
by the compiler.

Signed-off-by: Wenzhuo Lu 
---
 drivers/net/ice/ice_rxtx.c  | 46 ++-
 drivers/net/ice/ice_rxtx.h  |  2 ++
 drivers/net/ice/ice_rxtx_vec_avx2.c | 54 ++---
 3 files changed, 69 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index 49abcb2..5d7ca60 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -3294,9 +3294,9 @@
 #ifdef RTE_ARCH_X86
struct ice_tx_queue *txq;
int i;
-   int tx_check_ret;
-   bool use_avx512 = false;
-   bool use_avx2 = false;
+   int tx_check_ret = -1;
+   bool cap_avx512 = false;
+   bool cap_avx2 = false;
 
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
tx_check_ret = ice_tx_vec_dev_check(dev);
@@ -3308,18 +3308,18 @@
rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 &&
rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1)
 #ifdef CC_AVX512_SUPPORT
-   use_avx512 = true;
+   cap_avx512 = true;
 #else
PMD_DRV_LOG(NOTICE,
"AVX512 is not supported in build env");
 #endif
-   if (!use_avx512 && tx_check_ret == ICE_VECTOR_PATH &&
-   (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-   rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
-   rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256)
-   use_avx2 = true;
+   if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 
1) &&
+   rte_vect_get_max_simd_bitwidth() >= 
RTE_VECT_SIMD_256)
+   cap_avx2 = true;
 
-   if (!use_avx512 && tx_check_ret == 
ICE_VECTOR_OFFLOAD_PATH)
+   if (!cap_avx2 && !cap_avx512 &&
+   tx_check_ret == ICE_VECTOR_OFFLOAD_PATH)
ad->tx_vec_allowed = false;
 
if (ad->tx_vec_allowed) {
@@ -3337,7 +3337,8 @@
}
 
if (ad->tx_vec_allowed) {
-   if (use_avx512) {
+   dev->tx_pkt_prepare = NULL;
+   if (cap_avx512) {
 #ifdef CC_AVX512_SUPPORT
if (tx_check_ret == ICE_VECTOR_OFFLOAD_PATH) {
PMD_DRV_LOG(NOTICE,
@@ -3345,6 +3346,7 @@
dev->data->port_id);
dev->tx_pkt_burst =
ice_xmit_pkts_vec_avx512_offload;
+   dev->tx_pkt_prepare = ice_prep_pkts;
} else {
PMD_DRV_LOG(NOTICE,
"Using AVX512 Vector Tx (port %d).",
@@ -3353,14 +3355,22 @@
}
 #endif
} else {
-   PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).",
-   use_avx2 ? "avx2 " : "",
-   dev->data->port_id);
-   dev->tx_pkt_burst = use_avx2 ?
-   ice_xmit_pkts_vec_avx2 :
-   ice_xmit_pkts_vec;
+   if (tx_check_ret == ICE_VECTOR_OFFLOAD_PATH) {
+   PMD_DRV_LOG(NOTICE,
+   "Using AVX2 OFFLOAD Vector Tx (port 
%d).",
+   dev->data->port_id);
+   dev->tx_pkt_burst =
+   ice_xmit_pkts_vec_avx2_offload;
+   dev->tx_pkt_prepare = ice_prep_pkts;
+   } else {
+   PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port 
%d).",
+   cap_avx2 ? "avx2 " : "",
+   dev->data->port_id);
+   dev->tx_pkt_burst = cap_avx2 ?
+   ice_xmit_pkts_vec_avx2 :
+   ice_xmit_pkts_vec;
+   }
}
-   dev->tx_pkt_prepare = NULL;
 
return;
}
diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h
index b29387c..595dc66 100644
--- a/drivers/net/ice/ice_rxtx.h
+++ b/drivers/net/ice/ice_rxtx.h
@@ -255,6 +2

[dpdk-dev] [PATCH v2 2/2] net/ice: add Rx AVX2 offload path

2021-06-28 Thread Wenzhuo Lu
Add a specific path for RX AVX2.
In this path, support the HW offload features, like,
checksum, VLAN stripping, RSS hash.
This path is chosen automatically according to the
configuration.

'inline' is used, then the duplicate code is generated
by the compiler.

Signed-off-by: Wenzhuo Lu 
---
 doc/guides/rel_notes/release_21_08.rst |   6 +
 drivers/net/ice/ice_rxtx.c |  50 --
 drivers/net/ice/ice_rxtx.h |   5 +
 drivers/net/ice/ice_rxtx_vec_avx2.c| 296 +++--
 4 files changed, 217 insertions(+), 140 deletions(-)

diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index a6ecfdf..203b772 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -55,6 +55,12 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* **Updated Intel ice driver.**
+
+  * In AVX2 code, added the new RX and TX paths to use the HW offload
+features. When the HW offload features are configured to be used, the
+offload paths are chosen automatically. In parallel the support for HW
+offload features was removed from the legacy AVX2 paths.
 
 Removed Items
 -
diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index 5d7ca60..27fd248 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -1999,7 +1999,9 @@
dev->rx_pkt_burst == ice_recv_scattered_pkts_vec_avx512_offload ||
 #endif
dev->rx_pkt_burst == ice_recv_pkts_vec_avx2 ||
-   dev->rx_pkt_burst == ice_recv_scattered_pkts_vec_avx2)
+   dev->rx_pkt_burst == ice_recv_pkts_vec_avx2_offload ||
+   dev->rx_pkt_burst == ice_recv_scattered_pkts_vec_avx2 ||
+   dev->rx_pkt_burst == ice_recv_scattered_pkts_vec_avx2_offload)
return ptypes;
 #endif
 
@@ -3058,7 +3060,7 @@
 #ifdef RTE_ARCH_X86
struct ice_rx_queue *rxq;
int i;
-   int rx_check_ret;
+   int rx_check_ret = -1;
bool use_avx512 = false;
bool use_avx2 = false;
 
@@ -3113,14 +3115,25 @@

ice_recv_scattered_pkts_vec_avx512;
}
 #endif
+   } else if (use_avx2) {
+   if (rx_check_ret == ICE_VECTOR_OFFLOAD_PATH) {
+   PMD_DRV_LOG(NOTICE,
+   "Using AVX2 OFFLOAD Vector 
Scattered Rx (port %d).",
+   dev->data->port_id);
+   dev->rx_pkt_burst =
+   
ice_recv_scattered_pkts_vec_avx2_offload;
+   } else {
+   PMD_DRV_LOG(NOTICE,
+   "Using AVX2 Vector 
Scattered Rx (port %d).",
+   dev->data->port_id);
+   dev->rx_pkt_burst =
+   
ice_recv_scattered_pkts_vec_avx2;
+   }
} else {
PMD_DRV_LOG(DEBUG,
-   "Using %sVector Scattered Rx (port 
%d).",
-   use_avx2 ? "avx2 " : "",
+   "Using Vector Scattered Rx (port %d).",
dev->data->port_id);
-   dev->rx_pkt_burst = use_avx2 ?
-   ice_recv_scattered_pkts_vec_avx2 :
-   ice_recv_scattered_pkts_vec;
+   dev->rx_pkt_burst = ice_recv_scattered_pkts_vec;
}
} else {
if (use_avx512) {
@@ -3139,14 +3152,25 @@
ice_recv_pkts_vec_avx512;
}
 #endif
+   } else if (use_avx2) {
+   if (rx_check_ret == ICE_VECTOR_OFFLOAD_PATH) {
+   PMD_DRV_LOG(NOTICE,
+   "Using AVX2 OFFLOAD Vector 
Rx (port %d).",
+   dev->data->port_id);
+   dev->rx_pkt_burst =
+   ice_recv_pkts_vec_avx2_offload;
+   } else {
+   PMD_DRV_LOG(NOTICE,
+   "Using AVX2 Vector Rx (port 
%d).",
+   dev->data->port_id);
+   dev->rx_pkt_burst =
+

Re: [dpdk-dev] [PATCH v2 1/2] net/ice: factorize firmware loading

2021-06-28 Thread David Marchand
On Thu, Jun 3, 2021 at 6:55 PM David Marchand  wrote:
>
> Both "normal" and "dcf" inits have their copy of some firmware loading
> code.
>
> The DSN query is moved in specific parts for the "normal" and "dcf" init.
>
> A common helper ice_load_pkg is then introduced and takes an adapter
> pointer as its main input.
>
> This helper takes care of finding the right firmware file and loading
> it.
> The adapter active_pkg_type field is set by this helper.
>
> The ice_access macro is removed from the osdep.h header: osdep.h should
> only hosts wrappers for base driver code.
>
> Signed-off-by: David Marchand 

Please, can you confirm nothing breaks for net/ice with this series?
Thanks.


-- 
David Marchand



Re: [dpdk-dev] [PATCH v3] net/mlx5: add TCP and IPv6 to supported flow items list in Windows

2021-06-28 Thread Raslan Darawsheh
Hi,

> -Original Message-
> From: Tal Shnaiderman 
> Sent: Tuesday, June 22, 2021 6:35 PM
> To: dev@dpdk.org
> Cc: NBU-Contact-Thomas Monjalon ; Matan Azrad
> ; Raslan Darawsheh ; Asaf Penso
> ; Odi Assli 
> Subject: [PATCH v3] net/mlx5: add TCP and IPv6 to supported flow items list
> in Windows
Title too long:
Changing to : 
net/mlx5: add TCP and IPv6 to supported items in Windows
> 
> WINOF2 2.70 Windows kernel driver allows DevX rule creation of types TCP
> and IPv6.
> 
> Added the types to the supported items in mlx5_flow_os_item_supported
> to allow them to be created in the PMD.
> 
> Added description of new rules support in Windows kernel driver WINOF2
> 2.70 to the mlx5 driver guide.
> 
> Signed-off-by: Tal Shnaiderman 
> 
> ---
> v3 merge code and docu changes to a single patch.
> ---
> ---
>  doc/guides/nics/mlx5.rst| 3 +++
>  drivers/net/mlx5/windows/mlx5_flow_os.h | 2 ++
>  2 files changed, 5 insertions(+)

Patch applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh


Re: [dpdk-dev] [PATCH] net/mlx5: fix meter policy with RSS action

2021-06-28 Thread Raslan Darawsheh
Hi,

> -Original Message-
> From: Li Zhang 
> Sent: Wednesday, June 23, 2021 10:25 AM
> To: Ori Kam ; Slava Ovsiienko
> ; Matan Azrad ; Shahaf
> Shuler 
> Cc: dev@dpdk.org; NBU-Contact-Thomas Monjalon
> ; Raslan Darawsheh ; Roni
> Bar Yanai ; sta...@dpdk.org
> Subject: [PATCH] net/mlx5: fix meter policy with RSS action
> 
> When creating the meter sub-policy RSS rule,
> the RSS descriptor was used before its update.
> It also need update tunnel bit in RSS descriptor
> after flow translate.
> 
> Use it only when it is updated.
> 
> Fixes: ec962bad14e ("net/mlx5: fix metering cleanup on stop")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Li Zhang 
> Acked-by: Matan Azrad 
> ---

Patch applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh


Re: [dpdk-dev] [PATCH v2 00/17] Add CPT in Marvell CNXK common driver

2021-06-28 Thread Akhil Goyal
> This patchset adds initial support for CPT in common code for Marvell CN10K
> SoC.
> 
> CPT is the hardware cryptographic block available in 'cnxk' family SoC. CPT,
> with its microcoded engines can support symmetric, asymmetric and IPsec
> operations. CPT can associate with NIX (rte_ethdev) to enable inline IPsec
> functionality. Similarly, CPT can associate with SSO (rte_eventdev) to
> enable crypto adapter.
> 
> Based on CNXK common driver, new crypto PMDs would be added under
> 'crypto/cnxk'.
> 
> Changes in v2
> - Moved FPM & EC tables to RoC
> - Moved set_key routines to RoC
> - Added inline IPsec required mboxes and framework
> - Added security common code
> - Added plt_dp_* log based on Akhil's comment
> 
Series Acked-by: Akhil Goyal 


[dpdk-dev] [PATCH] mem: fix mem leak issue when attaching a PCI device without driver to DPDK

2021-06-28 Thread RickZhong
when trying to attach a PCI device without driver to DPDK,
the 'dev' is malloced but not freed.

Signed-off-by: RickZhong 
---
 drivers/bus/pci/linux/pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 0dc99e9cb..909502ecc 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -332,6 +332,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr 
*addr)
dev->kdrv = RTE_PCI_KDRV_UNKNOWN;
} else {
dev->kdrv = RTE_PCI_KDRV_NONE;
+   free(dev);
return 0;
}
/* device is valid, add in list (sorted) */
-- 
2.18.4




Re: [dpdk-dev] [PATCH v2 00/17] Add CPT in Marvell CNXK common driver

2021-06-28 Thread Akhil Goyal
> > This patchset adds initial support for CPT in common code for Marvell
> CN10K
> > SoC.
> >
> > CPT is the hardware cryptographic block available in 'cnxk' family SoC. CPT,
> > with its microcoded engines can support symmetric, asymmetric and IPsec
> > operations. CPT can associate with NIX (rte_ethdev) to enable inline IPsec
> > functionality. Similarly, CPT can associate with SSO (rte_eventdev) to
> > enable crypto adapter.
> >
> > Based on CNXK common driver, new crypto PMDs would be added under
> > 'crypto/cnxk'.
> >
> > Changes in v2
> > - Moved FPM & EC tables to RoC
> > - Moved set_key routines to RoC
> > - Added inline IPsec required mboxes and framework
> > - Added security common code
> > - Added plt_dp_* log based on Akhil's comment
> >
> Series Acked-by: Akhil Goyal 

A couple of small issues fixed while applying in patch title/description.

Applied to dpdk-next-crypto

Thanks.


[dpdk-dev] [PATCH] fix l2fwd --no-mac-updating not working issue

2021-06-28 Thread RickZhong
The original disable mac updating codes were removed by mistake.

Signed-off-by: RickZhong 
---
 examples/l2fwd/main.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/examples/l2fwd/main.c b/examples/l2fwd/main.c
index 32d405e65..69d0087dc 100644
--- a/examples/l2fwd/main.c
+++ b/examples/l2fwd/main.c
@@ -491,6 +491,10 @@ l2fwd_parse_args(int argc, char **argv)
timer_period = timer_secs;
break;
 
+   /* no-mac-updating */
+   case 0:
+   break;
+
/* long options */
case CMD_LINE_OPT_PORTMAP_NUM:
ret = l2fwd_parse_port_pair_config(optarg);
-- 
2.27.0




Re: [dpdk-dev] [PATCH] mem: fix mem leak issue when attaching a PCI device without driver to DPDK

2021-06-28 Thread David Marchand
On Mon, Jun 28, 2021 at 11:03 AM RickZhong  wrote:
>
> when trying to attach a PCI device without driver to DPDK,
> the 'dev' is malloced but not freed.
>
> Signed-off-by: RickZhong 

Thanks for the patch.

I already submitted a similar fix.
https://patchwork.dpdk.org/project/dpdk/patch/20210616065257.16044-1-david.march...@redhat.com/


-- 
David Marchand



Re: [dpdk-dev] [PATCH] fix l2fwd --no-mac-updating not working issue

2021-06-28 Thread David Marchand
On Mon, Jun 28, 2021 at 11:09 AM RickZhong  wrote:
>
> The original disable mac updating codes were removed by mistake.
>
> Signed-off-by: RickZhong 
> ---
>  examples/l2fwd/main.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/examples/l2fwd/main.c b/examples/l2fwd/main.c
> index 32d405e65..69d0087dc 100644
> --- a/examples/l2fwd/main.c
> +++ b/examples/l2fwd/main.c
> @@ -491,6 +491,10 @@ l2fwd_parse_args(int argc, char **argv)
> timer_period = timer_secs;
> break;
>
> +   /* no-mac-updating */
> +   case 0:
> +   break;
> +
> /* long options */
> case CMD_LINE_OPT_PORTMAP_NUM:
> ret = l2fwd_parse_port_pair_config(optarg);

This issue should be fixed with these patches:
http://patchwork.dpdk.org/project/dpdk/patch/20210622024706.1179945-1-sunchengl...@loongson.cn/
http://patchwork.dpdk.org/project/dpdk/patch/20210622024944.1180109-2-sunchengl...@loongson.cn/

Can you test/review them?

Thanks.

-- 
David Marchand



Re: [dpdk-dev] [PATCH v2 5/7] power: support callbacks for multiple Rx queues

2021-06-28 Thread Burakov, Anatoly

On 28-Jun-21 8:10 AM, David Marchand wrote:

On Fri, Jun 25, 2021 at 4:01 PM Anatoly Burakov
 wrote:

diff --git a/doc/guides/prog_guide/power_man.rst 
b/doc/guides/prog_guide/power_man.rst
index c70ae128ac..38f876466a 100644
--- a/doc/guides/prog_guide/power_man.rst
+++ b/doc/guides/prog_guide/power_man.rst


[snip]


+* Not all Ethernet devices support monitoring, even if the underlying
+  platform may support the necessary CPU instructions. Support for monitoring 
is
+  currently implemented in the following DPDK drivers:
+
+  * net/ixgbe
+  * net/i40e
+  * net/ice
+  * net/iavf
+  * net/mlx5
+  * net/af_xdp


This list will get obsolete.

It looks like a driver capability, so can we have a ethdev feature added?
Then mark drivers that supports this feature.

And the power lib documentation will have a reference to
doc/guides/nics/features.rst.




Good idea, thanks for the suggestion! Will fix in v3.

--
Thanks,
Anatoly


Re: [dpdk-dev] dmadev discussion summary

2021-06-28 Thread Bruce Richardson
On Sat, Jun 26, 2021 at 11:59:49AM +0800, fengchengwen wrote:
> Hi, all
>   I analyzed the current DPAM DMA driver and drew this summary in conjunction
> with the previous discussion, and this will as a basis for the V2 
> implementation.
>   Feedback is welcome, thanks
>
Fantastic review and summary, many thanks for the work. Some comments
inline in API part below, but nothing too major, I hope.

/Bruce
 
 
> 
> Summary:
>   1) The dpaa2/octeontx2/Kunpeng are all ARM soc, there may acts as endpoint 
> of
>  x86 host (e.g. smart NIC), multiple memory transfer requirements may 
> exist,
>  e.g. local-to-host/local-to-host..., from the point of view of API 
> design,
>  I think we should adopt a similar 'channel' or 'virt-queue' concept.
>   2) Whether to create a separate dmadev for each HW-queue? We previously
>  discussed this, and due HW-queue could indepent management (like
>  Kunpeng_dma and Intel DSA), we prefer create a separate dmadev for each
>  HW-queue before. But I'm not sure if that's the case with dpaa. I think
>  that can be left to the specific driver, no restriction is imposed on the
>  framework API layer.
>   3) I think we could setup following abstraction at dmadev device:
>   
>   |virt-queue||virt-queue|
>   
>  \   /
>   \ /
>\   /
>   
>  | HW-queue | | HW-queue |
>   
> \/
>  \  /
>   \/
> dmadev
>   4) The driver's ops design (here we only list key points):
>  [dev_info_get]: mainly return the number of HW-queues
>  [dev_configure]: nothing important
>  [queue_setup]: create one virt-queue, has following main parameters:
>  HW-queue-index: the HW-queue index used
>  nb_desc: the number of HW descriptors
>  opaque: driver's specific info
>  Note1: this API return virt-queue index which will used in later API.
> If user want create multiple virt-queue one the same HW-queue,
> they could achieved by call queue_setup with the same
> HW-queue-index.
>  Note2: I think it's hard to define queue_setup config paramter, and
> also this is control API, so I think it's OK to use opaque
> pointer to implement it.
I'm not sure opaque pointer will work in practice, so I think we should try
and standardize the parameters as much as possible. Since it's a control
plane API, using a struct with a superset of parameters may be workable.
Let's start with a minimum set and build up from there.

>   [dma_copy/memset/sg]: all has vq_id input parameter.
>  Note: I notice dpaa can't support single and sg in one virt-queue, 
> and
>I think it's maybe software implement policy other than HW
>restriction because virt-queue could share the same HW-queue.
Presumably for queues which support sq, the single-enqueue APIs can use a
single sg list internally?

>   Here we use vq_id to tackle different scenario, like local-to-local/
>   local-to-host and etc.
>   5) And the dmadev public data-plane API (just prototype):
>  dma_cookie_t rte_dmadev_memset(dev, vq_id, pattern, dst, len, flags)
>-- flags: used as an extended parameter, it could be uint32_t

Suggest uint64_t rather than uint32_t to ensure we have expansion room?
Otherwise +1

>  dma_cookie_t rte_dmadev_memcpy(dev, vq_id, src, dst, len, flags)
+1

>  dma_cookie_t rte_dmadev_memcpy_sg(dev, vq_id, sg, sg_len, flags)
>-- sg: struct dma_scatterlist array
I don't think our drivers will be directly implementing this API, but so
long as SG support is listed as a capability flag I'm fine with this as an
API. [We can't fudge it as a bunch of single copies, because that would
cause us to have multiple cookies rather than one]

>  uint16_t rte_dmadev_completed(dev, vq_id, dma_cookie_t *cookie,
>uint16_t nb_cpls, bool *has_error)
>-- nb_cpls: indicate max process operations number
>-- has_error: indicate if there is an error
>-- return value: the number of successful completed operations.
>-- example:
>   1) If there are already 32 completed ops, and 4th is error, and
>  nb_cpls is 32, then the ret will be 3(because 1/2/3th is OK), and
>  has_error will be true.
>   2) If there are already 32 completed ops, and all successful
>  completed, then the ret will be min(32, nb_cpls), and has_error
>  will be false.
>   3) If there are already 32 completed ops, and all failed completed,
>  then the ret will be 0, and has_error will be true.
+1 for t

Re: [dpdk-dev] [PATCH v14 1/9] lib: build libraries that testpmd depends on

2021-06-28 Thread Andrew Rybchenko

On 6/24/21 1:34 AM, Jie Zhou wrote:

Enable building libraries that testpmd depends on for Windows

Signed-off-by: Jie Zhou 
Signed-off-by: Jie Zhou 


Why are two a bit difference Signed-off-by used in these
patches? Can we drop the first one and keep the second one
which matches E-mail From?


Re: [dpdk-dev] [PATCH v14 1/9] lib: build libraries that testpmd depends on

2021-06-28 Thread Andrew Rybchenko

On 6/28/21 1:01 PM, Andrew Rybchenko wrote:

On 6/24/21 1:34 AM, Jie Zhou wrote:

Enable building libraries that testpmd depends on for Windows

Signed-off-by: Jie Zhou 
Signed-off-by: Jie Zhou 


Why are two a bit difference Signed-off-by used in these
patches? Can we drop the first one and keep the second one
which matches E-mail From?


In fact looking at git log I see both E-mail addresses
present, but it looks like the first one (w/o linux.) is
used more often.


Re: [dpdk-dev] [PATCH v14 6/9] app/testpmd: fix parse_fec_mode return type name

2021-06-28 Thread Andrew Rybchenko

On 6/24/21 1:34 AM, Jie Zhou wrote:

Replace parse_fec_mode misleading return type name mode with fec_capa

Fixes: b19da32e3151 ("app/testpmd: add FEC command")
Cc: sta...@dpdk.org

Signed-off-by: Jie Zhou 
Signed-off-by: Jie Zhou 


[snip]


diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 283b5e3680..9ae4d90dd1 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -885,7 +885,7 @@ void show_tx_pkt_segments(void);
  void set_tx_pkt_times(unsigned int *tx_times);
  void show_tx_pkt_times(void);
  void set_tx_pkt_split(const char *name);
-int parse_fec_mode(const char *name, enum rte_eth_fec_mode *mode);
+int parse_fec_mode(const char *name, uint32_t *fec_capa);


I guess that the real reason behind is to fix implicit
conversion of enum pointer to/from uint32_t pointer.
I guess the problem is different signness of enum on
Windows compiler.

If so, please, put real motivation of the changeset in summary.
It should be human-readable (and do not contain function name).
Explain details in the description.

Yes, I agree that mode is misleading here and should be mentioned
in the description, but I guess it is not the root cause.
May be I'm wrong.


[dpdk-dev] [PATCH] net/mlx5: fix the modify field action flag checking

2021-06-28 Thread Jiawei Wang
The introduced MODIFY_FIELD action was used to manipulate
the packet header field through copy or set operations.

These modify header actions should be counted as one action
in low level, the current code used wrong actions flags
checking for modify field action.

This patch update the action flags checking into the correct
MODIFY_HDR_ACTIONS set.

Fixes: 641dbe4fb053 ("net/mlx5: support modify field flow action")
Cc: sta...@dpdk.org

Signed-off-by: Jiawei Wang 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/net/mlx5/mlx5_flow_dv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index c5d4b01e57..20f068c522 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -7573,7 +7573,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct 
rte_flow_attr *attr,
if (action_flags & MLX5_FLOW_ACTION_SAMPLE)
modify_after_mirror = 1;
/* Count all modify-header actions as one action. */
-   if (!(action_flags & MLX5_FLOW_ACTION_MODIFY_FIELD))
+   if (!(action_flags & MLX5_FLOW_MODIFY_HDR_ACTIONS))
++actions_n;
action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
rw_act_num += ret;
-- 
2.18.1



Re: [dpdk-dev] [PATCH v3] app/testpmd: send failure logs to stderr

2021-06-28 Thread Andrew Rybchenko

On 6/18/21 11:32 AM, Li, Xiaoyun wrote:

-Original Message-
From: Andrew Rybchenko 
Sent: Thursday, June 17, 2021 22:20
To: Li, Xiaoyun ; Ori Kam 
Cc: dev@dpdk.org; Richardson, Bruce ; Yigit,
Ferruh ; Singh, Aman Deep

Subject: [PATCH v3] app/testpmd: send failure logs to stderr

Running with stdout suppressed or redirected for further processing
is very confusing in the case of errors. Fix it by logging errors and
warnings to stderr.

Since lines with log messages are touched anyway concatenate split
format strings to make it easier to search using grep.

Fix indent of format string arguments.

Signed-off-by: Andrew Rybchenko 
---
v3:
  - fix typo in the patch description
  - fix applicable coding style issues reported by patchwork

v2:
  - switch from printf() to fpritnf(stderr, ...) in more cases
  - do not inherit acks from the previous version since the patch is
much bigger
  - fix style in few cases (TAB vs spaces, missing space separtor etc)
  - still don't use TESTPMD_LOG() since the patch does not add new logs.
Also switching to TESTPMD_LOG() will add "testpmd: " prefix to log
messages and it is a real change and could be a pain for automation.

  app/test-pmd/bpf_cmd.c |   6 +-
  app/test-pmd/cmdline.c | 957 ++---
  app/test-pmd/cmdline_flow.c|  20 +-
  app/test-pmd/cmdline_mtr.c |   8 +-
  app/test-pmd/cmdline_tm.c  |  33 +-
  app/test-pmd/config.c  | 455 ++--
  app/test-pmd/csumonly.c|   5 +-
  app/test-pmd/parameters.c  |  21 +-
  app/test-pmd/testpmd.c | 298 
  app/test-pmd/util.c|  19 +-
  doc/guides/rel_notes/release_21_08.rst |   5 +
  11 files changed, 1004 insertions(+), 823 deletions(-)


Acked-by: Xiaoyun Li 



It looks like Mellanox test failures are unrelated to the patch.

There are a number of patches in queue. Which order is the right now?
Apply this one and require appropriate changes in other testpmd patches?
Or vise versa?


Re: [dpdk-dev] [PATCH] net/nfp: remove compile time log

2021-06-28 Thread Andrew Rybchenko

Hi Heinrich,

could you take a look at the patch?

Thanks,
Andrew.

On 5/18/21 1:41 PM, Ferruh Yigit wrote:

Logging should be converted to dynamic log.

Signed-off-by: Ferruh Yigit 
---
  drivers/net/nfp/nfp_net_logs.h | 7 ---
  1 file changed, 7 deletions(-)

diff --git a/drivers/net/nfp/nfp_net_logs.h b/drivers/net/nfp/nfp_net_logs.h
index 27dd87611b94..76cc94cb6565 100644
--- a/drivers/net/nfp/nfp_net_logs.h
+++ b/drivers/net/nfp/nfp_net_logs.h
@@ -30,14 +30,7 @@ extern int nfp_logtype_init;
  #define ASSERT(x) do { } while (0)
  #endif
  
-#define RTE_LIBRTE_NFP_NET_DEBUG_CPP

-
-#ifdef RTE_LIBRTE_NFP_NET_DEBUG_CPP
-#define PMD_CPP_LOG(level, fmt, args...) \
-   RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
-#else
  #define PMD_CPP_LOG(level, fmt, args...) do { } while (0)
-#endif
  
  extern int nfp_logtype_driver;

  #define PMD_DRV_LOG(level, fmt, args...) \





Re: [dpdk-dev] dmadev discussion summary

2021-06-28 Thread Ananyev, Konstantin


Hi everyone,

> On Sat, Jun 26, 2021 at 11:59:49AM +0800, fengchengwen wrote:
> > Hi, all
> >   I analyzed the current DPAM DMA driver and drew this summary in 
> > conjunction
> > with the previous discussion, and this will as a basis for the V2 
> > implementation.
> >   Feedback is welcome, thanks
> >
> Fantastic review and summary, many thanks for the work. Some comments
> inline in API part below, but nothing too major, I hope.
> 
> /Bruce
> 
> 
> >
> > Summary:
> >   1) The dpaa2/octeontx2/Kunpeng are all ARM soc, there may acts as 
> > endpoint of
> >  x86 host (e.g. smart NIC), multiple memory transfer requirements may 
> > exist,
> >  e.g. local-to-host/local-to-host..., from the point of view of API 
> > design,
> >  I think we should adopt a similar 'channel' or 'virt-queue' concept.
> >   2) Whether to create a separate dmadev for each HW-queue? We previously
> >  discussed this, and due HW-queue could indepent management (like
> >  Kunpeng_dma and Intel DSA), we prefer create a separate dmadev for each
> >  HW-queue before. But I'm not sure if that's the case with dpaa. I think
> >  that can be left to the specific driver, no restriction is imposed on 
> > the
> >  framework API layer.
> >   3) I think we could setup following abstraction at dmadev device:
> >   
> >   |virt-queue||virt-queue|
> >   
> >  \   /
> >   \ /
> >\   /
> >   
> >  | HW-queue | | HW-queue |
> >   
> > \/
> >  \  /
> >   \/
> > dmadev
> >   4) The driver's ops design (here we only list key points):
> >  [dev_info_get]: mainly return the number of HW-queues
> >  [dev_configure]: nothing important
> >  [queue_setup]: create one virt-queue, has following main parameters:
> >  HW-queue-index: the HW-queue index used
> >  nb_desc: the number of HW descriptors
> >  opaque: driver's specific info
> >  Note1: this API return virt-queue index which will used in later 
> > API.
> > If user want create multiple virt-queue one the same 
> > HW-queue,
> > they could achieved by call queue_setup with the same
> > HW-queue-index.
> >  Note2: I think it's hard to define queue_setup config paramter, and
> > also this is control API, so I think it's OK to use opaque
> > pointer to implement it.
> I'm not sure opaque pointer will work in practice, so I think we should try
> and standardize the parameters as much as possible. Since it's a control
> plane API, using a struct with a superset of parameters may be workable.
> Let's start with a minimum set and build up from there.
> 
> >   [dma_copy/memset/sg]: all has vq_id input parameter.
> >  Note: I notice dpaa can't support single and sg in one virt-queue, 
> > and
> >I think it's maybe software implement policy other than HW
> >restriction because virt-queue could share the same HW-queue.
> Presumably for queues which support sq, the single-enqueue APIs can use a
> single sg list internally?
> 
> >   Here we use vq_id to tackle different scenario, like local-to-local/
> >   local-to-host and etc.
> >   5) And the dmadev public data-plane API (just prototype):
> >  dma_cookie_t rte_dmadev_memset(dev, vq_id, pattern, dst, len, flags)
> >-- flags: used as an extended parameter, it could be uint32_t
> 
> Suggest uint64_t rather than uint32_t to ensure we have expansion room?
> Otherwise +1
> 
> >  dma_cookie_t rte_dmadev_memcpy(dev, vq_id, src, dst, len, flags)
> +1
> 
> >  dma_cookie_t rte_dmadev_memcpy_sg(dev, vq_id, sg, sg_len, flags)
> >-- sg: struct dma_scatterlist array
> I don't think our drivers will be directly implementing this API, but so
> long as SG support is listed as a capability flag I'm fine with this as an
> API. [We can't fudge it as a bunch of single copies, because that would
> cause us to have multiple cookies rather than one]
> 
> >  uint16_t rte_dmadev_completed(dev, vq_id, dma_cookie_t *cookie,
> >uint16_t nb_cpls, bool *has_error)
> >-- nb_cpls: indicate max process operations number
> >-- has_error: indicate if there is an error
> >-- return value: the number of successful completed operations.
> >-- example:
> >   1) If there are already 32 completed ops, and 4th is error, and
> >  nb_cpls is 32, then the ret will be 3(because 1/2/3th is OK), 
> > and
> >  has_error will be true.
> >   2) If there are already 32 completed ops, and all successful
> >  completed, th

[dpdk-dev] double free mbuf causing Data Coverage Problem

2021-06-28 Thread ??????
At dpdk19.08, I executed twice for the same mbuf by rte_pktmbuf_free_seg(), 
which stores two identical pointers in the cache, causes the device to be 
overwritten when I apply for mbuf consecutively, because rte_pktbuf_free_seg 
guarantees refcnt==1 after the mbuf is released. Why do you want to do this? 
This is not the problem at 17.02,when the refcnt is 0 after mbuf released

Re: [dpdk-dev] [PATCH v2 1/7] power_intrinsics: use callbacks for comparison

2021-06-28 Thread Ananyev, Konstantin


 
> Previously, the semantics of power monitor were such that we were
> checking current value against the expected value, and if they matched,
> then the sleep was aborted. This is somewhat inflexible, because it only
> allowed us to check for a specific value.
> 
> This commit replaces the comparison with a user callback mechanism, so
> that any PMD (or other code) using `rte_power_monitor()` can define
> their own comparison semantics and decision making on how to detect the
> need to abort the entering of power optimized state.
> 
> Existing implementations are adjusted to follow the new semantics.
> 
> Suggested-by: Konstantin Ananyev 
> Signed-off-by: Anatoly Burakov 
> ---
> 
> Notes:
> v2:
> - Use callback mechanism for more flexibility
> - Address feedback from Konstantin
> 
>  doc/guides/rel_notes/release_21_08.rst|  1 +
>  drivers/event/dlb2/dlb2.c | 16 --
>  drivers/net/i40e/i40e_rxtx.c  | 19 
>  drivers/net/iavf/iavf_rxtx.c  | 19 
>  drivers/net/ice/ice_rxtx.c| 19 
>  drivers/net/ixgbe/ixgbe_rxtx.c| 19 
>  drivers/net/mlx5/mlx5_rx.c| 16 --
>  .../include/generic/rte_power_intrinsics.h| 29 ++-
>  lib/eal/x86/rte_power_intrinsics.c|  9 ++
>  9 files changed, 106 insertions(+), 41 deletions(-)
> 
> diff --git a/lib/eal/include/generic/rte_power_intrinsics.h 
> b/lib/eal/include/generic/rte_power_intrinsics.h
> index dddca3d41c..046667ade6 100644
> --- a/lib/eal/include/generic/rte_power_intrinsics.h
> +++ b/lib/eal/include/generic/rte_power_intrinsics.h
> @@ -18,19 +18,34 @@
>   * which are architecture-dependent.
>   */
> 
> +/**
> + * Callback definition for monitoring conditions. Callbacks with this 
> signature
> + * will be used by `rte_power_monitor()` to check if the entering of power
> + * optimized state should be aborted.
> + *
> + * @param val
> + *   The value read from memory.
> + * @param opaque
> + *   Callback-specific data.
> + *
> + * @return
> + *   0 if entering of power optimized state should proceed
> + *   -1 if entering of power optimized state should be aborted
> + */
> +typedef int (*rte_power_monitor_clb_t)(const uint64_t val,
> + const uint64_t opaque[4]);
>  struct rte_power_monitor_cond {
>   volatile void *addr;  /**< Address to monitor for changes */
> - uint64_t val; /**< If the `mask` is non-zero, location pointed
> -*   to by `addr` will be read and compared
> -*   against this value.
> -*/
> - uint64_t mask;   /**< 64-bit mask to extract value read from `addr` */
> - uint8_t size;/**< Data size (in bytes) that will be used to compare
> -   *   expected value (`val`) with data read from the
> + uint8_t size;/**< Data size (in bytes) that will be read from the
> *   monitored memory location (`addr`). Can be 1, 2,
> *   4, or 8. Supplying any other value will result in
> *   an error.
> */
> + rte_power_monitor_clb_t fn; /**< Callback to be used to check if
> +  *   entering power optimized state should
> +  *   be aborted.
> +  */
> + uint64_t opaque[4]; /**< Callback-specific data */


As a nit - would be good to add some new macro for '4'.
Apart from that - LGTM.
Acked-by: Konstantin Ananyev 

>  };
> 
>  /**
> diff --git a/lib/eal/x86/rte_power_intrinsics.c 
> b/lib/eal/x86/rte_power_intrinsics.c
> index 39ea9fdecd..3c5c9ce7ad 100644
> --- a/lib/eal/x86/rte_power_intrinsics.c
> +++ b/lib/eal/x86/rte_power_intrinsics.c
> @@ -110,14 +110,11 @@ rte_power_monitor(const struct rte_power_monitor_cond 
> *pmc,
>   /* now that we've put this address into monitor, we can unlock */
>   rte_spinlock_unlock(&s->lock);
> 
> - /* if we have a comparison mask, we might not need to sleep at all */
> - if (pmc->mask) {
> + /* if we have a callback, we might not need to sleep at all */
> + if (pmc->fn) {
>   const uint64_t cur_value = __get_umwait_val(
>   pmc->addr, pmc->size);
> - const uint64_t masked = cur_value & pmc->mask;
> -
> - /* if the masked value is already matching, abort */
> - if (masked == pmc->val)
> + if (pmc->fn(cur_value, pmc->opaque) != 0)
>   goto end;
>   }
> 
> --
> 2.25.1



Re: [dpdk-dev] Experimental symbols in kni lib

2021-06-28 Thread Ferruh Yigit
On 6/25/2021 2:26 PM, Igor Ryzhov wrote:
> Hi Ferruh, all,
> 
> Let's please discuss another approach to setting KNI link status before
> making this API stable:
> http://patches.dpdk.org/project/dpdk/patch/20190925093623.18419-1-iryz...@nfware.com/
> 
> I explained the problem with the current implementation there.
> More than that, using ioctl approach makes it possible to set also speed
> and duplex and use them to implement get_link_ksettings callback.
> I can send patches for both features.
> 

Hi Igor, agree to discuss your patch before promoting the API, I will comment on
the outstanding patch.

> Igor
> 
> On Thu, Jun 24, 2021 at 4:54 PM Kinsella, Ray  wrote:
> 
>> Sounds more than reasonable, +1 from me.
>>
>> Ray K
>>
>> On 24/06/2021 14:24, Ferruh Yigit wrote:
>>> On 6/24/2021 11:42 AM, Kinsella, Ray wrote:
 Hi Ferruh,

 The following kni experimental symbols are present in both v21.05 and
>> v19.11 release. These symbols should be considered for promotion to stable
>> as part of the v22 ABI in DPDK 21.11, as they have been experimental for >=
>> 2yrs at this point.

  * rte_kni_update_link

 Ray K

>>>
>>> Hi Ray,
>>>
>>> Thanks for follow up.
>>>
>>> I just checked the API and planning a small behavior update to it.
>>> If the update is accepted, I suggest keeping the API experimental for
>> 21.08 too,
>>> but can mature it on v21.11.
>>>
>>> Thanks,
>>> ferruh
>>>
>>



Re: [dpdk-dev] [PATCH v2 3/7] eal: add power monitor for multiple events

2021-06-28 Thread Ananyev, Konstantin


> Use RTM and WAITPKG instructions to perform a wait-for-writes similar to
> what UMWAIT does, but without the limitation of having to listen for
> just one event. This works because the optimized power state used by the
> TPAUSE instruction will cause a wake up on RTM transaction abort, so if
> we add the addresses we're interested in to the read-set, any write to
> those addresses will wake us up.
> 
> Signed-off-by: Konstantin Ananyev 
> Signed-off-by: Anatoly Burakov 
> ---
> 
> Notes:
> v2:
> - Adapt to callback mechanism
> 
>  doc/guides/rel_notes/release_21_08.rst|  2 +
>  lib/eal/arm/rte_power_intrinsics.c| 11 +++
>  lib/eal/include/generic/rte_cpuflags.h|  2 +
>  .../include/generic/rte_power_intrinsics.h| 35 ++
>  lib/eal/ppc/rte_power_intrinsics.c| 11 +++
>  lib/eal/version.map   |  3 +
>  lib/eal/x86/rte_cpuflags.c|  2 +
>  lib/eal/x86/rte_power_intrinsics.c| 69 +++
>  8 files changed, 135 insertions(+)
> 
...

> diff --git a/lib/eal/x86/rte_power_intrinsics.c 
> b/lib/eal/x86/rte_power_intrinsics.c
> index 3c5c9ce7ad..3fc6f62ef5 100644
> --- a/lib/eal/x86/rte_power_intrinsics.c
> +++ b/lib/eal/x86/rte_power_intrinsics.c
> @@ -4,6 +4,7 @@
> 
>  #include 
>  #include 
> +#include 
>  #include 
> 
>  #include "rte_power_intrinsics.h"
> @@ -28,6 +29,7 @@ __umwait_wakeup(volatile void *addr)
>  }
> 
>  static bool wait_supported;
> +static bool wait_multi_supported;
> 
>  static inline uint64_t
>  __get_umwait_val(const volatile void *p, const uint8_t sz)
> @@ -164,6 +166,8 @@ RTE_INIT(rte_power_intrinsics_init) {
> 
>   if (i.power_monitor && i.power_pause)
>   wait_supported = 1;
> + if (i.power_monitor_multi)
> + wait_multi_supported = 1;
>  }
> 
>  int
> @@ -202,6 +206,9 @@ rte_power_monitor_wakeup(const unsigned int lcore_id)
>* In this case, since we've already woken up, the "wakeup" was
>* unneeded, and since T1 is still waiting on T2 releasing the lock, the
>* wakeup address is still valid so it's perfectly safe to write it.
> +  *
> +  * For multi-monitor case, the act of locking will in itself trigger the
> +  * wakeup, so no additional writes necessary.
>*/
>   rte_spinlock_lock(&s->lock);
>   if (s->monitor_addr != NULL)
> @@ -210,3 +217,65 @@ rte_power_monitor_wakeup(const unsigned int lcore_id)
> 
>   return 0;
>  }
> +
> +int
> +rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
> + const uint32_t num, const uint64_t tsc_timestamp)
> +{
> + const unsigned int lcore_id = rte_lcore_id();
> + struct power_wait_status *s = &wait_status[lcore_id];
> + uint32_t i, rc;
> +
> + /* check if supported */
> + if (!wait_multi_supported)
> + return -ENOTSUP;
> +
> + if (pmc == NULL || num == 0)
> + return -EINVAL;
> +
> + /* we are already inside transaction region, return */
> + if (rte_xtest() != 0)
> + return 0;
> +
> + /* start new transaction region */
> + rc = rte_xbegin();
> +
> + /* transaction abort, possible write to one of wait addresses */
> + if (rc != RTE_XBEGIN_STARTED)
> + return 0;
> +
> + /*
> +  * the mere act of reading the lock status here adds the lock to
> +  * the read set. This means that when we trigger a wakeup from another
> +  * thread, even if we don't have a defined wakeup address and thus don't
> +  * actually cause any writes, the act of locking our lock will itself
> +  * trigger the wakeup and abort the transaction.
> +  */
> + rte_spinlock_is_locked(&s->lock);
> +
> + /*
> +  * add all addresses to wait on into transaction read-set and check if
> +  * any of wakeup conditions are already met.
> +  */
> + for (i = 0; i < num; i++) {
> + const struct rte_power_monitor_cond *c = &pmc[i];
> +
> + if (pmc->fn == NULL)

Should be c->fn, I believe.

> + continue;

Actually that way, if c->fn == NULL, we'll never add  our c->addr to monitored 
addresses.
Is that what we really want?
My thought was, that if callback is not set, we'll just go to power-save state 
without extra checking, no?
Something like that:

const struct rte_power_monitor_cond *c = &pmc[i];
const uint64_t val = __get_umwait_val(c->addr, c->size);

if (c->fn && c->fn(val, c->opaque) != 0)
   break;

Same thought for rte_power_monitor().

> + const uint64_t val = __get_umwait_val(pmc->addr, pmc->size);

Same thing: s/pmc->/c->/

> +
> + /* abort if callback indicates that we need to stop */
> + if (c->fn(val, c->opaque) != 0)
> + break;
> + }
> +
> + /* none of the conditions were met, sleep until timeout */
> + if (i == num)
> + rte_power_pause(tsc_timestamp);
> +
> + /* end transactio

[dpdk-dev] [PATCH v3 0/7] Enhancements for PMD power management

2021-06-28 Thread Anatoly Burakov
This patchset introduces several changes related to PMD power management:

- Changed monitoring intrinsics to use callbacks as a comparison function, based
  on previous patchset [1] but incorporating feedback [2] - this hopefully will
  make it possible to add support for .get_monitor_addr in virtio
- Add a new intrinsic to monitor multiple addresses, based on RTM instruction
  set and the TPAUSE instruction
- Add support for PMD power management on multiple queues, as well as all
  accompanying infrastructure and example apps changes

v3:
- Moved some doc updates to NIC features list

v2:
- Changed check inversion to callbacks
- Addressed feedback from Konstantin
- Added doc updates where necessary

[1] http://patches.dpdk.org/project/dpdk/list/?series=16930&state=*
[2] 
http://patches.dpdk.org/project/dpdk/patch/819ef1ace187365a615d3383e54579e3d9fb216e.1620747068.git.anatoly.bura...@intel.com/#133274

Anatoly Burakov (7):
  power_intrinsics: use callbacks for comparison
  net/af_xdp: add power monitor support
  eal: add power monitor for multiple events
  power: remove thread safety from PMD power API's
  power: support callbacks for multiple Rx queues
  power: support monitoring multiple Rx queues
  l3fwd-power: support multiqueue in PMD pmgmt modes

 doc/guides/nics/features.rst  |  10 +
 doc/guides/prog_guide/power_man.rst   |  78 ++-
 doc/guides/rel_notes/release_21_08.rst|  11 +
 drivers/event/dlb2/dlb2.c |  16 +-
 drivers/net/af_xdp/rte_eth_af_xdp.c   |  33 +
 drivers/net/i40e/i40e_rxtx.c  |  19 +-
 drivers/net/iavf/iavf_rxtx.c  |  19 +-
 drivers/net/ice/ice_rxtx.c|  19 +-
 drivers/net/ixgbe/ixgbe_rxtx.c|  19 +-
 drivers/net/mlx5/mlx5_rx.c|  16 +-
 examples/l3fwd-power/main.c   |  39 +-
 lib/eal/arm/rte_power_intrinsics.c|  11 +
 lib/eal/include/generic/rte_cpuflags.h|   2 +
 .../include/generic/rte_power_intrinsics.h|  64 +-
 lib/eal/ppc/rte_power_intrinsics.c|  11 +
 lib/eal/version.map   |   3 +
 lib/eal/x86/rte_cpuflags.c|   2 +
 lib/eal/x86/rte_power_intrinsics.c|  78 ++-
 lib/power/meson.build |   3 +
 lib/power/rte_power_pmd_mgmt.c| 574 +-
 lib/power/rte_power_pmd_mgmt.h|  40 ++
 lib/power/version.map |   3 +
 22 files changed, 846 insertions(+), 224 deletions(-)

-- 
2.25.1



[dpdk-dev] [PATCH v3 1/7] power_intrinsics: use callbacks for comparison

2021-06-28 Thread Anatoly Burakov
Previously, the semantics of power monitor were such that we were
checking current value against the expected value, and if they matched,
then the sleep was aborted. This is somewhat inflexible, because it only
allowed us to check for a specific value.

This commit replaces the comparison with a user callback mechanism, so
that any PMD (or other code) using `rte_power_monitor()` can define
their own comparison semantics and decision making on how to detect the
need to abort the entering of power optimized state.

Existing implementations are adjusted to follow the new semantics.

Suggested-by: Konstantin Ananyev 
Signed-off-by: Anatoly Burakov 
---

Notes:
v2:
- Use callback mechanism for more flexibility
- Address feedback from Konstantin

 doc/guides/rel_notes/release_21_08.rst|  1 +
 drivers/event/dlb2/dlb2.c | 16 --
 drivers/net/i40e/i40e_rxtx.c  | 19 
 drivers/net/iavf/iavf_rxtx.c  | 19 
 drivers/net/ice/ice_rxtx.c| 19 
 drivers/net/ixgbe/ixgbe_rxtx.c| 19 
 drivers/net/mlx5/mlx5_rx.c| 16 --
 .../include/generic/rte_power_intrinsics.h| 29 ++-
 lib/eal/x86/rte_power_intrinsics.c|  9 ++
 9 files changed, 106 insertions(+), 41 deletions(-)

diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index a6ecfdf3ce..c84ac280f5 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -84,6 +84,7 @@ API Changes
Also, make sure to start the actual text at the margin.
===
 
+* eal: the ``rte_power_intrinsics`` API changed to use a callback mechanism.
 
 ABI Changes
 ---
diff --git a/drivers/event/dlb2/dlb2.c b/drivers/event/dlb2/dlb2.c
index eca183753f..14dfac257c 100644
--- a/drivers/event/dlb2/dlb2.c
+++ b/drivers/event/dlb2/dlb2.c
@@ -3154,6 +3154,15 @@ dlb2_port_credits_inc(struct dlb2_port *qm_port, int num)
}
 }
 
+#define CLB_MASK_IDX 0
+#define CLB_VAL_IDX 1
+static int
+dlb2_monitor_callback(const uint64_t val, const uint64_t opaque[4])
+{
+   /* abort if the value matches */
+   return (val & opaque[CLB_MASK_IDX]) == opaque[CLB_VAL_IDX] ? -1 : 0;
+}
+
 static inline int
 dlb2_dequeue_wait(struct dlb2_eventdev *dlb2,
  struct dlb2_eventdev_port *ev_port,
@@ -3194,8 +3203,11 @@ dlb2_dequeue_wait(struct dlb2_eventdev *dlb2,
expected_value = 0;
 
pmc.addr = monitor_addr;
-   pmc.val = expected_value;
-   pmc.mask = qe_mask.raw_qe[1];
+   /* store expected value and comparison mask in opaque data */
+   pmc.opaque[CLB_VAL_IDX] = expected_value;
+   pmc.opaque[CLB_MASK_IDX] = qe_mask.raw_qe[1];
+   /* set up callback */
+   pmc.fn = dlb2_monitor_callback;
pmc.size = sizeof(uint64_t);
 
rte_power_monitor(&pmc, timeout + start_ticks);
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 6c58decece..45f3fbf4ec 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -81,6 +81,17 @@
 #define I40E_TX_OFFLOAD_SIMPLE_NOTSUP_MASK \
(PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_SIMPLE_SUP_MASK)
 
+static int
+i40e_monitor_callback(const uint64_t value, const uint64_t arg[4] __rte_unused)
+{
+   const uint64_t m = rte_cpu_to_le_64(1 << I40E_RX_DESC_STATUS_DD_SHIFT);
+   /*
+* we expect the DD bit to be set to 1 if this descriptor was already
+* written to.
+*/
+   return (value & m) == m ? -1 : 0;
+}
+
 int
 i40e_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
@@ -93,12 +104,8 @@ i40e_get_monitor_addr(void *rx_queue, struct 
rte_power_monitor_cond *pmc)
/* watch for changes in status bit */
pmc->addr = &rxdp->wb.qword1.status_error_len;
 
-   /*
-* we expect the DD bit to be set to 1 if this descriptor was already
-* written to.
-*/
-   pmc->val = rte_cpu_to_le_64(1 << I40E_RX_DESC_STATUS_DD_SHIFT);
-   pmc->mask = rte_cpu_to_le_64(1 << I40E_RX_DESC_STATUS_DD_SHIFT);
+   /* comparison callback */
+   pmc->fn = i40e_monitor_callback;
 
/* registers are 64-bit */
pmc->size = sizeof(uint64_t);
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 0361af0d85..6e12ecce07 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -57,6 +57,17 @@ iavf_proto_xtr_type_to_rxdid(uint8_t flex_type)
rxdid_map[flex_type] : IAVF_RXDID_COMMS_OVS_1;
 }
 
+static int
+iavf_monitor_callback(const uint64_t value, const uint64_t arg[4] __rte_unused)
+{
+   const uint64_t m = rte_cpu_to_le_64(1 << IAVF_RX_DESC_STAT

[dpdk-dev] [PATCH v3 2/7] net/af_xdp: add power monitor support

2021-06-28 Thread Anatoly Burakov
Implement support for .get_monitor_addr in AF_XDP driver.

Signed-off-by: Anatoly Burakov 
---

Notes:
v2:
- Rewrite using the callback mechanism

 drivers/net/af_xdp/rte_eth_af_xdp.c | 33 +
 1 file changed, 33 insertions(+)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c 
b/drivers/net/af_xdp/rte_eth_af_xdp.c
index eb5660a3dc..8b9c89c3e8 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "compat.h"
 
@@ -788,6 +789,37 @@ eth_dev_configure(struct rte_eth_dev *dev)
return 0;
 }
 
+#define CLB_VAL_IDX 0
+static int
+eth_monitor_callback(const uint64_t value, const uint64_t opaque[4])
+{
+   const uint64_t v = opaque[CLB_VAL_IDX];
+   const uint64_t m = (uint32_t)~0;
+
+   /* if the value has changed, abort entering power optimized state */
+   return (value & m) == v ? 0 : -1;
+}
+
+static int
+eth_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
+{
+   struct pkt_rx_queue *rxq = rx_queue;
+   unsigned int *prod = rxq->rx.producer;
+   const uint32_t cur_val = rxq->rx.cached_prod; /* use cached value */
+
+   /* watch for changes in producer ring */
+   pmc->addr = (void*)prod;
+
+   /* store current value */
+   pmc->opaque[CLB_VAL_IDX] = cur_val;
+   pmc->fn = eth_monitor_callback;
+
+   /* AF_XDP producer ring index is 32-bit */
+   pmc->size = sizeof(uint32_t);
+
+   return 0;
+}
+
 static int
 eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
@@ -1448,6 +1480,7 @@ static const struct eth_dev_ops ops = {
.link_update = eth_link_update,
.stats_get = eth_stats_get,
.stats_reset = eth_stats_reset,
+   .get_monitor_addr = eth_get_monitor_addr
 };
 
 /** parse busy_budget argument */
-- 
2.25.1



[dpdk-dev] [PATCH v3 3/7] eal: add power monitor for multiple events

2021-06-28 Thread Anatoly Burakov
Use RTM and WAITPKG instructions to perform a wait-for-writes similar to
what UMWAIT does, but without the limitation of having to listen for
just one event. This works because the optimized power state used by the
TPAUSE instruction will cause a wake up on RTM transaction abort, so if
we add the addresses we're interested in to the read-set, any write to
those addresses will wake us up.

Signed-off-by: Konstantin Ananyev 
Signed-off-by: Anatoly Burakov 
---

Notes:
v2:
- Adapt to callback mechanism

 doc/guides/rel_notes/release_21_08.rst|  2 +
 lib/eal/arm/rte_power_intrinsics.c| 11 +++
 lib/eal/include/generic/rte_cpuflags.h|  2 +
 .../include/generic/rte_power_intrinsics.h| 35 ++
 lib/eal/ppc/rte_power_intrinsics.c| 11 +++
 lib/eal/version.map   |  3 +
 lib/eal/x86/rte_cpuflags.c|  2 +
 lib/eal/x86/rte_power_intrinsics.c| 69 +++
 8 files changed, 135 insertions(+)

diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index c84ac280f5..9d1cfac395 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -55,6 +55,8 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* eal: added ``rte_power_monitor_multi`` to support waiting for multiple 
events.
+
 
 Removed Items
 -
diff --git a/lib/eal/arm/rte_power_intrinsics.c 
b/lib/eal/arm/rte_power_intrinsics.c
index e83f04072a..78f55b7203 100644
--- a/lib/eal/arm/rte_power_intrinsics.c
+++ b/lib/eal/arm/rte_power_intrinsics.c
@@ -38,3 +38,14 @@ rte_power_monitor_wakeup(const unsigned int lcore_id)
 
return -ENOTSUP;
 }
+
+int
+rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
+   const uint32_t num, const uint64_t tsc_timestamp)
+{
+   RTE_SET_USED(pmc);
+   RTE_SET_USED(num);
+   RTE_SET_USED(tsc_timestamp);
+
+   return -ENOTSUP;
+}
diff --git a/lib/eal/include/generic/rte_cpuflags.h 
b/lib/eal/include/generic/rte_cpuflags.h
index 28a5aecde8..d35551e931 100644
--- a/lib/eal/include/generic/rte_cpuflags.h
+++ b/lib/eal/include/generic/rte_cpuflags.h
@@ -24,6 +24,8 @@ struct rte_cpu_intrinsics {
/**< indicates support for rte_power_monitor function */
uint32_t power_pause : 1;
/**< indicates support for rte_power_pause function */
+   uint32_t power_monitor_multi : 1;
+   /**< indicates support for rte_power_monitor_multi function */
 };
 
 /**
diff --git a/lib/eal/include/generic/rte_power_intrinsics.h 
b/lib/eal/include/generic/rte_power_intrinsics.h
index 046667ade6..877fb282cb 100644
--- a/lib/eal/include/generic/rte_power_intrinsics.h
+++ b/lib/eal/include/generic/rte_power_intrinsics.h
@@ -124,4 +124,39 @@ int rte_power_monitor_wakeup(const unsigned int lcore_id);
 __rte_experimental
 int rte_power_pause(const uint64_t tsc_timestamp);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Monitor a set of addresses for changes. This will cause the CPU to enter an
+ * architecture-defined optimized power state until either one of the specified
+ * memory addresses is written to, a certain TSC timestamp is reached, or other
+ * reasons cause the CPU to wake up.
+ *
+ * Additionally, `expected` 64-bit values and 64-bit masks are provided. If
+ * mask is non-zero, the current value pointed to by the `p` pointer will be
+ * checked against the expected value, and if they do not match, the entering 
of
+ * optimized power state may be aborted.
+ *
+ * @warning It is responsibility of the user to check if this function is
+ *   supported at runtime using `rte_cpu_get_intrinsics_support()` API call.
+ *   Failing to do so may result in an illegal CPU instruction error.
+ *
+ * @param pmc
+ *   An array of monitoring condition structures.
+ * @param num
+ *   Length of the `pmc` array.
+ * @param tsc_timestamp
+ *   Maximum TSC timestamp to wait for. Note that the wait behavior is
+ *   architecture-dependent.
+ *
+ * @return
+ *   0 on success
+ *   -EINVAL on invalid parameters
+ *   -ENOTSUP if unsupported
+ */
+__rte_experimental
+int rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
+   const uint32_t num, const uint64_t tsc_timestamp);
+
 #endif /* _RTE_POWER_INTRINSIC_H_ */
diff --git a/lib/eal/ppc/rte_power_intrinsics.c 
b/lib/eal/ppc/rte_power_intrinsics.c
index 7fc9586da7..f00b58ade5 100644
--- a/lib/eal/ppc/rte_power_intrinsics.c
+++ b/lib/eal/ppc/rte_power_intrinsics.c
@@ -38,3 +38,14 @@ rte_power_monitor_wakeup(const unsigned int lcore_id)
 
return -ENOTSUP;
 }
+
+int
+rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
+   const uint32_t num, const uint64_t tsc_timestamp)
+{
+   RTE_SET_USED(pmc);
+   RTE_SET_USED(num);
+   RTE_SET_USED(tsc_times

[dpdk-dev] [PATCH v3 4/7] power: remove thread safety from PMD power API's

2021-06-28 Thread Anatoly Burakov
Currently, we expect that only one callback can be active at any given
moment, for a particular queue configuration, which is relatively easy
to implement in a thread-safe way. However, we're about to add support
for multiple queues per lcore, which will greatly increase the
possibility of various race conditions.

We could have used something like an RCU for this use case, but absent
of a pressing need for thread safety we'll go the easy way and just
mandate that the API's are to be called when all affected ports are
stopped, and document this limitation. This greatly simplifies the
`rte_power_monitor`-related code.

Signed-off-by: Anatoly Burakov 
---

Notes:
v2:
- Add check for stopped queue
- Clarified doc message
- Added release notes

 doc/guides/rel_notes/release_21_08.rst |   5 +
 lib/power/meson.build  |   3 +
 lib/power/rte_power_pmd_mgmt.c | 133 ++---
 lib/power/rte_power_pmd_mgmt.h |   6 ++
 4 files changed, 67 insertions(+), 80 deletions(-)

diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index 9d1cfac395..f015c509fc 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -88,6 +88,11 @@ API Changes
 
 * eal: the ``rte_power_intrinsics`` API changed to use a callback mechanism.
 
+* rte_power: The experimental PMD power management API is no longer considered
+  to be thread safe; all Rx queues affected by the API will now need to be
+  stopped before making any changes to the power management scheme.
+
+
 ABI Changes
 ---
 
diff --git a/lib/power/meson.build b/lib/power/meson.build
index c1097d32f1..4f6a242364 100644
--- a/lib/power/meson.build
+++ b/lib/power/meson.build
@@ -21,4 +21,7 @@ headers = files(
 'rte_power_pmd_mgmt.h',
 'rte_power_guest_channel.h',
 )
+if cc.has_argument('-Wno-cast-qual')
+cflags += '-Wno-cast-qual'
+endif
 deps += ['timer', 'ethdev']
diff --git a/lib/power/rte_power_pmd_mgmt.c b/lib/power/rte_power_pmd_mgmt.c
index db03cbf420..9b95cf1794 100644
--- a/lib/power/rte_power_pmd_mgmt.c
+++ b/lib/power/rte_power_pmd_mgmt.c
@@ -40,8 +40,6 @@ struct pmd_queue_cfg {
/**< Callback mode for this queue */
const struct rte_eth_rxtx_callback *cur_cb;
/**< Callback instance */
-   volatile bool umwait_in_progress;
-   /**< are we currently sleeping? */
uint64_t empty_poll_stats;
/**< Number of empty polls */
 } __rte_cache_aligned;
@@ -92,30 +90,11 @@ clb_umwait(uint16_t port_id, uint16_t qidx, struct rte_mbuf 
**pkts __rte_unused,
struct rte_power_monitor_cond pmc;
uint16_t ret;
 
-   /*
-* we might get a cancellation request while being
-* inside the callback, in which case the wakeup
-* wouldn't work because it would've arrived too early.
-*
-* to get around this, we notify the other thread that
-* we're sleeping, so that it can spin until we're done.
-* unsolicited wakeups are perfectly safe.
-*/
-   q_conf->umwait_in_progress = true;
-
-   rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
-
-   /* check if we need to cancel sleep */
-   if (q_conf->pwr_mgmt_state == PMD_MGMT_ENABLED) {
-   /* use monitoring condition to sleep */
-   ret = rte_eth_get_monitor_addr(port_id, qidx,
-   &pmc);
-   if (ret == 0)
-   rte_power_monitor(&pmc, UINT64_MAX);
-   }
-   q_conf->umwait_in_progress = false;
-
-   rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
+   /* use monitoring condition to sleep */
+   ret = rte_eth_get_monitor_addr(port_id, qidx,
+   &pmc);
+   if (ret == 0)
+   rte_power_monitor(&pmc, UINT64_MAX);
}
} else
q_conf->empty_poll_stats = 0;
@@ -177,12 +156,24 @@ clb_scale_freq(uint16_t port_id, uint16_t qidx,
return nb_rx;
 }
 
+static int
+queue_stopped(const uint16_t port_id, const uint16_t queue_id)
+{
+   struct rte_eth_rxq_info qinfo;
+
+   if (rte_eth_rx_queue_info_get(port_id, queue_id, &qinfo) < 0)
+   return -1;
+
+   return qinfo.queue_state == RTE_ETH_QUEUE_STATE_STOPPED;
+}
+
 int
 rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t port_id,
uint16_t queue_id, enum rte_power_pmd_mgmt_type mode)
 {
struct pmd_queue_cfg *queue_cfg;
struct rte_eth_dev_

[dpdk-dev] [PATCH v3 5/7] power: support callbacks for multiple Rx queues

2021-06-28 Thread Anatoly Burakov
Currently, there is a hard limitation on the PMD power management
support that only allows it to support a single queue per lcore. This is
not ideal as most DPDK use cases will poll multiple queues per core.

The PMD power management mechanism relies on ethdev Rx callbacks, so it
is very difficult to implement such support because callbacks are
effectively stateless and have no visibility into what the other ethdev
devices are doing. This places limitations on what we can do within the
framework of Rx callbacks, but the basics of this implementation are as
follows:

- Replace per-queue structures with per-lcore ones, so that any device
  polled from the same lcore can share data
- Any queue that is going to be polled from a specific lcore has to be
  added to the list of cores to poll, so that the callback is aware of
  other queues being polled by the same lcore
- Both the empty poll counter and the actual power saving mechanism is
  shared between all queues polled on a particular lcore, and is only
  activated when a special designated "power saving" queue is polled. To
  put it another way, we have no idea which queue the user will poll in
  what order, so we rely on them telling us that queue X is the last one
  in the polling loop, so any power management should happen there.
- A new API is added to mark a specific Rx queue as "power saving".
  Failing to call this API will result in no power management, however
  when having only one queue per core it is obvious which queue is the
  "power saving" one, so things will still work without this new API for
  use cases that were previously working without it.
- The limitation on UMWAIT-based polling is not removed because UMWAIT
  is incapable of monitoring more than one address.

Also, while we're at it, update and improve the docs.

Signed-off-by: Anatoly Burakov 
---

Notes:
v3:
- Move the list of supported NICs to NIC feature table

v2:
- Use a TAILQ for queues instead of a static array
- Address feedback from Konstantin
- Add additional checks for stopped queues

 doc/guides/nics/features.rst   |  10 +
 doc/guides/prog_guide/power_man.rst|  75 +++--
 doc/guides/rel_notes/release_21_08.rst |   3 +
 lib/power/rte_power_pmd_mgmt.c | 381 -
 lib/power/rte_power_pmd_mgmt.h |  34 +++
 lib/power/version.map  |   3 +
 6 files changed, 412 insertions(+), 94 deletions(-)

diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst
index 403c2b03a3..a96e12d155 100644
--- a/doc/guides/nics/features.rst
+++ b/doc/guides/nics/features.rst
@@ -912,6 +912,16 @@ Supports to get Rx/Tx packet burst mode information.
 * **[implements] eth_dev_ops**: ``rx_burst_mode_get``, ``tx_burst_mode_get``.
 * **[related] API**: ``rte_eth_rx_burst_mode_get()``, 
``rte_eth_tx_burst_mode_get()``.
 
+.. _nic_features_get_monitor_addr:
+
+PMD power management using monitor addresses
+
+
+Supports getting a monitoring condition to use together with Ethernet PMD power
+management (see :doc:`../prog_guide/power_man` for more details).
+
+* **[implements] eth_dev_ops**: ``get_monitor_addr``
+
 .. _nic_features_other:
 
 Other dev ops not represented by a Feature
diff --git a/doc/guides/prog_guide/power_man.rst 
b/doc/guides/prog_guide/power_man.rst
index c70ae128ac..fac2c19516 100644
--- a/doc/guides/prog_guide/power_man.rst
+++ b/doc/guides/prog_guide/power_man.rst
@@ -198,34 +198,41 @@ Ethernet PMD Power Management API
 Abstract
 
 
-Existing power management mechanisms require developers
-to change application design or change code to make use of it.
-The PMD power management API provides a convenient alternative
-by utilizing Ethernet PMD RX callbacks,
-and triggering power saving whenever empty poll count reaches a certain number.
-
-Monitor
-   This power saving scheme will put the CPU into optimized power state
-   and use the ``rte_power_monitor()`` function
-   to monitor the Ethernet PMD RX descriptor address,
-   and wake the CPU up whenever there's new traffic.
-
-Pause
-   This power saving scheme will avoid busy polling
-   by either entering power-optimized sleep state
-   with ``rte_power_pause()`` function,
-   or, if it's not available, use ``rte_pause()``.
-
-Frequency scaling
-   This power saving scheme will use ``librte_power`` library
-   functionality to scale the core frequency up/down
-   depending on traffic volume.
-
-.. note::
-
-   Currently, this power management API is limited to mandatory mapping
-   of 1 queue to 1 core (multiple queues are supported,
-   but they must be polled from different cores).
+Existing power management mechanisms require developers to change application
+design or change code to make use of it. The PMD power management API provides 
a
+convenient alternative by utilizing Ethernet PMD RX callbacks, and triggering
+power saving whenever empty poll count reaches a certain numb

[dpdk-dev] [PATCH v3 6/7] power: support monitoring multiple Rx queues

2021-06-28 Thread Anatoly Burakov
Use the new multi-monitor intrinsic to allow monitoring multiple ethdev
Rx queues while entering the energy efficient power state. The multi
version will be used unconditionally if supported, and the UMWAIT one
will only be used when multi-monitor is not supported by the hardware.

Signed-off-by: Anatoly Burakov 
---
 doc/guides/prog_guide/power_man.rst |  9 ++--
 lib/power/rte_power_pmd_mgmt.c  | 76 -
 2 files changed, 80 insertions(+), 5 deletions(-)

diff --git a/doc/guides/prog_guide/power_man.rst 
b/doc/guides/prog_guide/power_man.rst
index fac2c19516..3245a5ebed 100644
--- a/doc/guides/prog_guide/power_man.rst
+++ b/doc/guides/prog_guide/power_man.rst
@@ -221,13 +221,16 @@ power saving whenever empty poll count reaches a certain 
number.
 The "monitor" mode is only supported in the following configurations and 
scenarios:
 
 * If ``rte_cpu_get_intrinsics_support()`` function indicates that
+  ``rte_power_monitor_multi()`` function is supported by the platform, then
+  monitoring multiple Ethernet Rx queues for traffic will be supported.
+
+* If ``rte_cpu_get_intrinsics_support()`` function indicates that only
   ``rte_power_monitor()`` is supported by the platform, then monitoring will be
   limited to a mapping of 1 core 1 queue (thus, each Rx queue will have to be
   monitored from a different lcore).
 
-* If ``rte_cpu_get_intrinsics_support()`` function indicates that the
-  ``rte_power_monitor()`` function is not supported, then monitor mode will not
-  be supported.
+* If ``rte_cpu_get_intrinsics_support()`` function indicates that neither of 
the
+  two monitoring functions are supported, then monitor mode will not be 
supported.
 
 * Not all Ethernet devices support monitoring, even if the underlying
   platform may support the necessary CPU instructions. Please refer to
diff --git a/lib/power/rte_power_pmd_mgmt.c b/lib/power/rte_power_pmd_mgmt.c
index 7762cd39b8..aab2d4f1ee 100644
--- a/lib/power/rte_power_pmd_mgmt.c
+++ b/lib/power/rte_power_pmd_mgmt.c
@@ -155,6 +155,24 @@ queue_list_remove(struct pmd_core_cfg *cfg, const union 
queue *q)
return 0;
 }
 
+static inline int
+get_monitor_addresses(struct pmd_core_cfg *cfg,
+   struct rte_power_monitor_cond *pmc)
+{
+   const struct queue_list_entry *qle;
+   size_t i = 0;
+   int ret;
+
+   TAILQ_FOREACH(qle, &cfg->head, next) {
+   struct rte_power_monitor_cond *cur = &pmc[i];
+   const union queue *q = &qle->queue;
+   ret = rte_eth_get_monitor_addr(q->portid, q->qid, cur);
+   if (ret < 0)
+   return ret;
+   }
+   return 0;
+}
+
 static void
 calc_tsc(void)
 {
@@ -183,6 +201,48 @@ calc_tsc(void)
}
 }
 
+static uint16_t
+clb_multiwait(uint16_t port_id, uint16_t qidx,
+   struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
+   uint16_t max_pkts __rte_unused, void *addr __rte_unused)
+{
+   const unsigned int lcore = rte_lcore_id();
+   const union queue q = {.portid = port_id, .qid = qidx};
+   const bool empty = nb_rx == 0;
+   struct pmd_core_cfg *q_conf;
+
+   q_conf = &lcore_cfg[lcore];
+
+   /* early exit */
+   if (likely(!empty)) {
+   q_conf->empty_poll_stats = 0;
+   } else {
+   /* do we care about this particular queue? */
+   if (!queue_is_power_save(q_conf, &q))
+   return nb_rx;
+
+   /*
+* we can increment unconditionally here because if there were
+* non-empty polls in other queues assigned to this core, we
+* dropped the counter to zero anyway.
+*/
+   q_conf->empty_poll_stats++;
+   if (unlikely(q_conf->empty_poll_stats > EMPTYPOLL_MAX)) {
+   struct rte_power_monitor_cond pmc[RTE_MAX_ETHPORTS];
+   uint16_t ret;
+
+   /* gather all monitoring conditions */
+   ret = get_monitor_addresses(q_conf, pmc);
+
+   if (ret == 0)
+   rte_power_monitor_multi(pmc,
+   q_conf->n_queues, UINT64_MAX);
+   }
+   }
+
+   return nb_rx;
+}
+
 static uint16_t
 clb_umwait(uint16_t port_id, uint16_t qidx, struct rte_mbuf **pkts 
__rte_unused,
uint16_t nb_rx, uint16_t max_pkts __rte_unused,
@@ -348,14 +408,19 @@ static int
 check_monitor(struct pmd_core_cfg *cfg, const union queue *qdata)
 {
struct rte_power_monitor_cond dummy;
+   bool multimonitor_supported;
 
/* check if rte_power_monitor is supported */
if (!global_data.intrinsics_support.power_monitor) {
RTE_LOG(DEBUG, POWER, "Monitoring intrinsics are not 
supported\n");
return -ENOTSUP;
}
+   /* check if multi-monitor is supported */
+   multimonitor_supported

[dpdk-dev] [PATCH v3 7/7] l3fwd-power: support multiqueue in PMD pmgmt modes

2021-06-28 Thread Anatoly Burakov
Currently, l3fwd-power enforces the limitation of having one queue per
lcore. This is no longer necessary, so remove the limitation, and always
mark the last queue in qconf as the power save queue.

Signed-off-by: Anatoly Burakov 
---
 examples/l3fwd-power/main.c | 39 +++--
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
index f8dfed1634..3057c06936 100644
--- a/examples/l3fwd-power/main.c
+++ b/examples/l3fwd-power/main.c
@@ -2498,6 +2498,27 @@ mode_to_str(enum appmode mode)
}
 }
 
+static void
+pmd_pmgmt_set_up(unsigned int lcore, uint16_t portid, uint16_t qid, bool last)
+{
+   int ret;
+
+   ret = rte_power_ethdev_pmgmt_queue_enable(lcore, portid,
+   qid, pmgmt_type);
+   if (ret < 0)
+   rte_exit(EXIT_FAILURE,
+   "rte_power_ethdev_pmgmt_queue_enable: err=%d, 
port=%d\n",
+   ret, portid);
+
+   if (!last)
+   return;
+   ret = rte_power_ethdev_pmgmt_queue_set_power_save(lcore, portid, qid);
+   if (ret < 0)
+   rte_exit(EXIT_FAILURE,
+   "rte_power_ethdev_pmgmt_queue_set_power_save: err=%d, 
port=%d\n",
+   ret, portid);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -2723,12 +2744,6 @@ main(int argc, char **argv)
printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
fflush(stdout);
 
-   /* PMD power management mode can only do 1 queue per core */
-   if (app_mode == APP_MODE_PMD_MGMT && qconf->n_rx_queue > 1) {
-   rte_exit(EXIT_FAILURE,
-   "In PMD power management mode, only one queue 
per lcore is allowed\n");
-   }
-
/* init RX queues */
for(queue = 0; queue < qconf->n_rx_queue; ++queue) {
struct rte_eth_rxconf rxq_conf;
@@ -2767,15 +2782,9 @@ main(int argc, char **argv)
 "Fail to add ptype cb\n");
}
 
-   if (app_mode == APP_MODE_PMD_MGMT) {
-   ret = rte_power_ethdev_pmgmt_queue_enable(
-   lcore_id, portid, queueid,
-   pmgmt_type);
-   if (ret < 0)
-   rte_exit(EXIT_FAILURE,
-   
"rte_power_ethdev_pmgmt_queue_enable: err=%d, port=%d\n",
-   ret, portid);
-   }
+   if (app_mode == APP_MODE_PMD_MGMT)
+   pmd_pmgmt_set_up(lcore_id, portid, queueid,
+   queue == (qconf->n_rx_queue - 1));
}
}
 
-- 
2.25.1



Re: [dpdk-dev] [PATCH v2 3/7] eal: add power monitor for multiple events

2021-06-28 Thread Burakov, Anatoly

On 28-Jun-21 1:37 PM, Ananyev, Konstantin wrote:



Use RTM and WAITPKG instructions to perform a wait-for-writes similar to
what UMWAIT does, but without the limitation of having to listen for
just one event. This works because the optimized power state used by the
TPAUSE instruction will cause a wake up on RTM transaction abort, so if
we add the addresses we're interested in to the read-set, any write to
those addresses will wake us up.

Signed-off-by: Konstantin Ananyev 
Signed-off-by: Anatoly Burakov 
---

Notes:
 v2:
 - Adapt to callback mechanism

  doc/guides/rel_notes/release_21_08.rst|  2 +
  lib/eal/arm/rte_power_intrinsics.c| 11 +++
  lib/eal/include/generic/rte_cpuflags.h|  2 +
  .../include/generic/rte_power_intrinsics.h| 35 ++
  lib/eal/ppc/rte_power_intrinsics.c| 11 +++
  lib/eal/version.map   |  3 +
  lib/eal/x86/rte_cpuflags.c|  2 +
  lib/eal/x86/rte_power_intrinsics.c| 69 +++
  8 files changed, 135 insertions(+)


...


diff --git a/lib/eal/x86/rte_power_intrinsics.c 
b/lib/eal/x86/rte_power_intrinsics.c
index 3c5c9ce7ad..3fc6f62ef5 100644
--- a/lib/eal/x86/rte_power_intrinsics.c
+++ b/lib/eal/x86/rte_power_intrinsics.c
@@ -4,6 +4,7 @@

  #include 
  #include 
+#include 
  #include 

  #include "rte_power_intrinsics.h"
@@ -28,6 +29,7 @@ __umwait_wakeup(volatile void *addr)
  }

  static bool wait_supported;
+static bool wait_multi_supported;

  static inline uint64_t
  __get_umwait_val(const volatile void *p, const uint8_t sz)
@@ -164,6 +166,8 @@ RTE_INIT(rte_power_intrinsics_init) {

   if (i.power_monitor && i.power_pause)
   wait_supported = 1;
+ if (i.power_monitor_multi)
+ wait_multi_supported = 1;
  }

  int
@@ -202,6 +206,9 @@ rte_power_monitor_wakeup(const unsigned int lcore_id)
* In this case, since we've already woken up, the "wakeup" was
* unneeded, and since T1 is still waiting on T2 releasing the lock, the
* wakeup address is still valid so it's perfectly safe to write it.
+  *
+  * For multi-monitor case, the act of locking will in itself trigger the
+  * wakeup, so no additional writes necessary.
*/
   rte_spinlock_lock(&s->lock);
   if (s->monitor_addr != NULL)
@@ -210,3 +217,65 @@ rte_power_monitor_wakeup(const unsigned int lcore_id)

   return 0;
  }
+
+int
+rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
+ const uint32_t num, const uint64_t tsc_timestamp)
+{
+ const unsigned int lcore_id = rte_lcore_id();
+ struct power_wait_status *s = &wait_status[lcore_id];
+ uint32_t i, rc;
+
+ /* check if supported */
+ if (!wait_multi_supported)
+ return -ENOTSUP;
+
+ if (pmc == NULL || num == 0)
+ return -EINVAL;
+
+ /* we are already inside transaction region, return */
+ if (rte_xtest() != 0)
+ return 0;
+
+ /* start new transaction region */
+ rc = rte_xbegin();
+
+ /* transaction abort, possible write to one of wait addresses */
+ if (rc != RTE_XBEGIN_STARTED)
+ return 0;
+
+ /*
+  * the mere act of reading the lock status here adds the lock to
+  * the read set. This means that when we trigger a wakeup from another
+  * thread, even if we don't have a defined wakeup address and thus don't
+  * actually cause any writes, the act of locking our lock will itself
+  * trigger the wakeup and abort the transaction.
+  */
+ rte_spinlock_is_locked(&s->lock);
+
+ /*
+  * add all addresses to wait on into transaction read-set and check if
+  * any of wakeup conditions are already met.
+  */
+ for (i = 0; i < num; i++) {
+ const struct rte_power_monitor_cond *c = &pmc[i];
+
+ if (pmc->fn == NULL)


Should be c->fn, I believe.


Yep, will fix.




+ continue;


Actually that way, if c->fn == NULL, we'll never add  our c->addr to monitored 
addresses.
Is that what we really want?
My thought was, that if callback is not set, we'll just go to power-save state 
without extra checking, no?
Something like that:

const struct rte_power_monitor_cond *c = &pmc[i];
const uint64_t val = __get_umwait_val(c->addr, c->size);

if (c->fn && c->fn(val, c->opaque) != 0)
break;


This is consistent with previous behavior of rte_power_monitor where if 
mask wasn't set we entered power save mode without any checks. If we do 
a break, that means the check condition has failed somewhere and we have 
to abort the sleep. Continue keeps the sleep.




Same thought for rte_power_monitor().


+ const uint64_t val = __get_umwait_val(pmc->addr, pmc->size);


Same thing: s/pmc->/c->/


Yep, you're right.




+
+ /* abort if callback indicates that we need to stop */
+ if (c->fn(val, c->opaque) != 0)
+ break;
+ }
+
+   

Re: [dpdk-dev] dmadev discussion summary

2021-06-28 Thread Bruce Richardson
On Mon, Jun 28, 2021 at 12:14:31PM +0100, Ananyev, Konstantin wrote:
> 
> Hi everyone,
> 
> > On Sat, Jun 26, 2021 at 11:59:49AM +0800, fengchengwen wrote:
> > > Hi, all
> > >   I analyzed the current DPAM DMA driver and drew this summary in 
> > > conjunction
> > > with the previous discussion, and this will as a basis for the V2 
> > > implementation.
> > >   Feedback is welcome, thanks
> > >
> > Fantastic review and summary, many thanks for the work. Some comments
> > inline in API part below, but nothing too major, I hope.
> >
> > /Bruce
> >
> > 
> > >
> > > Summary:
> > >   1) The dpaa2/octeontx2/Kunpeng are all ARM soc, there may acts as 
> > > endpoint of
> > >  x86 host (e.g. smart NIC), multiple memory transfer requirements may 
> > > exist,
> > >  e.g. local-to-host/local-to-host..., from the point of view of API 
> > > design,
> > >  I think we should adopt a similar 'channel' or 'virt-queue' concept.
> > >   2) Whether to create a separate dmadev for each HW-queue? We previously
> > >  discussed this, and due HW-queue could indepent management (like
> > >  Kunpeng_dma and Intel DSA), we prefer create a separate dmadev for 
> > > each
> > >  HW-queue before. But I'm not sure if that's the case with dpaa. I 
> > > think
> > >  that can be left to the specific driver, no restriction is imposed 
> > > on the
> > >  framework API layer.
> > >   3) I think we could setup following abstraction at dmadev device:
> > >   
> > >   |virt-queue||virt-queue|
> > >   
> > >  \   /
> > >   \ /
> > >\   /
> > >   
> > >  | HW-queue | | HW-queue |
> > >   
> > > \/
> > >  \  /
> > >   \/
> > > dmadev
> > >   4) The driver's ops design (here we only list key points):
> > >  [dev_info_get]: mainly return the number of HW-queues
> > >  [dev_configure]: nothing important
> > >  [queue_setup]: create one virt-queue, has following main parameters:
> > >  HW-queue-index: the HW-queue index used
> > >  nb_desc: the number of HW descriptors
> > >  opaque: driver's specific info
> > >  Note1: this API return virt-queue index which will used in later 
> > > API.
> > > If user want create multiple virt-queue one the same 
> > > HW-queue,
> > > they could achieved by call queue_setup with the same
> > > HW-queue-index.
> > >  Note2: I think it's hard to define queue_setup config paramter, 
> > > and
> > > also this is control API, so I think it's OK to use opaque
> > > pointer to implement it.
> > I'm not sure opaque pointer will work in practice, so I think we should try
> > and standardize the parameters as much as possible. Since it's a control
> > plane API, using a struct with a superset of parameters may be workable.
> > Let's start with a minimum set and build up from there.
> >
> > >   [dma_copy/memset/sg]: all has vq_id input parameter.
> > >  Note: I notice dpaa can't support single and sg in one 
> > > virt-queue, and
> > >I think it's maybe software implement policy other than HW
> > >restriction because virt-queue could share the same 
> > > HW-queue.
> > Presumably for queues which support sq, the single-enqueue APIs can use a
> > single sg list internally?
> >
> > >   Here we use vq_id to tackle different scenario, like local-to-local/
> > >   local-to-host and etc.
> > >   5) And the dmadev public data-plane API (just prototype):
> > >  dma_cookie_t rte_dmadev_memset(dev, vq_id, pattern, dst, len, flags)
> > >-- flags: used as an extended parameter, it could be uint32_t
> >
> > Suggest uint64_t rather than uint32_t to ensure we have expansion room?
> > Otherwise +1
> >
> > >  dma_cookie_t rte_dmadev_memcpy(dev, vq_id, src, dst, len, flags)
> > +1
> >
> > >  dma_cookie_t rte_dmadev_memcpy_sg(dev, vq_id, sg, sg_len, flags)
> > >-- sg: struct dma_scatterlist array
> > I don't think our drivers will be directly implementing this API, but so
> > long as SG support is listed as a capability flag I'm fine with this as an
> > API. [We can't fudge it as a bunch of single copies, because that would
> > cause us to have multiple cookies rather than one]
> >
> > >  uint16_t rte_dmadev_completed(dev, vq_id, dma_cookie_t *cookie,
> > >uint16_t nb_cpls, bool *has_error)
> > >-- nb_cpls: indicate max process operations number
> > >-- has_error: indicate if there is an error
> > >-- return value: the number of successful completed operations.
> > >-- example:
> > >   1) If 

Re: [dpdk-dev] [PATCH v3] kni: rework rte_kni_update_link using ioctl

2021-06-28 Thread Ferruh Yigit
On 10/27/2019 8:16 PM, Igor Ryzhov wrote:
> Hi Ferruh, Dan,
> 
> Sure, I remember last year discussion but now I see the problem in current
> implementation.
> 
> Ferruh, here is an example:
> 
> We have a thread in the application that processes KNI commands from the
> kernel.
> It receives config_network_if command to set interface up, calls
> rte_eth_dev_start, and here is the problem.
> We cannot call current rte_kni_update_link from here as the interface is
> not yet up in the kernel,
> as we didn't send a response for config_network_if yet. So we need to send
> a response first and only
> after that, we can use rte_kni_update_link. Actually, we don't even know
> the exact time between we
> send a response and the moment when the kernel receives it and the
> interface becomes up.
> We always have a dependency on the interface state in the kernel. With
> ioctl approach, we don't
> have such dependency - we can call rte_kni_update_link whenever we want,
> even when the interface is
> down in the kernel. As I explained, it's common when processing
> config_network_if to set interface up.
> 

Hi Igor,

I agree with the mentioned problem. When the KNI interface is down, not able to
update the link carrier status is not convenient.

For a physical interface this may make sense, since interface won't be used by
the OS, no need to power on the PHY and trace the carrier status. But for the
intention of the original link set feature, it requires to be able to update the
carrier status independent from the interface up/down status.

Overall, also agree to not introduce a new ioctl and use existing interface, but
for this case existing interface doesn't exactly fit to the intended use case
and I am OK have the ioctl.

Can you please send a new version rebasing latest head, we can continue on that 
one?

Thanks,
ferruh


> Igor
> 
> On Mon, Oct 14, 2019 at 11:56 PM Dan Gora  wrote:
> 
>> Here's another link to the thread where this was discussed last year..
>> Igor was actually on this thread as well...
>>
>> https://mails.dpdk.org/archives/dev/2018-August/110383.html
>>
>> On Mon, Oct 14, 2019 at 4:01 PM Dan Gora  wrote:
>>>
>>> My original patch to add this feature was basically the same thing as
>>> this: setting the link status via a KNI ioctl. That method was
>>> rejected after _much_ discussion and we eventually settled on the
>>> currently implementation.
>>>
>>> My original patch was here: Message-Id: <
>> 20180628225548.21885-1...@adax.com>
>>>
>>> If you search for KNI and d...@adax.com in the DPDK devel list you
>>> should be able to suss out the whole discussion that lead to the
>>> current implementation.
>>>
>>> thanks
>>> dan
>>>
>>> On Mon, Oct 14, 2019 at 1:17 PM Ferruh Yigit 
>> wrote:

 On 10/14/2019 5:10 PM, Ferruh Yigit wrote:
> On 9/25/2019 10:36 AM, Igor Ryzhov wrote:
>> Current implementation doesn't allow us to update KNI carrier if the
>> interface is not yet UP in kernel. It means that we can't use it in
>> the
>> same thread which is processing rte_kni_ops.config_network_if,
>> which is
>> very convenient, because it allows us to have correct carrier status
>> of the interface right after we enabled it and we don't have to use
>> any
>> additional thread to track link status.
>
> Hi Igor,
>
> The existing thread tracks the link status of the physical device
>> and reflects
> the changes to the kni netdev, but the "struct rte_kni_ops"
> (rte_kni_ops.config_network_if) works other way around, it captures
>> (some)
> requests to kni netdev and reflects them to the underlying physical
>> device.
> Even 'rte_kni_update_link()' updated to use ioctl, the thread still
>> looks
> required and this patch doesn't really changes that part.
>
> Also I am reluctant to extend the KNI ioctl interface when there is
>> a generic
> way to do that work.
>
> What is the use case of updating kni netdev carrier status when the
>> interface is
> down?

 btw, if the problem is status of the interface being 'no-carrier' by
>> default,
 this can be changed by "carrier=on" parameter of the kni kernel module:
 "insmod ./build/kmod/rte_kni.ko carrier=on"
>>



Re: [dpdk-dev] [PATCH v2 3/7] eal: add power monitor for multiple events

2021-06-28 Thread Ananyev, Konstantin

> On 28-Jun-21 1:37 PM, Ananyev, Konstantin wrote:
> >
> >> Use RTM and WAITPKG instructions to perform a wait-for-writes similar to
> >> what UMWAIT does, but without the limitation of having to listen for
> >> just one event. This works because the optimized power state used by the
> >> TPAUSE instruction will cause a wake up on RTM transaction abort, so if
> >> we add the addresses we're interested in to the read-set, any write to
> >> those addresses will wake us up.
> >>
> >> Signed-off-by: Konstantin Ananyev 
> >> Signed-off-by: Anatoly Burakov 
> >> ---
> >>
> >> Notes:
> >>  v2:
> >>  - Adapt to callback mechanism
> >>
> >>   doc/guides/rel_notes/release_21_08.rst|  2 +
> >>   lib/eal/arm/rte_power_intrinsics.c| 11 +++
> >>   lib/eal/include/generic/rte_cpuflags.h|  2 +
> >>   .../include/generic/rte_power_intrinsics.h| 35 ++
> >>   lib/eal/ppc/rte_power_intrinsics.c| 11 +++
> >>   lib/eal/version.map   |  3 +
> >>   lib/eal/x86/rte_cpuflags.c|  2 +
> >>   lib/eal/x86/rte_power_intrinsics.c| 69 +++
> >>   8 files changed, 135 insertions(+)
> >>
> > ...
> >
> >> diff --git a/lib/eal/x86/rte_power_intrinsics.c 
> >> b/lib/eal/x86/rte_power_intrinsics.c
> >> index 3c5c9ce7ad..3fc6f62ef5 100644
> >> --- a/lib/eal/x86/rte_power_intrinsics.c
> >> +++ b/lib/eal/x86/rte_power_intrinsics.c
> >> @@ -4,6 +4,7 @@
> >>
> >>   #include 
> >>   #include 
> >> +#include 
> >>   #include 
> >>
> >>   #include "rte_power_intrinsics.h"
> >> @@ -28,6 +29,7 @@ __umwait_wakeup(volatile void *addr)
> >>   }
> >>
> >>   static bool wait_supported;
> >> +static bool wait_multi_supported;
> >>
> >>   static inline uint64_t
> >>   __get_umwait_val(const volatile void *p, const uint8_t sz)
> >> @@ -164,6 +166,8 @@ RTE_INIT(rte_power_intrinsics_init) {
> >>
> >>if (i.power_monitor && i.power_pause)
> >>wait_supported = 1;
> >> + if (i.power_monitor_multi)
> >> + wait_multi_supported = 1;
> >>   }
> >>
> >>   int
> >> @@ -202,6 +206,9 @@ rte_power_monitor_wakeup(const unsigned int lcore_id)
> >> * In this case, since we've already woken up, the "wakeup" was
> >> * unneeded, and since T1 is still waiting on T2 releasing the 
> >> lock, the
> >> * wakeup address is still valid so it's perfectly safe to write it.
> >> +  *
> >> +  * For multi-monitor case, the act of locking will in itself trigger 
> >> the
> >> +  * wakeup, so no additional writes necessary.
> >> */
> >>rte_spinlock_lock(&s->lock);
> >>if (s->monitor_addr != NULL)
> >> @@ -210,3 +217,65 @@ rte_power_monitor_wakeup(const unsigned int lcore_id)
> >>
> >>return 0;
> >>   }
> >> +
> >> +int
> >> +rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
> >> + const uint32_t num, const uint64_t tsc_timestamp)
> >> +{
> >> + const unsigned int lcore_id = rte_lcore_id();
> >> + struct power_wait_status *s = &wait_status[lcore_id];
> >> + uint32_t i, rc;
> >> +
> >> + /* check if supported */
> >> + if (!wait_multi_supported)
> >> + return -ENOTSUP;
> >> +
> >> + if (pmc == NULL || num == 0)
> >> + return -EINVAL;
> >> +
> >> + /* we are already inside transaction region, return */
> >> + if (rte_xtest() != 0)
> >> + return 0;
> >> +
> >> + /* start new transaction region */
> >> + rc = rte_xbegin();
> >> +
> >> + /* transaction abort, possible write to one of wait addresses */
> >> + if (rc != RTE_XBEGIN_STARTED)
> >> + return 0;
> >> +
> >> + /*
> >> +  * the mere act of reading the lock status here adds the lock to
> >> +  * the read set. This means that when we trigger a wakeup from 
> >> another
> >> +  * thread, even if we don't have a defined wakeup address and thus 
> >> don't
> >> +  * actually cause any writes, the act of locking our lock will itself
> >> +  * trigger the wakeup and abort the transaction.
> >> +  */
> >> + rte_spinlock_is_locked(&s->lock);
> >> +
> >> + /*
> >> +  * add all addresses to wait on into transaction read-set and check 
> >> if
> >> +  * any of wakeup conditions are already met.
> >> +  */
> >> + for (i = 0; i < num; i++) {
> >> + const struct rte_power_monitor_cond *c = &pmc[i];
> >> +
> >> + if (pmc->fn == NULL)
> >
> > Should be c->fn, I believe.
> 
> Yep, will fix.
> 
> >
> >> + continue;
> >
> > Actually that way, if c->fn == NULL, we'll never add  our c->addr to 
> > monitored addresses.
> > Is that what we really want?
> > My thought was, that if callback is not set, we'll just go to power-save 
> > state without extra checking, no?
> > Something like that:
> >
> > const struct rte_power_monitor_cond *c = &pmc[i];
> > const uint64_t val = __get_umwait_val(c->addr, c->size);
> >
> 

Re: [dpdk-dev] [PATCH v3] kni: rework rte_kni_update_link using ioctl

2021-06-28 Thread Igor Ryzhov
Thanks Ferruh,

I'll send an update later this week.
I also want to add a "Suggested-by: Dan Gora " as it was his
idea.

Dan, please let me know if you don't want this tag to be added.

Thanks,
Igor

On Mon, Jun 28, 2021 at 3:55 PM Ferruh Yigit  wrote:

> On 10/27/2019 8:16 PM, Igor Ryzhov wrote:
> > Hi Ferruh, Dan,
> >
> > Sure, I remember last year discussion but now I see the problem in
> current
> > implementation.
> >
> > Ferruh, here is an example:
> >
> > We have a thread in the application that processes KNI commands from the
> > kernel.
> > It receives config_network_if command to set interface up, calls
> > rte_eth_dev_start, and here is the problem.
> > We cannot call current rte_kni_update_link from here as the interface is
> > not yet up in the kernel,
> > as we didn't send a response for config_network_if yet. So we need to
> send
> > a response first and only
> > after that, we can use rte_kni_update_link. Actually, we don't even know
> > the exact time between we
> > send a response and the moment when the kernel receives it and the
> > interface becomes up.
> > We always have a dependency on the interface state in the kernel. With
> > ioctl approach, we don't
> > have such dependency - we can call rte_kni_update_link whenever we want,
> > even when the interface is
> > down in the kernel. As I explained, it's common when processing
> > config_network_if to set interface up.
> >
>
> Hi Igor,
>
> I agree with the mentioned problem. When the KNI interface is down, not
> able to
> update the link carrier status is not convenient.
>
> For a physical interface this may make sense, since interface won't be
> used by
> the OS, no need to power on the PHY and trace the carrier status. But for
> the
> intention of the original link set feature, it requires to be able to
> update the
> carrier status independent from the interface up/down status.
>
> Overall, also agree to not introduce a new ioctl and use existing
> interface, but
> for this case existing interface doesn't exactly fit to the intended use
> case
> and I am OK have the ioctl.
>
> Can you please send a new version rebasing latest head, we can continue on
> that one?
>
> Thanks,
> ferruh
>
>
> > Igor
> >
> > On Mon, Oct 14, 2019 at 11:56 PM Dan Gora  wrote:
> >
> >> Here's another link to the thread where this was discussed last year..
> >> Igor was actually on this thread as well...
> >>
> >> https://mails.dpdk.org/archives/dev/2018-August/110383.html
> >>
> >> On Mon, Oct 14, 2019 at 4:01 PM Dan Gora  wrote:
> >>>
> >>> My original patch to add this feature was basically the same thing as
> >>> this: setting the link status via a KNI ioctl. That method was
> >>> rejected after _much_ discussion and we eventually settled on the
> >>> currently implementation.
> >>>
> >>> My original patch was here: Message-Id: <
> >> 20180628225548.21885-1...@adax.com>
> >>>
> >>> If you search for KNI and d...@adax.com in the DPDK devel list you
> >>> should be able to suss out the whole discussion that lead to the
> >>> current implementation.
> >>>
> >>> thanks
> >>> dan
> >>>
> >>> On Mon, Oct 14, 2019 at 1:17 PM Ferruh Yigit 
> >> wrote:
> 
>  On 10/14/2019 5:10 PM, Ferruh Yigit wrote:
> > On 9/25/2019 10:36 AM, Igor Ryzhov wrote:
> >> Current implementation doesn't allow us to update KNI carrier if the
> >> interface is not yet UP in kernel. It means that we can't use it in
> >> the
> >> same thread which is processing rte_kni_ops.config_network_if,
> >> which is
> >> very convenient, because it allows us to have correct carrier status
> >> of the interface right after we enabled it and we don't have to use
> >> any
> >> additional thread to track link status.
> >
> > Hi Igor,
> >
> > The existing thread tracks the link status of the physical device
> >> and reflects
> > the changes to the kni netdev, but the "struct rte_kni_ops"
> > (rte_kni_ops.config_network_if) works other way around, it captures
> >> (some)
> > requests to kni netdev and reflects them to the underlying physical
> >> device.
> > Even 'rte_kni_update_link()' updated to use ioctl, the thread still
> >> looks
> > required and this patch doesn't really changes that part.
> >
> > Also I am reluctant to extend the KNI ioctl interface when there is
> >> a generic
> > way to do that work.
> >
> > What is the use case of updating kni netdev carrier status when the
> >> interface is
> > down?
> 
>  btw, if the problem is status of the interface being 'no-carrier' by
> >> default,
>  this can be changed by "carrier=on" parameter of the kni kernel
> module:
>  "insmod ./build/kmod/rte_kni.ko carrier=on"
> >>
>
>


Re: [dpdk-dev] [PATCH v2 3/7] eal: add power monitor for multiple events

2021-06-28 Thread Burakov, Anatoly

On 28-Jun-21 1:58 PM, Ananyev, Konstantin wrote:



On 28-Jun-21 1:37 PM, Ananyev, Konstantin wrote:



Use RTM and WAITPKG instructions to perform a wait-for-writes similar to
what UMWAIT does, but without the limitation of having to listen for
just one event. This works because the optimized power state used by the
TPAUSE instruction will cause a wake up on RTM transaction abort, so if
we add the addresses we're interested in to the read-set, any write to
those addresses will wake us up.

Signed-off-by: Konstantin Ananyev 
Signed-off-by: Anatoly Burakov 
---

Notes:
  v2:
  - Adapt to callback mechanism

   doc/guides/rel_notes/release_21_08.rst|  2 +
   lib/eal/arm/rte_power_intrinsics.c| 11 +++
   lib/eal/include/generic/rte_cpuflags.h|  2 +
   .../include/generic/rte_power_intrinsics.h| 35 ++
   lib/eal/ppc/rte_power_intrinsics.c| 11 +++
   lib/eal/version.map   |  3 +
   lib/eal/x86/rte_cpuflags.c|  2 +
   lib/eal/x86/rte_power_intrinsics.c| 69 +++
   8 files changed, 135 insertions(+)


...


diff --git a/lib/eal/x86/rte_power_intrinsics.c 
b/lib/eal/x86/rte_power_intrinsics.c
index 3c5c9ce7ad..3fc6f62ef5 100644
--- a/lib/eal/x86/rte_power_intrinsics.c
+++ b/lib/eal/x86/rte_power_intrinsics.c
@@ -4,6 +4,7 @@

   #include 
   #include 
+#include 
   #include 

   #include "rte_power_intrinsics.h"
@@ -28,6 +29,7 @@ __umwait_wakeup(volatile void *addr)
   }

   static bool wait_supported;
+static bool wait_multi_supported;

   static inline uint64_t
   __get_umwait_val(const volatile void *p, const uint8_t sz)
@@ -164,6 +166,8 @@ RTE_INIT(rte_power_intrinsics_init) {

if (i.power_monitor && i.power_pause)
wait_supported = 1;
+ if (i.power_monitor_multi)
+ wait_multi_supported = 1;
   }

   int
@@ -202,6 +206,9 @@ rte_power_monitor_wakeup(const unsigned int lcore_id)
 * In this case, since we've already woken up, the "wakeup" was
 * unneeded, and since T1 is still waiting on T2 releasing the lock, the
 * wakeup address is still valid so it's perfectly safe to write it.
+  *
+  * For multi-monitor case, the act of locking will in itself trigger the
+  * wakeup, so no additional writes necessary.
 */
rte_spinlock_lock(&s->lock);
if (s->monitor_addr != NULL)
@@ -210,3 +217,65 @@ rte_power_monitor_wakeup(const unsigned int lcore_id)

return 0;
   }
+
+int
+rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
+ const uint32_t num, const uint64_t tsc_timestamp)
+{
+ const unsigned int lcore_id = rte_lcore_id();
+ struct power_wait_status *s = &wait_status[lcore_id];
+ uint32_t i, rc;
+
+ /* check if supported */
+ if (!wait_multi_supported)
+ return -ENOTSUP;
+
+ if (pmc == NULL || num == 0)
+ return -EINVAL;
+
+ /* we are already inside transaction region, return */
+ if (rte_xtest() != 0)
+ return 0;
+
+ /* start new transaction region */
+ rc = rte_xbegin();
+
+ /* transaction abort, possible write to one of wait addresses */
+ if (rc != RTE_XBEGIN_STARTED)
+ return 0;
+
+ /*
+  * the mere act of reading the lock status here adds the lock to
+  * the read set. This means that when we trigger a wakeup from another
+  * thread, even if we don't have a defined wakeup address and thus don't
+  * actually cause any writes, the act of locking our lock will itself
+  * trigger the wakeup and abort the transaction.
+  */
+ rte_spinlock_is_locked(&s->lock);
+
+ /*
+  * add all addresses to wait on into transaction read-set and check if
+  * any of wakeup conditions are already met.
+  */
+ for (i = 0; i < num; i++) {
+ const struct rte_power_monitor_cond *c = &pmc[i];
+
+ if (pmc->fn == NULL)


Should be c->fn, I believe.


Yep, will fix.




+ continue;


Actually that way, if c->fn == NULL, we'll never add  our c->addr to monitored 
addresses.
Is that what we really want?
My thought was, that if callback is not set, we'll just go to power-save state 
without extra checking, no?
Something like that:

const struct rte_power_monitor_cond *c = &pmc[i];
const uint64_t val = __get_umwait_val(c->addr, c->size);

if (c->fn && c->fn(val, c->opaque) != 0)
 break;


This is consistent with previous behavior of rte_power_monitor where if
mask wasn't set we entered power save mode without any checks. If we do
a break, that means the check condition has failed somewhere and we have
to abort the sleep. Continue keeps the sleep.


Ok, so what is current intention?
If pmc->fn == NULL what does it mean:
1) pmc->addr shouldn't be monitored at all?
2) pmc->addr should be monitored unconditionally
3) pmc->fn should never be NULL and monitor should return an error
3) something els

Re: [dpdk-dev] [PATCH v3 6/7] power: support monitoring multiple Rx queues

2021-06-28 Thread Ananyev, Konstantin



> Use the new multi-monitor intrinsic to allow monitoring multiple ethdev
> Rx queues while entering the energy efficient power state. The multi
> version will be used unconditionally if supported, and the UMWAIT one
> will only be used when multi-monitor is not supported by the hardware.
> 
> Signed-off-by: Anatoly Burakov 
> ---
>  doc/guides/prog_guide/power_man.rst |  9 ++--
>  lib/power/rte_power_pmd_mgmt.c  | 76 -
>  2 files changed, 80 insertions(+), 5 deletions(-)
> 
> diff --git a/doc/guides/prog_guide/power_man.rst 
> b/doc/guides/prog_guide/power_man.rst
> index fac2c19516..3245a5ebed 100644
> --- a/doc/guides/prog_guide/power_man.rst
> +++ b/doc/guides/prog_guide/power_man.rst
> @@ -221,13 +221,16 @@ power saving whenever empty poll count reaches a 
> certain number.
>  The "monitor" mode is only supported in the following configurations and 
> scenarios:
> 
>  * If ``rte_cpu_get_intrinsics_support()`` function indicates that
> +  ``rte_power_monitor_multi()`` function is supported by the platform, then
> +  monitoring multiple Ethernet Rx queues for traffic will be supported.
> +
> +* If ``rte_cpu_get_intrinsics_support()`` function indicates that only
>``rte_power_monitor()`` is supported by the platform, then monitoring will 
> be
>limited to a mapping of 1 core 1 queue (thus, each Rx queue will have to be
>monitored from a different lcore).
> 
> -* If ``rte_cpu_get_intrinsics_support()`` function indicates that the
> -  ``rte_power_monitor()`` function is not supported, then monitor mode will 
> not
> -  be supported.
> +* If ``rte_cpu_get_intrinsics_support()`` function indicates that neither of 
> the
> +  two monitoring functions are supported, then monitor mode will not be 
> supported.
> 
>  * Not all Ethernet devices support monitoring, even if the underlying
>platform may support the necessary CPU instructions. Please refer to
> diff --git a/lib/power/rte_power_pmd_mgmt.c b/lib/power/rte_power_pmd_mgmt.c
> index 7762cd39b8..aab2d4f1ee 100644
> --- a/lib/power/rte_power_pmd_mgmt.c
> +++ b/lib/power/rte_power_pmd_mgmt.c
> @@ -155,6 +155,24 @@ queue_list_remove(struct pmd_core_cfg *cfg, const union 
> queue *q)
>   return 0;
>  }
> 
> +static inline int
> +get_monitor_addresses(struct pmd_core_cfg *cfg,
> + struct rte_power_monitor_cond *pmc)
> +{
> + const struct queue_list_entry *qle;
> + size_t i = 0;
> + int ret;
> +
> + TAILQ_FOREACH(qle, &cfg->head, next) {
> + struct rte_power_monitor_cond *cur = &pmc[i];

Looks like you never increment 'i' value inside that function.
Also it probably will be safer to add 'num' parameter to check that
we will never over-run pmc[] boundaries.

> + const union queue *q = &qle->queue;
> + ret = rte_eth_get_monitor_addr(q->portid, q->qid, cur);
> + if (ret < 0)
> + return ret;
> + }
> + return 0;
> +}
> +
>  static void
>  calc_tsc(void)
>  {
> @@ -183,6 +201,48 @@ calc_tsc(void)
>   }
>  }
> 
> +static uint16_t
> +clb_multiwait(uint16_t port_id, uint16_t qidx,
> + struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
> + uint16_t max_pkts __rte_unused, void *addr __rte_unused)
> +{
> + const unsigned int lcore = rte_lcore_id();
> + const union queue q = {.portid = port_id, .qid = qidx};
> + const bool empty = nb_rx == 0;
> + struct pmd_core_cfg *q_conf;
> +
> + q_conf = &lcore_cfg[lcore];
> +
> + /* early exit */
> + if (likely(!empty)) {
> + q_conf->empty_poll_stats = 0;
> + } else {
> + /* do we care about this particular queue? */
> + if (!queue_is_power_save(q_conf, &q))
> + return nb_rx;

I still don't understand the need of 'special' power_save queue here...
Why we can't just have a function:

get_number_of_queues_whose_sequential_empty_polls_less_then_threshold(struct 
pmd_core_cfg *lcore_cfg),
and then just:

/* all queues have at least EMPTYPOLL_MAX sequential empty polls */
if 
(get_number_of_queues_whose_sequential_empty_polls_less_then_threshold(q_conf) 
== 0) {
/* go into power-save mode here */
}

> +
> + /*
> +  * we can increment unconditionally here because if there were
> +  * non-empty polls in other queues assigned to this core, we
> +  * dropped the counter to zero anyway.
> +  */
> + q_conf->empty_poll_stats++;
> + if (unlikely(q_conf->empty_poll_stats > EMPTYPOLL_MAX)) {
> + struct rte_power_monitor_cond pmc[RTE_MAX_ETHPORTS];

I think you need here:
struct rte_power_monitor_cond pmc[q_conf->n_queues];


> + uint16_t ret;
> +
> + /* gather all monitoring conditions */
> + ret = get_monitor_addresses(q_conf, pmc);
> +
> + if (ret == 0)
> + rte_pow

Re: [dpdk-dev] [PATCH v3 6/7] power: support monitoring multiple Rx queues

2021-06-28 Thread Burakov, Anatoly

On 28-Jun-21 2:29 PM, Ananyev, Konstantin wrote:




Use the new multi-monitor intrinsic to allow monitoring multiple ethdev
Rx queues while entering the energy efficient power state. The multi
version will be used unconditionally if supported, and the UMWAIT one
will only be used when multi-monitor is not supported by the hardware.

Signed-off-by: Anatoly Burakov 
---
  doc/guides/prog_guide/power_man.rst |  9 ++--
  lib/power/rte_power_pmd_mgmt.c  | 76 -
  2 files changed, 80 insertions(+), 5 deletions(-)

diff --git a/doc/guides/prog_guide/power_man.rst 
b/doc/guides/prog_guide/power_man.rst
index fac2c19516..3245a5ebed 100644
--- a/doc/guides/prog_guide/power_man.rst
+++ b/doc/guides/prog_guide/power_man.rst
@@ -221,13 +221,16 @@ power saving whenever empty poll count reaches a certain 
number.
  The "monitor" mode is only supported in the following configurations and 
scenarios:

  * If ``rte_cpu_get_intrinsics_support()`` function indicates that
+  ``rte_power_monitor_multi()`` function is supported by the platform, then
+  monitoring multiple Ethernet Rx queues for traffic will be supported.
+
+* If ``rte_cpu_get_intrinsics_support()`` function indicates that only
``rte_power_monitor()`` is supported by the platform, then monitoring will 
be
limited to a mapping of 1 core 1 queue (thus, each Rx queue will have to be
monitored from a different lcore).

-* If ``rte_cpu_get_intrinsics_support()`` function indicates that the
-  ``rte_power_monitor()`` function is not supported, then monitor mode will not
-  be supported.
+* If ``rte_cpu_get_intrinsics_support()`` function indicates that neither of 
the
+  two monitoring functions are supported, then monitor mode will not be 
supported.

  * Not all Ethernet devices support monitoring, even if the underlying
platform may support the necessary CPU instructions. Please refer to
diff --git a/lib/power/rte_power_pmd_mgmt.c b/lib/power/rte_power_pmd_mgmt.c
index 7762cd39b8..aab2d4f1ee 100644
--- a/lib/power/rte_power_pmd_mgmt.c
+++ b/lib/power/rte_power_pmd_mgmt.c
@@ -155,6 +155,24 @@ queue_list_remove(struct pmd_core_cfg *cfg, const union 
queue *q)
   return 0;
  }

+static inline int
+get_monitor_addresses(struct pmd_core_cfg *cfg,
+ struct rte_power_monitor_cond *pmc)
+{
+ const struct queue_list_entry *qle;
+ size_t i = 0;
+ int ret;
+
+ TAILQ_FOREACH(qle, &cfg->head, next) {
+ struct rte_power_monitor_cond *cur = &pmc[i];


Looks like you never increment 'i' value inside that function.
Also it probably will be safer to add 'num' parameter to check that
we will never over-run pmc[] boundaries.


Will fix in v4, good catch!




+ const union queue *q = &qle->queue;
+ ret = rte_eth_get_monitor_addr(q->portid, q->qid, cur);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
  static void
  calc_tsc(void)
  {
@@ -183,6 +201,48 @@ calc_tsc(void)
   }
  }

+static uint16_t
+clb_multiwait(uint16_t port_id, uint16_t qidx,
+ struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
+ uint16_t max_pkts __rte_unused, void *addr __rte_unused)
+{
+ const unsigned int lcore = rte_lcore_id();
+ const union queue q = {.portid = port_id, .qid = qidx};
+ const bool empty = nb_rx == 0;
+ struct pmd_core_cfg *q_conf;
+
+ q_conf = &lcore_cfg[lcore];
+
+ /* early exit */
+ if (likely(!empty)) {
+ q_conf->empty_poll_stats = 0;
+ } else {
+ /* do we care about this particular queue? */
+ if (!queue_is_power_save(q_conf, &q))
+ return nb_rx;


I still don't understand the need of 'special' power_save queue here...
Why we can't just have a function:

get_number_of_queues_whose_sequential_empty_polls_less_then_threshold(struct 
pmd_core_cfg *lcore_cfg),
and then just:

/* all queues have at least EMPTYPOLL_MAX sequential empty polls */
if 
(get_number_of_queues_whose_sequential_empty_polls_less_then_threshold(q_conf) 
== 0) {
 /* go into power-save mode here */
}


Okay, let's go through this step by step :)

Let's suppose we have three queues - q0, q1 and q2. We want to sleep 
whenever there's no traffic on *all of them*, however we cannot know 
that until we have checked all of them.


So, let's suppose that q0, q1 and q2 were empty all this time, but now 
some traffic arrived at q2 while we're still checking q0. We see that q0 
is empty, and all of the queues were empty for the last N polls, so we 
think we will be safe to sleep at q0 despite the fact that traffic has 
just arrived at q2.


This is not an issue with MONITOR mode because we will be able to see if 
current Rx ring descriptor is busy or not via the NIC callback, *but 
this is not possible* with PAUSE and SCALE modes, because they don't 
have the sneaky lookahead function of MONITOR! So, with PAUSE and SCALE 
modes, it is possible to end up in a sit

Re: [dpdk-dev] [PATCH v14 1/9] lib: build libraries that testpmd depends on

2021-06-28 Thread Tyler Retzlaff
On Mon, Jun 28, 2021 at 01:35:09PM +0300, Andrew Rybchenko wrote:
> On 6/28/21 1:01 PM, Andrew Rybchenko wrote:
> >On 6/24/21 1:34 AM, Jie Zhou wrote:
> >>Enable building libraries that testpmd depends on for Windows
> >>
> >>Signed-off-by: Jie Zhou 
> >>Signed-off-by: Jie Zhou 
> >
> >Why are two a bit difference Signed-off-by used in these
> >patches? Can we drop the first one and keep the second one
> >which matches E-mail From?
> 
> In fact looking at git log I see both E-mail addresses
> present, but it looks like the first one (w/o linux.) is
> used more often.

yes, the CI system rejects the patch without the first because it
doesn't match the original commit. it also rejects without the second
because it needs to match the dev email list subscriber mail so both
have been provided to pass CI.

i think future patches submitted won't be an issue because git local
config for the dpdk clone has been changed to match the second which is
also the subscription mail but changing the config doesn't re-write
the history on the commits (and it shouldn't).


Re: [dpdk-dev] [dpdk-stable] [PATCH 2/4] compress/mlx5: fix constant size in QP creation

2021-06-28 Thread Michael Baum
External email: Use caution opening links or attachments

> -Original Message-
> From: Thomas Monjalon 
> Sent: Wednesday, June 23, 2021 9:48 AM
> To: Michael Baum 
> Cc: dev@dpdk.org; sta...@dpdk.org; Matan Azrad ;
> Raslan Darawsheh ; Slava Ovsiienko
> 
> Subject: Re: [dpdk-stable] [PATCH 2/4] compress/mlx5: fix constant size in
> QP creation
> 
> External email: Use caution opening links or attachments
> 
> 
> 01/06/2021 09:11, Michael Baum:
> > The mlx5_compress_qp_setup function makes shifting to the numeric
> > constant 1, then sends it as a parameter to rte_calloc function.
> >
> > The rte_calloc function expects to get size_t (64 bits, unsigned) and
> 
> No on 32-bit systems, size_t is 32 bits.

Thanks for the comment, I'll send v2.

> > instead gets a 32-bit variable, because the numeric constant size is a
> > 32-bit.
> 
> Most of the patches of this series say "constant" where it is a variable.
> 
> > In case the shift is greater than 32 the variable will lose its value
> > even though the function can get 64-bit argument.
> >
> > Change the size of the numeric constant 1 to 64-bit.
> [...]
> > - opaq_buf = rte_calloc(__func__, 1u << log_ops_n,
> > + opaq_buf = rte_calloc(__func__, RTE_BIT64(log_ops_n),
> 
> 



Re: [dpdk-dev] [RFC v2] porting AddressSanitizer feature to DPDK

2021-06-28 Thread Burakov, Anatoly

On 18-Jun-21 10:04 AM, David Marchand wrote:

On Fri, Jun 18, 2021 at 9:49 AM Lin, Xueqin  wrote:

Suggest listing demo code and tool capture information for user to try if

tool works, also add this part into doc.




# Also, Please update the release note for this feature.

Sure, we can update the release note if code merge.


Probably you can send v1 version next i.e change the RFC status to get
merged.


Sure, we will send v1 patch if no obvious objection for that, hope patch could 
receive some ACKs and could success to merge, thanks.


How did you test this work?

UNH recently started testing with ASAN and it reveals leaks just in
the unit test.

Merging these annotations will help catch more issues.
But users will hit the current issues that we must fix first.



As far as i can tell, the regular build is not affected by this patch, 
so no issues will be hit until someone actually runs the test. IMO it's 
better to merge it early to catch more issues than to gate the feature 
on the condition that we fix all bugs unrelated to this feature first.


--
Thanks,
Anatoly


Re: [dpdk-dev] [RFC v2] porting AddressSanitizer feature to DPDK

2021-06-28 Thread Jerin Jacob
On Mon, Jun 28, 2021 at 7:52 PM Burakov, Anatoly
 wrote:
>
> On 18-Jun-21 10:04 AM, David Marchand wrote:
> > On Fri, Jun 18, 2021 at 9:49 AM Lin, Xueqin  wrote:
>  Suggest listing demo code and tool capture information for user to try if
> >>> tool works, also add this part into doc.
> 
> >
> > # Also, Please update the release note for this feature.
>  Sure, we can update the release note if code merge.
> >>>
> >>> Probably you can send v1 version next i.e change the RFC status to get
> >>> merged.
> >>
> >> Sure, we will send v1 patch if no obvious objection for that, hope patch 
> >> could receive some ACKs and could success to merge, thanks.
> >
> > How did you test this work?
> >
> > UNH recently started testing with ASAN and it reveals leaks just in
> > the unit test.
> >
> > Merging these annotations will help catch more issues.
> > But users will hit the current issues that we must fix first.
> >
>
> As far as i can tell, the regular build is not affected by this patch,
> so no issues will be hit until someone actually runs the test. IMO it's
> better to merge it early to catch more issues than to gate the feature
> on the condition that we fix all bugs unrelated to this feature first.

+1


>
> --
> Thanks,
> Anatoly


Re: [dpdk-dev] [PATCH v14 6/9] app/testpmd: fix parse_fec_mode return type name

2021-06-28 Thread Tyler Retzlaff
On Mon, Jun 28, 2021 at 01:55:02PM +0300, Andrew Rybchenko wrote:
> On 6/24/21 1:34 AM, Jie Zhou wrote:
> >Replace parse_fec_mode misleading return type name mode with fec_capa
> >
> >Fixes: b19da32e3151 ("app/testpmd: add FEC command")
> >Cc: sta...@dpdk.org
> >
> >Signed-off-by: Jie Zhou 
> >Signed-off-by: Jie Zhou 
> 
> [snip]
> 
> >diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
> >index 283b5e3680..9ae4d90dd1 100644
> >--- a/app/test-pmd/testpmd.h
> >+++ b/app/test-pmd/testpmd.h
> >@@ -885,7 +885,7 @@ void show_tx_pkt_segments(void);
> >  void set_tx_pkt_times(unsigned int *tx_times);
> >  void show_tx_pkt_times(void);
> >  void set_tx_pkt_split(const char *name);
> >-int parse_fec_mode(const char *name, enum rte_eth_fec_mode *mode);
> >+int parse_fec_mode(const char *name, uint32_t *fec_capa);
> 
> I guess that the real reason behind is to fix implicit
> conversion of enum pointer to/from uint32_t pointer.
> I guess the problem is different signness of enum on
> Windows compiler.

yes, compilers targeting targets will select `int' once all constants of
the enumeration list are defined.

> 
> If so, please, put real motivation of the changeset in summary.
> It should be human-readable (and do not contain function name).
> Explain details in the description.
> 
> Yes, I agree that mode is misleading here and should be mentioned
> in the description, but I guess it is not the root cause.
> May be I'm wrong.


Re: [dpdk-dev] [RFC] toolchain specific macro expansion

2021-06-28 Thread Tyler Retzlaff
On Thu, Jun 24, 2021 at 06:29:20PM +0200, Thomas Monjalon wrote:
> 24/06/2021 18:02, Tyler Retzlaff:
> > On Thu, Jun 24, 2021 at 08:54:49AM +0200, Thomas Monjalon wrote:
> > > 23/06/2021 20:26, Tyler Retzlaff:
> > > > // lib/eal/gcc/rte_toolchain_common.h
> > > > #define __rte_noreturn __attribute__((noreturn))
> > > 
> > > We should keep a macro in rte_common.h which triggers an explicit error
> > 
> > i think that's relatively trivial to do. rte_common.h could after
> > toolchain specific include do a simple test.
> > 
> > #ifndef __rte_no_return
> > #error no __rte_no_return defined for toolchain
> > #endif
> 
> No I was thinking of:
> 
> /** Doxygen comment for the attribute below */
> #define __rte_no_return RTE_ATTR_NOT_SUPPORTED

oh, didn't know about this. it sounds better.

> 
> This way we have a documentation in a single place for the macro,
> and compilation fails if it is not implemented for the toolchain.

yes, i was thinking about this.  i'm glad you suggested it because
a signle source of documentation in rte_common.h would be better than
having to maintain redundant copies.



[dpdk-dev] [PATCH] common/mlx5: share memory free callback

2021-06-28 Thread Michael Baum
All the mlx5 drivers using MRs for data-path must unregister the mapped
memory when it is freed by the dpdk process.

Currently, only the net/eth driver unregisters MRs in free event.

Move the net callback handler from net driver to common.

Cc: sta...@dpdk.org

Signed-off-by: Michael Baum 
Acked-by: Matan Azrad 
---
 drivers/common/mlx5/mlx5_common_mr.c | 89 +++
 drivers/common/mlx5/mlx5_common_mr.h |  3 +
 drivers/common/mlx5/version.map  |  1 +
 drivers/net/mlx5/mlx5_mr.c   | 90 +---
 4 files changed, 95 insertions(+), 88 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_common_mr.c 
b/drivers/common/mlx5/mlx5_common_mr.c
index afb5b3d0a7..98fe8698e2 100644
--- a/drivers/common/mlx5/mlx5_common_mr.c
+++ b/drivers/common/mlx5/mlx5_common_mr.c
@@ -1062,6 +1062,95 @@ mlx5_create_mr_ext(void *pd, uintptr_t addr, size_t len, 
int socket_id,
return mr;
 }
 
+/**
+ * Callback for memory free event. Iterate freed memsegs and check whether it
+ * belongs to an existing MR. If found, clear the bit from bitmap of MR. As a
+ * result, the MR would be fragmented. If it becomes empty, the MR will be 
freed
+ * later by mlx5_mr_garbage_collect(). Even if this callback is called from a
+ * secondary process, the garbage collector will be called in primary process
+ * as the secondary process can't call mlx5_mr_create().
+ *
+ * The global cache must be rebuilt if there's any change and this event has to
+ * be propagated to dataplane threads to flush the local caches.
+ *
+ * @param share_cache
+ *   Pointer to a global shared MR cache.
+ * @param ibdev_name
+ *   Name of ibv device.
+ * @param addr
+ *   Address of freed memory.
+ * @param len
+ *   Size of freed memory.
+ */
+void
+mlx5_free_mr_by_addr(struct mlx5_mr_share_cache *share_cache,
+const char *ibdev_name, const void *addr, size_t len)
+{
+   const struct rte_memseg_list *msl;
+   struct mlx5_mr *mr;
+   int ms_n;
+   int i;
+   int rebuild = 0;
+
+   DRV_LOG(DEBUG, "device %s free callback: addr=%p, len=%zu",
+   ibdev_name, addr, len);
+   msl = rte_mem_virt2memseg_list(addr);
+   /* addr and len must be page-aligned. */
+   MLX5_ASSERT((uintptr_t)addr ==
+   RTE_ALIGN((uintptr_t)addr, msl->page_sz));
+   MLX5_ASSERT(len == RTE_ALIGN(len, msl->page_sz));
+   ms_n = len / msl->page_sz;
+   rte_rwlock_write_lock(&share_cache->rwlock);
+   /* Clear bits of freed memsegs from MR. */
+   for (i = 0; i < ms_n; ++i) {
+   const struct rte_memseg *ms;
+   struct mr_cache_entry entry;
+   uintptr_t start;
+   int ms_idx;
+   uint32_t pos;
+
+   /* Find MR having this memseg. */
+   start = (uintptr_t)addr + i * msl->page_sz;
+   mr = mlx5_mr_lookup_list(share_cache, &entry, start);
+   if (mr == NULL)
+   continue;
+   MLX5_ASSERT(mr->msl); /* Can't be external memory. */
+   ms = rte_mem_virt2memseg((void *)start, msl);
+   MLX5_ASSERT(ms != NULL);
+   MLX5_ASSERT(msl->page_sz == ms->hugepage_sz);
+   ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
+   pos = ms_idx - mr->ms_base_idx;
+   MLX5_ASSERT(rte_bitmap_get(mr->ms_bmp, pos));
+   MLX5_ASSERT(pos < mr->ms_bmp_n);
+   DRV_LOG(DEBUG, "device %s MR(%p): clear bitmap[%u] for addr %p",
+   ibdev_name, (void *)mr, pos, (void *)start);
+   rte_bitmap_clear(mr->ms_bmp, pos);
+   if (--mr->ms_n == 0) {
+   LIST_REMOVE(mr, mr);
+   LIST_INSERT_HEAD(&share_cache->mr_free_list, mr, mr);
+   DRV_LOG(DEBUG, "device %s remove MR(%p) from list",
+   ibdev_name, (void *)mr);
+   }
+   /*
+* MR is fragmented or will be freed. the global cache must be
+* rebuilt.
+*/
+   rebuild = 1;
+   }
+   if (rebuild) {
+   mlx5_mr_rebuild_cache(share_cache);
+   /*
+* No explicit wmb is needed after updating dev_gen due to
+* store-release ordering in unlock that provides the
+* implicit barrier at the software visible level.
+*/
+   ++share_cache->dev_gen;
+   DRV_LOG(DEBUG, "broadcasting local cache flush, gen=%d",
+   share_cache->dev_gen);
+   }
+   rte_rwlock_write_unlock(&share_cache->rwlock);
+}
+
 /**
  * Dump all the created MRs and the global cache entries.
  *
diff --git a/drivers/common/mlx5/mlx5_common_mr.h 
b/drivers/common/mlx5/mlx5_common_mr.h
index 5cc3f097c2..6e465a05e9 100644
--- a/drivers/common/mlx5/mlx5_common_mr.h
+++ b/drivers/common/mlx5/mlx5_com

[dpdk-dev] DPDK Release Status Meeting 24/06/2021

2021-06-28 Thread Mcnamara, John
Release status meeting minutes {Date}
=
:Date: 24 June 2021
:toc:

.Agenda:
* Release Dates
* Subtrees
* Roadmaps
* LTS
* Defects
* Opens

.Participants:
* Arm
* Debian/Microsoft
* Intel
* Marvell
* Nvidia
* Red Hat


Release Dates
-

* `v21.08` dates
  - Proposal/V1:Wednesday, 2 June (completed)
  - -rc1:   Monday,5 July
  - Release:Tuesday,   3 August

* Note: We need to hold to the early August release date since
  several of the maintainers will be on holidays after that.

* `v21.11` dates (proposed and subject to discussion)
  - Proposal/V1:Friday, 10 September
  - -rc1:   Friday, 15 October
  - Release:Friday, 19 November

Subtrees


* main
  - Unit test fixes for issues with Address Sanitizer.
  - Build issues with Solarflare patches - under investigation.
  - Some of the sub-trees already pulled as part of incremental merging prior 
to RC1.
  - RC1 targeted for Monday 5th July. Subtree maintainers should submit PRs on
Wednesday 30th June to allow time for RC1 merge.

* next-net
  - Andrew working on testpmd patches for Windows - needs ack
  - Bonding patches also need reviews

* next-crypto
  - 4 new PMDs in this release:
** CNXK - reviewed - awaiting final version for RC1.
** MLX - still in progress. New version with unit tests due in next few 
days.
** Intel QAT - under review.
** NXP baseband - requires new version.

* next-eventdev
  - CNXK eventdev driver - should be available for RC1.
  - Baseband patches reviewed.

* next-virtio
  - 4 series from Intel around the async data - new series under review.
  - Patchset on numa reallocation under review.

* next-net-brcm
  - Big patchset with 58 patches merged into dpdk-next-net-brcm sub tree.

* next-net-intel
  - Proceeding okay. No issues

* next-net-mlx
  - Should be a small release for Mellanox
  - Waiting for PR to be pulled.

* next-net-mrvl
  - CNXK ethernet driver (from last release).
  - V4 received and under review. Should be merged next week.


LTS
---

* `v19.11` (next version is `v19.11.9`)
  - RC3 tagged
  - Target release date July 2

* `v20.11` (next version is `v20.11.2`)
  - RC2 tagged

* Distros
  - v20.11 in Debian 11
  - v20.11 in Ubuntu 21.04


Defects
---

* Bugzilla links, 'Bugs',  added for hosted projects
  - https://www.dpdk.org/hosted-projects/


Opens
-

* None


.DPDK Release Status Meetings
*
The DPDK Release Status Meeting is intended for DPDK Committers to discuss the 
status of the master tree and sub-trees, and for project managers to track 
progress or milestone dates.

The meeting occurs on every Thursdays at 8:30 UTC. on https://meet.jit.si/DPDK

If you wish to attend just send an email to "John McNamara 
" for the invite.
*


[dpdk-dev] [PATCH v4 0/7] Enhancements for PMD power management

2021-06-28 Thread Anatoly Burakov
This patchset introduces several changes related to PMD power management:

- Changed monitoring intrinsics to use callbacks as a comparison function, based
  on previous patchset [1] but incorporating feedback [2] - this hopefully will
  make it possible to add support for .get_monitor_addr in virtio
- Add a new intrinsic to monitor multiple addresses, based on RTM instruction
  set and the TPAUSE instruction
- Add support for PMD power management on multiple queues, as well as all
  accompanying infrastructure and example apps changes

v4:
- Replaced raw number with a macro
- Fixed all the bugs found by Konstantin
- Some other minor corrections

v3:
- Moved some doc updates to NIC features list

v2:
- Changed check inversion to callbacks
- Addressed feedback from Konstantin
- Added doc updates where necessary

[1] http://patches.dpdk.org/project/dpdk/list/?series=16930&state=*
[2] 
http://patches.dpdk.org/project/dpdk/patch/819ef1ace187365a615d3383e54579e3d9fb216e.1620747068.git.anatoly.bura...@intel.com/#133274

Anatoly Burakov (7):
  power_intrinsics: use callbacks for comparison
  net/af_xdp: add power monitor support
  eal: add power monitor for multiple events
  power: remove thread safety from PMD power API's
  power: support callbacks for multiple Rx queues
  power: support monitoring multiple Rx queues
  l3fwd-power: support multiqueue in PMD pmgmt modes

 doc/guides/nics/features.rst  |  10 +
 doc/guides/prog_guide/power_man.rst   |  78 ++-
 doc/guides/rel_notes/release_21_08.rst|  11 +
 drivers/event/dlb2/dlb2.c |  17 +-
 drivers/net/af_xdp/rte_eth_af_xdp.c   |  34 +
 drivers/net/i40e/i40e_rxtx.c  |  20 +-
 drivers/net/iavf/iavf_rxtx.c  |  20 +-
 drivers/net/ice/ice_rxtx.c|  20 +-
 drivers/net/ixgbe/ixgbe_rxtx.c|  20 +-
 drivers/net/mlx5/mlx5_rx.c|  17 +-
 examples/l3fwd-power/main.c   |  39 +-
 lib/eal/arm/rte_power_intrinsics.c|  11 +
 lib/eal/include/generic/rte_cpuflags.h|   2 +
 .../include/generic/rte_power_intrinsics.h|  68 +-
 lib/eal/ppc/rte_power_intrinsics.c|  11 +
 lib/eal/version.map   |   3 +
 lib/eal/x86/rte_cpuflags.c|   2 +
 lib/eal/x86/rte_power_intrinsics.c|  90 ++-
 lib/power/meson.build |   3 +
 lib/power/rte_power_pmd_mgmt.c| 582 +-
 lib/power/rte_power_pmd_mgmt.h|  40 ++
 lib/power/version.map |   3 +
 22 files changed, 874 insertions(+), 227 deletions(-)

-- 
2.25.1



[dpdk-dev] [PATCH v4 1/7] power_intrinsics: use callbacks for comparison

2021-06-28 Thread Anatoly Burakov
Previously, the semantics of power monitor were such that we were
checking current value against the expected value, and if they matched,
then the sleep was aborted. This is somewhat inflexible, because it only
allowed us to check for a specific value in a specific way.

This commit replaces the comparison with a user callback mechanism, so
that any PMD (or other code) using `rte_power_monitor()` can define
their own comparison semantics and decision making on how to detect the
need to abort the entering of power optimized state.

Existing implementations are adjusted to follow the new semantics.

Suggested-by: Konstantin Ananyev 
Signed-off-by: Anatoly Burakov 
Acked-by: Konstantin Ananyev 
---

Notes:
v4:
- Return error if callback is set to NULL
- Replace raw number with a macro in monitor condition opaque data

v2:
- Use callback mechanism for more flexibility
- Address feedback from Konstantin

 doc/guides/rel_notes/release_21_08.rst|  1 +
 drivers/event/dlb2/dlb2.c | 17 --
 drivers/net/i40e/i40e_rxtx.c  | 20 +++
 drivers/net/iavf/iavf_rxtx.c  | 20 +++
 drivers/net/ice/ice_rxtx.c| 20 +++
 drivers/net/ixgbe/ixgbe_rxtx.c| 20 +++
 drivers/net/mlx5/mlx5_rx.c| 17 --
 .../include/generic/rte_power_intrinsics.h| 33 +++
 lib/eal/x86/rte_power_intrinsics.c| 17 +-
 9 files changed, 121 insertions(+), 44 deletions(-)

diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index a6ecfdf3ce..c84ac280f5 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -84,6 +84,7 @@ API Changes
Also, make sure to start the actual text at the margin.
===
 
+* eal: the ``rte_power_intrinsics`` API changed to use a callback mechanism.
 
 ABI Changes
 ---
diff --git a/drivers/event/dlb2/dlb2.c b/drivers/event/dlb2/dlb2.c
index eca183753f..252bbd8d5e 100644
--- a/drivers/event/dlb2/dlb2.c
+++ b/drivers/event/dlb2/dlb2.c
@@ -3154,6 +3154,16 @@ dlb2_port_credits_inc(struct dlb2_port *qm_port, int num)
}
 }
 
+#define CLB_MASK_IDX 0
+#define CLB_VAL_IDX 1
+static int
+dlb2_monitor_callback(const uint64_t val,
+   const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
+{
+   /* abort if the value matches */
+   return (val & opaque[CLB_MASK_IDX]) == opaque[CLB_VAL_IDX] ? -1 : 0;
+}
+
 static inline int
 dlb2_dequeue_wait(struct dlb2_eventdev *dlb2,
  struct dlb2_eventdev_port *ev_port,
@@ -3194,8 +3204,11 @@ dlb2_dequeue_wait(struct dlb2_eventdev *dlb2,
expected_value = 0;
 
pmc.addr = monitor_addr;
-   pmc.val = expected_value;
-   pmc.mask = qe_mask.raw_qe[1];
+   /* store expected value and comparison mask in opaque data */
+   pmc.opaque[CLB_VAL_IDX] = expected_value;
+   pmc.opaque[CLB_MASK_IDX] = qe_mask.raw_qe[1];
+   /* set up callback */
+   pmc.fn = dlb2_monitor_callback;
pmc.size = sizeof(uint64_t);
 
rte_power_monitor(&pmc, timeout + start_ticks);
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 6c58decece..081682f88b 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -81,6 +81,18 @@
 #define I40E_TX_OFFLOAD_SIMPLE_NOTSUP_MASK \
(PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_SIMPLE_SUP_MASK)
 
+static int
+i40e_monitor_callback(const uint64_t value,
+   const uint64_t arg[RTE_POWER_MONITOR_OPAQUE_SZ] __rte_unused)
+{
+   const uint64_t m = rte_cpu_to_le_64(1 << I40E_RX_DESC_STATUS_DD_SHIFT);
+   /*
+* we expect the DD bit to be set to 1 if this descriptor was already
+* written to.
+*/
+   return (value & m) == m ? -1 : 0;
+}
+
 int
 i40e_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 {
@@ -93,12 +105,8 @@ i40e_get_monitor_addr(void *rx_queue, struct 
rte_power_monitor_cond *pmc)
/* watch for changes in status bit */
pmc->addr = &rxdp->wb.qword1.status_error_len;
 
-   /*
-* we expect the DD bit to be set to 1 if this descriptor was already
-* written to.
-*/
-   pmc->val = rte_cpu_to_le_64(1 << I40E_RX_DESC_STATUS_DD_SHIFT);
-   pmc->mask = rte_cpu_to_le_64(1 << I40E_RX_DESC_STATUS_DD_SHIFT);
+   /* comparison callback */
+   pmc->fn = i40e_monitor_callback;
 
/* registers are 64-bit */
pmc->size = sizeof(uint64_t);
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 0361af0d85..7ed196ec22 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -57,6 +57,18 @@ iavf_proto_xtr_type_to_rxdid(uin

[dpdk-dev] [PATCH v4 2/7] net/af_xdp: add power monitor support

2021-06-28 Thread Anatoly Burakov
Implement support for .get_monitor_addr in AF_XDP driver.

Signed-off-by: Anatoly Burakov 
---

Notes:
v2:
- Rewrite using the callback mechanism

 drivers/net/af_xdp/rte_eth_af_xdp.c | 34 +
 1 file changed, 34 insertions(+)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c 
b/drivers/net/af_xdp/rte_eth_af_xdp.c
index eb5660a3dc..7830d0c23a 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "compat.h"
 
@@ -788,6 +789,38 @@ eth_dev_configure(struct rte_eth_dev *dev)
return 0;
 }
 
+#define CLB_VAL_IDX 0
+static int
+eth_monitor_callback(const uint64_t value,
+   const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
+{
+   const uint64_t v = opaque[CLB_VAL_IDX];
+   const uint64_t m = (uint32_t)~0;
+
+   /* if the value has changed, abort entering power optimized state */
+   return (value & m) == v ? 0 : -1;
+}
+
+static int
+eth_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
+{
+   struct pkt_rx_queue *rxq = rx_queue;
+   unsigned int *prod = rxq->rx.producer;
+   const uint32_t cur_val = rxq->rx.cached_prod; /* use cached value */
+
+   /* watch for changes in producer ring */
+   pmc->addr = (void*)prod;
+
+   /* store current value */
+   pmc->opaque[CLB_VAL_IDX] = cur_val;
+   pmc->fn = eth_monitor_callback;
+
+   /* AF_XDP producer ring index is 32-bit */
+   pmc->size = sizeof(uint32_t);
+
+   return 0;
+}
+
 static int
 eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
@@ -1448,6 +1481,7 @@ static const struct eth_dev_ops ops = {
.link_update = eth_link_update,
.stats_get = eth_stats_get,
.stats_reset = eth_stats_reset,
+   .get_monitor_addr = eth_get_monitor_addr
 };
 
 /** parse busy_budget argument */
-- 
2.25.1



[dpdk-dev] [PATCH v4 3/7] eal: add power monitor for multiple events

2021-06-28 Thread Anatoly Burakov
Use RTM and WAITPKG instructions to perform a wait-for-writes similar to
what UMWAIT does, but without the limitation of having to listen for
just one event. This works because the optimized power state used by the
TPAUSE instruction will cause a wake up on RTM transaction abort, so if
we add the addresses we're interested in to the read-set, any write to
those addresses will wake us up.

Signed-off-by: Konstantin Ananyev 
Signed-off-by: Anatoly Burakov 
---

Notes:
v4:
- Fixed bugs in accessing the monitor condition
- Abort on any monitor condition not having a defined callback

v2:
- Adapt to callback mechanism

 doc/guides/rel_notes/release_21_08.rst|  2 +
 lib/eal/arm/rte_power_intrinsics.c| 11 +++
 lib/eal/include/generic/rte_cpuflags.h|  2 +
 .../include/generic/rte_power_intrinsics.h| 35 +
 lib/eal/ppc/rte_power_intrinsics.c| 11 +++
 lib/eal/version.map   |  3 +
 lib/eal/x86/rte_cpuflags.c|  2 +
 lib/eal/x86/rte_power_intrinsics.c| 73 +++
 8 files changed, 139 insertions(+)

diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index c84ac280f5..9d1cfac395 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -55,6 +55,8 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* eal: added ``rte_power_monitor_multi`` to support waiting for multiple 
events.
+
 
 Removed Items
 -
diff --git a/lib/eal/arm/rte_power_intrinsics.c 
b/lib/eal/arm/rte_power_intrinsics.c
index e83f04072a..78f55b7203 100644
--- a/lib/eal/arm/rte_power_intrinsics.c
+++ b/lib/eal/arm/rte_power_intrinsics.c
@@ -38,3 +38,14 @@ rte_power_monitor_wakeup(const unsigned int lcore_id)
 
return -ENOTSUP;
 }
+
+int
+rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
+   const uint32_t num, const uint64_t tsc_timestamp)
+{
+   RTE_SET_USED(pmc);
+   RTE_SET_USED(num);
+   RTE_SET_USED(tsc_timestamp);
+
+   return -ENOTSUP;
+}
diff --git a/lib/eal/include/generic/rte_cpuflags.h 
b/lib/eal/include/generic/rte_cpuflags.h
index 28a5aecde8..d35551e931 100644
--- a/lib/eal/include/generic/rte_cpuflags.h
+++ b/lib/eal/include/generic/rte_cpuflags.h
@@ -24,6 +24,8 @@ struct rte_cpu_intrinsics {
/**< indicates support for rte_power_monitor function */
uint32_t power_pause : 1;
/**< indicates support for rte_power_pause function */
+   uint32_t power_monitor_multi : 1;
+   /**< indicates support for rte_power_monitor_multi function */
 };
 
 /**
diff --git a/lib/eal/include/generic/rte_power_intrinsics.h 
b/lib/eal/include/generic/rte_power_intrinsics.h
index c9aa52a86d..04e8c2ab37 100644
--- a/lib/eal/include/generic/rte_power_intrinsics.h
+++ b/lib/eal/include/generic/rte_power_intrinsics.h
@@ -128,4 +128,39 @@ int rte_power_monitor_wakeup(const unsigned int lcore_id);
 __rte_experimental
 int rte_power_pause(const uint64_t tsc_timestamp);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Monitor a set of addresses for changes. This will cause the CPU to enter an
+ * architecture-defined optimized power state until either one of the specified
+ * memory addresses is written to, a certain TSC timestamp is reached, or other
+ * reasons cause the CPU to wake up.
+ *
+ * Additionally, `expected` 64-bit values and 64-bit masks are provided. If
+ * mask is non-zero, the current value pointed to by the `p` pointer will be
+ * checked against the expected value, and if they do not match, the entering 
of
+ * optimized power state may be aborted.
+ *
+ * @warning It is responsibility of the user to check if this function is
+ *   supported at runtime using `rte_cpu_get_intrinsics_support()` API call.
+ *   Failing to do so may result in an illegal CPU instruction error.
+ *
+ * @param pmc
+ *   An array of monitoring condition structures.
+ * @param num
+ *   Length of the `pmc` array.
+ * @param tsc_timestamp
+ *   Maximum TSC timestamp to wait for. Note that the wait behavior is
+ *   architecture-dependent.
+ *
+ * @return
+ *   0 on success
+ *   -EINVAL on invalid parameters
+ *   -ENOTSUP if unsupported
+ */
+__rte_experimental
+int rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
+   const uint32_t num, const uint64_t tsc_timestamp);
+
 #endif /* _RTE_POWER_INTRINSIC_H_ */
diff --git a/lib/eal/ppc/rte_power_intrinsics.c 
b/lib/eal/ppc/rte_power_intrinsics.c
index 7fc9586da7..f00b58ade5 100644
--- a/lib/eal/ppc/rte_power_intrinsics.c
+++ b/lib/eal/ppc/rte_power_intrinsics.c
@@ -38,3 +38,14 @@ rte_power_monitor_wakeup(const unsigned int lcore_id)
 
return -ENOTSUP;
 }
+
+int
+rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
+   const 

[dpdk-dev] [PATCH v4 4/7] power: remove thread safety from PMD power API's

2021-06-28 Thread Anatoly Burakov
Currently, we expect that only one callback can be active at any given
moment, for a particular queue configuration, which is relatively easy
to implement in a thread-safe way. However, we're about to add support
for multiple queues per lcore, which will greatly increase the
possibility of various race conditions.

We could have used something like an RCU for this use case, but absent
of a pressing need for thread safety we'll go the easy way and just
mandate that the API's are to be called when all affected ports are
stopped, and document this limitation. This greatly simplifies the
`rte_power_monitor`-related code.

Signed-off-by: Anatoly Burakov 
---

Notes:
v2:
- Add check for stopped queue
- Clarified doc message
- Added release notes

 doc/guides/rel_notes/release_21_08.rst |   5 +
 lib/power/meson.build  |   3 +
 lib/power/rte_power_pmd_mgmt.c | 133 ++---
 lib/power/rte_power_pmd_mgmt.h |   6 ++
 4 files changed, 67 insertions(+), 80 deletions(-)

diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index 9d1cfac395..f015c509fc 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -88,6 +88,11 @@ API Changes
 
 * eal: the ``rte_power_intrinsics`` API changed to use a callback mechanism.
 
+* rte_power: The experimental PMD power management API is no longer considered
+  to be thread safe; all Rx queues affected by the API will now need to be
+  stopped before making any changes to the power management scheme.
+
+
 ABI Changes
 ---
 
diff --git a/lib/power/meson.build b/lib/power/meson.build
index c1097d32f1..4f6a242364 100644
--- a/lib/power/meson.build
+++ b/lib/power/meson.build
@@ -21,4 +21,7 @@ headers = files(
 'rte_power_pmd_mgmt.h',
 'rte_power_guest_channel.h',
 )
+if cc.has_argument('-Wno-cast-qual')
+cflags += '-Wno-cast-qual'
+endif
 deps += ['timer', 'ethdev']
diff --git a/lib/power/rte_power_pmd_mgmt.c b/lib/power/rte_power_pmd_mgmt.c
index db03cbf420..9b95cf1794 100644
--- a/lib/power/rte_power_pmd_mgmt.c
+++ b/lib/power/rte_power_pmd_mgmt.c
@@ -40,8 +40,6 @@ struct pmd_queue_cfg {
/**< Callback mode for this queue */
const struct rte_eth_rxtx_callback *cur_cb;
/**< Callback instance */
-   volatile bool umwait_in_progress;
-   /**< are we currently sleeping? */
uint64_t empty_poll_stats;
/**< Number of empty polls */
 } __rte_cache_aligned;
@@ -92,30 +90,11 @@ clb_umwait(uint16_t port_id, uint16_t qidx, struct rte_mbuf 
**pkts __rte_unused,
struct rte_power_monitor_cond pmc;
uint16_t ret;
 
-   /*
-* we might get a cancellation request while being
-* inside the callback, in which case the wakeup
-* wouldn't work because it would've arrived too early.
-*
-* to get around this, we notify the other thread that
-* we're sleeping, so that it can spin until we're done.
-* unsolicited wakeups are perfectly safe.
-*/
-   q_conf->umwait_in_progress = true;
-
-   rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
-
-   /* check if we need to cancel sleep */
-   if (q_conf->pwr_mgmt_state == PMD_MGMT_ENABLED) {
-   /* use monitoring condition to sleep */
-   ret = rte_eth_get_monitor_addr(port_id, qidx,
-   &pmc);
-   if (ret == 0)
-   rte_power_monitor(&pmc, UINT64_MAX);
-   }
-   q_conf->umwait_in_progress = false;
-
-   rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
+   /* use monitoring condition to sleep */
+   ret = rte_eth_get_monitor_addr(port_id, qidx,
+   &pmc);
+   if (ret == 0)
+   rte_power_monitor(&pmc, UINT64_MAX);
}
} else
q_conf->empty_poll_stats = 0;
@@ -177,12 +156,24 @@ clb_scale_freq(uint16_t port_id, uint16_t qidx,
return nb_rx;
 }
 
+static int
+queue_stopped(const uint16_t port_id, const uint16_t queue_id)
+{
+   struct rte_eth_rxq_info qinfo;
+
+   if (rte_eth_rx_queue_info_get(port_id, queue_id, &qinfo) < 0)
+   return -1;
+
+   return qinfo.queue_state == RTE_ETH_QUEUE_STATE_STOPPED;
+}
+
 int
 rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t port_id,
uint16_t queue_id, enum rte_power_pmd_mgmt_type mode)
 {
struct pmd_queue_cfg *queue_cfg;
struct rte_eth_dev_

[dpdk-dev] [PATCH v4 5/7] power: support callbacks for multiple Rx queues

2021-06-28 Thread Anatoly Burakov
Currently, there is a hard limitation on the PMD power management
support that only allows it to support a single queue per lcore. This is
not ideal as most DPDK use cases will poll multiple queues per core.

The PMD power management mechanism relies on ethdev Rx callbacks, so it
is very difficult to implement such support because callbacks are
effectively stateless and have no visibility into what the other ethdev
devices are doing. This places limitations on what we can do within the
framework of Rx callbacks, but the basics of this implementation are as
follows:

- Replace per-queue structures with per-lcore ones, so that any device
  polled from the same lcore can share data
- Any queue that is going to be polled from a specific lcore has to be
  added to the list of cores to poll, so that the callback is aware of
  other queues being polled by the same lcore
- Both the empty poll counter and the actual power saving mechanism is
  shared between all queues polled on a particular lcore, and is only
  activated when a special designated "power saving" queue is polled. To
  put it another way, we have no idea which queue the user will poll in
  what order, so we rely on them telling us that queue X is the last one
  in the polling loop, so any power management should happen there.
- A new API is added to mark a specific Rx queue as "power saving".
  Failing to call this API will result in no power management, however
  when having only one queue per core it is obvious which queue is the
  "power saving" one, so things will still work without this new API for
  use cases that were previously working without it.
- The limitation on UMWAIT-based polling is not removed because UMWAIT
  is incapable of monitoring more than one address.

Also, while we're at it, update and improve the docs.

Signed-off-by: Anatoly Burakov 
---

Notes:
v3:
- Move the list of supported NICs to NIC feature table

v2:
- Use a TAILQ for queues instead of a static array
- Address feedback from Konstantin
- Add additional checks for stopped queues

 doc/guides/nics/features.rst   |  10 +
 doc/guides/prog_guide/power_man.rst|  75 +++--
 doc/guides/rel_notes/release_21_08.rst |   3 +
 lib/power/rte_power_pmd_mgmt.c | 381 -
 lib/power/rte_power_pmd_mgmt.h |  34 +++
 lib/power/version.map  |   3 +
 6 files changed, 412 insertions(+), 94 deletions(-)

diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst
index 403c2b03a3..a96e12d155 100644
--- a/doc/guides/nics/features.rst
+++ b/doc/guides/nics/features.rst
@@ -912,6 +912,16 @@ Supports to get Rx/Tx packet burst mode information.
 * **[implements] eth_dev_ops**: ``rx_burst_mode_get``, ``tx_burst_mode_get``.
 * **[related] API**: ``rte_eth_rx_burst_mode_get()``, 
``rte_eth_tx_burst_mode_get()``.
 
+.. _nic_features_get_monitor_addr:
+
+PMD power management using monitor addresses
+
+
+Supports getting a monitoring condition to use together with Ethernet PMD power
+management (see :doc:`../prog_guide/power_man` for more details).
+
+* **[implements] eth_dev_ops**: ``get_monitor_addr``
+
 .. _nic_features_other:
 
 Other dev ops not represented by a Feature
diff --git a/doc/guides/prog_guide/power_man.rst 
b/doc/guides/prog_guide/power_man.rst
index c70ae128ac..fac2c19516 100644
--- a/doc/guides/prog_guide/power_man.rst
+++ b/doc/guides/prog_guide/power_man.rst
@@ -198,34 +198,41 @@ Ethernet PMD Power Management API
 Abstract
 
 
-Existing power management mechanisms require developers
-to change application design or change code to make use of it.
-The PMD power management API provides a convenient alternative
-by utilizing Ethernet PMD RX callbacks,
-and triggering power saving whenever empty poll count reaches a certain number.
-
-Monitor
-   This power saving scheme will put the CPU into optimized power state
-   and use the ``rte_power_monitor()`` function
-   to monitor the Ethernet PMD RX descriptor address,
-   and wake the CPU up whenever there's new traffic.
-
-Pause
-   This power saving scheme will avoid busy polling
-   by either entering power-optimized sleep state
-   with ``rte_power_pause()`` function,
-   or, if it's not available, use ``rte_pause()``.
-
-Frequency scaling
-   This power saving scheme will use ``librte_power`` library
-   functionality to scale the core frequency up/down
-   depending on traffic volume.
-
-.. note::
-
-   Currently, this power management API is limited to mandatory mapping
-   of 1 queue to 1 core (multiple queues are supported,
-   but they must be polled from different cores).
+Existing power management mechanisms require developers to change application
+design or change code to make use of it. The PMD power management API provides 
a
+convenient alternative by utilizing Ethernet PMD RX callbacks, and triggering
+power saving whenever empty poll count reaches a certain numb

[dpdk-dev] [PATCH v4 6/7] power: support monitoring multiple Rx queues

2021-06-28 Thread Anatoly Burakov
Use the new multi-monitor intrinsic to allow monitoring multiple ethdev
Rx queues while entering the energy efficient power state. The multi
version will be used unconditionally if supported, and the UMWAIT one
will only be used when multi-monitor is not supported by the hardware.

Signed-off-by: Anatoly Burakov 
---

Notes:
v4:
- Fix possible out of bounds access
- Added missing index increment

 doc/guides/prog_guide/power_man.rst |  9 ++--
 lib/power/rte_power_pmd_mgmt.c  | 84 -
 2 files changed, 88 insertions(+), 5 deletions(-)

diff --git a/doc/guides/prog_guide/power_man.rst 
b/doc/guides/prog_guide/power_man.rst
index fac2c19516..3245a5ebed 100644
--- a/doc/guides/prog_guide/power_man.rst
+++ b/doc/guides/prog_guide/power_man.rst
@@ -221,13 +221,16 @@ power saving whenever empty poll count reaches a certain 
number.
 The "monitor" mode is only supported in the following configurations and 
scenarios:
 
 * If ``rte_cpu_get_intrinsics_support()`` function indicates that
+  ``rte_power_monitor_multi()`` function is supported by the platform, then
+  monitoring multiple Ethernet Rx queues for traffic will be supported.
+
+* If ``rte_cpu_get_intrinsics_support()`` function indicates that only
   ``rte_power_monitor()`` is supported by the platform, then monitoring will be
   limited to a mapping of 1 core 1 queue (thus, each Rx queue will have to be
   monitored from a different lcore).
 
-* If ``rte_cpu_get_intrinsics_support()`` function indicates that the
-  ``rte_power_monitor()`` function is not supported, then monitor mode will not
-  be supported.
+* If ``rte_cpu_get_intrinsics_support()`` function indicates that neither of 
the
+  two monitoring functions are supported, then monitor mode will not be 
supported.
 
 * Not all Ethernet devices support monitoring, even if the underlying
   platform may support the necessary CPU instructions. Please refer to
diff --git a/lib/power/rte_power_pmd_mgmt.c b/lib/power/rte_power_pmd_mgmt.c
index 7762cd39b8..97c9f1ea36 100644
--- a/lib/power/rte_power_pmd_mgmt.c
+++ b/lib/power/rte_power_pmd_mgmt.c
@@ -155,6 +155,32 @@ queue_list_remove(struct pmd_core_cfg *cfg, const union 
queue *q)
return 0;
 }
 
+static inline int
+get_monitor_addresses(struct pmd_core_cfg *cfg,
+   struct rte_power_monitor_cond *pmc, size_t len)
+{
+   const struct queue_list_entry *qle;
+   size_t i = 0;
+   int ret;
+
+   TAILQ_FOREACH(qle, &cfg->head, next) {
+   const union queue *q = &qle->queue;
+   struct rte_power_monitor_cond *cur;
+
+   /* attempted out of bounds access */
+   if (i >= len) {
+   RTE_LOG(ERR, POWER, "Too many queues being 
monitored\n");
+   return -1;
+   }
+
+   cur = &pmc[i++];
+   ret = rte_eth_get_monitor_addr(q->portid, q->qid, cur);
+   if (ret < 0)
+   return ret;
+   }
+   return 0;
+}
+
 static void
 calc_tsc(void)
 {
@@ -183,6 +209,48 @@ calc_tsc(void)
}
 }
 
+static uint16_t
+clb_multiwait(uint16_t port_id, uint16_t qidx,
+   struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
+   uint16_t max_pkts __rte_unused, void *addr __rte_unused)
+{
+   const unsigned int lcore = rte_lcore_id();
+   const union queue q = {.portid = port_id, .qid = qidx};
+   const bool empty = nb_rx == 0;
+   struct pmd_core_cfg *q_conf;
+
+   q_conf = &lcore_cfg[lcore];
+
+   /* early exit */
+   if (likely(!empty)) {
+   q_conf->empty_poll_stats = 0;
+   } else {
+   /* do we care about this particular queue? */
+   if (!queue_is_power_save(q_conf, &q))
+   return nb_rx;
+
+   /*
+* we can increment unconditionally here because if there were
+* non-empty polls in other queues assigned to this core, we
+* dropped the counter to zero anyway.
+*/
+   q_conf->empty_poll_stats++;
+   if (unlikely(q_conf->empty_poll_stats > EMPTYPOLL_MAX)) {
+   struct rte_power_monitor_cond pmc[RTE_MAX_ETHPORTS];
+   uint16_t ret;
+
+   /* gather all monitoring conditions */
+   ret = get_monitor_addresses(q_conf, pmc, RTE_DIM(pmc));
+
+   if (ret == 0)
+   rte_power_monitor_multi(pmc,
+   q_conf->n_queues, UINT64_MAX);
+   }
+   }
+
+   return nb_rx;
+}
+
 static uint16_t
 clb_umwait(uint16_t port_id, uint16_t qidx, struct rte_mbuf **pkts 
__rte_unused,
uint16_t nb_rx, uint16_t max_pkts __rte_unused,
@@ -348,14 +416,19 @@ static int
 check_monitor(struct pmd_core_cfg *cfg, const union queue *qdata)
 {
struct rte_power_monitor_cond du

[dpdk-dev] [PATCH v4 7/7] l3fwd-power: support multiqueue in PMD pmgmt modes

2021-06-28 Thread Anatoly Burakov
Currently, l3fwd-power enforces the limitation of having one queue per
lcore. This is no longer necessary, so remove the limitation, and always
mark the last queue in qconf as the power save queue.

Signed-off-by: Anatoly Burakov 
---
 examples/l3fwd-power/main.c | 39 +++--
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
index f8dfed1634..3057c06936 100644
--- a/examples/l3fwd-power/main.c
+++ b/examples/l3fwd-power/main.c
@@ -2498,6 +2498,27 @@ mode_to_str(enum appmode mode)
}
 }
 
+static void
+pmd_pmgmt_set_up(unsigned int lcore, uint16_t portid, uint16_t qid, bool last)
+{
+   int ret;
+
+   ret = rte_power_ethdev_pmgmt_queue_enable(lcore, portid,
+   qid, pmgmt_type);
+   if (ret < 0)
+   rte_exit(EXIT_FAILURE,
+   "rte_power_ethdev_pmgmt_queue_enable: err=%d, 
port=%d\n",
+   ret, portid);
+
+   if (!last)
+   return;
+   ret = rte_power_ethdev_pmgmt_queue_set_power_save(lcore, portid, qid);
+   if (ret < 0)
+   rte_exit(EXIT_FAILURE,
+   "rte_power_ethdev_pmgmt_queue_set_power_save: err=%d, 
port=%d\n",
+   ret, portid);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -2723,12 +2744,6 @@ main(int argc, char **argv)
printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
fflush(stdout);
 
-   /* PMD power management mode can only do 1 queue per core */
-   if (app_mode == APP_MODE_PMD_MGMT && qconf->n_rx_queue > 1) {
-   rte_exit(EXIT_FAILURE,
-   "In PMD power management mode, only one queue 
per lcore is allowed\n");
-   }
-
/* init RX queues */
for(queue = 0; queue < qconf->n_rx_queue; ++queue) {
struct rte_eth_rxconf rxq_conf;
@@ -2767,15 +2782,9 @@ main(int argc, char **argv)
 "Fail to add ptype cb\n");
}
 
-   if (app_mode == APP_MODE_PMD_MGMT) {
-   ret = rte_power_ethdev_pmgmt_queue_enable(
-   lcore_id, portid, queueid,
-   pmgmt_type);
-   if (ret < 0)
-   rte_exit(EXIT_FAILURE,
-   
"rte_power_ethdev_pmgmt_queue_enable: err=%d, port=%d\n",
-   ret, portid);
-   }
+   if (app_mode == APP_MODE_PMD_MGMT)
+   pmd_pmgmt_set_up(lcore_id, portid, queueid,
+   queue == (qconf->n_rx_queue - 1));
}
}
 
-- 
2.25.1



[dpdk-dev] [PATCH v2 00/16] Add support for fourth generation of Intel QuickAssist Technology devices

2021-06-28 Thread Arek Kusztal
This patchset adds support for fourth generation (GEN4) of Intel QuickAssist 
Technology (QAT) devices.
Symmetric crypto PMD is enabled with following algorithms:

* AES-CBC
* AES-CMAC
* AES-XCBC MAC
* NULL (auth, cipher)
* SHA1-HMAC
* SHA2-HMAC (224, 256, 384, 512)
* Chacha20-Poly1305
* AES-CCM
* AES-GCM

Other services (compression, asymmetric crypto) are not added with this 
patchset.

v2:
- add service discovery

Adam Dybkowski (2):
  crypto/qat: enable RAW API on QAT GEN1-3 only
  test/crypto: check if RAW API is supported

Arek Kusztal (13):
  common/qat: rework qp per service function
  crypto/qat: add support for generation 4 devices
  crypto/qat: enable gen4 legacy algorithms
  crypto/qat: add gen4 ucs slice type, add ctr mode
  crypto/qat: rename content descriptor functions
  crypto/qat: add legacy gcm and ccm
  crypto/qat: rework init common header function
  crypto/qat: add aes gcm in ucs spc mode
  crypto/qat: add chacha-poly in ucs spc mode
  crypto/qat: add gmac in legacy mode on gen 4
  common/qat: add pf2vf communication in qat
  common/qat: reset ring pairs before setting gen4
  common/qat: add service discovery to qat gen4

Fan Zhang (1):
  crypto/qat: update raw dp api

 app/test/test_cryptodev.c |  34 +-
 doc/guides/cryptodevs/qat.rst |  10 +-
 doc/guides/rel_notes/release_21_08.rst|   6 +
 drivers/common/qat/meson.build|   1 +
 drivers/common/qat/qat_adf/adf_pf2vf_msg.h| 154 ++
 .../adf_transport_access_macros_gen4.h|  52 ++
 .../adf_transport_access_macros_gen4vf.h  |  48 ++
 drivers/common/qat/qat_adf/icp_qat_fw_la.h|  28 ++
 drivers/common/qat/qat_adf/icp_qat_hw.h   |  10 +
 drivers/common/qat/qat_common.h   |  11 +-
 drivers/common/qat/qat_device.c   |  88 
 drivers/common/qat/qat_device.h   |  18 +
 drivers/common/qat/qat_pf2vf.c|  80 +++
 drivers/common/qat/qat_pf2vf.h|  19 +
 drivers/common/qat/qat_qp.c   | 269 +++---
 drivers/common/qat/qat_qp.h   |  30 +-
 drivers/compress/qat/qat_comp_pmd.c   |  16 +-
 drivers/crypto/qat/qat_asym_pmd.c |  16 +-
 drivers/crypto/qat/qat_sym.c  |  57 ++-
 drivers/crypto/qat/qat_sym_capabilities.h | 472 ++
 drivers/crypto/qat/qat_sym_hw_dp.c| 419 
 drivers/crypto/qat/qat_sym_pmd.c  |  53 +-
 drivers/crypto/qat/qat_sym_session.c  | 335 -
 drivers/crypto/qat/qat_sym_session.h  |  31 +-
 24 files changed, 1782 insertions(+), 475 deletions(-)
 create mode 100644 drivers/common/qat/qat_adf/adf_pf2vf_msg.h
 create mode 100644 
drivers/common/qat/qat_adf/adf_transport_access_macros_gen4.h
 create mode 100644 
drivers/common/qat/qat_adf/adf_transport_access_macros_gen4vf.h
 create mode 100644 drivers/common/qat/qat_pf2vf.c
 create mode 100644 drivers/common/qat/qat_pf2vf.h

-- 
2.30.2



[dpdk-dev] [PATCH v2 01/16] common/qat: rework qp per service function

2021-06-28 Thread Arek Kusztal
Different generations of Intel QuickAssist Technology devices may
differ in approach to allocate queues. Queue pair number function
therefore needs to be more generic.

Signed-off-by: Arek Kusztal 
---
 drivers/common/qat/qat_qp.c | 15 ++-
 drivers/common/qat/qat_qp.h |  2 +-
 drivers/compress/qat/qat_comp_pmd.c |  9 -
 drivers/crypto/qat/qat_asym_pmd.c   |  9 -
 drivers/crypto/qat/qat_sym_pmd.c|  9 -
 5 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c
index 4a8078541c..aa64d2e168 100644
--- a/drivers/common/qat/qat_qp.c
+++ b/drivers/common/qat/qat_qp.c
@@ -145,14 +145,19 @@ static void adf_queue_arb_disable(struct qat_queue *txq, 
void *base_addr,
rte_spinlock_t *lock);
 
 
-int qat_qps_per_service(const struct qat_qp_hw_data *qp_hw_data,
+int qat_qps_per_service(struct qat_pci_device *qat_dev,
enum qat_service_type service)
 {
-   int i, count;
-
-   for (i = 0, count = 0; i < ADF_MAX_QPS_ON_ANY_SERVICE; i++)
-   if (qp_hw_data[i].service_type == service)
+   int i = 0, count = 0, max_ops_per_srv = 0;
+   const struct qat_qp_hw_data*
+   sym_hw_qps = qat_gen_config[qat_dev->qat_dev_gen]
+   .qp_hw_data[service];
+
+   max_ops_per_srv = ADF_MAX_QPS_ON_ANY_SERVICE;
+   for (; i < max_ops_per_srv; i++)
+   if (sym_hw_qps[i].service_type == service)
count++;
+
return count;
 }
 
diff --git a/drivers/common/qat/qat_qp.h b/drivers/common/qat/qat_qp.h
index 74f7e7daee..d353e8552b 100644
--- a/drivers/common/qat/qat_qp.h
+++ b/drivers/common/qat/qat_qp.h
@@ -98,7 +98,7 @@ qat_qp_setup(struct qat_pci_device *qat_dev,
struct qat_qp_config *qat_qp_conf);
 
 int
-qat_qps_per_service(const struct qat_qp_hw_data *qp_hw_data,
+qat_qps_per_service(struct qat_pci_device *qat_dev,
enum qat_service_type service);
 
 int
diff --git a/drivers/compress/qat/qat_comp_pmd.c 
b/drivers/compress/qat/qat_comp_pmd.c
index 8de41f6b6e..6eb1ae3a21 100644
--- a/drivers/compress/qat/qat_comp_pmd.c
+++ b/drivers/compress/qat/qat_comp_pmd.c
@@ -106,6 +106,7 @@ qat_comp_qp_setup(struct rte_compressdev *dev, uint16_t 
qp_id,
struct qat_qp **qp_addr =
(struct qat_qp **)&(dev->data->queue_pairs[qp_id]);
struct qat_comp_dev_private *qat_private = dev->data->dev_private;
+   struct qat_pci_device *qat_dev = qat_private->qat_dev;
const struct qat_qp_hw_data *comp_hw_qps =
qat_gen_config[qat_private->qat_dev->qat_dev_gen]
  .qp_hw_data[QAT_SERVICE_COMPRESSION];
@@ -117,7 +118,7 @@ qat_comp_qp_setup(struct rte_compressdev *dev, uint16_t 
qp_id,
if (ret < 0)
return ret;
}
-   if (qp_id >= qat_qps_per_service(comp_hw_qps,
+   if (qp_id >= qat_qps_per_service(qat_dev,
 QAT_SERVICE_COMPRESSION)) {
QAT_LOG(ERR, "qp_id %u invalid for this device", qp_id);
return -EINVAL;
@@ -592,13 +593,11 @@ qat_comp_dev_info_get(struct rte_compressdev *dev,
struct rte_compressdev_info *info)
 {
struct qat_comp_dev_private *comp_dev = dev->data->dev_private;
-   const struct qat_qp_hw_data *comp_hw_qps =
-   qat_gen_config[comp_dev->qat_dev->qat_dev_gen]
- .qp_hw_data[QAT_SERVICE_COMPRESSION];
+   struct qat_pci_device *qat_dev = comp_dev->qat_dev;
 
if (info != NULL) {
info->max_nb_queue_pairs =
-   qat_qps_per_service(comp_hw_qps,
+   qat_qps_per_service(qat_dev,
QAT_SERVICE_COMPRESSION);
info->feature_flags = dev->feature_flags;
info->capabilities = comp_dev->qat_dev_capabilities;
diff --git a/drivers/crypto/qat/qat_asym_pmd.c 
b/drivers/crypto/qat/qat_asym_pmd.c
index a2c8aca2c1..f0c8ed1bcf 100644
--- a/drivers/crypto/qat/qat_asym_pmd.c
+++ b/drivers/crypto/qat/qat_asym_pmd.c
@@ -54,12 +54,10 @@ static void qat_asym_dev_info_get(struct rte_cryptodev *dev,
  struct rte_cryptodev_info *info)
 {
struct qat_asym_dev_private *internals = dev->data->dev_private;
-   const struct qat_qp_hw_data *asym_hw_qps =
-   qat_gen_config[internals->qat_dev->qat_dev_gen]
- .qp_hw_data[QAT_SERVICE_ASYMMETRIC];
+   struct qat_pci_device *qat_dev = internals->qat_dev;
 
if (info != NULL) {
-   info->max_nb_queue_pairs = qat_qps_per_service(asym_hw_qps,
+   info->max_nb_queue_pairs = qat_qps_per_service(qat_dev,
QAT_SERVICE_ASYMMETRIC);

[dpdk-dev] [PATCH v2 02/16] crypto/qat: add support for generation 4 devices

2021-06-28 Thread Arek Kusztal
This commit adds support for fourth generation (GEN4) of
Intel QuickAssist (QAT) Technology devices.

Signed-off-by: Arek Kusztal 
---
 doc/guides/cryptodevs/qat.rst |  10 +-
 doc/guides/rel_notes/release_21_08.rst|   6 +
 .../adf_transport_access_macros_gen4.h|  52 
 .../adf_transport_access_macros_gen4vf.h  |  48 
 drivers/common/qat/qat_common.h   |   3 +-
 drivers/common/qat/qat_device.c   |  22 ++
 drivers/common/qat/qat_device.h   |   3 +
 drivers/common/qat/qat_qp.c   | 243 +-
 drivers/common/qat/qat_qp.h   |  29 ++-
 drivers/compress/qat/qat_comp_pmd.c   |   7 +-
 drivers/crypto/qat/qat_asym_pmd.c |   7 +-
 drivers/crypto/qat/qat_sym_pmd.c  |  33 ++-
 drivers/crypto/qat/qat_sym_session.c  |   1 +
 13 files changed, 386 insertions(+), 78 deletions(-)
 create mode 100644 
drivers/common/qat/qat_adf/adf_transport_access_macros_gen4.h
 create mode 100644 
drivers/common/qat/qat_adf/adf_transport_access_macros_gen4vf.h

diff --git a/doc/guides/cryptodevs/qat.rst b/doc/guides/cryptodevs/qat.rst
index 96f5ab6afe..666a01df33 100644
--- a/doc/guides/cryptodevs/qat.rst
+++ b/doc/guides/cryptodevs/qat.rst
@@ -25,6 +25,7 @@ poll mode crypto driver support for the following hardware 
accelerator devices:
 * ``Intel QuickAssist Technology 200xx``
 * ``Intel QuickAssist Technology D15xx``
 * ``Intel QuickAssist Technology C4xxx``
+* ``Intel QuickAssist Technology 4xxx``
 
 
 Features
@@ -94,15 +95,16 @@ All the usual chains are supported and also some mixed 
chains:
+==+===+=+==+==+
| NULL CIPHER  | Y | 2&3 | 2&3  | Y|
+--+---+-+--+--+
-   | SNOW3G UEA2  | 2&3   | Y   | 2&3  | 2&3  |
+   | SNOW3G UEA2  | 2&3   | 1&2&3   | 2&3  | 2&3  |
+--+---+-+--+--+
| ZUC EEA3 | 2&3   | 2&3 | 2&3  | 2&3  |
+--+---+-+--+--+
-   | AES CTR  | Y | 2&3 | 2&3  | Y|
+   | AES CTR  | 1&2&3 | 2&3 | 2&3  | Y|
+--+---+-+--+--+
 
 * The combinations marked as "Y" are supported on all QAT hardware versions.
-* The combinations marked as "2&3" are supported on GEN2/GEN3 QAT hardware 
only.
+* The combinations marked as "2&3" are supported on GEN2 and GEN3 QAT hardware 
only.
+* The combinations marked as "1&2&3" are supported on GEN1, GEN2 and GEN3 QAT 
hardware only.
 
 
 Limitations
@@ -373,6 +375,8 @@ to see the full table)

+-+-+-+-+--+---+---+++--+++
| Yes | No  | No  | 3   | C4xxx| p | qat_c4xxx | c4xxx  
| 18a0   | 1| 18a1   | 128|

+-+-+-+-+--+---+---+++--+++
+   | Yes | No  | No  | 4   | 4xxx | N/A   | qat_4xxx  | 4xxx   
| 4940   | 4| 4941   | 16 |
+   
+-+-+-+-+--+---+---+++--+++
 
 * Note: Symmetric mixed crypto algorithms feature on Gen 2 works only with 
01.org driver version 4.9.0+
 
diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index a6ecfdf3ce..69ef43acf6 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -55,6 +55,12 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* **Updated Intel QuickAssist PMD.**
+
+  Added fourth generation of QuickAssist Technology devices support.
+  Only symmetric crypto has been currently enabled, compression and asymmetric
+  crypto PMD will fail to create.
+
 
 Removed Items
 -
diff --git a/drivers/common/qat/qat_adf/adf_transport_access_macros_gen4.h 
b/drivers/common/qat/qat_adf/adf_transport_access_macros_gen4.h
new file mode 100644
index 00..3ab873db5e
--- /dev/null
+++ b/drivers/common/qat/qat_adf/adf_transport_access_macros_gen4.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)
+ * Copyright(c) 2021 Intel Corporation
+ */
+
+#ifndef ADF_TRANSPORT_ACCESS_MACROS_GEN4_H
+#define ADF_TRANSPORT_ACCESS_MACROS_GEN4_H
+
+#include "adf_transport_access_macros.h"
+
+#define ADF_RINGS_PER_INT_SRCSEL_GEN4 2
+#define ADF_BANK_INT_SRC_SEL_MASK_GEN4 0x44UL
+#define ADF_BANK_INT_FLAG_CLEAR_MASK_GEN4 0x3
+#define ADF_RING_BUNDLE_SIZE_GEN4 0x2000
+#define ADF_RING_CSR_ADDR_OFFSET_GEN4 0x10
+#define ADF_RING_CSR_RING_CONFIG_GEN

[dpdk-dev] [PATCH v2 04/16] crypto/qat: add gen4 ucs slice type, add ctr mode

2021-06-28 Thread Arek Kusztal
This commit adds unified cipher slice to Intel QuickAssist
Technology PMD and enables AES-CTR algorithm.

Signed-off-by: Arek Kusztal 
---
 drivers/common/qat/qat_adf/icp_qat_fw_la.h | 28 ++
 drivers/common/qat/qat_adf/icp_qat_hw.h| 10 
 drivers/crypto/qat/qat_sym_capabilities.h  | 20 
 drivers/crypto/qat/qat_sym_session.c   | 27 -
 drivers/crypto/qat/qat_sym_session.h   |  1 +
 5 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/drivers/common/qat/qat_adf/icp_qat_fw_la.h 
b/drivers/common/qat/qat_adf/icp_qat_fw_la.h
index 20eb145def..c4901eb869 100644
--- a/drivers/common/qat/qat_adf/icp_qat_fw_la.h
+++ b/drivers/common/qat/qat_adf/icp_qat_fw_la.h
@@ -371,4 +371,32 @@ struct icp_qat_fw_la_resp {
& ICP_QAT_FW_COMN_NEXT_ID_MASK) | \
((val) & ICP_QAT_FW_COMN_CURR_ID_MASK)) }
 
+#define ICP_QAT_FW_LA_USE_WIRELESS_SLICE_TYPE 2
+#define ICP_QAT_FW_LA_USE_UCS_SLICE_TYPE 1
+#define ICP_QAT_FW_LA_USE_LEGACY_SLICE_TYPE 0
+#define QAT_LA_SLICE_TYPE_BITPOS 14
+#define QAT_LA_SLICE_TYPE_MASK 0x3
+#define ICP_QAT_FW_LA_SLICE_TYPE_SET(flags, val)   \
+   QAT_FIELD_SET(flags, val, QAT_LA_SLICE_TYPE_BITPOS, \
+   QAT_LA_SLICE_TYPE_MASK)
+
+struct icp_qat_fw_la_cipher_20_req_params {
+   uint32_t cipher_offset;
+   uint32_t cipher_length;
+   union {
+   uint32_t cipher_IV_array[ICP_QAT_FW_NUM_LONGWORDS_4];
+   struct {
+   uint64_t cipher_IV_ptr;
+   uint64_t resrvd1;
+   } s;
+
+   } u;
+   uint32_t   spc_aad_offset;
+   uint32_t   spc_aad_sz;
+   uint64_t   spc_aad_addr;
+   uint64_t   spc_auth_res_addr;
+   uint8_treserved[3];
+   uint8_tspc_auth_res_sz;
+};
+
 #endif
diff --git a/drivers/common/qat/qat_adf/icp_qat_hw.h 
b/drivers/common/qat/qat_adf/icp_qat_hw.h
index fdc0f191a2..b1e6a1fa15 100644
--- a/drivers/common/qat/qat_adf/icp_qat_hw.h
+++ b/drivers/common/qat/qat_adf/icp_qat_hw.h
@@ -342,6 +342,16 @@ struct icp_qat_hw_cipher_algo_blk {
uint8_t key[ICP_QAT_HW_CIPHER_MAX_KEY_SZ];
 } __rte_cache_aligned;
 
+struct icp_qat_hw_ucs_cipher_config {
+   uint32_t val;
+   uint32_t reserved[3];
+};
+
+struct icp_qat_hw_cipher_algo_blk20 {
+   struct icp_qat_hw_ucs_cipher_config cipher_config;
+   uint8_t key[ICP_QAT_HW_CIPHER_MAX_KEY_SZ];
+} __rte_cache_aligned;
+
 /* = */
 /*COMPRESSION SLICE  */
 /* = */
diff --git a/drivers/crypto/qat/qat_sym_capabilities.h 
b/drivers/crypto/qat/qat_sym_capabilities.h
index 21c817bccc..aca528b991 100644
--- a/drivers/crypto/qat/qat_sym_capabilities.h
+++ b/drivers/crypto/qat/qat_sym_capabilities.h
@@ -1064,6 +1064,26 @@
.iv_size = { 0 }\
}, }\
}, }\
+   },  \
+   {   /* AES CTR */   \
+   .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, \
+   {.sym = {   \
+   .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,  \
+   {.cipher = {\
+   .algo = RTE_CRYPTO_CIPHER_AES_CTR,  \
+   .block_size = 16,   \
+   .key_size = {   \
+   .min = 16,  \
+   .max = 32,  \
+   .increment = 8  \
+   },  \
+   .iv_size = {\
+   .min = 16,  \
+   .max = 16,  \
+   .increment = 0  \
+   }   \
+   }, }\
+   }, }\
}   \
 
 
diff --git a/drivers/crypto/qat/qat_sym_session.c 
b/drivers/crypto/qat/qat_sym_session.c
index 506ffddd20..2c44b1f1aa 100644
--- a/drivers/crypto/qat/qat_sym_session.c
+++ b/drivers/crypto/qat/qat_sym_session.c
@@ -246,6 +246,8 @@ qat_sym_session_configure_cipher(struct rte_cryptodev *dev

[dpdk-dev] [PATCH v2 03/16] crypto/qat: enable gen4 legacy algorithms

2021-06-28 Thread Arek Kusztal
This commit enables algorithms labeled as 'legacy'
on QAT generation 4 devices.
Following algorithms were enabled:
* AES-CBC
* AES-CMAC
* AES-XCBC MAC
* NULL (auth, cipher)
* SHA1-HMAC
* SHA2-HMAC (224, 256, 384, 512)

Signed-off-by: Arek Kusztal 
---
 drivers/crypto/qat/qat_sym_capabilities.h | 337 ++
 drivers/crypto/qat/qat_sym_pmd.c  |   9 +-
 2 files changed, 344 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/qat/qat_sym_capabilities.h 
b/drivers/crypto/qat/qat_sym_capabilities.h
index f7cab2f471..21c817bccc 100644
--- a/drivers/crypto/qat/qat_sym_capabilities.h
+++ b/drivers/crypto/qat/qat_sym_capabilities.h
@@ -731,6 +731,343 @@
}, }\
}
 
+#define QAT_BASE_GEN4_SYM_CAPABILITIES \
+   {   /* AES CBC */   \
+   .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, \
+   {.sym = {   \
+   .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,  \
+   {.cipher = {\
+   .algo = RTE_CRYPTO_CIPHER_AES_CBC,  \
+   .block_size = 16,   \
+   .key_size = {   \
+   .min = 16,  \
+   .max = 32,  \
+   .increment = 8  \
+   },  \
+   .iv_size = {\
+   .min = 16,  \
+   .max = 16,  \
+   .increment = 0  \
+   }   \
+   }, }\
+   }, }\
+   },  \
+   {   /* SHA1 HMAC */ \
+   .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, \
+   {.sym = {   \
+   .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,\
+   {.auth = {  \
+   .algo = RTE_CRYPTO_AUTH_SHA1_HMAC,  \
+   .block_size = 64,   \
+   .key_size = {   \
+   .min = 1,   \
+   .max = 64,  \
+   .increment = 1  \
+   },  \
+   .digest_size = {\
+   .min = 1,   \
+   .max = 20,  \
+   .increment = 1  \
+   },  \
+   .iv_size = { 0 }\
+   }, }\
+   }, }\
+   },  \
+   {   /* SHA224 HMAC */   \
+   .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, \
+   {.sym = {   \
+   .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,\
+   {.auth = {  \
+   .algo = RTE_CRYPTO_AUTH_SHA224_HMAC,\
+   .block_size = 64,   \
+   .key_size = {   \
+   .min = 1,   \
+   .max = 64,  \
+   .increment = 1  \
+   },  \
+   .digest_size = {\
+   .min = 1,   \
+   .max = 28,  \
+   

[dpdk-dev] [PATCH v2 05/16] crypto/qat: rename content descriptor functions

2021-06-28 Thread Arek Kusztal
Content descriptor functions are incorrectly named,
having them with proper name will improve readability and
facilitate further work.

Signed-off-by: Arek Kusztal 
---
 drivers/crypto/qat/qat_sym_session.c | 39 ++--
 drivers/crypto/qat/qat_sym_session.h | 13 --
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/crypto/qat/qat_sym_session.c 
b/drivers/crypto/qat/qat_sym_session.c
index 2c44b1f1aa..56c85e8435 100644
--- a/drivers/crypto/qat/qat_sym_session.c
+++ b/drivers/crypto/qat/qat_sym_session.c
@@ -57,6 +57,19 @@ static const uint8_t sha512InitialState[] = {
0x2b, 0x3e, 0x6c, 0x1f, 0x1f, 0x83, 0xd9, 0xab, 0xfb, 0x41, 0xbd,
0x6b, 0x5b, 0xe0, 0xcd, 0x19, 0x13, 0x7e, 0x21, 0x79};
 
+static int
+qat_sym_cd_cipher_set(struct qat_sym_session *cd,
+   const uint8_t *enckey,
+   uint32_t enckeylen);
+
+static int
+qat_sym_cd_auth_set(struct qat_sym_session *cdesc,
+   const uint8_t *authkey,
+   uint32_t authkeylen,
+   uint32_t aad_length,
+   uint32_t digestsize,
+   unsigned int operation);
+
 /** Frees a context previously created
  *  Depends on openssl libcrypto
  */
@@ -420,7 +433,7 @@ qat_sym_session_configure_cipher(struct rte_cryptodev *dev,
else
session->qat_dir = ICP_QAT_HW_CIPHER_DECRYPT;
 
-   if (qat_sym_session_aead_create_cd_cipher(session,
+   if (qat_sym_cd_cipher_set(session,
cipher_xform->key.data,
cipher_xform->key.length)) {
ret = -EINVAL;
@@ -669,7 +682,7 @@ qat_sym_session_handle_single_pass(struct qat_sym_session 
*session,
}
session->cipher_iv.offset = aead_xform->iv.offset;
session->cipher_iv.length = aead_xform->iv.length;
-   if (qat_sym_session_aead_create_cd_cipher(session,
+   if (qat_sym_cd_cipher_set(session,
aead_xform->key.data, aead_xform->key.length))
return -EINVAL;
session->aad_len = aead_xform->aad_length;
@@ -825,12 +838,12 @@ qat_sym_session_configure_auth(struct rte_cryptodev *dev,
 * then authentication
 */
 
-   if (qat_sym_session_aead_create_cd_cipher(session,
+   if (qat_sym_cd_cipher_set(session,
auth_xform->key.data,
auth_xform->key.length))
return -EINVAL;
 
-   if (qat_sym_session_aead_create_cd_auth(session,
+   if (qat_sym_cd_auth_set(session,
key_data,
key_length,
0,
@@ -845,7 +858,7 @@ qat_sym_session_configure_auth(struct rte_cryptodev *dev,
 * then cipher
 */
 
-   if (qat_sym_session_aead_create_cd_auth(session,
+   if (qat_sym_cd_auth_set(session,
key_data,
key_length,
0,
@@ -853,7 +866,7 @@ qat_sym_session_configure_auth(struct rte_cryptodev *dev,
auth_xform->op))
return -EINVAL;
 
-   if (qat_sym_session_aead_create_cd_cipher(session,
+   if (qat_sym_cd_cipher_set(session,
auth_xform->key.data,
auth_xform->key.length))
return -EINVAL;
@@ -861,7 +874,7 @@ qat_sym_session_configure_auth(struct rte_cryptodev *dev,
/* Restore to authentication only only */
session->qat_cmd = ICP_QAT_FW_LA_CMD_AUTH;
} else {
-   if (qat_sym_session_aead_create_cd_auth(session,
+   if (qat_sym_cd_auth_set(session,
key_data,
key_length,
0,
@@ -948,12 +961,12 @@ qat_sym_session_configure_aead(struct rte_cryptodev *dev,
crypto_operation = aead_xform->algo == RTE_CRYPTO_AEAD_AES_GCM ?
RTE_CRYPTO_AUTH_OP_GENERATE : RTE_CRYPTO_AUTH_OP_VERIFY;
 
-   if (qat_sym_session_aead_create_cd_cipher(session,
+   if (qat_sym_cd_cipher_set(session,
aead_xform->key.data,
   

[dpdk-dev] [PATCH v2 06/16] crypto/qat: add legacy gcm and ccm

2021-06-28 Thread Arek Kusztal
Add AES-GCM, AES-CCM algorithms in legacy mode.

Signed-off-by: Arek Kusztal 
---
 drivers/crypto/qat/qat_sym_capabilities.h | 60 +++
 drivers/crypto/qat/qat_sym_session.c  | 27 +-
 drivers/crypto/qat/qat_sym_session.h  |  3 +-
 3 files changed, 78 insertions(+), 12 deletions(-)

diff --git a/drivers/crypto/qat/qat_sym_capabilities.h 
b/drivers/crypto/qat/qat_sym_capabilities.h
index aca528b991..fc8e667687 100644
--- a/drivers/crypto/qat/qat_sym_capabilities.h
+++ b/drivers/crypto/qat/qat_sym_capabilities.h
@@ -1084,6 +1084,66 @@
}   \
}, }\
}, }\
+   },  \
+   {   /* AES GCM */   \
+   .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, \
+   {.sym = {   \
+   .xform_type = RTE_CRYPTO_SYM_XFORM_AEAD,\
+   {.aead = {  \
+   .algo = RTE_CRYPTO_AEAD_AES_GCM,\
+   .block_size = 16,   \
+   .key_size = {   \
+   .min = 16,  \
+   .max = 32,  \
+   .increment = 8  \
+   },  \
+   .digest_size = {\
+   .min = 8,   \
+   .max = 16,  \
+   .increment = 4  \
+   },  \
+   .aad_size = {   \
+   .min = 0,   \
+   .max = 240, \
+   .increment = 1  \
+   },  \
+   .iv_size = {\
+   .min = 0,   \
+   .max = 12,  \
+   .increment = 12 \
+   },  \
+   }, }\
+   }, }\
+   },  \
+   {   /* AES CCM */   \
+   .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, \
+   {.sym = {   \
+   .xform_type = RTE_CRYPTO_SYM_XFORM_AEAD,\
+   {.aead = {  \
+   .algo = RTE_CRYPTO_AEAD_AES_CCM,\
+   .block_size = 16,   \
+   .key_size = {   \
+   .min = 16,  \
+   .max = 16,  \
+   .increment = 0  \
+   },  \
+   .digest_size = {\
+   .min = 4,   \
+   .max = 16,  \
+   .increment = 2  \
+   },  \
+   .aad_size = {   \
+   .min = 0,   \
+   .max = 224, \
+   .increment = 1  \
+   },  \
+   .iv_size = {\
+   .min = 7,   \
+   .max = 13,  \
+   .increment = 1   

[dpdk-dev] [PATCH v2 07/16] crypto/qat: rework init common header function

2021-06-28 Thread Arek Kusztal
Rework init common header function for request
descriptor so it can be called only once.

Signed-off-by: Arek Kusztal 
---
 drivers/crypto/qat/qat_sym.c |  25 +--
 drivers/crypto/qat/qat_sym_session.c | 265 ++-
 drivers/crypto/qat/qat_sym_session.h |  12 ++
 3 files changed, 158 insertions(+), 144 deletions(-)

diff --git a/drivers/crypto/qat/qat_sym.c b/drivers/crypto/qat/qat_sym.c
index 9415ec7d32..eef4a886c5 100644
--- a/drivers/crypto/qat/qat_sym.c
+++ b/drivers/crypto/qat/qat_sym.c
@@ -289,8 +289,9 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg,
auth_param = (void *)((uint8_t *)cipher_param +
ICP_QAT_FW_HASH_REQUEST_PARAMETERS_OFFSET);
 
-   if (ctx->qat_cmd == ICP_QAT_FW_LA_CMD_HASH_CIPHER ||
-   ctx->qat_cmd == ICP_QAT_FW_LA_CMD_CIPHER_HASH) {
+   if ((ctx->qat_cmd == ICP_QAT_FW_LA_CMD_HASH_CIPHER ||
+   ctx->qat_cmd == ICP_QAT_FW_LA_CMD_CIPHER_HASH) &&
+   !ctx->is_gmac) {
/* AES-GCM or AES-CCM */
if (ctx->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_GALOIS_128 ||
ctx->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_GALOIS_64 ||
@@ -303,7 +304,7 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg,
do_auth = 1;
do_cipher = 1;
}
-   } else if (ctx->qat_cmd == ICP_QAT_FW_LA_CMD_AUTH) {
+   } else if (ctx->qat_cmd == ICP_QAT_FW_LA_CMD_AUTH || ctx->is_gmac) {
do_auth = 1;
do_cipher = 0;
} else if (ctx->qat_cmd == ICP_QAT_FW_LA_CMD_CIPHER) {
@@ -383,15 +384,6 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg,
auth_param->u1.aad_adr = 0;
auth_param->u2.aad_sz = 0;
 
-   /*
-* If len(iv)==12B fw computes J0
-*/
-   if (ctx->auth_iv.length == 12) {
-   ICP_QAT_FW_LA_GCM_IV_LEN_FLAG_SET(
-   qat_req->comn_hdr.serv_specif_flags,
-   ICP_QAT_FW_LA_GCM_IV_LEN_12_OCTETS);
-
-   }
} else {
auth_ofs = op->sym->auth.data.offset;
auth_len = op->sym->auth.data.length;
@@ -416,14 +408,7 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg,
ICP_QAT_HW_AUTH_ALGO_GALOIS_128 ||
ctx->qat_hash_alg ==
ICP_QAT_HW_AUTH_ALGO_GALOIS_64) {
-   /*
-* If len(iv)==12B fw computes J0
-*/
-   if (ctx->cipher_iv.length == 12) {
-   ICP_QAT_FW_LA_GCM_IV_LEN_FLAG_SET(
-   qat_req->comn_hdr.serv_specif_flags,
-   ICP_QAT_FW_LA_GCM_IV_LEN_12_OCTETS);
-   }
+
set_cipher_iv(ctx->cipher_iv.length,
ctx->cipher_iv.offset,
cipher_param, op, qat_req);
diff --git a/drivers/crypto/qat/qat_sym_session.c 
b/drivers/crypto/qat/qat_sym_session.c
index 5140d61a9c..fd6fe4423d 100644
--- a/drivers/crypto/qat/qat_sym_session.c
+++ b/drivers/crypto/qat/qat_sym_session.c
@@ -69,6 +69,16 @@ qat_sym_cd_auth_set(struct qat_sym_session *cdesc,
uint32_t aad_length,
uint32_t digestsize,
unsigned int operation);
+static void
+qat_sym_session_init_common_hdr(struct qat_sym_session *session);
+
+/* Req/cd init functions */
+
+static void
+qat_sym_session_finalize(struct qat_sym_session *session)
+{
+   qat_sym_session_init_common_hdr(session);
+}
 
 /** Frees a context previously created
  *  Depends on openssl libcrypto
@@ -558,6 +568,7 @@ qat_sym_session_set_parameters(struct rte_cryptodev *dev,
enum qat_device_gen qat_dev_gen = internals->qat_dev->qat_dev_gen;
int ret;
int qat_cmd_id;
+   int handle_mixed = 0;
 
/* Verify the session physical address is known */
rte_iova_t session_paddr = rte_mempool_virt2iova(session);
@@ -573,6 +584,7 @@ qat_sym_session_set_parameters(struct rte_cryptodev *dev,
offsetof(struct qat_sym_session, cd);
 
session->min_qat_dev_gen = QAT_GEN1;
+   session->qat_proto_flag = QAT_CRYPTO_PROTO_FLAG_NONE;
session->is_ucs = 0;
 
/* Get requested QAT command id */
@@ -612,8 +624,7 @@ qat_sym_session_set_parameters(struct rte_cryptodev *dev,
xform, session);
if (ret < 0)
return ret;
-   

[dpdk-dev] [PATCH v2 08/16] crypto/qat: add aes gcm in ucs spc mode

2021-06-28 Thread Arek Kusztal
This commit adds AES-GCM algorithm that works
in UCS (Unified crypto slice) SPC(Single-Pass) mode.

Signed-off-by: Arek Kusztal 
---
 drivers/crypto/qat/qat_sym.c | 32 
 drivers/crypto/qat/qat_sym_session.c |  9 
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/drivers/crypto/qat/qat_sym.c b/drivers/crypto/qat/qat_sym.c
index eef4a886c5..00fc4d6b1a 100644
--- a/drivers/crypto/qat/qat_sym.c
+++ b/drivers/crypto/qat/qat_sym.c
@@ -217,6 +217,7 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg,
int ret = 0;
struct qat_sym_session *ctx = NULL;
struct icp_qat_fw_la_cipher_req_params *cipher_param;
+   struct icp_qat_fw_la_cipher_20_req_params *cipher_param20;
struct icp_qat_fw_la_auth_req_params *auth_param;
register struct icp_qat_fw_la_bulk_req *qat_req;
uint8_t do_auth = 0, do_cipher = 0, do_aead = 0;
@@ -286,6 +287,7 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg,
rte_mov128((uint8_t *)qat_req, (const uint8_t *)&(ctx->fw_req));
qat_req->comn_mid.opaque_data = (uint64_t)(uintptr_t)op;
cipher_param = (void *)&qat_req->serv_specif_rqpars;
+   cipher_param20 = (void *)&qat_req->serv_specif_rqpars;
auth_param = (void *)((uint8_t *)cipher_param +
ICP_QAT_FW_HASH_REQUEST_PARAMETERS_OFFSET);
 
@@ -563,13 +565,17 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg,
cipher_param->cipher_length = 0;
}
 
-   if (do_auth || do_aead) {
-   auth_param->auth_off = (uint32_t)rte_pktmbuf_iova_offset(
+   if (!ctx->is_single_pass) {
+   /* Do not let to owerwrite spc_aad len */
+   if (do_auth || do_aead) {
+   auth_param->auth_off =
+   (uint32_t)rte_pktmbuf_iova_offset(
op->sym->m_src, auth_ofs) - src_buf_start;
-   auth_param->auth_len = auth_len;
-   } else {
-   auth_param->auth_off = 0;
-   auth_param->auth_len = 0;
+   auth_param->auth_len = auth_len;
+   } else {
+   auth_param->auth_off = 0;
+   auth_param->auth_len = 0;
+   }
}
 
qat_req->comn_mid.dst_length =
@@ -675,10 +681,18 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg,
}
 
if (ctx->is_single_pass) {
-   /* Handle Single-Pass GCM */
-   cipher_param->spc_aad_addr = op->sym->aead.aad.phys_addr;
-   cipher_param->spc_auth_res_addr =
+   if (ctx->is_ucs) {
+   /* GEN 4 */
+   cipher_param20->spc_aad_addr =
+   op->sym->aead.aad.phys_addr;
+   cipher_param20->spc_auth_res_addr =
op->sym->aead.digest.phys_addr;
+   } else {
+   cipher_param->spc_aad_addr =
+   op->sym->aead.aad.phys_addr;
+   cipher_param->spc_auth_res_addr =
+   op->sym->aead.digest.phys_addr;
+   }
} else if (ctx->is_single_pass_gmac &&
   op->sym->auth.data.length <= QAT_AES_GMAC_SPC_MAX_SIZE) {
/* Handle Single-Pass AES-GMAC */
diff --git a/drivers/crypto/qat/qat_sym_session.c 
b/drivers/crypto/qat/qat_sym_session.c
index fd6fe4423d..019c9f4f02 100644
--- a/drivers/crypto/qat/qat_sym_session.c
+++ b/drivers/crypto/qat/qat_sym_session.c
@@ -898,16 +898,15 @@ qat_sym_session_configure_aead(struct rte_cryptodev *dev,
 
if (qat_dev_gen == QAT_GEN4)
session->is_ucs = 1;
-
if (session->cipher_iv.length == 0) {
session->cipher_iv.length = AES_GCM_J0_LEN;
break;
}
session->is_iv12B = 1;
-   if (qat_dev_gen == QAT_GEN3) {
-   qat_sym_session_handle_single_pass(session,
-   aead_xform);
-   }
+   if (qat_dev_gen < QAT_GEN3)
+   break;
+   qat_sym_session_handle_single_pass(session,
+   aead_xform);
break;
case RTE_CRYPTO_AEAD_AES_CCM:
if (qat_sym_validate_aes_key(aead_xform->key.length,
-- 
2.30.2



[dpdk-dev] [PATCH v2 09/16] crypto/qat: add chacha-poly in ucs spc mode

2021-06-28 Thread Arek Kusztal
This commit adds Chacha20-Poly1305 aglorithm that works
in UCS (Unified crypto slice) SPC(Single-Pass) mode.

Signed-off-by: Arek Kusztal 
---
 drivers/crypto/qat/qat_sym_capabilities.h | 32 ++-
 drivers/crypto/qat/qat_sym_session.c  |  2 ++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/qat/qat_sym_capabilities.h 
b/drivers/crypto/qat/qat_sym_capabilities.h
index fc8e667687..5c6e723466 100644
--- a/drivers/crypto/qat/qat_sym_capabilities.h
+++ b/drivers/crypto/qat/qat_sym_capabilities.h
@@ -1144,7 +1144,37 @@
},  \
}, }\
}, }\
-   }   \
+   },  \
+   {   /* Chacha20-Poly1305 */ \
+   .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, \
+   {.sym = {   \
+   .xform_type = RTE_CRYPTO_SYM_XFORM_AEAD,\
+   {.aead = {  \
+   .algo = RTE_CRYPTO_AEAD_CHACHA20_POLY1305, \
+   .block_size = 64,   \
+   .key_size = {   \
+   .min = 32,  \
+   .max = 32,  \
+   .increment = 0  \
+   },  \
+   .digest_size = {\
+   .min = 16,  \
+   .max = 16,  \
+   .increment = 0  \
+   },  \
+   .aad_size = {   \
+   .min = 0,   \
+   .max = 240, \
+   .increment = 1  \
+   },  \
+   .iv_size = {\
+   .min = 12,  \
+   .max = 12,  \
+   .increment = 0  \
+   },  \
+   }, }\
+   }, }\
+   }
 
 
 
diff --git a/drivers/crypto/qat/qat_sym_session.c 
b/drivers/crypto/qat/qat_sym_session.c
index 019c9f4f02..a49da8e364 100644
--- a/drivers/crypto/qat/qat_sym_session.c
+++ b/drivers/crypto/qat/qat_sym_session.c
@@ -922,6 +922,8 @@ qat_sym_session_configure_aead(struct rte_cryptodev *dev,
case RTE_CRYPTO_AEAD_CHACHA20_POLY1305:
if (aead_xform->key.length != ICP_QAT_HW_CHACHAPOLY_KEY_SZ)
return -EINVAL;
+   if (qat_dev_gen == QAT_GEN4)
+   session->is_ucs = 1;
session->qat_cipher_alg =
ICP_QAT_HW_CIPHER_ALGO_CHACHA20_POLY1305;
qat_sym_session_handle_single_pass(session,
-- 
2.30.2



[dpdk-dev] [PATCH v2 10/16] crypto/qat: add gmac in legacy mode on gen 4

2021-06-28 Thread Arek Kusztal
Add AES-GMAC algorithm in legacy mode to generation 4 devices.

Signed-off-by: Arek Kusztal 
---
 drivers/crypto/qat/qat_sym_capabilities.h | 27 ++-
 drivers/crypto/qat/qat_sym_session.c  |  9 +++-
 drivers/crypto/qat/qat_sym_session.h  |  2 ++
 3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/qat/qat_sym_capabilities.h 
b/drivers/crypto/qat/qat_sym_capabilities.h
index 5c6e723466..cfb176ca94 100644
--- a/drivers/crypto/qat/qat_sym_capabilities.h
+++ b/drivers/crypto/qat/qat_sym_capabilities.h
@@ -1174,7 +1174,32 @@
},  \
}, }\
}, }\
-   }
+   },  \
+   {   /* AES GMAC (AUTH) */   \
+   .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC, \
+   {.sym = {   \
+   .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,\
+   {.auth = {  \
+   .algo = RTE_CRYPTO_AUTH_AES_GMAC,   \
+   .block_size = 16,   \
+   .key_size = {   \
+   .min = 16,  \
+   .max = 32,  \
+   .increment = 8  \
+   },  \
+   .digest_size = {\
+   .min = 8,   \
+   .max = 16,  \
+   .increment = 4  \
+   },  \
+   .iv_size = {\
+   .min = 0,   \
+   .max = 12,  \
+   .increment = 12 \
+   }   \
+   }, }\
+   }, }\
+   }   \
 
 
 
diff --git a/drivers/crypto/qat/qat_sym_session.c 
b/drivers/crypto/qat/qat_sym_session.c
index a49da8e364..03514ca073 100644
--- a/drivers/crypto/qat/qat_sym_session.c
+++ b/drivers/crypto/qat/qat_sym_session.c
@@ -710,6 +710,8 @@ qat_sym_session_configure_auth(struct rte_cryptodev *dev,
struct qat_sym_dev_private *internals = dev->data->dev_private;
const uint8_t *key_data = auth_xform->key.data;
uint8_t key_length = auth_xform->key.length;
+   enum qat_device_gen qat_dev_gen =
+   internals->qat_dev->qat_dev_gen;
 
session->aes_cmac = 0;
session->auth_key_length = auth_xform->key.length;
@@ -717,6 +719,7 @@ qat_sym_session_configure_auth(struct rte_cryptodev *dev,
session->auth_iv.length = auth_xform->iv.length;
session->auth_mode = ICP_QAT_HW_AUTH_MODE1;
session->is_auth = 1;
+   session->digest_length = auth_xform->digest_length;
 
switch (auth_xform->algo) {
case RTE_CRYPTO_AUTH_SHA1:
@@ -773,6 +776,10 @@ qat_sym_session_configure_auth(struct rte_cryptodev *dev,
session->auth_iv.length = AES_GCM_J0_LEN;
else
session->is_iv12B = 1;
+   if (qat_dev_gen == QAT_GEN4) {
+   session->is_cnt_zero = 1;
+   session->is_ucs = 1;
+   }
break;
case RTE_CRYPTO_AUTH_SNOW3G_UIA2:
session->qat_hash_alg = ICP_QAT_HW_AUTH_ALGO_SNOW_3G_UIA2;
@@ -858,7 +865,6 @@ qat_sym_session_configure_auth(struct rte_cryptodev *dev,
return -EINVAL;
}
 
-   session->digest_length = auth_xform->digest_length;
return 0;
 }
 
@@ -1811,6 +1817,7 @@ int qat_sym_cd_auth_set(struct qat_sym_session *cdesc,
|| cdesc->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_AES_XCBC_MAC
|| cdesc->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_AES_CBC_MAC
|| cdesc->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_NULL
+   || cdesc->is_cnt_zero
)
hash->auth_counter.counter = 0;
else {
diff --git a/drivers/crypto/qat/qat_sym_session.h 
b/drivers/crypto/qat/qat_sym_session.h
in

[dpdk-dev] [PATCH v2 11/16] common/qat: add pf2vf communication in qat

2021-06-28 Thread Arek Kusztal
Add communication between physical device and virtual function
in Intel QucikAssist Technology PMD.

Signed-off-by: Arek Kusztal 
---
 drivers/common/qat/meson.build |   1 +
 drivers/common/qat/qat_adf/adf_pf2vf_msg.h | 154 +
 drivers/common/qat/qat_device.c|  22 ++-
 drivers/common/qat/qat_device.h|  12 ++
 drivers/common/qat/qat_pf2vf.c |  80 +++
 drivers/common/qat/qat_pf2vf.h |  19 +++
 6 files changed, 287 insertions(+), 1 deletion(-)
 create mode 100644 drivers/common/qat/qat_adf/adf_pf2vf_msg.h
 create mode 100644 drivers/common/qat/qat_pf2vf.c
 create mode 100644 drivers/common/qat/qat_pf2vf.h

diff --git a/drivers/common/qat/meson.build b/drivers/common/qat/meson.build
index 479a46f9f0..11ed37c910 100644
--- a/drivers/common/qat/meson.build
+++ b/drivers/common/qat/meson.build
@@ -49,6 +49,7 @@ sources += files(
 'qat_qp.c',
 'qat_device.c',
 'qat_logs.c',
+'qat_pf2vf.c'
 )
 includes += include_directories(
 'qat_adf',
diff --git a/drivers/common/qat/qat_adf/adf_pf2vf_msg.h 
b/drivers/common/qat/qat_adf/adf_pf2vf_msg.h
new file mode 100644
index 00..4029b1c14a
--- /dev/null
+++ b/drivers/common/qat/qat_adf/adf_pf2vf_msg.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)
+ * Copyright(c) 2021 Intel Corporation
+ */
+#ifndef ADF_PF2VF_MSG_H_
+#define ADF_PF2VF_MSG_H_
+
+/* VF/PF compatibility version. */
+/* ADF_PFVF_COMPATIBILITY_EXT_CAP: Support for extended capabilities */
+#define ADF_PFVF_COMPATIBILITY_CAPABILITIES2
+/* ADF_PFVF_COMPATIBILITY_FAST_ACK: In-use pattern cleared by receiver */
+#define ADF_PFVF_COMPATIBILITY_FAST_ACK3
+#define ADF_PFVF_COMPATIBILITY_RING_TO_SVC_MAP 4
+#define ADF_PFVF_COMPATIBILITY_VERSION 4   /* PF<->VF compat */
+
+#define ADF_PFVF_INT   1
+#define ADF_PFVF_MSGORIGIN_SYSTEM  2
+#define ADF_PFVF_1X_MSGTYPE_SHIFT  2
+#define ADF_PFVF_1X_MSGTYPE_MASK   0xF
+#define ADF_PFVF_1X_MSGDATA_SHIFT  6
+#define ADF_PFVF_1X_MSGDATA_MASK   0x3FF
+#define ADF_PFVF_2X_MSGTYPE_SHIFT  2
+#define ADF_PFVF_2X_MSGTYPE_MASK   0x3F
+#define ADF_PFVF_2X_MSGDATA_SHIFT  8
+#define ADF_PFVF_2X_MSGDATA_MASK   0xFF
+
+#define ADF_PFVF_IN_USE0x6AC2
+#define ADF_PFVF_IN_USE_MASK   0xFFFE
+#define ADF_PFVF_VF_MSG_SHIFT  16
+
+/* PF->VF messages */
+#define ADF_PF2VF_MSGTYPE_RESTARTING   0x01
+#define ADF_PF2VF_MSGTYPE_VERSION_RESP 0x02
+#define ADF_PF2VF_MSGTYPE_BLOCK_RESP   0x03
+#define ADF_PF2VF_MSGTYPE_FATAL_ERROR  0x04
+/* Do not use messages which start from 0x10 to 1.x as 1.x only use
+ * 4 bits as message types. Hence they are only applicable to 2.0
+ */
+#define ADF_PF2VF_MSGTYPE_RP_RESET_RESP0x10
+
+/* PF->VF Version Response - ADF_PF2VF_MSGTYPE_VERSION_RESP */
+#define ADF_PF2VF_VERSION_RESP_VERS_MASK   0xFF
+#define ADF_PF2VF_VERSION_RESP_VERS_SHIFT  0
+#define ADF_PF2VF_VERSION_RESP_RESULT_MASK 0x03
+#define ADF_PF2VF_VERSION_RESP_RESULT_SHIFT8
+#define ADF_PF2VF_MINORVERSION_SHIFT   0
+#define ADF_PF2VF_MAJORVERSION_SHIFT   4
+#define ADF_PF2VF_VF_COMPATIBLE1
+#define ADF_PF2VF_VF_INCOMPATIBLE  2
+#define ADF_PF2VF_VF_COMPAT_UNKNOWN3
+
+/* PF->VF Block Response Type - ADF_PF2VF_MSGTYPE_BLOCK_RESP */
+#define ADF_PF2VF_BLOCK_RESP_TYPE_DATA 0x0
+#define ADF_PF2VF_BLOCK_RESP_TYPE_CRC  0x1
+#define ADF_PF2VF_BLOCK_RESP_TYPE_ERROR0x2
+#define ADF_PF2VF_BLOCK_RESP_TYPE_MASK 0x03
+#define ADF_PF2VF_BLOCK_RESP_TYPE_SHIFT0
+#define ADF_PF2VF_BLOCK_RESP_DATA_MASK 0xFF
+#define ADF_PF2VF_BLOCK_RESP_DATA_SHIFT2
+
+/*
+ * PF->VF Block Error Code - Returned in data field when the
+ * response type indicates an error
+ */
+#define ADF_PF2VF_INVALID_BLOCK_TYPE   0x0
+#define ADF_PF2VF_INVALID_BYTE_NUM_REQ 0x1
+#define ADF_PF2VF_PAYLOAD_TRUNCATED0x2
+#define ADF_PF2VF_UNSPECIFIED_ERROR0x3
+
+/* VF->PF messages */
+#define ADF_VF2PF_MSGTYPE_INIT 0x3
+#define ADF_VF2PF_MSGTYPE_SHUTDOWN 0x4
+#define ADF_VF2PF_MSGTYPE_VERSION_REQ  0x5
+#define ADF_VF2PF_MSGTYPE_COMPAT_VER_REQ   0x6
+#define ADF_VF2PF_MSGTYPE_GET_LARGE_BLOCK_REQ  0x7
+#define ADF_VF2PF_MSGTYPE_GET_MEDIUM_BLOCK_REQ 0x8
+#define ADF_VF2PF_MSGTYPE_GET_SMALL_BLOCK_REQ  0x9
+/* Do not use messages which start from 0x10 to 1.x as 1.x only use
+ * 4 bits as message types. Hence they are only applicable to 2.0
+ */
+#define ADF_VF2PF_MSGTYPE_RP_RESET 0x10
+
+/* VF->PF Block Request Type - ADF_VF2PF_MSGTYPE_GET_xxx_BLOCK_REQ  */
+#define ADF_VF2PF_MIN_SMALL_MESS

[dpdk-dev] [PATCH v2 12/16] common/qat: reset ring pairs before setting gen4

2021-06-28 Thread Arek Kusztal
This commit resets ring pairs of particular vf before
setting PMD.

Signed-off-by: Arek Kusztal 
---
 drivers/common/qat/qat_device.c | 32 
 1 file changed, 32 insertions(+)

diff --git a/drivers/common/qat/qat_device.c b/drivers/common/qat/qat_device.c
index 5ee441171e..e52d90fcd7 100644
--- a/drivers/common/qat/qat_device.c
+++ b/drivers/common/qat/qat_device.c
@@ -11,6 +11,7 @@
 #include "qat_sym_pmd.h"
 #include "qat_comp_pmd.h"
 #include "adf_pf2vf_msg.h"
+#include "qat_pf2vf.h"
 
 /* pv2vf data Gen 4*/
 struct qat_pf2vf_dev qat_pf2vf_gen4 = {
@@ -125,6 +126,28 @@ qat_get_qat_dev_from_pci_dev(struct rte_pci_device 
*pci_dev)
return qat_pci_get_named_dev(name);
 }
 
+static int
+qat_gen4_reset_ring_pair(struct qat_pci_device *qat_pci_dev)
+{
+   int ret = 0, i;
+   uint8_t data[4];
+   struct qat_pf2vf_msg pf2vf_msg;
+
+   pf2vf_msg.msg_type = ADF_VF2PF_MSGTYPE_RP_RESET;
+   pf2vf_msg.block_hdr = -1;
+   for (i = 0; i < QAT_GEN4_BUNDLE_NUM; i++) {
+   pf2vf_msg.msg_data = i;
+   ret = qat_pf2vf_exch_msg(qat_pci_dev, pf2vf_msg, 1, data);
+   if (ret) {
+   QAT_LOG(ERR, "QAT error when reset bundle no %d",
+   i);
+   return ret;
+   }
+   }
+
+   return 0;
+}
+
 static void qat_dev_parse_cmd(const char *str, struct qat_dev_cmd_param
*qat_dev_cmd_param)
 {
@@ -371,6 +394,15 @@ static int qat_pci_probe(struct rte_pci_driver *pci_drv 
__rte_unused,
if (qat_pci_dev == NULL)
return -ENODEV;
 
+   if (qat_pci_dev->qat_dev_gen == QAT_GEN4) {
+   if (qat_gen4_reset_ring_pair(qat_pci_dev)) {
+   QAT_LOG(ERR,
+   "Cannot reset ring pairs, does pf driver 
supports pf2vf comms?"
+   );
+   return -ENODEV;
+   }
+   }
+
sym_ret = qat_sym_dev_create(qat_pci_dev, qat_dev_cmd_param);
if (sym_ret == 0) {
num_pmds_created++;
-- 
2.30.2



[dpdk-dev] [PATCH v2 13/16] common/qat: add service discovery to qat gen4

2021-06-28 Thread Arek Kusztal
This commit adds service discovery to generation four
of Intel QuickAssist Technology devices.

Signed-off-by: Arek Kusztal 
---
 drivers/common/qat/qat_common.h |  8 ++
 drivers/common/qat/qat_device.c | 20 ---
 drivers/common/qat/qat_device.h |  3 +++
 drivers/common/qat/qat_qp.c | 43 +
 drivers/common/qat/qat_qp.h |  3 +--
 5 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/drivers/common/qat/qat_common.h b/drivers/common/qat/qat_common.h
index 845c8d99ab..23715085f4 100644
--- a/drivers/common/qat/qat_common.h
+++ b/drivers/common/qat/qat_common.h
@@ -29,6 +29,14 @@ enum qat_service_type {
QAT_SERVICE_INVALID
 };
 
+enum qat_svc_list {
+   QAT_SVC_UNUSED = 0,
+   QAT_SVC_CRYPTO = 1,
+   QAT_SVC_COMPRESSION = 2,
+   QAT_SVC_SYM = 3,
+   QAT_SVC_ASYM = 4,
+};
+
 #define QAT_MAX_SERVICES   (QAT_SERVICE_INVALID)
 
 /**< Common struct for scatter-gather list operations */
diff --git a/drivers/common/qat/qat_device.c b/drivers/common/qat/qat_device.c
index e52d90fcd7..1b967cbcf7 100644
--- a/drivers/common/qat/qat_device.c
+++ b/drivers/common/qat/qat_device.c
@@ -148,6 +148,22 @@ qat_gen4_reset_ring_pair(struct qat_pci_device 
*qat_pci_dev)
return 0;
 }
 
+int qat_query_svc(struct qat_pci_device *qat_dev, uint8_t *val)
+{
+   int ret = -(EINVAL);
+   struct qat_pf2vf_msg pf2vf_msg;
+
+   if (qat_dev->qat_dev_gen == QAT_GEN4) {
+   pf2vf_msg.msg_type = ADF_VF2PF_MSGTYPE_GET_SMALL_BLOCK_REQ;
+   pf2vf_msg.block_hdr = ADF_VF2PF_BLOCK_MSG_GET_RING_TO_SVC_REQ;
+   pf2vf_msg.msg_data = 2;
+   ret = qat_pf2vf_exch_msg(qat_dev, pf2vf_msg, 2, val);
+   }
+
+   return ret;
+}
+
+
 static void qat_dev_parse_cmd(const char *str, struct qat_dev_cmd_param
*qat_dev_cmd_param)
 {
@@ -296,9 +312,7 @@ qat_pci_device_allocate(struct rte_pci_device *pci_dev,
qat_dev_parse_cmd(devargs->drv_str, qat_dev_cmd_param);
 
if (qat_dev->qat_dev_gen >= QAT_GEN4) {
-   int ret = qat_read_qp_config(qat_dev, qat_dev->qat_dev_gen);
-
-   if (ret) {
+   if (qat_read_qp_config(qat_dev)) {
QAT_LOG(ERR,
"Cannot acquire ring configuration for QAT_%d",
qat_dev_id);
diff --git a/drivers/common/qat/qat_device.h b/drivers/common/qat/qat_device.h
index 05e164baa7..228c057d1e 100644
--- a/drivers/common/qat/qat_device.h
+++ b/drivers/common/qat/qat_device.h
@@ -159,4 +159,7 @@ qat_comp_dev_create(struct qat_pci_device *qat_pci_dev 
__rte_unused,
 int
 qat_comp_dev_destroy(struct qat_pci_device *qat_pci_dev __rte_unused);
 
+int
+qat_query_svc(struct qat_pci_device *qat_pci_dev, uint8_t *ret);
+
 #endif /* _QAT_DEVICE_H_ */
diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c
index 8be59779f9..026ea5ee01 100644
--- a/drivers/common/qat/qat_qp.c
+++ b/drivers/common/qat/qat_qp.c
@@ -504,20 +504,43 @@ qat_select_valid_queue(struct qat_pci_device *qat_dev, 
int qp_id,
 }
 
 int
-qat_read_qp_config(struct qat_pci_device *qat_dev,
-   enum qat_device_gen qat_dev_gen)
+qat_read_qp_config(struct qat_pci_device *qat_dev)
 {
+   int i = 0;
+   enum qat_device_gen qat_dev_gen = qat_dev->qat_dev_gen;
+
if (qat_dev_gen == QAT_GEN4) {
-   /* Read default configuration,
-* until some probe of it can be done
-*/
-   int i = 0;
+   uint16_t svc = 0;
 
+   if (qat_query_svc(qat_dev, (uint8_t *)&svc))
+   return -(EFAULT);
for (; i < QAT_GEN4_BUNDLE_NUM; i++) {
struct qat_qp_hw_data *hw_data =
&qat_dev->qp_gen4_data[i][0];
-   enum qat_service_type service_type =
-   (QAT_GEN4_QP_DEFCON >> (8 * i)) & 0xFF;
+   uint8_t svc1 = (svc >> (3 * i)) & 0x7;
+   enum qat_service_type service_type = 
QAT_SERVICE_INVALID;
+
+   if (svc1 == QAT_SVC_SYM) {
+   service_type = QAT_SERVICE_SYMMETRIC;
+   QAT_LOG(DEBUG,
+   "Discovered SYMMETRIC service on bundle 
%d",
+   i);
+   } else if (svc1 == QAT_SVC_COMPRESSION) {
+   service_type = QAT_SERVICE_COMPRESSION;
+   QAT_LOG(DEBUG,
+   "Discovered COPRESSION service on 
bundle %d",
+   i);
+   } else if (svc1 == QAT_SVC_ASYM) {
+   service_type = QAT_SERVICE_ASYMMETRIC;
+   QAT_LOG(DEBUG,
+  

[dpdk-dev] [PATCH v2 14/16] crypto/qat: update raw dp api

2021-06-28 Thread Arek Kusztal
From: Fan Zhang 

This commit updates the QAT raw data-path API to support the
changes made to device and sessions. The QAT RAW data-path API
now works on Generation 1-3 devices.

Signed-off-by: Fan Zhang 
---
 drivers/crypto/qat/qat_sym_hw_dp.c | 419 +++--
 1 file changed, 216 insertions(+), 203 deletions(-)

diff --git a/drivers/crypto/qat/qat_sym_hw_dp.c 
b/drivers/crypto/qat/qat_sym_hw_dp.c
index 2f64de44a1..4305579b54 100644
--- a/drivers/crypto/qat/qat_sym_hw_dp.c
+++ b/drivers/crypto/qat/qat_sym_hw_dp.c
@@ -101,204 +101,6 @@ qat_sym_dp_fill_vec_status(int32_t *sta, int status, 
uint32_t n)
 #define QAT_SYM_DP_GET_MAX_ENQ(q, c, n) \
RTE_MIN((q->max_inflights - q->enqueued + q->dequeued - c), n)
 
-static __rte_always_inline void
-enqueue_one_aead_job(struct qat_sym_session *ctx,
-   struct icp_qat_fw_la_bulk_req *req,
-   struct rte_crypto_va_iova_ptr *iv,
-   struct rte_crypto_va_iova_ptr *digest,
-   struct rte_crypto_va_iova_ptr *aad,
-   union rte_crypto_sym_ofs ofs, uint32_t data_len)
-{
-   struct icp_qat_fw_la_cipher_req_params *cipher_param =
-   (void *)&req->serv_specif_rqpars;
-   struct icp_qat_fw_la_auth_req_params *auth_param =
-   (void *)((uint8_t *)&req->serv_specif_rqpars +
-   ICP_QAT_FW_HASH_REQUEST_PARAMETERS_OFFSET);
-   uint8_t *aad_data;
-   uint8_t aad_ccm_real_len;
-   uint8_t aad_len_field_sz;
-   uint32_t msg_len_be;
-   rte_iova_t aad_iova = 0;
-   uint8_t q;
-
-   switch (ctx->qat_hash_alg) {
-   case ICP_QAT_HW_AUTH_ALGO_GALOIS_128:
-   case ICP_QAT_HW_AUTH_ALGO_GALOIS_64:
-   ICP_QAT_FW_LA_GCM_IV_LEN_FLAG_SET(
-   req->comn_hdr.serv_specif_flags,
-   ICP_QAT_FW_LA_GCM_IV_LEN_12_OCTETS);
-   rte_memcpy(cipher_param->u.cipher_IV_array, iv->va,
-   ctx->cipher_iv.length);
-   aad_iova = aad->iova;
-   break;
-   case ICP_QAT_HW_AUTH_ALGO_AES_CBC_MAC:
-   aad_data = aad->va;
-   aad_iova = aad->iova;
-   aad_ccm_real_len = 0;
-   aad_len_field_sz = 0;
-   msg_len_be = rte_bswap32((uint32_t)data_len -
-   ofs.ofs.cipher.head);
-
-   if (ctx->aad_len > ICP_QAT_HW_CCM_AAD_DATA_OFFSET) {
-   aad_len_field_sz = ICP_QAT_HW_CCM_AAD_LEN_INFO;
-   aad_ccm_real_len = ctx->aad_len -
-   ICP_QAT_HW_CCM_AAD_B0_LEN -
-   ICP_QAT_HW_CCM_AAD_LEN_INFO;
-   } else {
-   aad_data = iv->va;
-   aad_iova = iv->iova;
-   }
-
-   q = ICP_QAT_HW_CCM_NQ_CONST - ctx->cipher_iv.length;
-   aad_data[0] = ICP_QAT_HW_CCM_BUILD_B0_FLAGS(
-   aad_len_field_sz, ctx->digest_length, q);
-   if (q > ICP_QAT_HW_CCM_MSG_LEN_MAX_FIELD_SIZE) {
-   memcpy(aad_data + ctx->cipher_iv.length +
-   ICP_QAT_HW_CCM_NONCE_OFFSET + (q -
-   ICP_QAT_HW_CCM_MSG_LEN_MAX_FIELD_SIZE),
-   (uint8_t *)&msg_len_be,
-   ICP_QAT_HW_CCM_MSG_LEN_MAX_FIELD_SIZE);
-   } else {
-   memcpy(aad_data + ctx->cipher_iv.length +
-   ICP_QAT_HW_CCM_NONCE_OFFSET,
-   (uint8_t *)&msg_len_be +
-   (ICP_QAT_HW_CCM_MSG_LEN_MAX_FIELD_SIZE
-   - q), q);
-   }
-
-   if (aad_len_field_sz > 0) {
-   *(uint16_t *)&aad_data[ICP_QAT_HW_CCM_AAD_B0_LEN] =
-   rte_bswap16(aad_ccm_real_len);
-
-   if ((aad_ccm_real_len + aad_len_field_sz)
-   % ICP_QAT_HW_CCM_AAD_B0_LEN) {
-   uint8_t pad_len = 0;
-   uint8_t pad_idx = 0;
-
-   pad_len = ICP_QAT_HW_CCM_AAD_B0_LEN -
-   ((aad_ccm_real_len +
-   aad_len_field_sz) %
-   ICP_QAT_HW_CCM_AAD_B0_LEN);
-   pad_idx = ICP_QAT_HW_CCM_AAD_B0_LEN +
-   aad_ccm_real_len +
-   aad_len_field_sz;
-   memset(&aad_data[pad_idx], 0, pad_len);
-   }
-   }
-
-   rte_memcpy(((uint8_t *)cipher_param->u.cipher_IV_array)
-   + ICP_QAT_HW_CCM_NONCE_OFFSET,
-   (uint8_t *)iv->va +
-   ICP_QAT_HW_CCM_NONCE_OFFSET, ctx->cipher_iv.length);
-   *(uint8_t *)&cip

[dpdk-dev] [PATCH v2 15/16] crypto/qat: enable RAW API on QAT GEN1-3 only

2021-06-28 Thread Arek Kusztal
From: Adam Dybkowski 

This patch enables RAW API in feature flags on QAT generations
1 to 3 only. Disables it for later generations.

Signed-off-by: Adam Dybkowski 
---
 drivers/crypto/qat/qat_sym_pmd.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/qat/qat_sym_pmd.c b/drivers/crypto/qat/qat_sym_pmd.c
index 0097ee210f..1c7b142511 100644
--- a/drivers/crypto/qat/qat_sym_pmd.c
+++ b/drivers/crypto/qat/qat_sym_pmd.c
@@ -409,8 +409,10 @@ qat_sym_dev_create(struct qat_pci_device *qat_pci_dev,
RTE_CRYPTODEV_FF_OOP_SGL_IN_LB_OUT |
RTE_CRYPTODEV_FF_OOP_LB_IN_SGL_OUT |
RTE_CRYPTODEV_FF_OOP_LB_IN_LB_OUT |
-   RTE_CRYPTODEV_FF_DIGEST_ENCRYPTED |
-   RTE_CRYPTODEV_FF_SYM_RAW_DP;
+   RTE_CRYPTODEV_FF_DIGEST_ENCRYPTED;
+
+   if (qat_pci_dev->qat_dev_gen < QAT_GEN4)
+   cryptodev->feature_flags |= RTE_CRYPTODEV_FF_SYM_RAW_DP;
 
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
-- 
2.30.2



[dpdk-dev] [PATCH v2 16/16] test/crypto: check if RAW API is supported

2021-06-28 Thread Arek Kusztal
From: Adam Dybkowski 

This patch adds checking if RAW API is supported at the start
of the test command "cryptodev_qat_raw_api_autotest".

Signed-off-by: Adam Dybkowski 
---
 app/test/test_cryptodev.c | 34 +-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c
index 39db52b17a..64b6cc0db7 100644
--- a/app/test/test_cryptodev.c
+++ b/app/test/test_cryptodev.c
@@ -14769,7 +14769,39 @@ test_cryptodev_bcmfs(void)
 static int
 test_cryptodev_qat_raw_api(void /*argv __rte_unused, int argc __rte_unused*/)
 {
-   int ret;
+   static const char *pmd_name = RTE_STR(CRYPTODEV_NAME_QAT_SYM_PMD);
+   struct rte_cryptodev_info dev_info;
+   uint8_t i, nb_devs, found = 0;
+   int driver_id, ret;
+
+   driver_id = rte_cryptodev_driver_id_get(pmd_name);
+   if (driver_id == -1) {
+   RTE_LOG(WARNING, USER1, "%s PMD must be loaded.\n", pmd_name);
+   return TEST_SKIPPED;
+   }
+
+   nb_devs = rte_cryptodev_count();
+   if (nb_devs < 1) {
+   RTE_LOG(WARNING, USER1, "No crypto devices found?\n");
+   return TEST_SKIPPED;
+   }
+
+   for (i = 0; i < nb_devs; i++) {
+   rte_cryptodev_info_get(i, &dev_info);
+   if (dev_info.driver_id == driver_id) {
+   if (!(dev_info.feature_flags &
+   RTE_CRYPTODEV_FF_SYM_RAW_DP)) {
+   RTE_LOG(INFO, USER1, "RAW API not supported\n");
+   return TEST_SKIPPED;
+   }
+   found = 1;
+   break;
+   }
+   }
+   if (!found) {
+   RTE_LOG(INFO, USER1, "RAW API not supported\n");
+   return TEST_SKIPPED;
+   }
 
global_api_test_type = CRYPTODEV_RAW_API_TEST;
ret = run_cryptodev_testsuite(RTE_STR(CRYPTODEV_NAME_QAT_SYM_PMD));
-- 
2.30.2



Re: [dpdk-dev] [PATCH v1 2/2] linux/kni: Added support for KNI multiple fifos

2021-06-28 Thread Ferruh Yigit
On 12/10/2020 11:15 PM, dheemanth wrote:
> In order to improve performance, the KNI is made to
> support multiple fifos, So that multiple threads pinned
> to multiple cores can process packets in parallel.
> 
> Signed-off-by: dheemanth 

Hi Dheemanth,

I didn't check the patch yet but as a very high level comment,
it is possible to create multiple KNI interface and use multiple cores for each,
instead of multiple FIFO in a single interface. KNI example uses this approach.
Did you investigate this approach? What is the benefit of multiple FIFO against
multiple KNI interface?

Thanks,
ferruh



Re: [dpdk-dev] [PATCH v2 00/20] Add Marvell CNXK crypto PMDs

2021-06-28 Thread Akhil Goyal
> Add cnxk crypto PMDs supporting Marvell CN106XX SoC, based on
> 'common/cnxk'.
> 
> This series utilizes 'common/cnxk' to register cn9k & cn10k crypto PMDs and
> add symmetric cryptographic features for the same.
> 
> Depends-on: series-17482 ("Add CPT in Marvell CNXK common driver")
> 
> Changes in v2:
> - Added documentation & updated release notes
> - Reworked DP logs as suggested by Akhil
> - Rearranged capability additions & feature flag updates as suggested by
> Akhil
> - Rebased on v2 of dependant series
> 
Series Acked-by: Akhil Goyal 


[dpdk-dev] [PATCH 1/3] regex/mlx5: fix memory region unregistration

2021-06-28 Thread Michael Baum
The issue can cause illegal physical address access while a huge-page A
is released and huge-page B is allocated on the same virtual address.
The old MR can be matched using the virtual address of huge-page B but
the HW will access the physical address of huge-page A which is no more
part of the DPDK process.

Register a driver callback for memory event in order to free out all the
MRs of memory that is going to be freed from the dpdk process.

Fixes: cda883bbb655 ("regex/mlx5: add dynamic memory registration to datapath")
Cc: sta...@dpdk.org

Signed-off-by: Michael Baum 
---

This series depends on this patch:
https://patchwork.dpdk.org/project/dpdk/patch/20210628150614.1769507-1-michae...@nvidia.com/
Please don't apply it only before this patch is integrated.

 drivers/regex/mlx5/mlx5_regex.c  | 55 
 drivers/regex/mlx5/mlx5_regex.h  |  2 +
 drivers/regex/mlx5/mlx5_regex_fastpath.c | 39 +++--
 3 files changed, 92 insertions(+), 4 deletions(-)

diff --git a/drivers/regex/mlx5/mlx5_regex.c b/drivers/regex/mlx5/mlx5_regex.c
index dcb2ced88e..0f12d94d7e 100644
--- a/drivers/regex/mlx5/mlx5_regex.c
+++ b/drivers/regex/mlx5/mlx5_regex.c
@@ -11,6 +11,7 @@
 #include 
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -24,6 +25,10 @@
 
 int mlx5_regex_logtype;
 
+TAILQ_HEAD(regex_mem_event, mlx5_regex_priv) mlx5_mem_event_list =
+   TAILQ_HEAD_INITIALIZER(mlx5_mem_event_list);
+static pthread_mutex_t mem_event_list_lock = PTHREAD_MUTEX_INITIALIZER;
+
 const struct rte_regexdev_ops mlx5_regexdev_ops = {
.dev_info_get = mlx5_regex_info_get,
.dev_configure = mlx5_regex_configure,
@@ -82,6 +87,40 @@ mlx5_regex_get_name(char *name, struct rte_pci_device 
*pci_dev __rte_unused)
pci_dev->addr.devid, pci_dev->addr.function);
 }
 
+/**
+ * Callback for memory event.
+ *
+ * @param event_type
+ *   Memory event type.
+ * @param addr
+ *   Address of memory.
+ * @param len
+ *   Size of memory.
+ */
+static void
+mlx5_regex_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr,
+  size_t len, void *arg __rte_unused)
+{
+   struct mlx5_regex_priv *priv;
+
+   /* Must be called from the primary process. */
+   MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
+   switch (event_type) {
+   case RTE_MEM_EVENT_FREE:
+   pthread_mutex_lock(&mem_event_list_lock);
+   /* Iterate all the existing mlx5 devices. */
+   TAILQ_FOREACH(priv, &mlx5_mem_event_list, mem_event_cb)
+   mlx5_free_mr_by_addr(&priv->mr_scache,
+priv->ctx->device->name,
+addr, len);
+   pthread_mutex_unlock(&mem_event_list_lock);
+   break;
+   case RTE_MEM_EVENT_ALLOC:
+   default:
+   break;
+   }
+}
+
 static int
 mlx5_regex_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 struct rte_pci_device *pci_dev)
@@ -193,6 +232,15 @@ mlx5_regex_pci_probe(struct rte_pci_driver *pci_drv 
__rte_unused,
rte_errno = ENOMEM;
goto error;
}
+   /* Register callback function for global shared MR cache management. */
+   if (TAILQ_EMPTY(&mlx5_mem_event_list))
+   rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
+   mlx5_regex_mr_mem_event_cb,
+   NULL);
+   /* Add device to memory callback list. */
+   pthread_mutex_lock(&mem_event_list_lock);
+   TAILQ_INSERT_TAIL(&mlx5_mem_event_list, priv, mem_event_cb);
+   pthread_mutex_unlock(&mem_event_list_lock);
DRV_LOG(INFO, "RegEx GGA is %s.",
priv->has_umr ? "supported" : "unsupported");
return 0;
@@ -225,6 +273,13 @@ mlx5_regex_pci_remove(struct rte_pci_device *pci_dev)
return 0;
priv = dev->data->dev_private;
if (priv) {
+   /* Remove from memory callback device list. */
+   pthread_mutex_lock(&mem_event_list_lock);
+   TAILQ_REMOVE(&mlx5_mem_event_list, priv, mem_event_cb);
+   pthread_mutex_unlock(&mem_event_list_lock);
+   if (TAILQ_EMPTY(&mlx5_mem_event_list))
+   rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB",
+ NULL);
if (priv->pd)
mlx5_glue->dealloc_pd(priv->pd);
if (priv->uar)
diff --git a/drivers/regex/mlx5/mlx5_regex.h b/drivers/regex/mlx5/mlx5_regex.h
index 51a2101e53..61f59ba873 100644
--- a/drivers/regex/mlx5/mlx5_regex.h
+++ b/drivers/regex/mlx5/mlx5_regex.h
@@ -70,6 +70,8 @@ struct mlx5_regex_priv {
uint32_t nb_engines; /* Number of RegEx engines. */
struct mlx5dv_devx_uar *uar; /* UAR obje

[dpdk-dev] [PATCH 2/3] regex/mlx5: fix leak in PCI remove function

2021-06-28 Thread Michael Baum
In the PCI removal function, PMD releases all driver resources allocated
in the probe function.

The MR btree memory is allocated in the probe function, but it is not
freed in remove function what caused a memory leak.

Release it.

Fixes: cda883bbb655 ("regex/mlx5: add dynamic memory registration to datapath")
Cc: sta...@dpdk.org

Signed-off-by: Michael Baum 
---
 drivers/regex/mlx5/mlx5_regex.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/regex/mlx5/mlx5_regex.c b/drivers/regex/mlx5/mlx5_regex.c
index 0f12d94d7e..f64dc2824c 100644
--- a/drivers/regex/mlx5/mlx5_regex.c
+++ b/drivers/regex/mlx5/mlx5_regex.c
@@ -280,6 +280,8 @@ mlx5_regex_pci_remove(struct rte_pci_device *pci_dev)
if (TAILQ_EMPTY(&mlx5_mem_event_list))
rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB",
  NULL);
+   if (priv->mr_scache.cache.table)
+   mlx5_mr_release_cache(&priv->mr_scache);
if (priv->pd)
mlx5_glue->dealloc_pd(priv->pd);
if (priv->uar)
-- 
2.25.1



[dpdk-dev] [PATCH 3/3] regex/mlx5: fix redundancy in PCI remove function

2021-06-28 Thread Michael Baum
In the PCI removal function, PMD releases all driver resources and
cancels the regexdev registry.

However, regexdev registration is accidentally canceled twice.

Remove one of them.

Fixes: b34d816363b5 ("regex/mlx5: support rules import")
Cc: sta...@dpdk.org

Signed-off-by: Michael Baum 
---
 drivers/regex/mlx5/mlx5_regex.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/regex/mlx5/mlx5_regex.c b/drivers/regex/mlx5/mlx5_regex.c
index f64dc2824c..1c5bf930ad 100644
--- a/drivers/regex/mlx5/mlx5_regex.c
+++ b/drivers/regex/mlx5/mlx5_regex.c
@@ -290,8 +290,6 @@ mlx5_regex_pci_remove(struct rte_pci_device *pci_dev)
rte_regexdev_unregister(priv->regexdev);
if (priv->ctx)
mlx5_glue->close_device(priv->ctx);
-   if (priv->regexdev)
-   rte_regexdev_unregister(priv->regexdev);
rte_free(priv);
}
return 0;
-- 
2.25.1



[dpdk-dev] [PATCH v4 1/6] net/cnxk: add multi seg Rx vector routine

2021-06-28 Thread pbhagavatula
From: Pavan Nikhilesh 

Add multi-segment Rx vector routine, form the primary mbufs using
vector path switch to scalar path when extracting segments.

Signed-off-by: Pavan Nikhilesh 
---
 v4 Changes:
 - Split patches for easier merge.
 - Rebase on dpdk-next-net-mrvl.
 v3 Changes:
 - Spell check.

 drivers/net/cnxk/cn10k_rx.c  | 31 +++--
 drivers/net/cnxk/cn10k_rx.h  | 51 +---
 drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++
 drivers/net/cnxk/cn9k_rx.c   | 31 +++--
 drivers/net/cnxk/cn9k_rx.h   | 51 +---
 drivers/net/cnxk/cn9k_rx_vec_mseg.c  | 18 ++
 drivers/net/cnxk/meson.build |  2 ++
 7 files changed, 157 insertions(+), 44 deletions(-)
 create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c
 create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c

diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c
index 5c956c06b..3a9fd7130 100644
--- a/drivers/net/cnxk/cn10k_rx.c
+++ b/drivers/net/cnxk/cn10k_rx.c
@@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
+
+   rte_atomic_thread_fence(__ATOMIC_RELEASE);
 }

 void
@@ -60,20 +62,29 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
 #undef R
};

-   /* For PTP enabled, scalar rx function should be chosen as most of the
-* PTP apps are implemented to rx burst 1 pkt.
-*/
-   if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
-   pick_rx_func(eth_dev, nix_eth_rx_burst);
-   else
-   pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
+   const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) 
\
+   [f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name,

-   if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
-   pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+   NIX_RX_FASTPATH_MODES
+#undef R
+   };

/* Copy multi seg version with no offload for tear down sequence */
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
dev->rx_pkt_burst_no_offload =
nix_eth_rx_burst_mseg[0][0][0][0][0][0];
-   rte_mb();
+
+   /* For PTP enabled, scalar rx function should be chosen as most of the
+* PTP apps are implemented to rx burst 1 pkt.
+*/
+   if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+   if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+   return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+   return pick_rx_func(eth_dev, nix_eth_rx_burst);
+   }
+
+   if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+   return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
+   return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
 }
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index 1cc37cbaa..5926ff7f4 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -119,8 +119,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct 
rte_mbuf *mbuf,

sg = *(const uint64_t *)(rx + 1);
nb_segs = (sg >> 48) & 0x3;
-   mbuf->nb_segs = nb_segs;
+
+   if (nb_segs == 1) {
+   mbuf->next = NULL;
+   return;
+   }
+
+   mbuf->pkt_len = rx->pkt_lenm1 + 1;
mbuf->data_len = sg & 0x;
+   mbuf->nb_segs = nb_segs;
sg = sg >> 16;

eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1));
@@ -195,15 +202,14 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, 
const uint32_t tag,
ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf);

mbuf->ol_flags = ol_flags;
-   *(uint64_t *)(&mbuf->rearm_data) = val;
mbuf->pkt_len = len;
+   mbuf->data_len = len;
+   *(uint64_t *)(&mbuf->rearm_data) = val;

-   if (flag & NIX_RX_MULTI_SEG_F) {
+   if (flag & NIX_RX_MULTI_SEG_F)
nix_cqe_xtract_mseg(rx, mbuf, val);
-   } else {
-   mbuf->data_len = len;
+   else
mbuf->next = NULL;
-   }
 }

 static inline uint16_t
@@ -481,16 +487,34 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct 
rte_mbuf **rx_pkts,
vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);

-   /* Update that no more segments */
-   mbuf0->next = NULL;
-   mbuf1->next = NULL;
-   mbuf2->next = NULL;
-   mbuf3->next = NULL;
-
/* Store the mbufs to rx_pkts */
vst1q_u64((uint64_t *)&rx_pk

[dpdk-dev] [PATCH v4 2/6] net/cnxk: enable ptp processing in vector Rx

2021-06-28 Thread pbhagavatula
From: Pavan Nikhilesh 

Enable PTP offload in vector Rx burst function, use vector path
for processing mbufs and finally switch to scalar when extracting
timestamp.

Signed-off-by: Pavan Nikhilesh 
---
 drivers/net/cnxk/cn10k_ethdev.c |   1 -
 drivers/net/cnxk/cn10k_rx.c |   5 +-
 drivers/net/cnxk/cn10k_rx.h | 124 
 drivers/net/cnxk/cn10k_rx_vec.c |   3 -
 drivers/net/cnxk/cn9k_ethdev.c  |   1 -
 drivers/net/cnxk/cn9k_rx.c  |   5 +-
 drivers/net/cnxk/cn9k_rx.h  | 124 
 drivers/net/cnxk/cn9k_rx_vec.c  |   3 -
 drivers/net/cnxk/cnxk_ethdev.h  |  19 ++---
 9 files changed, 232 insertions(+), 53 deletions(-)

diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c
index b079edbd3..7caec6cf1 100644
--- a/drivers/net/cnxk/cn10k_ethdev.c
+++ b/drivers/net/cnxk/cn10k_ethdev.c
@@ -301,7 +301,6 @@ nix_ptp_enable_vf(struct rte_eth_dev *eth_dev)
if (nix_recalc_mtu(eth_dev))
plt_err("Failed to set MTU size for ptp");
 
-   dev->scalar_ena = true;
dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F;
 
/* Setting up the function pointers as per new offload flags */
diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c
index 3a9fd7130..69e767ac3 100644
--- a/drivers/net/cnxk/cn10k_rx.c
+++ b/drivers/net/cnxk/cn10k_rx.c
@@ -75,10 +75,7 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
dev->rx_pkt_burst_no_offload =
nix_eth_rx_burst_mseg[0][0][0][0][0][0];
 
-   /* For PTP enabled, scalar rx function should be chosen as most of the
-* PTP apps are implemented to rx burst 1 pkt.
-*/
-   if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+   if (dev->scalar_ena) {
if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
return pick_rx_func(eth_dev, nix_eth_rx_burst);
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index 5926ff7f4..d9572b19e 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -109,7 +109,7 @@ nix_update_match_id(const uint16_t match_id, uint64_t 
ol_flags,
 
 static __rte_always_inline void
 nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
-   uint64_t rearm)
+   uint64_t rearm, const uint16_t flags)
 {
const rte_iova_t *iova_list;
struct rte_mbuf *head;
@@ -125,8 +125,10 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct 
rte_mbuf *mbuf,
return;
}
 
-   mbuf->pkt_len = rx->pkt_lenm1 + 1;
-   mbuf->data_len = sg & 0x;
+   mbuf->pkt_len = (rx->pkt_lenm1 + 1) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ?
+  CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
+   mbuf->data_len = (sg & 0x) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ?
+ CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
mbuf->nb_segs = nb_segs;
sg = sg >> 16;
 
@@ -207,7 +209,7 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const 
uint32_t tag,
*(uint64_t *)(&mbuf->rearm_data) = val;
 
if (flag & NIX_RX_MULTI_SEG_F)
-   nix_cqe_xtract_mseg(rx, mbuf, val);
+   nix_cqe_xtract_mseg(rx, mbuf, val, flag);
else
mbuf->next = NULL;
 }
@@ -272,8 +274,9 @@ cn10k_nix_recv_pkts(void *rx_queue, struct rte_mbuf 
**rx_pkts, uint16_t pkts,
  flags);
cnxk_nix_mbuf_to_tstamp(mbuf, rxq->tstamp,
(flags & NIX_RX_OFFLOAD_TSTAMP_F),
-   (uint64_t *)((uint8_t *)mbuf + data_off)
-   );
+   (flags & NIX_RX_MULTI_SEG_F),
+   (uint64_t *)((uint8_t *)mbuf
+   + data_off));
rx_pkts[packets++] = mbuf;
roc_prefetch_store_keep(mbuf);
head++;
@@ -469,6 +472,99 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf 
**rx_pkts,
mbuf3);
}
 
+   if (flags & NIX_RX_OFFLOAD_TSTAMP_F) {
+   const uint16x8_t len_off = {
+   0,   /* ptype   0:15 */
+   0,   /* ptype  16:32 */
+   CNXK_NIX_TIMESYNC_RX_OFFSET, /* pktlen  0:15*/
+   0,   /* pktlen 16:32 */
+   CNXK_NIX_TIMESYNC_RX_OFFSET, /* datalen 0:15 */
+   0,
+   0,
+   0};
+   

[dpdk-dev] [PATCH v4 3/6] net/cnxk: enable VLAN processing in vector Tx

2021-06-28 Thread pbhagavatula
From: Pavan Nikhilesh 

Enable VLAN offload in vector Tx burst function.

Signed-off-by: Pavan Nikhilesh 
---
 drivers/net/cnxk/cn10k_tx.c |   3 +-
 drivers/net/cnxk/cn10k_tx.h | 125 +++
 drivers/net/cnxk/cn10k_tx_vec.c |   3 +-
 drivers/net/cnxk/cn9k_tx.c  |   3 +-
 drivers/net/cnxk/cn9k_tx.h  | 128 
 drivers/net/cnxk/cn9k_tx_vec.c  |   3 +-
 6 files changed, 227 insertions(+), 38 deletions(-)

diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index 18694dc70..05bc163a4 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -69,8 +69,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
 
if (dev->scalar_ena ||
(dev->tx_offload_flags &
-(NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |
- NIX_TX_OFFLOAD_TSO_F)))
+(NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F)))
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 8b1446f25..1e1697858 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -62,9 +62,14 @@ cn10k_nix_tx_ext_subs(const uint16_t flags)
 static __rte_always_inline uint8_t
 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
 {
-   RTE_SET_USED(flags);
-   /* We can pack up to 4 packets per LMTLINE if there are no offloads. */
-   return 4 << ROC_LMT_LINES_PER_CORE_LOG2;
+   return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
+  << ROC_LMT_LINES_PER_CORE_LOG2;
+}
+
+static __rte_always_inline uint8_t
+cn10k_nix_tx_dwords_per_line(const uint16_t flags)
+{
+   return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8;
 }
 
 static __rte_always_inline uint64_t
@@ -98,10 +103,9 @@ cn10k_nix_tx_steor_data(const uint16_t flags)
 static __rte_always_inline uint64_t
 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
 {
-   const uint64_t dw_m1 = 0x7;
+   const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
uint64_t data;
 
-   RTE_SET_USED(flags);
/* This will be moved to addr area */
data = dw_m1;
/* 15 vector sizes for single seg */
@@ -690,11 +694,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct 
rte_mbuf **tx_pkts,
 {
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
-   uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP];
+   uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
+   cmd2[NIX_DESCS_PER_LOOP];
uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
uint64x2_t senddesc01_w0, senddesc23_w0;
uint64x2_t senddesc01_w1, senddesc23_w1;
uint16_t left, scalar, burst, i, lmt_id;
+   uint64x2_t sendext01_w0, sendext23_w0;
+   uint64x2_t sendext01_w1, sendext23_w1;
uint64x2_t sgdesc01_w0, sgdesc23_w0;
uint64x2_t sgdesc01_w1, sgdesc23_w1;
struct cn10k_eth_txq *txq = tx_queue;
@@ -720,6 +727,14 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf 
**tx_pkts,
sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
sgdesc23_w0 = sgdesc01_w0;
 
+   /* Load command defaults into vector variables. */
+   if (flags & NIX_TX_NEED_EXT_HDR) {
+   sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
+   sendext23_w0 = sendext01_w0;
+   sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
+   sendext23_w1 = sendext01_w1;
+   }
+
/* Get LMT base address and LMT ID as lcore id */
ROC_LMT_BASE_ID_GET(laddr, lmt_id);
left = pkts;
@@ -738,6 +753,13 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf 
**tx_pkts,
senddesc23_w0 = senddesc01_w0;
sgdesc23_w0 = sgdesc01_w0;
 
+   /* Clear vlan enables. */
+   if (flags & NIX_TX_NEED_EXT_HDR) {
+   sendext01_w1 = vbicq_u64(sendext01_w1,
+vdupq_n_u64(0x30000));
+   sendext23_w1 = sendext01_w1;
+   }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1303,6 +1325,52 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct 
rte_mbuf **tx_pkts,
senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
 
+   if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
+   /* Tx ol_flag for vlan. */
+   const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
+   /* Bit enable for VLAN1 */
+   const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
+   /* Tx 

[dpdk-dev] [PATCH v4 4/6] net/cnxk: enable ptp processing in vector Tx

2021-06-28 Thread pbhagavatula
From: Pavan Nikhilesh 

Enable PTP offload in vector Tx burst function. Since, we can
no-longer use a single LMT line for burst of 4, split the LMT
into two and transmit twice.

Signed-off-by: Pavan Nikhilesh 
---
 drivers/net/cnxk/cn10k_tx.c |   4 +-
 drivers/net/cnxk/cn10k_tx.h | 109 +++-
 drivers/net/cnxk/cn10k_tx_vec.c |   5 +-
 drivers/net/cnxk/cn9k_tx.c  |   4 +-
 drivers/net/cnxk/cn9k_tx.h  | 105 ++
 drivers/net/cnxk/cn9k_tx_vec.c  |   5 +-
 6 files changed, 192 insertions(+), 40 deletions(-)

diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index 05bc163a4..c4c3e6570 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -67,9 +67,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
 #undef T
};
 
-   if (dev->scalar_ena ||
-   (dev->tx_offload_flags &
-(NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F)))
+   if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F))
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 1e1697858..8af6799ff 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -69,7 +69,9 @@ cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
 static __rte_always_inline uint8_t
 cn10k_nix_tx_dwords_per_line(const uint16_t flags)
 {
-   return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8;
+   return (flags & NIX_TX_NEED_EXT_HDR) ?
+((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
+8;
 }
 
 static __rte_always_inline uint64_t
@@ -695,13 +697,15 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct 
rte_mbuf **tx_pkts,
uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
-   cmd2[NIX_DESCS_PER_LOOP];
+   cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
uint64x2_t senddesc01_w0, senddesc23_w0;
uint64x2_t senddesc01_w1, senddesc23_w1;
uint16_t left, scalar, burst, i, lmt_id;
uint64x2_t sendext01_w0, sendext23_w0;
uint64x2_t sendext01_w1, sendext23_w1;
+   uint64x2_t sendmem01_w0, sendmem23_w0;
+   uint64x2_t sendmem01_w1, sendmem23_w1;
uint64x2_t sgdesc01_w0, sgdesc23_w0;
uint64x2_t sgdesc01_w1, sgdesc23_w1;
struct cn10k_eth_txq *txq = tx_queue;
@@ -733,6 +737,12 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf 
**tx_pkts,
sendext23_w0 = sendext01_w0;
sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
sendext23_w1 = sendext01_w1;
+   if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+   sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]);
+   sendmem23_w0 = sendmem01_w0;
+   sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]);
+   sendmem23_w1 = sendmem01_w1;
+   }
}
 
/* Get LMT base address and LMT ID as lcore id */
@@ -760,6 +770,17 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf 
**tx_pkts,
sendext23_w1 = sendext01_w1;
}
 
+   if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+   /* Reset send mem alg to SETTSTMP from SUB*/
+   sendmem01_w0 = vbicq_u64(sendmem01_w0,
+vdupq_n_u64(BIT_ULL(59)));
+   /* Reset send mem address to default. */
+   sendmem01_w1 =
+   vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
+   sendmem23_w0 = sendmem01_w0;
+   sendmem23_w1 = sendmem01_w1;
+   }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1371,6 +1392,44 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct 
rte_mbuf **tx_pkts,
sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
}
 
+   if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
+   /* Tx ol_flag for timestam. */
+   const uint64x2_t olf = {PKT_TX_IEEE1588_TMST,
+   PKT_TX_IEEE1588_TMST};
+   /* Set send mem alg to SUB. */
+   const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
+   /* Increment send mem address by 8. */
+   const uint64x2_t addr = {0x8, 0x8};
+
+   xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+ 

[dpdk-dev] [PATCH v4 5/6] net/cnxk: enable TSO processing in vector Tx

2021-06-28 Thread pbhagavatula
From: Pavan Nikhilesh 

Enable TSO offload in vector Tx burst function.

Signed-off-by: Pavan Nikhilesh 
---
 drivers/net/cnxk/cn10k_tx.c |  2 +-
 drivers/net/cnxk/cn10k_tx.h | 97 +
 drivers/net/cnxk/cn10k_tx_vec.c |  5 +-
 drivers/net/cnxk/cn9k_tx.c  |  2 +-
 drivers/net/cnxk/cn9k_tx.h  | 94 
 drivers/net/cnxk/cn9k_tx_vec.c  |  5 +-
 6 files changed, 199 insertions(+), 6 deletions(-)

diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index c4c3e6570..d06879163 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -67,7 +67,7 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
 #undef T
};
 
-   if (dev->scalar_ena || (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F))
+   if (dev->scalar_ena)
pick_tx_func(eth_dev, nix_eth_tx_burst);
else
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 8af6799ff..26797581e 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -689,6 +689,46 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 
 #if defined(RTE_ARCH_ARM64)
 
+static __rte_always_inline void
+cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
+ union nix_send_ext_w0_u *w0, uint64_t ol_flags,
+ const uint64_t flags, const uint64_t lso_tun_fmt)
+{
+   uint16_t lso_sb;
+   uint64_t mask;
+
+   if (!(ol_flags & PKT_TX_TCP_SEG))
+   return;
+
+   mask = -(!w1->il3type);
+   lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
+
+   w0->u |= BIT(14);
+   w0->lso_sb = lso_sb;
+   w0->lso_mps = m->tso_segsz;
+   w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & PKT_TX_IPV6);
+   w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
+
+   /* Handle tunnel tso */
+   if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
+   (ol_flags & PKT_TX_TUNNEL_MASK)) {
+   const uint8_t is_udp_tun =
+   (CNXK_NIX_UDP_TUN_BITMASK >>
+((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) &
+   0x1;
+   uint8_t shift = is_udp_tun ? 32 : 0;
+
+   shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
+   shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
+
+   w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
+   w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
+   /* Update format for UDP tunneled packet */
+
+   w0->lso_format = (lso_tun_fmt >> shift);
+   }
+}
+
 #define NIX_DESCS_PER_LOOP 4
 static __rte_always_inline uint16_t
 cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
@@ -723,6 +763,11 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf 
**tx_pkts,
 
/* Reduce the cached count */
txq->fc_cache_pkts -= pkts;
+   /* Perform header writes before barrier for TSO */
+   if (flags & NIX_TX_OFFLOAD_TSO_F) {
+   for (i = 0; i < pkts; i++)
+   cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
+   }
 
senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
senddesc23_w0 = senddesc01_w0;
@@ -781,6 +826,13 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf 
**tx_pkts,
sendmem23_w1 = sendmem01_w1;
}
 
+   if (flags & NIX_TX_OFFLOAD_TSO_F) {
+   /* Clear the LSO enable bit. */
+   sendext01_w0 = vbicq_u64(sendext01_w0,
+vdupq_n_u64(BIT_ULL(14)));
+   sendext23_w0 = sendext01_w0;
+   }
+
/* Move mbufs to iova */
mbuf0 = (uint64_t *)tx_pkts[0];
mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1430,6 +1482,51 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct 
rte_mbuf **tx_pkts,
cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
}
 
+   if (flags & NIX_TX_OFFLOAD_TSO_F) {
+   const uint64_t lso_fmt = txq->lso_tun_fmt;
+   uint64_t sx_w0[NIX_DESCS_PER_LOOP];
+   uint64_t sd_w1[NIX_DESCS_PER_LOOP];
+
+   /* Extract SD W1 as we need to set L4 types. */
+   vst1q_u64(sd_w1, senddesc01_w1);
+   vst1q_u64(sd_w1 + 2, senddesc23_w1);
+
+   /* Extract SX W0 as we need to set LSO fields. */
+   vst1q_u64(sx_w0, sendext01_w0);
+   vst1q_u64(sx_w0 + 2, sendext23_w0);
+
+   /* Extract ol_flags. */
+   xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+   ytmp128 = vzip1q_u64(len_olflags2, len_olfla

[dpdk-dev] [PATCH v4 6/6] net/cnxk: add multi seg Tx vector routine

2021-06-28 Thread pbhagavatula
From: Pavan Nikhilesh 

Add multi segment Tx vector routine.

Signed-off-by: Pavan Nikhilesh 
---
 drivers/net/cnxk/cn10k_tx.c  |  20 +-
 drivers/net/cnxk/cn10k_tx.h  | 388 +--
 drivers/net/cnxk/cn10k_tx_vec_mseg.c |  24 ++
 drivers/net/cnxk/cn9k_tx.c   |  20 +-
 drivers/net/cnxk/cn9k_tx.h   | 272 ++-
 drivers/net/cnxk/cn9k_tx_vec_mseg.c  |  24 ++
 drivers/net/cnxk/meson.build |   6 +-
 7 files changed, 709 insertions(+), 45 deletions(-)
 create mode 100644 drivers/net/cnxk/cn10k_tx_vec_mseg.c
 create mode 100644 drivers/net/cnxk/cn9k_tx_vec_mseg.c

diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index d06879163..1f30bab59 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -67,13 +67,23 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
 #undef T
};
 
-   if (dev->scalar_ena)
+   const eth_tx_burst_t nix_eth_tx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) 
\
+   [f5][f4][f3][f2][f1][f0] = cn10k_nix_xmit_pkts_vec_mseg_##name,
+
+   NIX_TX_FASTPATH_MODES
+#undef T
+   };
+
+   if (dev->scalar_ena) {
pick_tx_func(eth_dev, nix_eth_tx_burst);
-   else
+   if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+   pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
+   } else {
pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
-
-   if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
-   pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
+   if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+   pick_tx_func(eth_dev, nix_eth_tx_vec_burst_mseg);
+   }
 
rte_mb();
 }
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 26797581e..532b53b31 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -42,6 +42,13 @@
}  \
} while (0)
 
+/* Encoded number of segments to number of dwords macro, each value of nb_segs
+ * is encoded as 4bits.
+ */
+#define NIX_SEGDW_MAGIC 0x76654432210ULL
+
+#define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
+
 #define LMT_OFF(lmt_addr, lmt_num, offset) 
\
(void *)((lmt_addr) + ((lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset))
 
@@ -102,6 +109,14 @@ cn10k_nix_tx_steor_data(const uint16_t flags)
return data;
 }
 
+static __rte_always_inline uint8_t
+cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
+{
+   return ((flags & NIX_TX_NEED_EXT_HDR) ?
+ (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
+ 4);
+}
+
 static __rte_always_inline uint64_t
 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
 {
@@ -729,7 +744,244 @@ cn10k_nix_prepare_tso(struct rte_mbuf *m, union 
nix_send_hdr_w1_u *w1,
}
 }
 
+static __rte_always_inline void
+cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
+   union nix_send_hdr_w0_u *sh,
+   union nix_send_sg_s *sg, const uint32_t flags)
+{
+   struct rte_mbuf *m_next;
+   uint64_t *slist, sg_u;
+   uint16_t nb_segs;
+   int i = 1;
+
+   sh->total = m->pkt_len;
+   /* Clear sg->u header before use */
+   sg->u &= 0xFC00;
+   sg_u = sg->u;
+   slist = &cmd[0];
+
+   sg_u = sg_u | ((uint64_t)m->data_len);
+
+   nb_segs = m->nb_segs - 1;
+   m_next = m->next;
+
+   /* Set invert df if buffer is not to be freed by H/W */
+   if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
+   sg_u |= (cnxk_nix_prefree_seg(m) << 55);
+   /* Mark mempool object as "put" since it is freed by NIX */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+   if (!(sg_u & (1ULL << 55)))
+   __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+   rte_io_wmb();
+#endif
+
+   m = m_next;
+   /* Fill mbuf segments */
+   do {
+   m_next = m->next;
+   sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
+   *slist = rte_mbuf_data_iova(m);
+   /* Set invert df if buffer is not to be freed by H/W */
+   if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
+   sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
+   /* Mark mempool object as "put" since it is freed by NIX
+*/
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+   if (!(sg_u & (1ULL << (i + 55
+   __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+   rte_io_wmb();
+#endif
+   slist++;
+   i++;
+   nb_segs--;
+   if (i > 2 && nb_segs) {
+   i = 0;
+   

[dpdk-dev] [PATCH v4 1/7] event/cnxk: add Rx adapter support

2021-06-28 Thread pbhagavatula
From: Pavan Nikhilesh 

Add support for event eth Rx adapter.
Resize cn10k workslot fastpath structure to fit in 64B cacheline size.

Signed-off-by: Pavan Nikhilesh 
---
 v4 Changes:
 - Split patches for easier merge.
 v3 Changes:
 - Spell check.

 doc/guides/eventdevs/cnxk.rst|  28 
 doc/guides/rel_notes/release_21_08.rst   |   5 +
 drivers/common/cnxk/roc_nix.h|   3 +
 drivers/common/cnxk/roc_nix_fc.c |  78 ++
 drivers/common/cnxk/roc_nix_priv.h   |   3 +-
 drivers/common/cnxk/version.map  |   1 +
 drivers/event/cnxk/cn10k_eventdev.c  | 107 +++---
 drivers/event/cnxk/cn10k_worker.c|   7 +-
 drivers/event/cnxk/cn10k_worker.h|  32 +++--
 drivers/event/cnxk/cn9k_eventdev.c   |  89 
 drivers/event/cnxk/cn9k_worker.h |   4 +
 drivers/event/cnxk/cnxk_eventdev.c   |   2 +
 drivers/event/cnxk/cnxk_eventdev.h   |  43 --
 drivers/event/cnxk/cnxk_eventdev_adptr.c | 176 +++
 drivers/event/cnxk/meson.build   |   9 +-
 15 files changed, 540 insertions(+), 47 deletions(-)

diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 36da3800c..b7e82c127 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -39,6 +39,10 @@ Features of the OCTEON cnxk SSO PMD are:
   time granularity of 2.5us on CN9K and 1us on CN10K.
 - Up to 256 TIM rings a.k.a event timer adapters.
 - Up to 8 rings traversed in parallel.
+- HW managed packets enqueued from ethdev to eventdev exposed through event eth
+  RX adapter.
+- N:1 ethernet device Rx queue to Event queue mapping.
+- Full Rx offload support defined through ethdev queue configuration.

 Prerequisites and Compilation procedure
 ---
@@ -93,6 +97,15 @@ Runtime Config Options

 -a 0002:0e:00.0,qos=[1-50-50-50]

+- ``Force Rx Back pressure``
+
+   Force Rx back pressure when same mempool is used across ethernet device
+   connected to event device.
+
+   For example::
+
+  -a 0002:0e:00.0,force_rx_bp=1
+
 - ``TIM disable NPA``

   By default chunks are allocated from NPA then TIM can automatically free
@@ -160,3 +173,18 @@ Debugging Options
+---++---+
| 2 | TIM| --log-level='pmd\.event\.cnxk\.timer,8'   |
+---++---+
+
+Limitations
+---
+
+Rx adapter support
+~~
+
+Using the same mempool for all the ethernet device ports connected to
+event device would cause back pressure to be asserted only on the first
+ethernet device.
+Back pressure is automatically disabled when using same mempool for all the
+ethernet devices connected to event device to override this applications can
+use `force_rx_bp=1` device arguments.
+Using unique mempool per each ethernet device is recommended when they are
+connected to event device.
diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index 31e49e1a5..3892c8017 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -60,6 +60,11 @@ New Features
   * Added net/cnxk driver which provides the support for the integrated 
ethernet
 device.

+* **Added support for Marvell CN10K, CN9K, event Rx adapter.**
+
+  * Added Rx adapter support for event/cnxk when the ethernet device requested 
is
+net/cnxk.
+

 Removed Items
 -
diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index bb6902795..76613fe84 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -514,6 +514,9 @@ int __roc_api roc_nix_fc_mode_set(struct roc_nix *roc_nix,

 enum roc_nix_fc_mode __roc_api roc_nix_fc_mode_get(struct roc_nix *roc_nix);

+void __roc_api rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id,
+uint8_t ena, uint8_t force);
+
 /* NPC */
 int __roc_api roc_nix_npc_promisc_ena_dis(struct roc_nix *roc_nix, int enable);

diff --git a/drivers/common/cnxk/roc_nix_fc.c b/drivers/common/cnxk/roc_nix_fc.c
index 47be8aa3f..f17eba416 100644
--- a/drivers/common/cnxk/roc_nix_fc.c
+++ b/drivers/common/cnxk/roc_nix_fc.c
@@ -249,3 +249,81 @@ roc_nix_fc_mode_set(struct roc_nix *roc_nix, enum 
roc_nix_fc_mode mode)
 exit:
return rc;
 }
+
+void
+rox_nix_fc_npa_bp_cfg(struct roc_nix *roc_nix, uint64_t pool_id, uint8_t ena,
+ uint8_t force)
+{
+   struct nix *nix = roc_nix_to_nix_priv(roc_nix);
+   struct npa_lf *lf = idev_npa_obj_get();
+   struct npa_aq_enq_req *req;
+   struct npa_aq_enq_rsp *rsp;
+   struct mbox *mbox;
+   uint32_t limit;
+   int rc;
+
+   if (roc_nix_is_sdp(roc_nix))
+   return;
+
+   if (!lf)
+   return;
+   mbox = lf->mbox;
+
+   req = mbox_alloc_msg_npa_aq_enq(mbox);

[dpdk-dev] [PATCH v4 2/7] event/cnxk: add Rx adapter fastpath ops

2021-06-28 Thread pbhagavatula
From: Pavan Nikhilesh 

Add support for event eth Rx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh 
---
 drivers/event/cnxk/cn10k_eventdev.c   | 136 +++-
 drivers/event/cnxk/cn10k_worker.c |  54 
 drivers/event/cnxk/cn10k_worker.h |  97 +-
 drivers/event/cnxk/cn10k_worker_deq.c |  44 +++
 drivers/event/cnxk/cn10k_worker_deq_burst.c   |  29 ++
 drivers/event/cnxk/cn10k_worker_deq_tmo.c |  72 +
 drivers/event/cnxk/cn9k_eventdev.c| 305 +-
 drivers/event/cnxk/cn9k_worker.c  | 117 ---
 drivers/event/cnxk/cn9k_worker.h  | 174 --
 drivers/event/cnxk/cn9k_worker_deq.c  |  44 +++
 drivers/event/cnxk/cn9k_worker_deq_burst.c|  29 ++
 drivers/event/cnxk/cn9k_worker_deq_tmo.c  |  72 +
 drivers/event/cnxk/cn9k_worker_dual_deq.c |  53 +++
 .../event/cnxk/cn9k_worker_dual_deq_burst.c   |  30 ++
 drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c |  89 +
 drivers/event/cnxk/cnxk_eventdev.h|   1 +
 drivers/event/cnxk/meson.build|   9 +
 17 files changed, 1124 insertions(+), 231 deletions(-)
 create mode 100644 drivers/event/cnxk/cn10k_worker_deq.c
 create mode 100644 drivers/event/cnxk/cn10k_worker_deq_burst.c
 create mode 100644 drivers/event/cnxk/cn10k_worker_deq_tmo.c
 create mode 100644 drivers/event/cnxk/cn9k_worker_deq.c
 create mode 100644 drivers/event/cnxk/cn9k_worker_deq_burst.c
 create mode 100644 drivers/event/cnxk/cn9k_worker_deq_tmo.c
 create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq.c
 create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_burst.c
 create mode 100644 drivers/event/cnxk/cn9k_worker_dual_deq_tmo.c

diff --git a/drivers/event/cnxk/cn10k_eventdev.c 
b/drivers/event/cnxk/cn10k_eventdev.c
index 2060c8fe8..ba7d95fff 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -237,17 +237,141 @@ static void
 cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+   const event_dequeue_t sso_hws_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) 
\
+   [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_##name,
+   NIX_RX_FASTPATH_MODES
+#undef R
+   };
+
+   const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) 
\
+   [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_burst_##name,
+   NIX_RX_FASTPATH_MODES
+#undef R
+   };
+
+   const event_dequeue_t sso_hws_tmo_deq[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) 
\
+   [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_##name,
+   NIX_RX_FASTPATH_MODES
+#undef R
+   };
+
+   const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) 
\
+   [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_burst_##name,
+   NIX_RX_FASTPATH_MODES
+#undef R
+   };
+
+   const event_dequeue_t sso_hws_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) 
\
+   [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_##name,
+   NIX_RX_FASTPATH_MODES
+#undef R
+   };
+
+   const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) 
\
+   [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_burst_##name,
+   NIX_RX_FASTPATH_MODES
+#undef R
+   };
+
+   const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) 
\
+   [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_##name,
+   NIX_RX_FASTPATH_MODES
+#undef R
+   };
+
+   const event_dequeue_burst_t
+   sso_hws_tmo_deq_seg_burst[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) 
\
+   [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_deq_tmo_seg_burst_##name,
+   NIX_RX_FASTPATH_MODES
+#undef R
+   };
 
event_dev->enqueue = cn10k_sso_hws_enq;
event_dev->enqueue_burst = cn10k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst;
event_dev->enqueue_forward_burst = cn10k_sso_hws_enq_fwd_burst;
-
-   event_dev->dequeue = cn10k_sso_hws_deq;
-   event_dev->dequeue_burst = cn10k_sso_hws_deq_burst;
-   if (dev->is_timeout_deq) {
-   event_dev->dequeue = cn10k_sso_hws_tmo_deq;
-   event_dev->dequeue_burst = cn10k_sso_hws_tmo_deq_burst;
+   if (dev->rx_offloads & NIX_RX_MULTI_SEG_F

[dpdk-dev] [PATCH v4 3/7] event/cnxk: add Tx adapter support

2021-06-28 Thread pbhagavatula
From: Pavan Nikhilesh 

Add support for event eth Tx adapter.

Signed-off-by: Pavan Nikhilesh 
---
 doc/guides/eventdevs/cnxk.rst|   4 +-
 doc/guides/rel_notes/release_21_08.rst   |   6 +-
 drivers/event/cnxk/cn10k_eventdev.c  |  91 ++
 drivers/event/cnxk/cn9k_eventdev.c   | 117 +++
 drivers/event/cnxk/cnxk_eventdev.h   |  21 +++-
 drivers/event/cnxk/cnxk_eventdev_adptr.c | 106 
 6 files changed, 339 insertions(+), 6 deletions(-)

diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index b7e82c127..6fdccc2ab 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -42,7 +42,9 @@ Features of the OCTEON cnxk SSO PMD are:
 - HW managed packets enqueued from ethdev to eventdev exposed through event eth
   RX adapter.
 - N:1 ethernet device Rx queue to Event queue mapping.
-- Full Rx offload support defined through ethdev queue configuration.
+- Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE``
+  capability while maintaining receive packet order.
+- Full Rx/Tx offload support defined through ethdev queue configuration.
 
 Prerequisites and Compilation procedure
 ---
diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index 3892c8017..80ff93269 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -60,10 +60,10 @@ New Features
   * Added net/cnxk driver which provides the support for the integrated 
ethernet
 device.
 
-* **Added support for Marvell CN10K, CN9K, event Rx adapter.**
+* **Added support for Marvell CN10K, CN9K, event Rx/Tx adapter.**
 
-  * Added Rx adapter support for event/cnxk when the ethernet device requested 
is
-net/cnxk.
+  * Added Rx/Tx adapter support for event/cnxk when the ethernet device 
requested
+is net/cnxk.
 
 
 Removed Items
diff --git a/drivers/event/cnxk/cn10k_eventdev.c 
b/drivers/event/cnxk/cn10k_eventdev.c
index ba7d95fff..8a9b04a3d 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -44,6 +44,7 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
/* First cache line is reserved for cookie */
ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
+   ws->tx_base = ws->base;
ws->hws_id = port_id;
ws->swtag_req = 0;
ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
@@ -233,6 +234,39 @@ cn10k_sso_rsrc_init(void *arg, uint8_t hws, uint8_t hwgrp)
return roc_sso_rsrc_init(&dev->sso, hws, hwgrp);
 }
 
+static int
+cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
+{
+   struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+   int i;
+
+   if (dev->tx_adptr_data == NULL)
+   return 0;
+
+   for (i = 0; i < dev->nb_event_ports; i++) {
+   struct cn10k_sso_hws *ws = event_dev->data->ports[i];
+   void *ws_cookie;
+
+   ws_cookie = cnxk_sso_hws_get_cookie(ws);
+   ws_cookie = rte_realloc_socket(
+   ws_cookie,
+   sizeof(struct cnxk_sso_hws_cookie) +
+   sizeof(struct cn10k_sso_hws) +
+   (sizeof(uint64_t) * (dev->max_port_id + 1) *
+RTE_MAX_QUEUES_PER_PORT),
+   RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+   if (ws_cookie == NULL)
+   return -ENOMEM;
+   ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
+   memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
+  sizeof(uint64_t) * (dev->max_port_id + 1) *
+  RTE_MAX_QUEUES_PER_PORT);
+   event_dev->data->ports[i] = ws;
+   }
+
+   return 0;
+}
+
 static void
 cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
@@ -493,6 +527,10 @@ cn10k_sso_start(struct rte_eventdev *event_dev)
 {
int rc;
 
+   rc = cn10k_sso_updt_tx_adptr_data(event_dev);
+   if (rc < 0)
+   return rc;
+
rc = cnxk_sso_start(event_dev, cn10k_sso_hws_reset,
cn10k_sso_hws_flush_events);
if (rc < 0)
@@ -595,6 +633,55 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev 
*event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
 }
 
+static int
+cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev *dev,
+ const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+   int ret;
+
+   RTE_SET_USED(dev);
+   ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+   if (ret)
+   *caps = 0;
+   else
+   *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+   return 0;
+

[dpdk-dev] [PATCH v4 4/7] event/cnxk: add Tx adapter fastpath ops

2021-06-28 Thread pbhagavatula
From: Pavan Nikhilesh 

Add support for event eth Tx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh 
---
 drivers/event/cnxk/cn10k_eventdev.c   | 38 
 drivers/event/cnxk/cn10k_worker.h | 67 ++
 drivers/event/cnxk/cn10k_worker_tx_enq.c  | 23 +
 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c  | 23 +
 drivers/event/cnxk/cn9k_eventdev.c| 81 +
 drivers/event/cnxk/cn9k_worker.h  | 87 +++
 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c  | 23 +
 .../event/cnxk/cn9k_worker_dual_tx_enq_seg.c  | 23 +
 drivers/event/cnxk/cn9k_worker_tx_enq.c   | 23 +
 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c   | 23 +
 drivers/event/cnxk/meson.build|  6 ++
 11 files changed, 417 insertions(+)
 create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq.c
 create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
 create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
 create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
 create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq.c
 create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c

diff --git a/drivers/event/cnxk/cn10k_eventdev.c 
b/drivers/event/cnxk/cn10k_eventdev.c
index 8a9b04a3d..e462f770c 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -328,6 +328,23 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 #undef R
};
 
+   /* Tx modes */
+   const event_tx_adapter_enqueue
+   sso_hws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) 
\
+   [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name,
+   NIX_TX_FASTPATH_MODES
+#undef T
+   };
+
+   const event_tx_adapter_enqueue
+   sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags) 
\
+   [f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name,
+   NIX_TX_FASTPATH_MODES
+#undef T
+   };
+
event_dev->enqueue = cn10k_sso_hws_enq;
event_dev->enqueue_burst = cn10k_sso_hws_enq_burst;
event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst;
@@ -407,6 +424,27 @@ cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
}
}
+
+   if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+   /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
+   event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg
+   [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+   [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+   [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+   [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+   [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+   [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+   } else {
+   event_dev->txa_enqueue = sso_hws_tx_adptr_enq
+   [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+   [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+   [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+   [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+   [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+   [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+   }
+
+   event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
 }
 
 static void
diff --git a/drivers/event/cnxk/cn10k_worker.h 
b/drivers/event/cnxk/cn10k_worker.h
index b724083ca..3c90c8500 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -11,6 +11,7 @@
 
 #include "cn10k_ethdev.h"
 #include "cn10k_rx.h"
+#include "cn10k_tx.h"
 
 /* SSO Operations */
 
@@ -251,4 +252,70 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
 NIX_RX_FASTPATH_MODES
 #undef R
 
+static __rte_always_inline const struct cn10k_eth_txq *
+cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
+ const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+{
+   return (const struct cn10k_eth_txq *)
+   txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
+}
+
+static __rte_always_inline uint16_t
+cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
+  uint64_t *cmd,
+  const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+  const uint32_t flags)
+{
+   const struct cn10k_eth_txq *txq;
+   struct rte_mbuf *m = ev->mbuf;
+   uint16_t ref_cnt = m->refcnt;
+   uintptr_t lm

[dpdk-dev] [PATCH v4 5/7] event/cnxk: add Rx adapter vector support

2021-06-28 Thread pbhagavatula
From: Pavan Nikhilesh 

Add event vector support for cnxk event Rx adapter, add control path
APIs to get vector limits and ability to configure event vectorization
on a given Rx queue.

Signed-off-by: Pavan Nikhilesh 
---
 doc/guides/eventdevs/cnxk.rst|   2 +
 drivers/event/cnxk/cn10k_eventdev.c  | 106 ++-
 drivers/event/cnxk/cnxk_eventdev.h   |   2 +
 drivers/event/cnxk/cnxk_eventdev_adptr.c |  25 ++
 drivers/net/cnxk/cnxk_ethdev.h   |   2 +-
 5 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 6fdccc2ab..0297cd3d5 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -45,6 +45,8 @@ Features of the OCTEON cnxk SSO PMD are:
 - Lockfree Tx from event eth Tx adapter using ``DEV_TX_OFFLOAD_MT_LOCKFREE``
   capability while maintaining receive packet order.
 - Full Rx/Tx offload support defined through ethdev queue configuration.
+- HW managed event vectorization on CN10K for packets enqueued from ethdev to
+  eventdev configurable per each Rx queue in Rx adapter.
 
 Prerequisites and Compilation procedure
 ---
diff --git a/drivers/event/cnxk/cn10k_eventdev.c 
b/drivers/event/cnxk/cn10k_eventdev.c
index e462f770c..e85fa4785 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -610,7 +610,8 @@ cn10k_sso_rx_adapter_caps_get(const struct rte_eventdev 
*event_dev,
else
*caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT |
RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ |
-   RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID;
+   RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID |
+   RTE_EVENT_ETH_RX_ADAPTER_CAP_EVENT_VECTOR;
 
return 0;
 }
@@ -671,6 +672,105 @@ cn10k_sso_rx_adapter_queue_del(const struct rte_eventdev 
*event_dev,
return cnxk_sso_rx_adapter_queue_del(event_dev, eth_dev, rx_queue_id);
 }
 
+static int
+cn10k_sso_rx_adapter_vector_limits(
+   const struct rte_eventdev *dev, const struct rte_eth_dev *eth_dev,
+   struct rte_event_eth_rx_adapter_vector_limits *limits)
+{
+   struct cnxk_eth_dev *cnxk_eth_dev;
+   int ret;
+
+   RTE_SET_USED(dev);
+   ret = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+   if (ret)
+   return -ENOTSUP;
+
+   cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev);
+   limits->log2_sz = true;
+   limits->min_sz = 1 << ROC_NIX_VWQE_MIN_SIZE_LOG2;
+   limits->max_sz = 1 << ROC_NIX_VWQE_MAX_SIZE_LOG2;
+   limits->min_timeout_ns =
+   (roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100;
+   limits->max_timeout_ns = BITMASK_ULL(8, 0) * limits->min_timeout_ns;
+
+   return 0;
+}
+
+static int
+cnxk_sso_rx_adapter_vwqe_enable(struct cnxk_eth_dev *cnxk_eth_dev,
+   uint16_t port_id, uint16_t rq_id, uint16_t sz,
+   uint64_t tmo_ns, struct rte_mempool *vmp)
+{
+   struct roc_nix_rq *rq;
+
+   rq = &cnxk_eth_dev->rqs[rq_id];
+
+   if (!rq->sso_ena)
+   return -EINVAL;
+   if (rq->flow_tag_width == 0)
+   return -EINVAL;
+
+   rq->vwqe_ena = 1;
+   rq->vwqe_first_skip = 0;
+   rq->vwqe_aura_handle = roc_npa_aura_handle_to_aura(vmp->pool_id);
+   rq->vwqe_max_sz_exp = rte_log2_u32(sz);
+   rq->vwqe_wait_tmo =
+   tmo_ns /
+   ((roc_nix_get_vwqe_interval(&cnxk_eth_dev->nix) + 1) * 100);
+   rq->tag_mask = (port_id & 0xF) << 20;
+   rq->tag_mask |=
+   (((port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV_VECTOR << 4))
+   << 24;
+
+   return roc_nix_rq_modify(&cnxk_eth_dev->nix, rq, 0);
+}
+
+static int
+cn10k_sso_rx_adapter_vector_config(
+   const struct rte_eventdev *event_dev, const struct rte_eth_dev *eth_dev,
+   int32_t rx_queue_id,
+   const struct rte_event_eth_rx_adapter_event_vector_config *config)
+{
+   struct cnxk_eth_dev *cnxk_eth_dev;
+   struct cnxk_sso_evdev *dev;
+   int i, rc;
+
+   rc = strncmp(eth_dev->device->driver->name, "net_cn10k", 8);
+   if (rc)
+   return -EINVAL;
+
+   dev = cnxk_sso_pmd_priv(event_dev);
+   cnxk_eth_dev = cnxk_eth_pmd_priv(eth_dev);
+   if (rx_queue_id < 0) {
+   for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+   cnxk_sso_updt_xae_cnt(dev, config->vector_mp,
+ RTE_EVENT_TYPE_ETHDEV_VECTOR);
+   rc = cnxk_sso_xae_reconfigure(
+   (struct rte_eventdev *)(uintptr_t)event_dev);
+   rc = cnxk_sso_rx_adapter_vwqe_enable(
+   cnxk_eth_dev, eth_dev->data->port_id, i,
+   config->vector_sz, config->

[dpdk-dev] [PATCH v4 6/7] event/cnxk: add Rx event vector fastpath

2021-06-28 Thread pbhagavatula
From: Pavan Nikhilesh 

Add Rx event vector fastpath to convert HW defined metadata into
rte_mbuf and rte_event_vector.

Signed-off-by: Pavan Nikhilesh 
---
 doc/guides/rel_notes/release_21_08.rst |   1 +
 drivers/event/cnxk/cn10k_worker.h  |  56 +++
 drivers/net/cnxk/cn10k_rx.h| 200 +++--
 drivers/net/cnxk/cn10k_rx_vec.c|   2 +-
 drivers/net/cnxk/cn10k_rx_vec_mseg.c   |   5 +-
 5 files changed, 179 insertions(+), 85 deletions(-)

diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index 80ff93269..11ccc9bcb 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -64,6 +64,7 @@ New Features
 
   * Added Rx/Tx adapter support for event/cnxk when the ethernet device 
requested
 is net/cnxk.
+  * Add support for event vectorization for Rx adapter.
 
 
 Removed Items
diff --git a/drivers/event/cnxk/cn10k_worker.h 
b/drivers/event/cnxk/cn10k_worker.h
index 3c90c8500..7a48a6b17 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -5,6 +5,8 @@
 #ifndef __CN10K_WORKER_H__
 #define __CN10K_WORKER_H__
 
+#include 
+
 #include "cnxk_ethdev.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
@@ -101,6 +103,49 @@ cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, 
uint8_t port_id,
  mbuf_init | ((uint64_t)port_id) << 48, flags);
 }
 
+static __rte_always_inline void
+cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags,
+  void *lookup_mem, void *tstamp)
+{
+   uint64_t mbuf_init = 0x10001ULL | RTE_PKTMBUF_HEADROOM |
+(flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+   struct rte_event_vector *vec;
+   uint16_t nb_mbufs, non_vec;
+   uint64_t **wqe;
+
+   mbuf_init |= ((uint64_t)port_id) << 48;
+   vec = (struct rte_event_vector *)vwqe;
+   wqe = vec->u64s;
+
+   nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
+   nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, vec->mbufs, nb_mbufs,
+ flags | NIX_RX_VWQE_F, lookup_mem,
+ tstamp);
+   wqe += nb_mbufs;
+   non_vec = vec->nb_elem - nb_mbufs;
+
+   while (non_vec) {
+   struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0];
+   struct rte_mbuf *mbuf;
+   uint64_t tstamp_ptr;
+
+   mbuf = (struct rte_mbuf *)((char *)cqe -
+  sizeof(struct rte_mbuf));
+   cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem,
+ mbuf_init, flags);
+   /* Extracting tstamp, if PTP enabled*/
+   tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)cqe) +
+  CNXK_SSO_WQE_SG_PTR);
+   cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
+   flags & NIX_RX_OFFLOAD_TSTAMP_F,
+   flags & NIX_RX_MULTI_SEG_F,
+   (uint64_t *)tstamp_ptr);
+   wqe[0] = (uint64_t *)mbuf;
+   non_vec--;
+   wqe++;
+   }
+}
+
 static __rte_always_inline uint16_t
 cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
   const uint32_t flags, void *lookup_mem)
@@ -152,6 +197,17 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct 
rte_event *ev,
flags & NIX_RX_MULTI_SEG_F,
(uint64_t *)tstamp_ptr);
gw.u64[1] = mbuf;
+   } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+  RTE_EVENT_TYPE_ETHDEV_VECTOR) {
+   uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+   __uint128_t vwqe_hdr = *(__uint128_t *)gw.u64[1];
+
+   vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) |
+  ((vwqe_hdr & 0x) << 48) |
+  ((uint64_t)port << 32);
+   *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr;
+   cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem,
+  ws->tstamp);
}
}
 
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index d9572b19e..a506a867c 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -21,6 +21,7 @@
  * Defining it from backwards to denote its been
  * not used as offload flags to pick function
  */
+#define NIX_RX_VWQE_F BIT(14)
 #define NIX_RX_MULTI_SEG_F BIT(15)
 
 #define CNXK_NIX_CQ_ENTRY_SZ 128
@@ -28,6 +29,11 @@
 #define CQE_CAST(x) ((struct nix_cqe_hdr_s *)(x))
 #de

[dpdk-dev] [PATCH v4 7/7] event/cnxk: add Tx event vector fastpath

2021-06-28 Thread pbhagavatula
From: Pavan Nikhilesh 

Add Tx event vector fastpath, integrate event vector Tx routine
into Tx burst.

Signed-off-by: Pavan Nikhilesh 
---
 doc/guides/eventdevs/cnxk.rst  |   1 +
 doc/guides/rel_notes/release_21_08.rst |   2 +-
 drivers/common/cnxk/roc_sso.h  |  23 ++
 drivers/event/cnxk/cn10k_eventdev.c|   3 +-
 drivers/event/cnxk/cn10k_worker.h  | 104 +++--
 drivers/event/cnxk/cn9k_worker.h   |   4 +-
 drivers/event/cnxk/cnxk_worker.h   |  22 --
 drivers/net/cnxk/cn10k_tx.c|   2 +-
 drivers/net/cnxk/cn10k_tx.h|  52 +
 drivers/net/cnxk/cn10k_tx_mseg.c   |   3 +-
 drivers/net/cnxk/cn10k_tx_vec.c|   2 +-
 drivers/net/cnxk/cn10k_tx_vec_mseg.c   |   2 +-
 12 files changed, 167 insertions(+), 53 deletions(-)

diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 0297cd3d5..53560d383 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -47,6 +47,7 @@ Features of the OCTEON cnxk SSO PMD are:
 - Full Rx/Tx offload support defined through ethdev queue configuration.
 - HW managed event vectorization on CN10K for packets enqueued from ethdev to
   eventdev configurable per each Rx queue in Rx adapter.
+- Event vector transmission via Tx adapter.
 
 Prerequisites and Compilation procedure
 ---
diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index 11ccc9bcb..9e49cb27d 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -64,7 +64,7 @@ New Features
 
   * Added Rx/Tx adapter support for event/cnxk when the ethernet device 
requested
 is net/cnxk.
-  * Add support for event vectorization for Rx adapter.
+  * Add support for event vectorization for Rx/Tx adapter.
 
 
 Removed Items
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index a6030e7d8..316c6ccd5 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -44,6 +44,29 @@ struct roc_sso {
uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
 } __plt_cache_aligned;
 
+static __rte_always_inline void
+roc_sso_hws_head_wait(uintptr_t tag_op)
+{
+#ifdef RTE_ARCH_ARM64
+   uint64_t tag;
+
+   asm volatile(PLT_CPU_FEATURE_PREAMBLE
+"  ldr %[tag], [%[tag_op]] \n"
+"  tbnz %[tag], 35, done%= \n"
+"  sevl\n"
+"rty%=:wfe \n"
+"  ldr %[tag], [%[tag_op]] \n"
+"  tbz %[tag], 35, rty%=   \n"
+"done%=:   \n"
+: [tag] "=&r"(tag)
+: [tag_op] "r"(tag_op));
+#else
+   /* Wait for the SWTAG/SWTAG_FULL operation */
+   while (!(plt_read64(tag_op) & BIT_ULL(35)))
+   ;
+#endif
+}
+
 /* SSO device initialization */
 int __roc_api roc_sso_dev_init(struct roc_sso *roc_sso);
 int __roc_api roc_sso_dev_fini(struct roc_sso *roc_sso);
diff --git a/drivers/event/cnxk/cn10k_eventdev.c 
b/drivers/event/cnxk/cn10k_eventdev.c
index e85fa4785..6f37c5bd2 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -782,7 +782,8 @@ cn10k_sso_tx_adapter_caps_get(const struct rte_eventdev 
*dev,
if (ret)
*caps = 0;
else
-   *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+   *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT |
+   RTE_EVENT_ETH_TX_ADAPTER_CAP_EVENT_VECTOR;
 
return 0;
 }
diff --git a/drivers/event/cnxk/cn10k_worker.h 
b/drivers/event/cnxk/cn10k_worker.h
index 7a48a6b17..9cc099206 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -308,29 +308,120 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void 
*port,
 NIX_RX_FASTPATH_MODES
 #undef R
 
-static __rte_always_inline const struct cn10k_eth_txq *
+static __rte_always_inline struct cn10k_eth_txq *
 cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
  const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
 {
-   return (const struct cn10k_eth_txq *)
+   return (struct cn10k_eth_txq *)
txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
 }
 
+static __rte_always_inline void
+cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
+   uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr,
+   uint8_t sched_type, uintptr_t base,
+   const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+   const uint32_t flags)
+{
+   uint16_t port[4], queue[4];
+   struct cn10k_eth_txq *txq;
+   uint16_t i, j;
+   uintptr_t pa;
+
+   for (i = 0; i <

Re: [dpdk-dev] [PATCH v2 00/20] Add Marvell CNXK crypto PMDs

2021-06-28 Thread Akhil Goyal
> > Add cnxk crypto PMDs supporting Marvell CN106XX SoC, based on
> > 'common/cnxk'.
> >
> > This series utilizes 'common/cnxk' to register cn9k & cn10k crypto PMDs
> and
> > add symmetric cryptographic features for the same.
> >
> > Depends-on: series-17482 ("Add CPT in Marvell CNXK common driver")
> >
> > Changes in v2:
> > - Added documentation & updated release notes
> > - Reworked DP logs as suggested by Akhil
> > - Rearranged capability additions & feature flag updates as suggested by
> > Akhil
> > - Rebased on v2 of dependant series
> >
> Series Acked-by: Akhil Goyal 

Applied to dpdk-next-crypto
Fixed minor issues while merging.

Thanks.


Re: [dpdk-dev] [PATCH v3 6/7] power: support monitoring multiple Rx queues

2021-06-28 Thread Ananyev, Konstantin


> >> Use the new multi-monitor intrinsic to allow monitoring multiple ethdev
> >> Rx queues while entering the energy efficient power state. The multi
> >> version will be used unconditionally if supported, and the UMWAIT one
> >> will only be used when multi-monitor is not supported by the hardware.
> >>
> >> Signed-off-by: Anatoly Burakov 
> >> ---
> >>   doc/guides/prog_guide/power_man.rst |  9 ++--
> >>   lib/power/rte_power_pmd_mgmt.c  | 76 -
> >>   2 files changed, 80 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/doc/guides/prog_guide/power_man.rst 
> >> b/doc/guides/prog_guide/power_man.rst
> >> index fac2c19516..3245a5ebed 100644
> >> --- a/doc/guides/prog_guide/power_man.rst
> >> +++ b/doc/guides/prog_guide/power_man.rst
> >> @@ -221,13 +221,16 @@ power saving whenever empty poll count reaches a 
> >> certain number.
> >>   The "monitor" mode is only supported in the following configurations and 
> >> scenarios:
> >>
> >>   * If ``rte_cpu_get_intrinsics_support()`` function indicates that
> >> +  ``rte_power_monitor_multi()`` function is supported by the platform, 
> >> then
> >> +  monitoring multiple Ethernet Rx queues for traffic will be supported.
> >> +
> >> +* If ``rte_cpu_get_intrinsics_support()`` function indicates that only
> >> ``rte_power_monitor()`` is supported by the platform, then monitoring 
> >> will be
> >> limited to a mapping of 1 core 1 queue (thus, each Rx queue will have 
> >> to be
> >> monitored from a different lcore).
> >>
> >> -* If ``rte_cpu_get_intrinsics_support()`` function indicates that the
> >> -  ``rte_power_monitor()`` function is not supported, then monitor mode 
> >> will not
> >> -  be supported.
> >> +* If ``rte_cpu_get_intrinsics_support()`` function indicates that neither 
> >> of the
> >> +  two monitoring functions are supported, then monitor mode will not be 
> >> supported.
> >>
> >>   * Not all Ethernet devices support monitoring, even if the underlying
> >> platform may support the necessary CPU instructions. Please refer to
> >> diff --git a/lib/power/rte_power_pmd_mgmt.c 
> >> b/lib/power/rte_power_pmd_mgmt.c
> >> index 7762cd39b8..aab2d4f1ee 100644
> >> --- a/lib/power/rte_power_pmd_mgmt.c
> >> +++ b/lib/power/rte_power_pmd_mgmt.c
> >> @@ -155,6 +155,24 @@ queue_list_remove(struct pmd_core_cfg *cfg, const 
> >> union queue *q)
> >>return 0;
> >>   }
> >>
> >> +static inline int
> >> +get_monitor_addresses(struct pmd_core_cfg *cfg,
> >> + struct rte_power_monitor_cond *pmc)
> >> +{
> >> + const struct queue_list_entry *qle;
> >> + size_t i = 0;
> >> + int ret;
> >> +
> >> + TAILQ_FOREACH(qle, &cfg->head, next) {
> >> + struct rte_power_monitor_cond *cur = &pmc[i];
> >
> > Looks like you never increment 'i' value inside that function.
> > Also it probably will be safer to add 'num' parameter to check that
> > we will never over-run pmc[] boundaries.
> 
> Will fix in v4, good catch!
> 
> >
> >> + const union queue *q = &qle->queue;
> >> + ret = rte_eth_get_monitor_addr(q->portid, q->qid, cur);
> >> + if (ret < 0)
> >> + return ret;
> >> + }
> >> + return 0;
> >> +}
> >> +
> >>   static void
> >>   calc_tsc(void)
> >>   {
> >> @@ -183,6 +201,48 @@ calc_tsc(void)
> >>}
> >>   }
> >>
> >> +static uint16_t
> >> +clb_multiwait(uint16_t port_id, uint16_t qidx,
> >> + struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
> >> + uint16_t max_pkts __rte_unused, void *addr __rte_unused)
> >> +{
> >> + const unsigned int lcore = rte_lcore_id();
> >> + const union queue q = {.portid = port_id, .qid = qidx};
> >> + const bool empty = nb_rx == 0;
> >> + struct pmd_core_cfg *q_conf;
> >> +
> >> + q_conf = &lcore_cfg[lcore];
> >> +
> >> + /* early exit */
> >> + if (likely(!empty)) {
> >> + q_conf->empty_poll_stats = 0;
> >> + } else {
> >> + /* do we care about this particular queue? */
> >> + if (!queue_is_power_save(q_conf, &q))
> >> + return nb_rx;
> >
> > I still don't understand the need of 'special' power_save queue here...
> > Why we can't just have a function:
> >
> > get_number_of_queues_whose_sequential_empty_polls_less_then_threshold(struct
> >  pmd_core_cfg *lcore_cfg),
> > and then just:
> >
> > /* all queues have at least EMPTYPOLL_MAX sequential empty polls */
> > if 
> > (get_number_of_queues_whose_sequential_empty_polls_less_then_threshold(q_conf)
> >  == 0) {
> >  /* go into power-save mode here */
> > }
> 
> Okay, let's go through this step by step :)
> 
> Let's suppose we have three queues - q0, q1 and q2. We want to sleep
> whenever there's no traffic on *all of them*, however we cannot know
> that until we have checked all of them.
> 
> So, let's suppose that q0, q1 and q2 were empty all this time, but now
> some traffic arrived at q2 while we're still ch

Re: [dpdk-dev] [RFC] lib/ethdev: add dev configured flag

2021-06-28 Thread Huisong Li



在 2021/6/14 23:37, Andrew Rybchenko 写道:

Summary should start from "ethdev: "

Don't forget to include all maintainers in Cc the next time.
Just use --cc-cmd or --to-cmd options.

ok, thanks!


Adding Thomas.

On 5/8/21 11:00 AM, Huisong Li wrote:

Currently, if dev_configure is not invoked or fails to be invoked, users
can still invoke dev_start successfully. This patch adds a 
"dev_configured"

flag in "rte_eth_dev_data" to control whether dev_start can be invoked.


In theory there is an indirect condition. If number of configured Tx
*and* Rx queues is 0, device is not configured.


That's true. If the framework doesn't have this check, each driver needs 
to do this.


But it's a common thing, and it's probably more reasonable to put it in 
the ethdev layer.




I have no strong opinion on the topic. Extra flag requires
extra housekeeping. Indirect conditions are not always good
and could be a subject to change.


Signed-off-by: Huisong Li 
---
  lib/ethdev/rte_ethdev.c  | 11 +++
  lib/ethdev/rte_ethdev_core.h |  6 +-
  2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index a187976..7d74b17 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1604,6 +1604,8 @@ rte_eth_dev_configure(uint16_t port_id, 
uint16_t nb_rx_q, uint16_t nb_tx_q,

  }
    rte_ethdev_trace_configure(port_id, nb_rx_q, nb_tx_q, 
dev_conf, 0);

+    dev->data->dev_configured = 1;
+
  return 0;
  reset_queues:
  eth_dev_rx_queue_config(dev, 0);
@@ -1614,6 +1616,8 @@ rte_eth_dev_configure(uint16_t port_id, 
uint16_t nb_rx_q, uint16_t nb_tx_q,

  dev->data->mtu = old_mtu;
    rte_ethdev_trace_configure(port_id, nb_rx_q, nb_tx_q, 
dev_conf, ret);

+    dev->data->dev_configured = 0;
+
  return ret;
  }
  @@ -1749,6 +1753,13 @@ rte_eth_dev_start(uint16_t port_id)
    RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP);
  +    if (dev->data->dev_configured == 0) {
+    RTE_ETHDEV_LOG(INFO,
+    "Device with port_id=%"PRIu16" is not configured.\n",
+    port_id);
+    return -EINVAL;
+    }
+
  if (dev->data->dev_started != 0) {
  RTE_ETHDEV_LOG(INFO,
  "Device with port_id=%"PRIu16" already started\n",
diff --git a/lib/ethdev/rte_ethdev_core.h b/lib/ethdev/rte_ethdev_core.h
index 4679d94..b508769 100644
--- a/lib/ethdev/rte_ethdev_core.h
+++ b/lib/ethdev/rte_ethdev_core.h
@@ -167,7 +167,11 @@ struct rte_eth_dev_data {
  scattered_rx : 1,  /**< RX of scattered packets is ON(1) / 
OFF(0) */
  all_multicast : 1, /**< RX all multicast mode ON(1) / 
OFF(0). */
  dev_started : 1,   /**< Device state: STARTED(1) / 
STOPPED(0). */

-    lro : 1;   /**< RX LRO is ON(1) / OFF(0) */
+    lro : 1,  /**< RX LRO is ON(1) / OFF(0) */
+    dev_configured : 1;
+    /**< Device configuration state:
+ * CONFIGURED(1) / NOT CONFIGURED(0).
+ */
  uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT];
  /**< Queues state: HAIRPIN(2) / STARTED(1) / STOPPED(0). */
  uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT];



.


[dpdk-dev] [PATCH v3 0/2] add Rx/Tx offload paths for ICE AVX2

2021-06-28 Thread Wenzhuo Lu
Add specific paths for RX/TX AVX2, called offload paths.
In these paths, support the HW offload features, like, checksum, VLAN, RSS 
offload.
These paths are chosen automatically according to the configuration.

v2:
 - fdir should be supported by offload and normal path.

v3:
 - rebased on the newest code.

Wenzhuo Lu (2):
  net/ice: add Tx AVX2 offload path
  net/ice: add Rx AVX2 offload path

 doc/guides/rel_notes/release_21_08.rst |   6 +
 drivers/net/ice/ice_rxtx.c |  87 +---
 drivers/net/ice/ice_rxtx.h |   7 +
 drivers/net/ice/ice_rxtx_vec_avx2.c| 350 +++--
 4 files changed, 282 insertions(+), 168 deletions(-)

-- 
1.8.3.1



[dpdk-dev] [PATCH v3 1/2] net/ice: add Tx AVX2 offload path

2021-06-28 Thread Wenzhuo Lu
Add a specific path for TX AVX2.
In this path, support the HW offload features, like,
checksum insertion, VLAN insertion.
This path is chosen automatically according to the
configuration.

'inline' is used, then the duplicate code is generated
by the compiler.

Signed-off-by: Wenzhuo Lu 
---
 drivers/net/ice/ice_rxtx.c  | 37 -
 drivers/net/ice/ice_rxtx.h  |  2 ++
 drivers/net/ice/ice_rxtx_vec_avx2.c | 54 ++---
 3 files changed, 65 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index fc9bb5a..5419047 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -3288,7 +3288,7 @@
 #ifdef RTE_ARCH_X86
struct ice_tx_queue *txq;
int i;
-   int tx_check_ret = 0;
+   int tx_check_ret = -1;
 
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
ad->tx_use_avx2 = false;
@@ -3307,13 +3307,14 @@
PMD_DRV_LOG(NOTICE,
"AVX512 is not supported in build env");
 #endif
-   if (!ad->tx_use_avx512 && tx_check_ret == 
ICE_VECTOR_PATH &&
-   (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
-   rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&
-   rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256)
+   if (!ad->tx_use_avx512 &&
+   (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 
1 ||
+   rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) 
== 1) &&
+   rte_vect_get_max_simd_bitwidth() >= 
RTE_VECT_SIMD_256)
ad->tx_use_avx2 = true;
 
-   if (!ad->tx_use_avx512 && tx_check_ret == 
ICE_VECTOR_OFFLOAD_PATH)
+   if (!ad->tx_use_avx2 && !ad->tx_use_avx512 &&
+   tx_check_ret == ICE_VECTOR_OFFLOAD_PATH)
ad->tx_vec_allowed = false;
 
if (ad->tx_vec_allowed) {
@@ -3331,6 +3332,7 @@
}
 
if (ad->tx_vec_allowed) {
+   dev->tx_pkt_prepare = NULL;
if (ad->tx_use_avx512) {
 #ifdef CC_AVX512_SUPPORT
if (tx_check_ret == ICE_VECTOR_OFFLOAD_PATH) {
@@ -3339,6 +3341,7 @@
dev->data->port_id);
dev->tx_pkt_burst =
ice_xmit_pkts_vec_avx512_offload;
+   dev->tx_pkt_prepare = ice_prep_pkts;
} else {
PMD_DRV_LOG(NOTICE,
"Using AVX512 Vector Tx (port %d).",
@@ -3347,14 +3350,22 @@
}
 #endif
} else {
-   PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).",
-   ad->tx_use_avx2 ? "avx2 " : "",
-   dev->data->port_id);
-   dev->tx_pkt_burst = ad->tx_use_avx2 ?
-   ice_xmit_pkts_vec_avx2 :
-   ice_xmit_pkts_vec;
+   if (tx_check_ret == ICE_VECTOR_OFFLOAD_PATH) {
+   PMD_DRV_LOG(NOTICE,
+   "Using AVX2 OFFLOAD Vector Tx (port 
%d).",
+   dev->data->port_id);
+   dev->tx_pkt_burst =
+   ice_xmit_pkts_vec_avx2_offload;
+   dev->tx_pkt_prepare = ice_prep_pkts;
+   } else {
+   PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port 
%d).",
+   ad->tx_use_avx2 ? "avx2 " : "",
+   dev->data->port_id);
+   dev->tx_pkt_burst = ad->tx_use_avx2 ?
+   ice_xmit_pkts_vec_avx2 :
+   ice_xmit_pkts_vec;
+   }
}
-   dev->tx_pkt_prepare = NULL;
 
return;
}
diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h
index 86b6f3d..f0536f7 100644
--- a/drivers/net/ice/ice_rxtx.h
+++ b/drivers/net/ice/ice_rxtx.h
@@ -255,6 +255,8 @@ uint16_t ice_recv_scattered_pkts_vec_avx2(void *rx_queue,
  uint16_t nb_pkts);
 uint16_t ice_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
+uint16_t ice_xmit_pkts_vec_avx2_offload(void *tx_queue, struct rte_mbuf 
**tx_pkts,
+   uint16_t nb_pkts);
 uint16_t ice_recv_pkts_vec_avx512(vo

[dpdk-dev] [PATCH v3 2/2] net/ice: add Rx AVX2 offload path

2021-06-28 Thread Wenzhuo Lu
Add a specific path for RX AVX2.
In this path, support the HW offload features, like,
checksum, VLAN stripping, RSS hash.
This path is chosen automatically according to the
configuration.

'inline' is used, then the duplicate code is generated
by the compiler.

Signed-off-by: Wenzhuo Lu 
---
 doc/guides/rel_notes/release_21_08.rst |   6 +
 drivers/net/ice/ice_rxtx.c |  50 --
 drivers/net/ice/ice_rxtx.h |   5 +
 drivers/net/ice/ice_rxtx_vec_avx2.c| 296 +++--
 4 files changed, 217 insertions(+), 140 deletions(-)

diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index a6ecfdf..203b772 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -55,6 +55,12 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* **Updated Intel ice driver.**
+
+  * In AVX2 code, added the new RX and TX paths to use the HW offload
+features. When the HW offload features are configured to be used, the
+offload paths are chosen automatically. In parallel the support for HW
+offload features was removed from the legacy AVX2 paths.
 
 Removed Items
 -
diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index 5419047..97c3d80 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -1995,7 +1995,9 @@
dev->rx_pkt_burst == ice_recv_scattered_pkts_vec_avx512_offload ||
 #endif
dev->rx_pkt_burst == ice_recv_pkts_vec_avx2 ||
-   dev->rx_pkt_burst == ice_recv_scattered_pkts_vec_avx2)
+   dev->rx_pkt_burst == ice_recv_pkts_vec_avx2_offload ||
+   dev->rx_pkt_burst == ice_recv_scattered_pkts_vec_avx2 ||
+   dev->rx_pkt_burst == ice_recv_scattered_pkts_vec_avx2_offload)
return ptypes;
 #endif
 
@@ -3052,7 +3054,7 @@
 #ifdef RTE_ARCH_X86
struct ice_rx_queue *rxq;
int i;
-   int rx_check_ret = 0;
+   int rx_check_ret = -1;
 
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
ad->rx_use_avx512 = false;
@@ -3107,14 +3109,25 @@

ice_recv_scattered_pkts_vec_avx512;
}
 #endif
+   } else if (ad->rx_use_avx2) {
+   if (rx_check_ret == ICE_VECTOR_OFFLOAD_PATH) {
+   PMD_DRV_LOG(NOTICE,
+   "Using AVX2 OFFLOAD Vector 
Scattered Rx (port %d).",
+   dev->data->port_id);
+   dev->rx_pkt_burst =
+   
ice_recv_scattered_pkts_vec_avx2_offload;
+   } else {
+   PMD_DRV_LOG(NOTICE,
+   "Using AVX2 Vector 
Scattered Rx (port %d).",
+   dev->data->port_id);
+   dev->rx_pkt_burst =
+   
ice_recv_scattered_pkts_vec_avx2;
+   }
} else {
PMD_DRV_LOG(DEBUG,
-   "Using %sVector Scattered Rx (port 
%d).",
-   ad->rx_use_avx2 ? "avx2 " : "",
+   "Using Vector Scattered Rx (port %d).",
dev->data->port_id);
-   dev->rx_pkt_burst = ad->rx_use_avx2 ?
-   ice_recv_scattered_pkts_vec_avx2 :
-   ice_recv_scattered_pkts_vec;
+   dev->rx_pkt_burst = ice_recv_scattered_pkts_vec;
}
} else {
if (ad->rx_use_avx512) {
@@ -3133,14 +3146,25 @@
ice_recv_pkts_vec_avx512;
}
 #endif
+   } else if (ad->rx_use_avx2) {
+   if (rx_check_ret == ICE_VECTOR_OFFLOAD_PATH) {
+   PMD_DRV_LOG(NOTICE,
+   "Using AVX2 OFFLOAD Vector 
Rx (port %d).",
+   dev->data->port_id);
+   dev->rx_pkt_burst =
+   ice_recv_pkts_vec_avx2_offload;
+   } else {
+   PMD_DRV_LOG(NOTICE,
+   "Using AVX2 Vector Rx (port 
%d).",
+   dev->data-

Re: [dpdk-dev] [RFC 2/2] app/testpmd: support VXLAN last 8-bits field matching

2021-06-28 Thread Rongwei Liu
Hi Singh:
This update only provides the capability to match the last 8-bits 
reserved field of VXLAN header.
No package modification totally. Package content is up to end user' 
decision.
BR
Rongwei

> -Original Message-
> From: dev  On Behalf Of Singh, Aman Deep
> Sent: Wednesday, June 16, 2021 1:16 AM
> To: dev@dpdk.org
> Subject: Re: [dpdk-dev] [RFC 2/2] app/testpmd: support VXLAN last 8-bits
> field matching
> 
> External email: Use caution opening links or attachments
> 
> 
> Hi Rongwei,
> 
> As per VxLAN RFC 7348- 
> 
> Reserved fields (24 bits and 8 bits): MUST be set to zero on transmission and
> ignored on receipt.
> 
> I hope this criteria will be met for external packets.
> 
> On 5/31/2021 3:49 PM, rongwei liu wrote:
> > Add a new testpmd pattern field 'last_rsvd' that supports the last
> > 8-bits matching of VXLAN header.
> >
> > The examples for the "last_rsvd" pattern field are as below:
> >
> > 1. ...pattern eth / ipv4 / udp / vxlan last_rsvd is 0x80 / end ...
> >
> > This flow will exactly match the last 8-bits to be 0x80.
> >
> > 2. ...pattern eth / ipv4 / udp / vxlan last_rsvd spec 0x80 vxlan mask
> > 0x80 / end ...
> Shouldn't this be as below, (or I am mistaken)-
> 
> 2. ...pattern eth / ipv4 / udp / vxlan last_rsvd spec 0x80 last_rsvd mask 
> 0x80 /
> end ...
> 
> > This flow will only match the MSB of the last 8-bits to be 1.
> >
> > Signed-off-by: rongwei liu 
> > ---
> >   app/test-pmd/cmdline_flow.c | 9 +
> >   app/test-pmd/util.c | 5 +++--
> >   doc/guides/testpmd_app_ug/testpmd_funcs.rst | 1 +
> >   3 files changed, 13 insertions(+), 2 deletions(-)
> >
> > diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
> > index 1c587bb7b8..6e76a625ca 100644
> > --- a/app/test-pmd/cmdline_flow.c
> > +++ b/app/test-pmd/cmdline_flow.c
> > @@ -207,6 +207,7 @@ enum index {
> >   ITEM_SCTP_CKSUM,
> >   ITEM_VXLAN,
> >   ITEM_VXLAN_VNI,
> > + ITEM_VXLAN_LAST_RSVD,
> >   ITEM_E_TAG,
> >   ITEM_E_TAG_GRP_ECID_B,
> >   ITEM_NVGRE,
> > @@ -1129,6 +1130,7 @@ static const enum index item_sctp[] = {
> >
> >   static const enum index item_vxlan[] = {
> >   ITEM_VXLAN_VNI,
> > + ITEM_VXLAN_LAST_RSVD,
> >   ITEM_NEXT,
> >   ZERO,
> >   };
> > @@ -2806,6 +2808,13 @@ static const struct token token_list[] = {
> >   .next = NEXT(item_vxlan, NEXT_ENTRY(UNSIGNED), item_param),
> >   .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_vxlan, 
> > vni)),
> >   },
> > + [ITEM_VXLAN_LAST_RSVD] = {
> > + .name = "last_rsvd",
> > + .help = "VXLAN last reserved bits",
> > + .next = NEXT(item_vxlan, NEXT_ENTRY(UNSIGNED), item_param),
> > + .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_vxlan,
> > +  rsvd1)),
> > + },
> >   [ITEM_E_TAG] = {
> >   .name = "e_tag",
> >   .help = "match E-Tag header", diff --git
> > a/app/test-pmd/util.c b/app/test-pmd/util.c index
> > a9e431a8b2..59626518d5 100644
> > --- a/app/test-pmd/util.c
> > +++ b/app/test-pmd/util.c
> > @@ -266,8 +266,9 @@ dump_pkt_burst(uint16_t port_id, uint16_t queue,
> struct rte_mbuf *pkts[],
> >   vx_vni = rte_be_to_cpu_32(vxlan_hdr->vx_vni);
> >   MKDUMPSTR(print_buf, buf_size, cur_len,
> > " - VXLAN packet: packet type =%d, "
> > -   "Destination UDP port =%d, VNI = 
> > %d",
> > -   packet_type, udp_port, vx_vni >> 8);
> > +   "Destination UDP port =%d, VNI = 
> > %d, "
> > +   "last_rsvd = %d", packet_type,
> > +   udp_port, vx_vni >> 8, vx_vni
> > + & 0xff);
> >   }
> >   }
> >   MKDUMPSTR(print_buf, buf_size, cur_len, diff --git
> > a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> > b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> > index 33857acf54..4ca3103067 100644
> > --- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> > +++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> > @@ -3694,6 +3694,7 @@ This section lists supported pattern items and
> their attributes, if any.
> >   - ``vxlan``: match VXLAN header.
> >
> > - ``vni {unsigned}``: VXLAN identifier.
> > +  - ``last_rsvd {unsigned}``: VXLAN last reserved 8-bits.
> >
> >   - ``e_tag``: match IEEE 802.1BR E-Tag header.
> >


  1   2   >