[PATCH v2] drivers: remove support to limit XAQ in cnxk event driver

2022-09-20 Thread Shijith Thotton
Removed support to limit XAQ from devargs. If XAQ is limited, new add
works could run out of XAQ entries and disable the queue.

Signed-off-by: Shijith Thotton 
---
v2:
 * Removed used function parameter.

 doc/guides/eventdevs/cnxk.rst  |  5 ++---
 drivers/common/cnxk/roc_mbox.h |  2 +-
 drivers/common/cnxk/roc_sso.c  |  4 +---
 drivers/common/cnxk/roc_sso.h  |  2 +-
 drivers/event/cnxk/cnxk_eventdev.c | 11 ---
 drivers/event/cnxk/cnxk_eventdev.h |  1 -
 6 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/doc/guides/eventdevs/cnxk.rst b/doc/guides/eventdevs/cnxk.rst
index 8537f6257e..3baf26fb54 100644
--- a/doc/guides/eventdevs/cnxk.rst
+++ b/doc/guides/eventdevs/cnxk.rst
@@ -95,12 +95,11 @@ Runtime Config Options
   We can control the QoS of SSO GGRP by modifying the above mentioned
   thresholds. GGRPs that have higher importance can be assigned higher
   thresholds than the rest. The dictionary format is as follows
-  [Qx-XAQ-TAQ-IAQ][Qz-XAQ-TAQ-IAQ] expressed in percentages, 0 represents
-  default.
+  [Qx-TAQ-IAQ][Qz-TAQ-IAQ] expressed in percentages, 0 represents default.
 
   For example::
 
--a 0002:0e:00.0,qos=[1-50-50-50]
+-a 0002:0e:00.0,qos=[1-50-50]
 
 - ``Force Rx Back pressure``
 
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index 965c704322..d07c8be9d9 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -1330,7 +1330,7 @@ struct sso_grp_priority {
 struct sso_grp_qos_cfg {
struct mbox_msghdr hdr;
uint16_t __io grp;
-   uint32_t __io xaq_limit;
+   uint32_t __io rsvd;
uint16_t __io taq_thr;
uint16_t __io iaq_thr;
 };
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 126a9cba99..9d5efe848e 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -377,7 +377,7 @@ roc_sso_hwgrp_hws_link_status(struct roc_sso *roc_sso, 
uint8_t hws,
 
 int
 roc_sso_hwgrp_qos_config(struct roc_sso *roc_sso, struct roc_sso_hwgrp_qos 
*qos,
-uint8_t nb_qos, uint32_t nb_xaq)
+uint8_t nb_qos)
 {
struct sso *sso = roc_sso_to_sso_priv(roc_sso);
struct dev *dev = &sso->dev;
@@ -386,7 +386,6 @@ roc_sso_hwgrp_qos_config(struct roc_sso *roc_sso, struct 
roc_sso_hwgrp_qos *qos,
 
plt_spinlock_lock(&sso->mbox_lock);
for (i = 0; i < nb_qos; i++) {
-   uint8_t xaq_prcnt = qos[i].xaq_prcnt;
uint8_t iaq_prcnt = qos[i].iaq_prcnt;
uint8_t taq_prcnt = qos[i].taq_prcnt;
 
@@ -405,7 +404,6 @@ roc_sso_hwgrp_qos_config(struct roc_sso *roc_sso, struct 
roc_sso_hwgrp_qos *qos,
}
}
req->grp = qos[i].hwgrp;
-   req->xaq_limit = (nb_xaq * (xaq_prcnt ? xaq_prcnt : 100)) / 100;
req->iaq_thr = (SSO_HWGRP_IAQ_MAX_THR_MASK *
(iaq_prcnt ? iaq_prcnt : 100)) /
   100;
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index ab7cee1c60..5075991ef7 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -89,7 +89,7 @@ int __roc_api roc_sso_rsrc_init(struct roc_sso *roc_sso, 
uint8_t nb_hws,
 void __roc_api roc_sso_rsrc_fini(struct roc_sso *roc_sso);
 int __roc_api roc_sso_hwgrp_qos_config(struct roc_sso *roc_sso,
   struct roc_sso_hwgrp_qos *qos,
-  uint8_t nb_qos, uint32_t nb_xaq);
+  uint8_t nb_qos);
 int __roc_api roc_sso_hwgrp_alloc_xaq(struct roc_sso *roc_sso,
  uint32_t npa_aura_id, uint16_t hwgrps);
 int __roc_api roc_sso_hwgrp_release_xaq(struct roc_sso *roc_sso,
diff --git a/drivers/event/cnxk/cnxk_eventdev.c 
b/drivers/event/cnxk/cnxk_eventdev.c
index 8923e94824..db62d32a81 100644
--- a/drivers/event/cnxk/cnxk_eventdev.c
+++ b/drivers/event/cnxk/cnxk_eventdev.c
@@ -400,10 +400,8 @@ cnxk_sso_start(struct rte_eventdev *event_dev, 
cnxk_sso_hws_reset_t reset_fn,
qos[i].hwgrp = dev->qos_parse_data[i].queue;
qos[i].iaq_prcnt = dev->qos_parse_data[i].iaq_prcnt;
qos[i].taq_prcnt = dev->qos_parse_data[i].taq_prcnt;
-   qos[i].xaq_prcnt = dev->qos_parse_data[i].xaq_prcnt;
}
-   rc = roc_sso_hwgrp_qos_config(&dev->sso, qos, dev->qos_queue_cnt,
- dev->xae_cnt);
+   rc = roc_sso_hwgrp_qos_config(&dev->sso, qos, dev->qos_queue_cnt);
if (rc < 0) {
plt_sso_dbg("failed to configure HWGRP QoS rc = %d", rc);
return -EINVAL;
@@ -477,7 +475,7 @@ parse_queue_param(char *value, void *opaque)
}
 
if (val != (&queue_qos.iaq_prcnt + 1)) {
-   plt_err("Invalid QoS parameter expected [Qx-XAQ-TAQ-IAQ]");
+   plt_err("Invalid 

[PATCH v1] ethdev: add async flow connection tracking configuration

2022-09-20 Thread Suanming Mou
In queue based async flow engine, in order to optimize the flow
insertion rate, PMD can use the hints from application to have
resources pre-allocate during initialization phase for actions
such as count/meter/aging.

This commit adds the connection tracking action hints.

Signed-off-by: Suanming Mou 
Acked-by: Ori Kam 
---
 app/test-pmd/cmdline_flow.c| 10 ++
 doc/guides/rel_notes/release_22_11.rst |  6 ++
 lib/ethdev/rte_flow.h  | 10 ++
 3 files changed, 26 insertions(+)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 7f50028eb7..c9cbf381c4 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -219,6 +219,7 @@ enum index {
CONFIG_COUNTERS_NUMBER,
CONFIG_AGING_OBJECTS_NUMBER,
CONFIG_METERS_NUMBER,
+   CONFIG_CONN_TRACK_NUMBER,
 
/* Indirect action arguments */
INDIRECT_ACTION_CREATE,
@@ -1081,6 +1082,7 @@ static const enum index next_config_attr[] = {
CONFIG_COUNTERS_NUMBER,
CONFIG_AGING_OBJECTS_NUMBER,
CONFIG_METERS_NUMBER,
+   CONFIG_CONN_TRACK_NUMBER,
END,
ZERO,
 };
@@ -2667,6 +2669,14 @@ static const struct token token_list[] = {
.args = ARGS(ARGS_ENTRY(struct buffer,
args.configure.port_attr.nb_meters)),
},
+   [CONFIG_CONN_TRACK_NUMBER] = {
+   .name = "conn_tracks_number",
+   .help = "number of connection trackings",
+   .next = NEXT(next_config_attr,
+NEXT_ENTRY(COMMON_UNSIGNED)),
+   .args = ARGS(ARGS_ENTRY(struct buffer,
+   
args.configure.port_attr.nb_conn_tracks)),
+   },
/* Top-level command. */
[PATTERN_TEMPLATE] = {
.name = "pattern_template",
diff --git a/doc/guides/rel_notes/release_22_11.rst 
b/doc/guides/rel_notes/release_22_11.rst
index 8c021cf050..d5e64ff9a1 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -55,6 +55,12 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* **Added configuration for asynchronous flow connection tracking.**
+
+  Added connection tracking action number hint to ``rte_flow_configure``
+  and ``rte_flow_info_get``.
+  PMD can prepare the connection tracking resources according to the hint.
+
 
 Removed Items
 -
diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
index a79f1e7ef0..c2747abc55 100644
--- a/lib/ethdev/rte_flow.h
+++ b/lib/ethdev/rte_flow.h
@@ -4902,6 +4902,11 @@ struct rte_flow_port_info {
 * @see RTE_FLOW_ACTION_TYPE_METER
 */
uint32_t max_nb_meters;
+   /**
+* Maximum number connection trackings.
+* @see RTE_FLOW_ACTION_TYPE_CONNTRACK
+*/
+   uint32_t max_nb_conn_tracks;
 };
 
 /**
@@ -4971,6 +4976,11 @@ struct rte_flow_port_attr {
 * @see RTE_FLOW_ACTION_TYPE_METER
 */
uint32_t nb_meters;
+   /**
+* Number of connection trackings to configure.
+* @see RTE_FLOW_ACTION_TYPE_CONNTRACK
+*/
+   uint32_t nb_conn_tracks;
 };
 
 /**
-- 
2.25.1



[PATCH v1] ethdev: add indirect action async query

2022-09-20 Thread Suanming Mou
As rte_flow_action_handle_create/destroy/update() have their own
asynchronous rte_flow_async_action_handle_create/destroy/update()
version functions to accelerate the indirect action operations in
queue based flow engine. Currently, the asynchronous version query
function for indirect action was missing.

This patch adds the rte_flow_async_action_handle_query() function
corresponds to rte_flow_action_handle_query(). The new asynchronous
version function enables enqueue the query to the hardware similar
as asynchronous flow management does and returns immediately to free
the CPU for other tasks. Application can get the query results from
rte_flow_pull() when the hardware completes its work.

Signed-off-by: Suanming Mou 
Acked-by: Ori Kam 
---
 app/test-pmd/cmdline_flow.c |  34 +++
 app/test-pmd/config.c   | 240 ++--
 app/test-pmd/testpmd.h  |  28 +++
 doc/guides/prog_guide/rte_flow.rst  |  16 ++
 doc/guides/rel_notes/release_22_11.rst  |   5 +
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  19 ++
 lib/ethdev/rte_flow.c   |  18 ++
 lib/ethdev/rte_flow.h   |  44 
 lib/ethdev/rte_flow_driver.h|   9 +
 lib/ethdev/version.map  |   3 +
 10 files changed, 345 insertions(+), 71 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 7f50028eb7..0223286c1a 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -145,6 +145,7 @@ enum index {
QUEUE_INDIRECT_ACTION_CREATE,
QUEUE_INDIRECT_ACTION_UPDATE,
QUEUE_INDIRECT_ACTION_DESTROY,
+   QUEUE_INDIRECT_ACTION_QUERY,
 
/* Queue indirect action create arguments */
QUEUE_INDIRECT_ACTION_CREATE_ID,
@@ -161,6 +162,9 @@ enum index {
QUEUE_INDIRECT_ACTION_DESTROY_ID,
QUEUE_INDIRECT_ACTION_DESTROY_POSTPONE,
 
+   /* Queue indirect action query arguments */
+   QUEUE_INDIRECT_ACTION_QUERY_POSTPONE,
+
/* Push arguments. */
PUSH_QUEUE,
 
@@ -1171,6 +1175,7 @@ static const enum index next_qia_subcmd[] = {
QUEUE_INDIRECT_ACTION_CREATE,
QUEUE_INDIRECT_ACTION_UPDATE,
QUEUE_INDIRECT_ACTION_DESTROY,
+   QUEUE_INDIRECT_ACTION_QUERY,
ZERO,
 };
 
@@ -1197,6 +1202,12 @@ static const enum index next_qia_destroy_attr[] = {
ZERO,
 };
 
+static const enum index next_qia_query_attr[] = {
+   QUEUE_INDIRECT_ACTION_QUERY_POSTPONE,
+   END,
+   ZERO,
+};
+
 static const enum index next_ia_create_attr[] = {
INDIRECT_ACTION_CREATE_ID,
INDIRECT_ACTION_INGRESS,
@@ -3013,6 +3024,14 @@ static const struct token token_list[] = {
.next = NEXT(next_qia_destroy_attr),
.call = parse_qia_destroy,
},
+   [QUEUE_INDIRECT_ACTION_QUERY] = {
+   .name = "query",
+   .help = "query indirect action",
+   .next = NEXT(next_qia_query_attr,
+NEXT_ENTRY(COMMON_INDIRECT_ACTION_ID)),
+   .args = ARGS(ARGS_ENTRY(struct buffer, args.vc.attr.group)),
+   .call = parse_qia,
+   },
/* Indirect action destroy arguments. */
[QUEUE_INDIRECT_ACTION_DESTROY_POSTPONE] = {
.name = "postpone",
@@ -3038,6 +3057,14 @@ static const struct token token_list[] = {
 NEXT_ENTRY(COMMON_BOOLEAN)),
.args = ARGS(ARGS_ENTRY(struct buffer, postpone)),
},
+   /* Indirect action update arguments. */
+   [QUEUE_INDIRECT_ACTION_QUERY_POSTPONE] = {
+   .name = "postpone",
+   .help = "postpone query operation",
+   .next = NEXT(next_qia_query_attr,
+NEXT_ENTRY(COMMON_BOOLEAN)),
+   .args = ARGS(ARGS_ENTRY(struct buffer, postpone)),
+   },
/* Indirect action create arguments. */
[QUEUE_INDIRECT_ACTION_CREATE_ID] = {
.name = "action_id",
@@ -6682,6 +6709,8 @@ parse_qia(struct context *ctx, const struct token *token,
(void *)RTE_ALIGN_CEIL((uintptr_t)(out + 1),
   sizeof(double));
out->args.vc.attr.group = UINT32_MAX;
+   /* fallthrough */
+   case QUEUE_INDIRECT_ACTION_QUERY:
out->command = ctx->curr;
ctx->objdata = 0;
ctx->object = out;
@@ -10509,6 +10538,11 @@ cmd_flow_parsed(const struct buffer *in)
in->args.vc.attr.group,
in->args.vc.actions);
break;
+   case QUEUE_INDIRECT_ACTION_QUERY:
+   port_queue_action_handle_query(in->port,
+  in->queue, in->postpone,
+  in->args.vc.attr.group);
+

Re: [PATCH] vhost: use try_lock in rte_vhost_vring_call

2022-09-20 Thread Maxime Coquelin




On 9/7/22 02:40, Liu, Changpeng wrote:




-Original Message-
From: Stephen Hemminger 
Sent: Wednesday, September 7, 2022 5:16 AM
To: Liu, Changpeng 
Cc: dev@dpdk.org; Maxime Coquelin ; Xia,
Chenbo 
Subject: Re: [PATCH] vhost: use try_lock in rte_vhost_vring_call

On Tue,  6 Sep 2022 10:22:25 +0800
Changpeng Liu  wrote:


Note that this function is in data path, so the thread context
may not same as socket messages processing context, by using
try_lock here, users can have another try in case of VQ's access
lock is held by `vhost-events` thread.

Signed-off-by: Changpeng Liu 
---
  lib/vhost/vhost.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index 60cb05a0ff..072d2acb7b 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -1329,7 +1329,11 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx)
if (!vq)
return -1;

-   rte_spinlock_lock(&vq->access_lock);
+   if (!rte_spinlock_trylock(&vq->access_lock)) {
+   VHOST_LOG_CONFIG(dev->ifname, DEBUG,
+   "failed to kick guest, virtqueue busy.\n");
+   return -1;
+   }



If it is a race, logging a message is not a good idea; the log will fill
with this noise.

Instead make it statistic that can be seen by xstats.

It's a DEBUG log, users can't see it in practice.



Having an xstat would enable live debugging & post-mortem analysis.
You can have both the stats and the debug log.

Regards,
Maxime



Re: [PATCH] vhost: use try_lock in rte_vhost_vring_call

2022-09-20 Thread Maxime Coquelin




On 9/6/22 04:22, Changpeng Liu wrote:

Note that this function is in data path, so the thread context
may not same as socket messages processing context, by using
try_lock here, users can have another try in case of VQ's access
lock is held by `vhost-events` thread.

Signed-off-by: Changpeng Liu 
---
  lib/vhost/vhost.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index 60cb05a0ff..072d2acb7b 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -1329,7 +1329,11 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx)
if (!vq)
return -1;
  
-	rte_spinlock_lock(&vq->access_lock);

+   if (!rte_spinlock_trylock(&vq->access_lock)) {
+   VHOST_LOG_CONFIG(dev->ifname, DEBUG,
+   "failed to kick guest, virtqueue busy.\n");
+   return -1;
+   }
  
  	if (vq_is_packed(dev))

vhost_vring_call_packed(dev, vq);


I think that's problematic, because it will break other applications
that currently rely on the API to block until the call is done.

Just some internal DPDK usage of this API:
./drivers/vdpa/ifc/ifcvf_vdpa.c:871:	rte_vhost_vring_call(internal->vid, 
qid);

./examples/vhost/virtio_net.c:236:  rte_vhost_vring_call(dev->vid, 
queue_id);
./examples/vhost/virtio_net.c:446:  rte_vhost_vring_call(dev->vid, 
queue_id);
./examples/vhost_blk/vhost_blk.c:99: 
rte_vhost_vring_call(task->ctrlr->vid, vq->id);
./examples/vhost_blk/vhost_blk.c:134: 
rte_vhost_vring_call(task->ctrlr->vid, vq->id);


This change will break all the above uses.

And that's not counting external projects.

ou should better introduce a new API that does not block.

Regards,
Maxime



Re: [PATCH] vhost: use try_lock in rte_vhost_vring_call

2022-09-20 Thread Maxime Coquelin




On 9/20/22 04:53, Xia, Chenbo wrote:

-Original Message-
From: Liu, Changpeng 
Sent: Tuesday, September 20, 2022 10:34 AM
To: Xia, Chenbo ; dev@dpdk.org
Cc: Maxime Coquelin 
Subject: RE: [PATCH] vhost: use try_lock in rte_vhost_vring_call

Hi Bo,


-Original Message-
From: Xia, Chenbo 
Sent: Tuesday, September 20, 2022 10:25 AM
To: Liu, Changpeng ; dev@dpdk.org
Cc: Maxime Coquelin 
Subject: RE: [PATCH] vhost: use try_lock in rte_vhost_vring_call

Hi Changpeng,


-Original Message-
From: Liu, Changpeng 
Sent: Tuesday, September 6, 2022 10:22 AM
To: dev@dpdk.org
Cc: Liu, Changpeng ; Maxime Coquelin
; Xia, Chenbo 
Subject: [PATCH] vhost: use try_lock in rte_vhost_vring_call

Note that this function is in data path, so the thread context
may not same as socket messages processing context, by using
try_lock here, users can have another try in case of VQ's access
lock is held by `vhost-events` thread.


Better to describe the issue this patch wants to fix and how does
it fix.

I remember it's a bz issue, do you want to backport? And it has
some bz ID, we need to add it in commit message.

Actually it's my intention not to add bz ID, as I think for this bz ID,
It's better not to lock all VQ's access lock for KICK/CALLFD messages,


Do you plan to add this change? I think that may be an improvement to current
locking implementation.

Maxime, what do you think of this idea about only locking specific queue when
handling vring related message (not global config like mem table)?


I think this is not a good idea.
For example SET_VRING_KICK can currently call
translate_ring_addresses(), which itself can call numa_realloc().

numa_realloc() may reallocate the dev, so you don't want it to be used
by other queues while it happens.


What do you think? If this is identified as a fix, I can backport it to
22.05.


You can decide, if this is planned to be the fix, just backport. I am just
thinking if this is not the fix for the bz, do we still need this?

Thanks,
Chenbo





Signed-off-by: Changpeng Liu 
---
  lib/vhost/vhost.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index 60cb05a0ff..072d2acb7b 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -1329,7 +1329,11 @@ rte_vhost_vring_call(int vid, uint16_t

vring_idx)

if (!vq)
return -1;

-   rte_spinlock_lock(&vq->access_lock);
+   if (!rte_spinlock_trylock(&vq->access_lock)) {
+   VHOST_LOG_CONFIG(dev->ifname, DEBUG,


Should use VHOST_LOG_DATA

OK.


Thanks,
Chenbo


+   "failed to kick guest, virtqueue busy.\n");
+   return -1;
+   }

if (vq_is_packed(dev))
vhost_vring_call_packed(dev, vq);
--
2.21.3






RE: [PATCH] vhost: use try_lock in rte_vhost_vring_call

2022-09-20 Thread Liu, Changpeng
Hi Maxime,

> -Original Message-
> From: Maxime Coquelin 
> Sent: Tuesday, September 20, 2022 3:19 PM
> To: Liu, Changpeng ; dev@dpdk.org
> Cc: Xia, Chenbo 
> Subject: Re: [PATCH] vhost: use try_lock in rte_vhost_vring_call
> 
> 
> 
> On 9/6/22 04:22, Changpeng Liu wrote:
> > Note that this function is in data path, so the thread context
> > may not same as socket messages processing context, by using
> > try_lock here, users can have another try in case of VQ's access
> > lock is held by `vhost-events` thread.
> >
> > Signed-off-by: Changpeng Liu 
> > ---
> >   lib/vhost/vhost.c | 6 +-
> >   1 file changed, 5 insertions(+), 1 deletion(-)
> >
> > diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
> > index 60cb05a0ff..072d2acb7b 100644
> > --- a/lib/vhost/vhost.c
> > +++ b/lib/vhost/vhost.c
> > @@ -1329,7 +1329,11 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx)
> > if (!vq)
> > return -1;
> >
> > -   rte_spinlock_lock(&vq->access_lock);
> > +   if (!rte_spinlock_trylock(&vq->access_lock)) {
> > +   VHOST_LOG_CONFIG(dev->ifname, DEBUG,
> > +   "failed to kick guest, virtqueue busy.\n");
> > +   return -1;
> > +   }
> >
> > if (vq_is_packed(dev))
> > vhost_vring_call_packed(dev, vq);
> 
> I think that's problematic, because it will break other applications
> that currently rely on the API to block until the call is done.
> 
> Just some internal DPDK usage of this API:
> ./drivers/vdpa/ifc/ifcvf_vdpa.c:871:  rte_vhost_vring_call(internal->vid,
> qid);
> ./examples/vhost/virtio_net.c:236:rte_vhost_vring_call(dev->vid, 
> queue_id);
> ./examples/vhost/virtio_net.c:446:rte_vhost_vring_call(dev->vid, 
> queue_id);
> ./examples/vhost_blk/vhost_blk.c:99:
> rte_vhost_vring_call(task->ctrlr->vid, vq->id);
> ./examples/vhost_blk/vhost_blk.c:134:
> rte_vhost_vring_call(task->ctrlr->vid, vq->id);
> 
> This change will break all the above uses.
> 
> And that's not counting external projects.
> 
> ou should better introduce a new API that does not block.
Could you add a new API to do this? 
I think we can use the new API in SPDK as a workaround, note that SPDK project 
is blocked for
a while which can't be used with DPDK 22.05 or newer.
Vhost-blk and scsi devices are not same with vhost-net, we need to cover 
SeaBIOS and VM
cases, so we need to start processing vrings after 1 vring is ready.
> 
> Regards,
> Maxime



Re: [PATCH] vhost: use try_lock in rte_vhost_vring_call

2022-09-20 Thread Maxime Coquelin




On 9/20/22 09:23, Maxime Coquelin wrote:



On 9/20/22 04:53, Xia, Chenbo wrote:

-Original Message-
From: Liu, Changpeng 
Sent: Tuesday, September 20, 2022 10:34 AM
To: Xia, Chenbo ; dev@dpdk.org
Cc: Maxime Coquelin 
Subject: RE: [PATCH] vhost: use try_lock in rte_vhost_vring_call

Hi Bo,


-Original Message-
From: Xia, Chenbo 
Sent: Tuesday, September 20, 2022 10:25 AM
To: Liu, Changpeng ; dev@dpdk.org
Cc: Maxime Coquelin 
Subject: RE: [PATCH] vhost: use try_lock in rte_vhost_vring_call

Hi Changpeng,


-Original Message-
From: Liu, Changpeng 
Sent: Tuesday, September 6, 2022 10:22 AM
To: dev@dpdk.org
Cc: Liu, Changpeng ; Maxime Coquelin
; Xia, Chenbo 
Subject: [PATCH] vhost: use try_lock in rte_vhost_vring_call

Note that this function is in data path, so the thread context
may not same as socket messages processing context, by using
try_lock here, users can have another try in case of VQ's access
lock is held by `vhost-events` thread.


Better to describe the issue this patch wants to fix and how does
it fix.

I remember it's a bz issue, do you want to backport? And it has
some bz ID, we need to add it in commit message.

Actually it's my intention not to add bz ID, as I think for this bz ID,
It's better not to lock all VQ's access lock for KICK/CALLFD messages,


Do you plan to add this change? I think that may be an improvement to 
current

locking implementation.

Maxime, what do you think of this idea about only locking specific 
queue when

handling vring related message (not global config like mem table)?


I think this is not a good idea.
For example SET_VRING_KICK can currently call
translate_ring_addresses(), which itself can call numa_realloc().

numa_realloc() may reallocate the dev, so you don't want it to be used
by other queues while it happens.


Hmm, actually that may be possible because numa_realloc() reallocs the 
dev only if it is not running.


So maybe you can propose something, but you will have to test it
carefully with use-cases involving NUMA reallocation.


What do you think? If this is identified as a fix, I can backport it to
22.05.


You can decide, if this is planned to be the fix, just backport. I am 
just

thinking if this is not the fix for the bz, do we still need this?

Thanks,
Chenbo





Signed-off-by: Changpeng Liu 
---
  lib/vhost/vhost.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index 60cb05a0ff..072d2acb7b 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -1329,7 +1329,11 @@ rte_vhost_vring_call(int vid, uint16_t

vring_idx)

  if (!vq)
  return -1;

-    rte_spinlock_lock(&vq->access_lock);
+    if (!rte_spinlock_trylock(&vq->access_lock)) {
+    VHOST_LOG_CONFIG(dev->ifname, DEBUG,


Should use VHOST_LOG_DATA

OK.


Thanks,
Chenbo


+    "failed to kick guest, virtqueue busy.\n");
+    return -1;
+    }

  if (vq_is_packed(dev))
  vhost_vring_call_packed(dev, vq);
--
2.21.3






Re: [PATCH] vhost: use try_lock in rte_vhost_vring_call

2022-09-20 Thread Maxime Coquelin




On 9/20/22 09:29, Liu, Changpeng wrote:

Hi Maxime,


-Original Message-
From: Maxime Coquelin 
Sent: Tuesday, September 20, 2022 3:19 PM
To: Liu, Changpeng ; dev@dpdk.org
Cc: Xia, Chenbo 
Subject: Re: [PATCH] vhost: use try_lock in rte_vhost_vring_call



On 9/6/22 04:22, Changpeng Liu wrote:

Note that this function is in data path, so the thread context
may not same as socket messages processing context, by using
try_lock here, users can have another try in case of VQ's access
lock is held by `vhost-events` thread.

Signed-off-by: Changpeng Liu 
---
   lib/vhost/vhost.c | 6 +-
   1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index 60cb05a0ff..072d2acb7b 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -1329,7 +1329,11 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx)
if (!vq)
return -1;

-   rte_spinlock_lock(&vq->access_lock);
+   if (!rte_spinlock_trylock(&vq->access_lock)) {
+   VHOST_LOG_CONFIG(dev->ifname, DEBUG,
+   "failed to kick guest, virtqueue busy.\n");
+   return -1;
+   }

if (vq_is_packed(dev))
vhost_vring_call_packed(dev, vq);


I think that's problematic, because it will break other applications
that currently rely on the API to block until the call is done.

Just some internal DPDK usage of this API:
./drivers/vdpa/ifc/ifcvf_vdpa.c:871:rte_vhost_vring_call(internal->vid,
qid);
./examples/vhost/virtio_net.c:236:  rte_vhost_vring_call(dev->vid, 
queue_id);
./examples/vhost/virtio_net.c:446:  rte_vhost_vring_call(dev->vid, 
queue_id);
./examples/vhost_blk/vhost_blk.c:99:
rte_vhost_vring_call(task->ctrlr->vid, vq->id);
./examples/vhost_blk/vhost_blk.c:134:
rte_vhost_vring_call(task->ctrlr->vid, vq->id);

This change will break all the above uses.

And that's not counting external projects.

ou should better introduce a new API that does not block.

Could you add a new API to do this?

>

I think we can use the new API in SPDK as a workaround, note that SPDK project 
is blocked for
a while which can't be used with DPDK 22.05 or newer.


DPDK v22.05?
What is the commit introducing the regression?

Note that if we introduce a new API, it won't be backported to stable
branches.



Vhost-blk and scsi devices are not same with vhost-net, we need to cover 
SeaBIOS and VM
cases, so we need to start processing vrings after 1 vring is ready.


Regards,
Maxime






RE: [PATCH] vhost: use try_lock in rte_vhost_vring_call

2022-09-20 Thread Liu, Changpeng


> -Original Message-
> From: Maxime Coquelin 
> Sent: Tuesday, September 20, 2022 3:35 PM
> To: Liu, Changpeng ; dev@dpdk.org
> Cc: Xia, Chenbo 
> Subject: Re: [PATCH] vhost: use try_lock in rte_vhost_vring_call
> 
> 
> 
> On 9/20/22 09:29, Liu, Changpeng wrote:
> > Hi Maxime,
> >
> >> -Original Message-
> >> From: Maxime Coquelin 
> >> Sent: Tuesday, September 20, 2022 3:19 PM
> >> To: Liu, Changpeng ; dev@dpdk.org
> >> Cc: Xia, Chenbo 
> >> Subject: Re: [PATCH] vhost: use try_lock in rte_vhost_vring_call
> >>
> >>
> >>
> >> On 9/6/22 04:22, Changpeng Liu wrote:
> >>> Note that this function is in data path, so the thread context
> >>> may not same as socket messages processing context, by using
> >>> try_lock here, users can have another try in case of VQ's access
> >>> lock is held by `vhost-events` thread.
> >>>
> >>> Signed-off-by: Changpeng Liu 
> >>> ---
> >>>lib/vhost/vhost.c | 6 +-
> >>>1 file changed, 5 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
> >>> index 60cb05a0ff..072d2acb7b 100644
> >>> --- a/lib/vhost/vhost.c
> >>> +++ b/lib/vhost/vhost.c
> >>> @@ -1329,7 +1329,11 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx)
> >>>   if (!vq)
> >>>   return -1;
> >>>
> >>> - rte_spinlock_lock(&vq->access_lock);
> >>> + if (!rte_spinlock_trylock(&vq->access_lock)) {
> >>> + VHOST_LOG_CONFIG(dev->ifname, DEBUG,
> >>> + "failed to kick guest, virtqueue busy.\n");
> >>> + return -1;
> >>> + }
> >>>
> >>>   if (vq_is_packed(dev))
> >>>   vhost_vring_call_packed(dev, vq);
> >>
> >> I think that's problematic, because it will break other applications
> >> that currently rely on the API to block until the call is done.
> >>
> >> Just some internal DPDK usage of this API:
> >> ./drivers/vdpa/ifc/ifcvf_vdpa.c:871:   
> >> rte_vhost_vring_call(internal->vid,
> >> qid);
> >> ./examples/vhost/virtio_net.c:236: rte_vhost_vring_call(dev->vid, 
> >> queue_id);
> >> ./examples/vhost/virtio_net.c:446: rte_vhost_vring_call(dev->vid, 
> >> queue_id);
> >> ./examples/vhost_blk/vhost_blk.c:99:
> >> rte_vhost_vring_call(task->ctrlr->vid, vq->id);
> >> ./examples/vhost_blk/vhost_blk.c:134:
> >> rte_vhost_vring_call(task->ctrlr->vid, vq->id);
> >>
> >> This change will break all the above uses.
> >>
> >> And that's not counting external projects.
> >>
> >> ou should better introduce a new API that does not block.
> > Could you add a new API to do this?
>  >
> > I think we can use the new API in SPDK as a workaround, note that SPDK 
> > project
> is blocked for
> > a while which can't be used with DPDK 22.05 or newer.
> 
> DPDK v22.05?
> What is the commit introducing the regression?
Here is the commit introducing this issue
c5736998305d ("vhost: fix missing virtqueue lock protection")
Bugzilla ID: 1015
> 
> Note that if we introduce a new API, it won't be backported to stable
> branches.
I understand, but do we have better idea in short time? we're planning
to release SPDK 22.09 recently.
> 
> 
> > Vhost-blk and scsi devices are not same with vhost-net, we need to cover
> SeaBIOS and VM
> > cases, so we need to start processing vrings after 1 vring is ready.
> >>
> >> Regards,
> >> Maxime
> >



RE: [EXT] [PATCH v2] graph: fix out of bounds access when re-allocate node objs

2022-09-20 Thread Jerin Jacob Kollanukkaran



> -Original Message-
> From: Zhirun Yan 
> Sent: Thursday, August 4, 2022 11:33 AM
> To: dev@dpdk.org; Jerin Jacob Kollanukkaran ; Kiran
> Kumar Kokkilagadda 
> Cc: Zhirun Yan ; Cunming Liang
> 
> Subject: [EXT] [PATCH v2] graph: fix out of bounds access when re-allocate
> node objs
> 
> External Email
> 
> --
> For __rte_node_enqueue_prologue(), If the number of objs is more than the
> node->size * 2, the extra objs will write out of bounds memory.
> It should use __rte_node_stream_alloc_size() to request enough memory.
> 
> And for rte_node_next_stream_put(), it will re-allocate a small size, when the
> node free space is small and new objs is less than the current
> node->size. Some objs pointers behind new size may be lost. And it will
> cause memory leak. It should request enough size of memory, containing the
> original objs and new objs at least.
> 
> Fixes: 40d4f51403ec ("graph: implement fastpath routines")
> 
> Signed-off-by: Zhirun Yan 
> Signed-off-by: Cunming Liang 


Acked-by: Jerin Jacob 

> ---
>  lib/graph/rte_graph_worker.h | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/graph/rte_graph_worker.h b/lib/graph/rte_graph_worker.h index
> 0c0b9c095a..6dc7461659 100644
> --- a/lib/graph/rte_graph_worker.h
> +++ b/lib/graph/rte_graph_worker.h
> @@ -224,7 +224,7 @@ __rte_node_enqueue_prologue(struct rte_graph
> *graph, struct rte_node *node,
>   __rte_node_enqueue_tail_update(graph, node);
> 
>   if (unlikely(node->size < (idx + space)))
> - __rte_node_stream_alloc(graph, node);
> + __rte_node_stream_alloc_size(graph, node, node->size +
> space);
>  }
> 
>  /**
> @@ -432,7 +432,7 @@ rte_node_next_stream_get(struct rte_graph *graph,
> struct rte_node *node,
>   uint16_t free_space = node->size - idx;
> 
>   if (unlikely(free_space < nb_objs))
> - __rte_node_stream_alloc_size(graph, node, nb_objs);
> + __rte_node_stream_alloc_size(graph, node, node->size +
> nb_objs);
> 
>   return &node->objs[idx];
>  }
> --
> 2.25.1



Re: [PATCH] vhost: use try_lock in rte_vhost_vring_call

2022-09-20 Thread Maxime Coquelin




On 9/20/22 09:45, Liu, Changpeng wrote:




-Original Message-
From: Maxime Coquelin 
Sent: Tuesday, September 20, 2022 3:35 PM
To: Liu, Changpeng ; dev@dpdk.org
Cc: Xia, Chenbo 
Subject: Re: [PATCH] vhost: use try_lock in rte_vhost_vring_call



On 9/20/22 09:29, Liu, Changpeng wrote:

Hi Maxime,


-Original Message-
From: Maxime Coquelin 
Sent: Tuesday, September 20, 2022 3:19 PM
To: Liu, Changpeng ; dev@dpdk.org
Cc: Xia, Chenbo 
Subject: Re: [PATCH] vhost: use try_lock in rte_vhost_vring_call



On 9/6/22 04:22, Changpeng Liu wrote:

Note that this function is in data path, so the thread context
may not same as socket messages processing context, by using
try_lock here, users can have another try in case of VQ's access
lock is held by `vhost-events` thread.

Signed-off-by: Changpeng Liu 
---
lib/vhost/vhost.c | 6 +-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index 60cb05a0ff..072d2acb7b 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -1329,7 +1329,11 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx)
if (!vq)
return -1;

-   rte_spinlock_lock(&vq->access_lock);
+   if (!rte_spinlock_trylock(&vq->access_lock)) {
+   VHOST_LOG_CONFIG(dev->ifname, DEBUG,
+   "failed to kick guest, virtqueue busy.\n");
+   return -1;
+   }

if (vq_is_packed(dev))
vhost_vring_call_packed(dev, vq);


I think that's problematic, because it will break other applications
that currently rely on the API to block until the call is done.

Just some internal DPDK usage of this API:
./drivers/vdpa/ifc/ifcvf_vdpa.c:871:rte_vhost_vring_call(internal->vid,
qid);
./examples/vhost/virtio_net.c:236:  rte_vhost_vring_call(dev->vid, 
queue_id);
./examples/vhost/virtio_net.c:446:  rte_vhost_vring_call(dev->vid, 
queue_id);
./examples/vhost_blk/vhost_blk.c:99:
rte_vhost_vring_call(task->ctrlr->vid, vq->id);
./examples/vhost_blk/vhost_blk.c:134:
rte_vhost_vring_call(task->ctrlr->vid, vq->id);

This change will break all the above uses.

And that's not counting external projects.

ou should better introduce a new API that does not block.

Could you add a new API to do this?

  >

I think we can use the new API in SPDK as a workaround, note that SPDK project

is blocked for

a while which can't be used with DPDK 22.05 or newer.


DPDK v22.05?
What is the commit introducing the regression?

Here is the commit introducing this issue
c5736998305d ("vhost: fix missing virtqueue lock protection")
Bugzilla ID: 1015


Ok, it cannot be reverted, as it prevents some undefined
behaviors/crashes.



Note that if we introduce a new API, it won't be backported to stable
branches.

I understand, but do we have better idea in short time? we're planning
to release SPDK 22.09 recently.


You can have another thread that sends the call?





Vhost-blk and scsi devices are not same with vhost-net, we need to cover

SeaBIOS and VM

cases, so we need to start processing vrings after 1 vring is ready.


Regards,
Maxime








[PATCH] net/iavf: fix outer checksum flags

2022-09-20 Thread Zhichao Zeng
When receiving tunneled packets, the testpmd output log shows 'ol_flags'
value always as 'RTE_MBUF_F_RX_OUTER_L4_CKSUM_UNKNOWN', but expected value
should be 'RX_OUTER_L4_CKSUM_GOOD' or 'RX_OUTER_L4_CKSUM_BAD'.

Adding 'RX_OUTER_L4_CKSUM_GOOD' and 'RX_OUTER_L4_CKSUM_BAD' to 'flags' for
normal path, 'l3_l4_flags_shuf' for AVX2 and AVX512 vector path and
'cksum_flags' for SSE vector path to ensure that the 'ol_flags'
can match correct flags.

Fixes: b8b4c54ef9b0 ("net/iavf: support flexible Rx descriptor in normal path")
Fixes: 1162f5a0ef31 ("net/iavf: support flexible Rx descriptor in SSE path")
Fixes: 5b6e8859081d ("net/iavf: support flexible Rx descriptor in AVX path")
Fixes: 9c9aa0040344 ("net/iavf: add offload path for Rx AVX512 flex descriptor")
Cc: sta...@dpdk.org

Signed-off-by: Zhichao Zeng 
---
 drivers/net/iavf/iavf_rxtx.c|   9 +-
 drivers/net/iavf/iavf_rxtx_vec_avx2.c   | 118 +++--
 drivers/net/iavf/iavf_rxtx_vec_avx512.c | 133 ++--
 drivers/net/iavf/iavf_rxtx_vec_sse.c|  77 ++
 4 files changed, 252 insertions(+), 85 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 3deabe1d7e..e1681024a6 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -1277,7 +1277,9 @@ iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0)
return 0;
 
if (likely(!(stat_err0 & IAVF_RX_FLEX_ERR0_BITS))) {
-   flags |= (RTE_MBUF_F_RX_IP_CKSUM_GOOD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD);
+   flags |= (RTE_MBUF_F_RX_IP_CKSUM_GOOD |
+   RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+   RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD);
return flags;
}
 
@@ -1294,6 +1296,11 @@ iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0)
if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))
flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD;
 
+   if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)))
+   flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD;
+   else
+   flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD;
+
return flags;
 }
 
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c 
b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
index d6243b96e2..862f6eb0c0 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
@@ -622,43 +622,88 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct 
iavf_rx_queue *rxq,
 * bit13 is for VLAN indication.
 */
const __m256i flags_mask =
-_mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13));
+_mm256_set1_epi32((0xF << 4) | (1 << 12) | (1 << 13));
/**
 * data to be shuffled by the result of the flags mask shifted by 4
 * bits.  This gives use the l3_l4 flags.
 */
-   const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
-   /* shift right 1 bit to make sure it not exceed 255 */
-   (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
-RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-   (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
-RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-   (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
-RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-   (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
-RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-   (RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-   (RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-   (RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-   (RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-   /* second 128-bits */
-   0, 0, 0, 0, 0, 0, 0, 0,
-   (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
-RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-   (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_BAD |
-RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-   (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
-RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-   (RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | 
RTE_MBUF_F_RX_L4_CKSUM_GOOD |
-RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-   (RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
-   (RTE_MBUF_F_RX_L4_CKSUM_BAD | 
RTE_MBUF_F_RX_IP_CKSUM_GOOD) >> 1,
-   (RTE_MBUF_F_RX_L4_CKSUM_GOOD | 
RTE_MBUF_F_RX_IP_CKSUM_BAD) >> 1,
- 

RE: [PATCH] vhost: use try_lock in rte_vhost_vring_call

2022-09-20 Thread Liu, Changpeng


> -Original Message-
> From: Maxime Coquelin 
> Sent: Tuesday, September 20, 2022 4:13 PM
> To: Liu, Changpeng ; dev@dpdk.org
> Cc: Xia, Chenbo 
> Subject: Re: [PATCH] vhost: use try_lock in rte_vhost_vring_call
> 
> 
> 
> On 9/20/22 09:45, Liu, Changpeng wrote:
> >
> >
> >> -Original Message-
> >> From: Maxime Coquelin 
> >> Sent: Tuesday, September 20, 2022 3:35 PM
> >> To: Liu, Changpeng ; dev@dpdk.org
> >> Cc: Xia, Chenbo 
> >> Subject: Re: [PATCH] vhost: use try_lock in rte_vhost_vring_call
> >>
> >>
> >>
> >> On 9/20/22 09:29, Liu, Changpeng wrote:
> >>> Hi Maxime,
> >>>
>  -Original Message-
>  From: Maxime Coquelin 
>  Sent: Tuesday, September 20, 2022 3:19 PM
>  To: Liu, Changpeng ; dev@dpdk.org
>  Cc: Xia, Chenbo 
>  Subject: Re: [PATCH] vhost: use try_lock in rte_vhost_vring_call
> 
> 
> 
>  On 9/6/22 04:22, Changpeng Liu wrote:
> > Note that this function is in data path, so the thread context
> > may not same as socket messages processing context, by using
> > try_lock here, users can have another try in case of VQ's access
> > lock is held by `vhost-events` thread.
> >
> > Signed-off-by: Changpeng Liu 
> > ---
> > lib/vhost/vhost.c | 6 +-
> > 1 file changed, 5 insertions(+), 1 deletion(-)
> >
> > diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
> > index 60cb05a0ff..072d2acb7b 100644
> > --- a/lib/vhost/vhost.c
> > +++ b/lib/vhost/vhost.c
> > @@ -1329,7 +1329,11 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx)
> > if (!vq)
> > return -1;
> >
> > -   rte_spinlock_lock(&vq->access_lock);
> > +   if (!rte_spinlock_trylock(&vq->access_lock)) {
> > +   VHOST_LOG_CONFIG(dev->ifname, DEBUG,
> > +   "failed to kick guest, virtqueue busy.\n");
> > +   return -1;
> > +   }
> >
> > if (vq_is_packed(dev))
> > vhost_vring_call_packed(dev, vq);
> 
>  I think that's problematic, because it will break other applications
>  that currently rely on the API to block until the call is done.
> 
>  Just some internal DPDK usage of this API:
>  ./drivers/vdpa/ifc/ifcvf_vdpa.c:871: 
>  rte_vhost_vring_call(internal->vid,
>  qid);
>  ./examples/vhost/virtio_net.c:236:   rte_vhost_vring_call(dev->vid,
> queue_id);
>  ./examples/vhost/virtio_net.c:446:   rte_vhost_vring_call(dev->vid,
> queue_id);
>  ./examples/vhost_blk/vhost_blk.c:99:
>  rte_vhost_vring_call(task->ctrlr->vid, vq->id);
>  ./examples/vhost_blk/vhost_blk.c:134:
>  rte_vhost_vring_call(task->ctrlr->vid, vq->id);
> 
>  This change will break all the above uses.
> 
>  And that's not counting external projects.
> 
>  ou should better introduce a new API that does not block.
> >>> Could you add a new API to do this?
> >>   >
> >>> I think we can use the new API in SPDK as a workaround, note that SPDK
> project
> >> is blocked for
> >>> a while which can't be used with DPDK 22.05 or newer.
> >>
> >> DPDK v22.05?
> >> What is the commit introducing the regression?
> > Here is the commit introducing this issue
> > c5736998305d ("vhost: fix missing virtqueue lock protection")
> > Bugzilla ID: 1015
> 
> Ok, it cannot be reverted, as it prevents some undefined
> behaviors/crashes.
> 
> >>
> >> Note that if we introduce a new API, it won't be backported to stable
> >> branches.
> > I understand, but do we have better idea in short time? we're planning
> > to release SPDK 22.09 recently.
> 
> You can have another thread that sends the call?
We already use two threads to do this. Here is the example for existing code in 
SPDK:

DPDK vhost-events threadSPDK thread

SET_VRING_KICK VQ1   >Start polling VQ1
Reply to DPDK<  Done
SET_VRING_KICK VQ2   >thread is blocked on VQ's access 
lock, SPDK thread can't provide reply message   
 
For example, we can just return for  SET_VRING_KICK VQ2 message without 
checking SPDK thread, but this leave
uncertain replies to VM.
> 
> >>
> >>
> >>> Vhost-blk and scsi devices are not same with vhost-net, we need to cover
> >> SeaBIOS and VM
> >>> cases, so we need to start processing vrings after 1 vring is ready.
> 
>  Regards,
>  Maxime
> >>>
> >



RE: [PATCH] doc: relate bifurcated driver and flow isolated mode

2022-09-20 Thread Dariusz Sosnowski
Hi Thomas,

> -Original Message-
> From: Thomas Monjalon 
> Sent: Wednesday, September 14, 2022 23:30
> To: dev@dpdk.org
> Cc: Michael Savisko ; Slava Ovsiienko
> ; Matan Azrad ; Dariusz
> Sosnowski ; Asaf Penso ; Ori
> Kam ; Ferruh Yigit ; Andrew
> Rybchenko 
> Subject: [PATCH] doc: relate bifurcated driver and flow isolated mode
> 
> External email: Use caution opening links or attachments
> 
> 
> The relation between the isolated mode in ethdev flow API and bifurcated
> driver behaviour was not clearly explained.
> 
> It is made clear in the how-to guide that isolated mode is required for flow
> bifurcation to the kernel.
> On the other side, the impact of the isolated mode on a bifurcated driver is
> made more explicit.
> 
> Signed-off-by: Thomas Monjalon 
> ---
>  doc/guides/howto/flow_bifurcation.rst | 3 ++-
>  lib/ethdev/rte_flow.h | 4 
>  2 files changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/doc/guides/howto/flow_bifurcation.rst
> b/doc/guides/howto/flow_bifurcation.rst
> index 7ba66b9003..79cf4f1e64 100644
> --- a/doc/guides/howto/flow_bifurcation.rst
> +++ b/doc/guides/howto/flow_bifurcation.rst
> @@ -55,7 +55,8 @@ The full device is already shared with the kernel driver.
>  The DPDK application can setup some flow steering rules,  and let the rest go
> to the kernel stack.
>  In order to define the filters strictly with flow rules, -the
> :ref:`flow_isolated_mode` can be configured.
> +the :ref:`flow_isolated_mode` must be configured, so there is no
> +default rule routing traffic to userspace.
> 
>  There is no specific instructions to follow.
>  The recommended reading is the :doc:`../prog_guide/rte_flow` guide.
> diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h index
> a79f1e7ef0..1bac3fd9ec 100644
> --- a/lib/ethdev/rte_flow.h
> +++ b/lib/ethdev/rte_flow.h
> @@ -4254,6 +4254,10 @@ rte_flow_query(uint16_t port_id,
>   *
>   * Isolated mode guarantees that all ingress traffic comes from defined flow
>   * rules only (current and future).
> + * When enabled with a bifurcated driver,
> + * non-matched packets are routed to the kernel driver interface.
> + * When disabled (the default),
> + * there may be some default rules routing traffic to the DPDK port.
>   *
>   * Besides making ingress more deterministic, it allows PMDs to safely reuse
>   * resources otherwise assigned to handle the remaining traffic, such as
> --
> 2.36.1

Looks good to me. Thank you.

Reviewed-by: Dariusz Sosnowski 

Best regards,
Dariusz Sosnowski


[PATCH v3] usertools: rewrite pmdinfo

2022-09-20 Thread Robin Jarry
dpdk-pmdinfo.py does not produce any parseable output. The -r/--raw flag
merely prints multiple independent JSON lines which cannot be fed
directly to any JSON parser. Moreover, the script complexity is rather
high for such a simple task: extracting PMD_INFO_STRING from .rodata ELF
sections. Rewrite it so that it can produce valid JSON.

Remove the PCI database parsing for PCI-ID to Vendor-Device names
conversion. This should be done by external scripts (if really needed).

Here are some examples of use with jq:

Get the complete info for a given driver:

 ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
   jq '.[] | select(.name == "dmadev_idxd_pci")'
 {
   "name": "dmadev_idxd_pci",
   "params": "max_queues=0",
   "kmod": "vfio-pci",
   "pci_ids": [
 {
   "vendor": "8086",
   "device": "0b25",
   "subsystem_vendor": "",
   "subsystem_device": ""
 }
   ]
 }

Get only the required kernel modules for a given driver:

 ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
   jq '.[] | select(.name == "net_i40e").kmod'
 "* igb_uio | uio_pci_generic | vfio-pci"

Get only the required kernel modules for a given device:

 ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
   jq '.[] | select(.pci_ids[] | .vendor == "15b3" and .device == "1013").kmod'
 "* ib_uverbs & mlx5_core & mlx5_ib"

Print the list of drivers which define multiple parameters without
string separators:

 ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
   jq '.[] | select(.params!=null and (.params|test("=[^ ]+="))) | {name, 
params}'
 ...

The script passes flake8, black, isort and pylint checks.

I have tested this with a matrix of python/pyelftools versions:

 pyelftools
   0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29
 3.6 ok   ok   ok   ok   ok   ok   ok   ok
 3.7 ok   ok   ok   ok   ok   ok   ok   ok
  Python 3.8 ok   ok   ok   ok   ok   ok   ok   ok
 3.9 ok   ok   ok   ok   ok   ok   ok   ok
 3.10  fail fail fail fail   ok   ok   ok   ok

All failures with python 3.10 are related to the same issue:

  File "elftools/construct/lib/container.py", line 5, in 
from collections import MutableMapping
  ImportError: cannot import name 'MutableMapping' from 'collections'

Python 3.10 support is only available since pyelftools 0.26. The script
will only work with Python 3.6 and later. Update the minimal system
requirements and release notes.

NB: The output produced by the legacy -r/--raw flag can be obtained with
the following command:

  strings build/app/dpdk-testpmd | sed -n 's/^PMD_INFO_STRING= //p'

Cc: Olivier Matz 
Cc: Ferruh Yigit 
Cc: Bruce Richardson 
Signed-off-by: Robin Jarry 
---
v2 -> v3:

* strip "pci_ids" when it is empty (some drivers do not support any pci
  devices)

v1 -> v2:

* update release notes and minimal python version requirement
* hide warnings by default (-v/--verbose to show them)
* show debug messages with -vv
* also search libs in folders listed in /etc/ld.so.conf/*.conf
* only search for DT_NEEDED on executables, not on dynamic libraries
* take DT_RUNPATH into account for searching libraries
* fix weird broken pipe error
* fix some typos:
s/begining/beginning/
s/subsystem_device/subsystem_vendor/
s/subsystem_system/subsystem_device/
* change field names for pci_ids elements (remove _id suffixes)
* DT_NEEDED of files are analyzed. There is no way to differentiate
  between dynamically linked executables and dynamic libraries.

 doc/guides/linux_gsg/sys_reqs.rst  |   2 +-
 doc/guides/rel_notes/release_22_11.rst |   5 +
 usertools/dpdk-pmdinfo.py  | 914 +
 3 files changed, 314 insertions(+), 607 deletions(-)

diff --git a/doc/guides/linux_gsg/sys_reqs.rst 
b/doc/guides/linux_gsg/sys_reqs.rst
index 08d45898f025..f842105eeda7 100644
--- a/doc/guides/linux_gsg/sys_reqs.rst
+++ b/doc/guides/linux_gsg/sys_reqs.rst
@@ -41,7 +41,7 @@ Compilation of the DPDK
resulting in statically linked applications not being linked properly.
Use an updated version of ``pkg-config`` or ``pkgconf`` instead when 
building applications
 
-*   Python 3.5 or later.
+*   Python 3.6 or later.
 
 *   Meson (version 0.49.2+) and ninja
 
diff --git a/doc/guides/rel_notes/release_22_11.rst 
b/doc/guides/rel_notes/release_22_11.rst
index 8c021cf0505e..67054f5acdc9 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -84,6 +84,11 @@ API Changes
Also, make sure to start the actual text at the margin.
===
 
+* The ``dpdk-pmdinfo.py`` script was rewritten to produce valid JSON only.
+  PCI-IDs parsing has been removed.
+  To get a similar output to the (now removed) ``-r/--raw`` flag, you may use 
the following command::
+
+ strings $dpdk_binary_or_driver | sed -n 's/^PMD_INFO_STRING= //p'
 
 ABI Changes
 ---
diff --git a/usertools/dpdk-pm

[PATCH v3] net/iavf: enable inner and outer Tx checksum offload

2022-09-20 Thread Zhichao Zeng
This patch is to enable scalar path inner and outer Tx checksum offload
for tunnel packet by configure ol_flags.

Signed-off-by: Peng Zhang 
Signed-off-by: Zhichao Zeng 

---
v2: add outer udp cksum flag and remove unrelated code
---
v3: specify the patch scope and update document
---
 doc/guides/nics/features/iavf.ini |  2 ++
 drivers/net/iavf/iavf_ethdev.c|  1 +
 drivers/net/iavf/iavf_rxtx.c  | 48 +--
 drivers/net/iavf/iavf_rxtx.h  |  9 +-
 4 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/doc/guides/nics/features/iavf.ini 
b/doc/guides/nics/features/iavf.ini
index dfaa82b83d..eeda6b7210 100644
--- a/doc/guides/nics/features/iavf.ini
+++ b/doc/guides/nics/features/iavf.ini
@@ -25,6 +25,8 @@ VLAN offload = Y
 L3 checksum offload  = P
 L4 checksum offload  = P
 Timestamp offload= P
+Inner L3 checksum= P
+Inner L4 checksum= P
 Packet type parsing  = Y
 Rx descriptor status = Y
 Tx descriptor status = Y
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index 506fcff6e3..fa040766e5 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -1134,6 +1134,7 @@ iavf_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
RTE_ETH_TX_OFFLOAD_TCP_CKSUM |
RTE_ETH_TX_OFFLOAD_SCTP_CKSUM |
RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM |
+   RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM |
RTE_ETH_TX_OFFLOAD_TCP_TSO |
RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO |
RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO |
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 3deabe1d7e..b784c5cc18 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2334,7 +2334,8 @@ static inline uint16_t
 iavf_calc_context_desc(uint64_t flags, uint8_t vlan_flag)
 {
if (flags & (RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_UDP_SEG |
-   RTE_MBUF_F_TX_TUNNEL_MASK))
+   RTE_MBUF_F_TX_TUNNEL_MASK | RTE_MBUF_F_TX_OUTER_IP_CKSUM |
+   RTE_MBUF_F_TX_OUTER_UDP_CKSUM))
return 1;
if (flags & RTE_MBUF_F_TX_VLAN &&
vlan_flag & IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2)
@@ -2399,6 +2400,44 @@ iavf_fill_ctx_desc_tunnelling_field(volatile uint64_t 
*qw0,
break;
}
 
+   /* L4TUNT: L4 Tunneling Type */
+   switch (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
+   case RTE_MBUF_F_TX_TUNNEL_IPIP:
+   /* for non UDP / GRE tunneling, set to 00b */
+   break;
+   case RTE_MBUF_F_TX_TUNNEL_VXLAN:
+   case RTE_MBUF_F_TX_TUNNEL_GTP:
+   case RTE_MBUF_F_TX_TUNNEL_GENEVE:
+   eip_typ |= IAVF_TXD_CTX_UDP_TUNNELING;
+   break;
+   case RTE_MBUF_F_TX_TUNNEL_GRE:
+   eip_typ |= IAVF_TXD_CTX_GRE_TUNNELING;
+   break;
+   default:
+   PMD_TX_LOG(ERR, "Tunnel type not supported");
+   return;
+   }
+
+   /* L4TUNLEN: L4 Tunneling Length, in Words
+*
+* We depend on app to set rte_mbuf.l2_len correctly.
+* For IP in GRE it should be set to the length of the GRE
+* header;
+* For MAC in GRE or MAC in UDP it should be set to the length
+* of the GRE or UDP headers plus the inner MAC up to including
+* its last Ethertype.
+* If MPLS labels exists, it should include them as well.
+*/
+   eip_typ |= (m->l2_len >> 1) << IAVF_TXD_CTX_QW0_NATLEN_SHIFT;
+
+   /**
+* Calculate the tunneling UDP checksum.
+* Shall be set only if L4TUNT = 01b and EIPT is not zero
+*/
+   if (!(eip_typ & IAVF_TX_CTX_EXT_IP_NONE) &&
+   (eip_typ & IAVF_TXD_CTX_UDP_TUNNELING))
+   eip_typ |= IAVF_TXD_CTX_QW0_L4T_CS_MASK;
+
*qw0 = eip_typ << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPT_SHIFT |
eip_len << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIPLEN_SHIFT |
eip_noinc << IAVF_TXD_CTX_QW0_TUN_PARAMS_EIP_NOINC_SHIFT;
@@ -2535,7 +2574,12 @@ iavf_build_data_desc_cmd_offset_fields(volatile uint64_t 
*qw1,
}
 
/* Set MACLEN */
-   offset |= (m->l2_len >> 1) << IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
+   if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)
+   offset |= (m->outer_l2_len >> 1)
+   << IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
+   else
+   offset |= (m->l2_len >> 1)
+   << IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
 
/* Enable L3 checksum offloading inner */
if (m->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 1695e43cd5..66e832713c 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -26,6 +26,8 @@
 #define IAVF_TX_NO_VECTOR_FLAGS (   \
RTE_ETH_TX_OFFLOAD_MULTI_SEGS |  \
 

Re: [RFC, v1 0/6] graph enhancement for multi-core dispatch

2022-09-20 Thread Jerin Jacob
On Thu, Sep 8, 2022 at 7:40 AM Zhirun Yan  wrote:
>
> Currently, the rte_graph_walk() and rte_node_enqueue* fast path API
> functions in graph lib implementation are designed to work on single-core.
>
> This solution(RFC) proposes usage of cross-core dispatching mechanism to
> enhance the graph scaling strategy. We introduce Scheduler Workqueue
> then we could directly dispatch streams to another worker core which is
> affinity with a specific node.
>
> This RFC:
>   1. Introduce core affinity API and graph clone API.
>   2. Introduce key functions to enqueue/dequeue for dispatching streams.
>   3. Enhance rte_graph_walk by cross-core dispatch.
>   4. Add l2fwd-graph example and stats for cross-core dispatching.
>
> With this patch set, it could easily plan and orchestrate stream on
> multi-core systems.
>
> Future work:
>   1. Support to affinity lcore set for one node.
>   2. Use l3fwd-graph instead of l2fwd-graph as example in patch 06.
>   3. Add new parameter, like --node(nodeid, lcoreid) to config node for core
>   affinity.
>
> Comments and suggestions are welcome. Thanks!

Some top level comments.

1)Yes it makes sense to not create the l2fwd-graph, Please enhance the
l3fwd-graph and compare the performance with multi core scenarios.

2) It is good to have multiple graph walk schemes like the one you
have introduced now.
Though I am not sure about performance aspects, specifically, it is
used with multiple producers and multi consumers with node.

If you have a use case for the new worker scheme, then we can add it.
I think, it would call for

a) We need to have separate rte_graph_worker.h for each implementation
to avoid the performance impact for each other.
That may boils down to
i) Create lib/graph/rte_graph_worker_common.h
ii) Treat existing rte_graph_worker.h as default scheme and include
rte_graph_worker_common.h
iii) Add new rte_graph_worker_xxx.h for the new scheme(diff between
default worker) with leveraging te_graph_worker_common.h

Application can select the worker by

#define RTE_GRAPH_WORKER_MODEL_XXX
//#define RTE_GRAPH_WORKER_MODEL_YYY
#include 

b) Introduce a new enum rte_graph_model or so to express this  new
model and other models in feature

c) Each core has its own node instance so we don't need explicit
critical section management when dealing with node instances.
In this new scheme, Can we leverage the existing node implementation?
If not, we need to have separate node
implementation for different graph models. It will be a maintenance
issue. But if we really need to take this path,
Probably on each node's capability, the node needs to declare the
models supported(Use enum rte_graph_model).
This can be used for sanity checking when we clone the graph etc and
check the compatibility for creating the graph etc.
I think this is the biggest issue with adding a new model. Where nodes
need to be written based on the model. I think this could
be the reason for VPP not adding other models.

d) All new slowpath APIs like rte_node_set_lcore_affinity,
rte_graph_clone, We need to fix the namespace by
rte_graph_model__ or so to make sure
application writer understand this APIs
are only for this model.(Also we can use "enum rte_graph_model" for
sanity check etc)



>


RE: [PATCH v1] ethdev: add direction info when creating the transfer table

2022-09-20 Thread Ori Kam
Hi Ivan, Thomas and Rongwei

> -Original Message-
> From: Thomas Monjalon 
> Sent: Thursday, 15 September 2022 14:16
> 
> 15/09/2022 12:59, Ivan Malov:
> > Hi Rongwei,
> >
> > In this reply, I do not include the previous mail because the amount
> > of inline commentary has gone haywire over the past couple of days.
> > Let's re-iterate.
> >
> > But before I get to that, I'd like to offer a fresh perspective:
> >
> > Perhaps, if we all agree that term "vport" means an endpoint which
> > can stand for any "port" except for physical one, then it should
> > be possible to use term ANY_VPORTS rather than ANY_GUEST_PORTS.
> 
> The opposite of "physical" is "virtual" indeed.
> 
> > But that's tricky, of course. I don't have a way with naming,
> > so more opinions are welcome and very-very desirable here.
> >
> > So:
> >
> > 1) Do you agree that, in your proposal, the new "wire_orig" / "vf_orig"
> > primitives are in fact yet another match criteria?
> >
> > ..
> >
> > To me, it looks so. If they are match criteria, then they belong
> > in match pattern, that is, they should be expressed as new items.
> >
> > For "transfer" rules, the *existing* attributes are: "group"
> > and "priority". As you may note, these are clearly not match
> > criteria. They control the look-up order. So, to this day,
> > there're no match criteria in DPDK expressed as attributes.
> >
> > If these "wire_orig" / "vf_orig" are going to be introduced
> > as attributes, that should be backed with strong motivation.
> 
> I prefer we keep matching in a single place, not in attributes.
> 

I think we are talking about two different features.
Feature 1:
Allow matching on all vports that are not wire
Feature 2:
Save allocation space and allow fast insertion.
In this case, the matching is not on all vports it can be just part of the 
vports
but it will never be the wire port.
For example:
port 0 - wire
ports 1,2,3,4,5  - vports
the application want to inset only those rules:
represented_port(port_id=2) / eth / ipv4 (src==xx)
represented_port(port_id=4) / eth / ipv4 (src==xx)
represented_port(port_id=4) / eth / ipv4 (src==yy)

For feature 1 I fully agree with you Ivan, this should be added as an item.
For feature 2 I think Rongwei's suggestion is the better option.
If I understand correctly the idea is to give hint to the PMD on where to 
allocate memory
and how to insert the rules most optimally. Since this is shared for all rules 
it makes more sense
to add it as an attribute, just like we don’t have an ingress item (maybe we 
should?)

Ivan we have the item RTE_FLOW_ITEM_TYPE_PF and RTE_FLOW_ITEM_TYPE_VF which are 
deprecated,
So do you want to un-deprecate them?

To summarize, if PMD can use such an hint during rule creation and save memory, 
I vote
to allow it.
if the idea is to match on all vports then it should be an item.

> 
> > 2) From your viewpoint, why items "ANY_PHYS_PORTS" and
> "ANY_VPORTS"
> > won't do? Or, which problems do you think they may inflict?
> >
> > ..
> >
> > Previously, you explained why REPRESENTED_PORT would not
> > fit your needs. And I understand your point: to async API,
> > two pattern templates which both have item REPRESENTED_PORT
> > in them cannot be clearly distinguished and are in fact the
> > same set of criteria (provided that all other items are also
> > the same and have the same masks). Templates are, well,
> > templates (or shapes) of the rules to come later and
> > do not include exact "spec" for the "ethdev_id".
> > Got it.
> >
> > But that's not going to be the case with items ANY_PHYS_PORTS and
> > ANY_VPORTS, is it? In one async table template, the user submits
> > item ANY_PHYS_PORTS (instead of table attribute "wire_orig").
> > In another template, the user submits item ANY_VPORTS to
> > state that they want to match only traffic transmitted
> > software endpoints (DPDK ethdevs, guest VFs, etc.)
> > connected to the switch.
> >
> > In this example, the PMD will clearly see that the two templates
> > differ. So it will be able to allocate separate resources, each
> > one "cutting one half of traffic" (as per your concept).
> >
> > 3) In your most recent response, you suggested that one might have
> > had the attributes occupied for some other purposes. To me,
> > they're not. Neither me nor my closest colleagues have
> > any plans on them. When I advocate using item approach
> > over the attribute approach, I do this to ensure
> > a) clarity of the API contract and b) robustness.

If something is shared for all rules in the same table, it should be a table
property.

> >
> > 4) Also, in your response, you suggested that I might have
> > confused item mask and spec. That is not the case.
> > If we agree, that switch domain ID is unneeded in
> > the new items, then these items will have no
> > fields in them (like item PF had not had

[PATCH v4 0/3] add uncore api to be called through l3fwd-power

2022-09-20 Thread Tadhg Kearney
This is targeting 22.11 and aims to add an API to DPDK power library to allow 
uncore frequency
adjustment. This will be called through the l3fwd-power app and gives the 
ability to set the 
minimum and maximum uncore frequency to both min, max or specific frequency 
index.

Signed-off-by: tadhgkearney 
---

v2:
Fix compilation warnings and errors.
v3:
Remove addition of x86 global macros.
Add 2 new API's for getting package and die numbers from system.
Address comments from mailing list.
Improve efficiency of code and code quality.
v4:
Fix compilation warnings and errors.

Tadhg Kearney (3):
  power: add uncore frequency control API to the power library
  l3fwd-power: add option to call uncore API
  test/power: add unit tests for uncore API

 app/test/meson.build  |   2 +
 app/test/test_power_uncore.c  | 299 
 doc/guides/prog_guide/power_man.rst   |  37 ++
 doc/guides/rel_notes/release_22_11.rst|   5 +
 .../sample_app_ug/l3_forward_power_man.rst|  29 ++
 examples/l3fwd-power/main.c   | 122 -
 lib/power/meson.build |   2 +
 lib/power/rte_power_uncore.c  | 447 ++
 lib/power/rte_power_uncore.h  | 194 
 lib/power/version.map |  11 +
 10 files changed, 1145 insertions(+), 3 deletions(-)
 create mode 100644 app/test/test_power_uncore.c
 create mode 100644 lib/power/rte_power_uncore.c
 create mode 100644 lib/power/rte_power_uncore.h

-- 
2.25.1



[PATCH v4 1/3] power: add uncore frequency control API to the power library

2022-09-20 Thread Tadhg Kearney
Add API to allow uncore frequency adjustment. This is done through
manipulating related uncore frequency control sysfs entries to
adjust the minimum and maximum uncore frequency values.
Nine API's are being added that are all public and experimental.

Signed-off-by: Tadhg Kearney 
---
 doc/guides/prog_guide/power_man.rst|  37 ++
 doc/guides/rel_notes/release_22_11.rst |   5 +
 lib/power/meson.build  |   2 +
 lib/power/rte_power_uncore.c   | 447 +
 lib/power/rte_power_uncore.h   | 194 +++
 lib/power/version.map  |  11 +
 6 files changed, 696 insertions(+)
 create mode 100644 lib/power/rte_power_uncore.c
 create mode 100644 lib/power/rte_power_uncore.h

diff --git a/doc/guides/prog_guide/power_man.rst 
b/doc/guides/prog_guide/power_man.rst
index 98cfd3c1f3..49ff3edef0 100644
--- a/doc/guides/prog_guide/power_man.rst
+++ b/doc/guides/prog_guide/power_man.rst
@@ -276,6 +276,43 @@ API Overview for Ethernet PMD Power Management
 * **Set Scaling Max Freq**: Set the maximum frequency (kHz) to be used in 
Frequency
   Scaling mode.
 
+Uncore API
+--
+
+Abstract
+
+
+Uncore is a term used by Intel to describe the functions of a microprocessor 
that are
+not in the core, but which must be closely connected to the core to achieve 
high performance;
+L3 cache, on-die memory controller, etc.
+Significant power savings can be achieved by reducing the uncore frequency to 
its lowest value.
+
+The Linux kernel provides the driver ???intel-uncore-frequency" to control the 
uncore frequency limits
+for x86 platform. The driver is available from kernel version 5.6 and above.
+This manipulates the contest of MSR 0x620, which sets min/max of the uncore 
for the SKU.
+
+
+API Overview for Uncore
+~~~
+* **Uncore Power Init**: Initialise uncore power, populate frequency array and 
record
+  original min & max for pkg & die.
+
+* **Uncore Power Exit**: Exit uncore power, restoring original min & max for 
pkg & die.
+
+* **Get Uncore Power Freq**: Get current uncore freq index for pkg & die.
+
+* **Set Uncore Power Freq**: Set min & max uncore freq index for pkg & die 
(min and max will be the same).
+
+* **Uncore Power Max**: Set max uncore freq index for pkg & die.
+
+* **Uncore Power Min**: Set min uncore freq index for pkg & die.
+
+* **Get Num Freqs**: Get the number of frequencies in the index array.
+
+* **Get Num Pkgs**: Get the number of packages (CPUs) on the system.
+
+* **Get Num Dies**: Get the number of die's on a given package.
+
 References
 --
 
diff --git a/doc/guides/rel_notes/release_22_11.rst 
b/doc/guides/rel_notes/release_22_11.rst
index 8c021cf050..8e184034d8 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -55,6 +55,11 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* **Added uncore frequency control API to the power library.**
+
+  Add api to allow uncore frequency adjustment. This is done through
+  manipulating related uncore frequency control sysfs entries to
+  adjust the minimum and maximum uncore frequency values.
 
 Removed Items
 -
diff --git a/lib/power/meson.build b/lib/power/meson.build
index ba8d66074b..80cdeb72d4 100644
--- a/lib/power/meson.build
+++ b/lib/power/meson.build
@@ -21,12 +21,14 @@ sources = files(
 'rte_power.c',
 'rte_power_empty_poll.c',
 'rte_power_pmd_mgmt.c',
+'rte_power_uncore.c',
 )
 headers = files(
 'rte_power.h',
 'rte_power_empty_poll.h',
 'rte_power_pmd_mgmt.h',
 'rte_power_guest_channel.h',
+'rte_power_uncore.h',
 )
 if cc.has_argument('-Wno-cast-qual')
 cflags += '-Wno-cast-qual'
diff --git a/lib/power/rte_power_uncore.c b/lib/power/rte_power_uncore.c
new file mode 100644
index 00..b3004e5bfc
--- /dev/null
+++ b/lib/power/rte_power_uncore.c
@@ -0,0 +1,447 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+
+#include 
+
+#include "rte_power_uncore.h"
+#include "power_common.h"
+
+#define MAX_UNCORE_FREQS 32
+#define MAX_NUMA_DIE 8
+#define BUS_FREQ 10
+#define FILTER_LENGTH 18
+#define PACKAGE_FILTER "package_%02u_die_*"
+#define DIE_FILTER "package_%02u_die_%02u"
+#define UNCORE_FREQUENCY_DIR "/sys/devices/system/cpu/intel_uncore_frequency"
+#define POWER_GOVERNOR_PERF "performance"
+#define POWER_UNCORE_SYSFILE_MAX_FREQ \
+   
"/sys/devices/system/cpu/intel_uncore_frequency/package_%02u_die_%02u/max_freq_khz"
+#define POWER_UNCORE_SYSFILE_MIN_FREQ  \
+   
"/sys/devices/system/cpu/intel_uncore_frequency/package_%02u_die_%02u/min_freq_khz"
+#define POWER_UNCORE_SYSFILE_BASE_MAX_FREQ \
+   
"/sys/devices/system/cpu/intel_uncore_frequency/package_%02u_die_%02u/initial_max_freq_khz"
+#defin

[PATCH v4 2/3] l3fwd-power: add option to call uncore API

2022-09-20 Thread Tadhg Kearney
Add option for setting uncore frequency min/max/index, through uncore API.
This will be set for each package and die on the SKU. On exit, uncore min
and max frequency will be reverted back to previous frequencies.

Signed-off-by: Tadhg Kearney 
---
 .../sample_app_ug/l3_forward_power_man.rst|  29 +
 examples/l3fwd-power/main.c   | 122 +-
 2 files changed, 148 insertions(+), 3 deletions(-)

diff --git a/doc/guides/sample_app_ug/l3_forward_power_man.rst 
b/doc/guides/sample_app_ug/l3_forward_power_man.rst
index 8f6d906200..08ac8ef369 100644
--- a/doc/guides/sample_app_ug/l3_forward_power_man.rst
+++ b/doc/guides/sample_app_ug/l3_forward_power_man.rst
@@ -97,6 +97,12 @@ where,
 *   -P: Sets all ports to promiscuous mode so that packets are accepted 
regardless of the packet's Ethernet MAC destination address.
 Without this option, only packets with the Ethernet MAC destination 
address set to the Ethernet address of the port are accepted.
 
+*   -u: optional, sets uncore min/max frequency to minimum value.
+
+*   -U: optional, sets uncore min/max frequency to maximum value.
+
+*   -i (frequency index): optional, sets uncore frequency to frequency index 
value, by setting min and max values to be the same.
+
 *   --config (port,queue,lcore)[,(port,queue,lcore)]: determines which queues 
from which ports are mapped to which cores.
 
 *   --max-pkt-len: optional, maximum packet length in decimal (64-9600)
@@ -364,3 +370,26 @@ in the DPDK Programmer's Guide for more details on PMD 
power management.
 .. code-block:: console
 
 .//examples/dpdk-l3fwd-power -l 1-3 -- -p 0x0f 
--config="(0,0,2),(0,1,3)" --pmd-mgmt=scale
+
+Setting Uncore Values
+-
+
+Uncore frequency can be adjusted through manipulating related sysfs entries to 
adjust the minimum and maximum uncore values.
+This will be set for each package and die on the SKU. The driver for enabling 
this is available from kernel version 5.6 and above.
+Three options are available for setting uncore frequency:
+
+``-u``
+  This will set uncore minimum and maximum frequencies to minimum possible 
value.
+
+``-U``
+  This will set uncore minimum and maximum frequencies to maximum possible 
value.
+
+``-i``
+  This will allow you to set the specific uncore frequency index that you 
want, by setting
+  the uncore frequency to a frequency pointed by index. Frequency index's are 
set 100MHz apart from
+  maximum to minimum.
+  Frequency index values are in descending order, ie, index 0 is maximum 
frequency index.
+
+.. code-block:: console
+
+.//examples/dpdk-l3fwd-power -l 1-3 -- -p 0x0f 
--config="(0,0,2),(0,1,3)" -i 1
diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
index 887c6eae3f..d1a32594c0 100644
--- a/examples/l3fwd-power/main.c
+++ b/examples/l3fwd-power/main.c
@@ -47,6 +47,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "perf_core.h"
 #include "main.h"
@@ -179,6 +180,12 @@ enum busy_rate {
FULL = 100
 };
 
+enum uncore_choice {
+   UNCORE_MIN = 0,
+   UNCORE_MAX = 1,
+   UNCORE_IDX = 2
+};
+
 /* reference poll count to measure core busyness */
 #define DEFAULT_COUNT 1
 /*
@@ -1616,6 +1623,9 @@ print_usage(const char *prgname)
"  [--max-pkt-len PKTLEN]\n"
"  -p PORTMASK: hexadecimal bitmask of ports to configure\n"
"  -P: enable promiscuous mode\n"
+   "  -u: set min/max frequency for uncore to minimum value\n"
+   "  -U: set min/max frequency for uncore to maximum value\n"
+   "  -i (frequency index): set min/max frequency for uncore to 
specified frequency index\n"
"  --config (port,queue,lcore): rx queues configuration\n"
"  --high-perf-cores CORELIST: list of high performance cores\n"
"  --perf-config: similar as config, cores specified as indices"
@@ -1672,6 +1682,74 @@ static int parse_max_pkt_len(const char *pktlen)
return len;
 }
 
+static int
+parse_uncore_options(enum uncore_choice choice, const char *argument)
+{
+   unsigned int die, pkg, max_pkg, max_die;
+   int ret = 0;
+   max_pkg = rte_power_uncore_get_num_pkgs();
+   if (max_pkg == 0)
+   return -1;
+
+   for (pkg = 0; pkg < max_pkg; pkg++) {
+   max_die = rte_power_uncore_get_num_dies(pkg);
+   if (max_die == 0)
+   return -1;
+   for (die = 0; die < max_die; die++) {
+   ret = rte_power_uncore_init(pkg, die);
+   if (ret == -1) {
+   RTE_LOG(INFO, L3FWD_POWER, "Unable to 
initialize uncore for pkg %02u die %02u\n"
+   , pkg, die);
+   return ret;
+   }
+   if (choice == UNCORE_MIN) {
+   ret = rte_power_uncore_freq_min(pkg, die);
+  

[PATCH v4 3/3] test/power: add unit tests for uncore API

2022-09-20 Thread Tadhg Kearney
Add basic unit tests covering all nine uncore API's.

Signed-off-by: Tadhg Kearney 
---
 app/test/meson.build |   2 +
 app/test/test_power_uncore.c | 299 +++
 2 files changed, 301 insertions(+)
 create mode 100644 app/test/test_power_uncore.c

diff --git a/app/test/meson.build b/app/test/meson.build
index bf1d81f84a..170401ccdc 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -100,6 +100,7 @@ test_sources = files(
 'test_power.c',
 'test_power_cpufreq.c',
 'test_power_kvm_vm.c',
+'test_power_uncore.c',
 'test_prefetch.c',
 'test_rand_perf.c',
 'test_rawdev.c',
@@ -240,6 +241,7 @@ fast_tests = [
 ['power_cpufreq_autotest', false, true],
 ['power_autotest', true, true],
 ['power_kvm_vm_autotest', false, true],
+['power_uncore_autotest', true, true],
 ['reorder_autotest', true, true],
 ['service_autotest', true, true],
 ['thash_autotest', true, true],
diff --git a/app/test/test_power_uncore.c b/app/test/test_power_uncore.c
new file mode 100644
index 00..7bc3ed7260
--- /dev/null
+++ b/app/test/test_power_uncore.c
@@ -0,0 +1,299 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2022 Intel Corporation
+ */
+
+#include "test.h"
+
+#ifndef RTE_LIB_POWER
+
+static int
+test_power_uncore(void)
+{
+   printf("Power management library not supported, skipping test\n");
+   return TEST_SKIPPED;
+}
+
+#else
+#include 
+#include 
+
+#define MAX_UNCORE_FREQS 32
+
+#define VALID_PKG 0
+#define VALID_DIE 0
+#define INVALID_PKG (rte_power_uncore_get_num_pkgs() + 1)
+#define INVALID_DIE (rte_power_uncore_get_num_dies(VALID_PKG) + 1)
+#define VALID_INDEX 1
+#define INVALID_INDEX (MAX_UNCORE_FREQS + 1)
+
+static int check_power_uncore_init(void)
+{
+   int ret;
+
+   /* Test initialisation of uncore configuration*/
+   ret = rte_power_uncore_init(VALID_PKG, VALID_DIE);
+   if (ret < 0) {
+   printf("Cannot initialise uncore power management for pkg %u 
die %u, this "
+   "may occur if environment is not configured "
+   "correctly(APCI cpufreq) or operating in another valid "
+   "Power management environment\n", VALID_PKG, VALID_DIE);
+   return -1;
+   }
+
+   /* Unsuccessful Test */
+   ret = rte_power_uncore_init(INVALID_PKG, INVALID_DIE);
+   if (ret == 0) {
+   printf("Unexpectedly was able to initialise uncore power 
management "
+   "for pkg %u die %u\n", INVALID_PKG, INVALID_DIE);
+   return -1;
+   }
+
+   return 0;
+}
+
+static int
+check_power_get_uncore_freq(void)
+{
+   int ret;
+
+   /* Successfully get uncore freq */
+   ret = rte_power_get_uncore_freq(VALID_PKG, VALID_DIE);
+   if (ret < 0) {
+   printf("Failed to get uncore frequency for pkg %u die %u\n",
+   VALID_PKG, VALID_DIE);
+   return -1;
+   }
+
+   /* Unsuccessful Test */
+   ret = rte_power_get_uncore_freq(INVALID_PKG, INVALID_DIE);
+   if (ret >= 0) {
+   printf("Unexpectedly got invalid uncore frequency for pkg %u 
die %u\n",
+   INVALID_PKG, 
INVALID_DIE);
+   return -1;
+   }
+
+   return 0;
+}
+
+static int
+check_power_set_uncore_freq(void)
+{
+   int ret;
+
+   /* Successfully set uncore freq */
+   ret = rte_power_set_uncore_freq(VALID_PKG, VALID_DIE, VALID_INDEX);
+   if (ret < 0) {
+   printf("Failed to set uncore frequency for pkg %u die %u index 
%u\n",
+   VALID_PKG, VALID_DIE, 
VALID_INDEX);
+   return -1;
+   }
+
+   /* Try to unsuccessfully set invalid uncore freq index */
+   ret = rte_power_set_uncore_freq(VALID_PKG, VALID_DIE, INVALID_INDEX);
+   if (ret == 0) {
+   printf("Unexpectedly set invalid uncore index for pkg %u die %u 
index %u\n",
+   VALID_PKG, VALID_DIE, 
INVALID_INDEX);
+   return -1;
+   }
+
+   /* Unsuccessful Test */
+   ret = rte_power_set_uncore_freq(INVALID_PKG, INVALID_DIE, VALID_INDEX);
+   if (ret == 0) {
+   printf("Unexpectedly set invalid uncore frequency for pkg %u 
die %u index %u\n",
+   INVALID_PKG, 
INVALID_DIE, VALID_INDEX);
+   return -1;
+   }
+
+   return 0;
+}
+
+static int
+check_power_uncore_freq_max(void)
+{
+   int ret;
+
+   /* Successfully get max uncore freq */
+   ret = rte_power_uncore_freq_max(VALID_PKG, VALID_DIE);
+   if (ret < 0) {
+   printf("Failed to set max uncore frequency for pkg %u die %u\n",
+   

Re: [PATCH] net/nfp: improve readability NFP HWINFO header

2022-09-20 Thread Niklas Söderlund
Hi all,

Gentle ping.

On 2022-08-26 13:39:03 +0800, Chaoyong He wrote:
> From: James Hershaw 
> 
> Prepend `0x` to the NFP HWINFO header value that is printed to improve
> the readability of the printed statement.
> 
> Signed-off-by: James Hershaw 
> Reviewed-by: Chaoyong He 
> Reviewed-by: Niklas Söderlund 
> ---
>  drivers/net/nfp/nfpcore/nfp_hwinfo.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/nfp/nfpcore/nfp_hwinfo.c 
> b/drivers/net/nfp/nfpcore/nfp_hwinfo.c
> index c0516bf..9f848bd 100644
> --- a/drivers/net/nfp/nfpcore/nfp_hwinfo.c
> +++ b/drivers/net/nfp/nfpcore/nfp_hwinfo.c
> @@ -108,7 +108,7 @@
>   goto exit_free;
>  
>   header = (void *)db;
> - printf("NFP HWINFO header: %08x\n", *(uint32_t *)header);
> + printf("NFP HWINFO header: %#08x\n", *(uint32_t *)header);
>   if (nfp_hwinfo_is_updating(header))
>   goto exit_free;
>  
> -- 
> 1.8.3.1
> 

-- 
Kind Regards,
Niklas Söderlund


Re: [PATCH 0/2] upgrade the log system of nfp PMD

2022-09-20 Thread Niklas Söderlund
Hi everyone,

A gentle ping on this.

On 2022-08-26 14:03:04 +0800, Chaoyong He wrote:
> This patch series do some upgrade of the log system of nfp PMD:
> Use DPDK debug macro to control the nfp Rx/Tx log.
> Add the support of nfp cpp log macro.
> 
> Long Wu (2):
>   net/nfp: add support for nfp cpp log print
>   net/nfp: use dpdk debug macro to control nfp Rx/Tx log print
> 
>  drivers/net/nfp/nfp_common.c | 1 +
>  drivers/net/nfp/nfp_logs.h   | 9 ++---
>  2 files changed, 7 insertions(+), 3 deletions(-)
> 
> -- 
> 1.8.3.1
> 

-- 
Kind Regards,
Niklas Söderlund


Re: [PATCH v3] usertools: rewrite pmdinfo

2022-09-20 Thread Ferruh Yigit

On 9/20/2022 10:08 AM, Robin Jarry wrote:

CAUTION: This message has originated from an External Source. Please use proper 
judgment and caution when opening attachments, clicking links, or responding to 
this email.


dpdk-pmdinfo.py does not produce any parseable output. The -r/--raw flag
merely prints multiple independent JSON lines which cannot be fed
directly to any JSON parser. Moreover, the script complexity is rather
high for such a simple task: extracting PMD_INFO_STRING from .rodata ELF
sections. Rewrite it so that it can produce valid JSON.

Remove the PCI database parsing for PCI-ID to Vendor-Device names
conversion. This should be done by external scripts (if really needed).

Here are some examples of use with jq:

Get the complete info for a given driver:

  ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
jq '.[] | select(.name == "dmadev_idxd_pci")'
  {
"name": "dmadev_idxd_pci",
"params": "max_queues=0",
"kmod": "vfio-pci",
"pci_ids": [
  {
"vendor": "8086",
"device": "0b25",
"subsystem_vendor": "",
"subsystem_device": ""
  }
]
  }

Get only the required kernel modules for a given driver:

  ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
jq '.[] | select(.name == "net_i40e").kmod'
  "* igb_uio | uio_pci_generic | vfio-pci"

Get only the required kernel modules for a given device:

  ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
jq '.[] | select(.pci_ids[] | .vendor == "15b3" and .device == "1013").kmod'
  "* ib_uverbs & mlx5_core & mlx5_ib"

Print the list of drivers which define multiple parameters without
string separators:

  ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
jq '.[] | select(.params!=null and (.params|test("=[^ ]+="))) | {name, 
params}'
  ...

The script passes flake8, black, isort and pylint checks.

I have tested this with a matrix of python/pyelftools versions:

  pyelftools
0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29
  3.6 ok   ok   ok   ok   ok   ok   ok   ok
  3.7 ok   ok   ok   ok   ok   ok   ok   ok
   Python 3.8 ok   ok   ok   ok   ok   ok   ok   ok
  3.9 ok   ok   ok   ok   ok   ok   ok   ok
  3.10  fail fail fail fail   ok   ok   ok   ok

All failures with python 3.10 are related to the same issue:

   File "elftools/construct/lib/container.py", line 5, in 
 from collections import MutableMapping
   ImportError: cannot import name 'MutableMapping' from 'collections'

Python 3.10 support is only available since pyelftools 0.26. The script
will only work with Python 3.6 and later. Update the minimal system
requirements and release notes.

NB: The output produced by the legacy -r/--raw flag can be obtained with
the following command:

   strings build/app/dpdk-testpmd | sed -n 's/^PMD_INFO_STRING= //p'

Cc: Olivier Matz 
Cc: Ferruh Yigit 
Cc: Bruce Richardson 
Signed-off-by: Robin Jarry 



For 'subsystem_vendor' & 'subsystem_device', the value "" means it 
is not explicitly defined, so it gets default value.
What do you think to omit those as well, when value is "", to reduce 
noise on the output?




BTW, I have detected some duplicates in the output, like [1], [2] & [3]. 
It seems related to the duplicates in the code, cc'ed maintainers.


[1]:
  {
"name": "net_qede",
"kmod": "* igb_uio | uio_pci_generic | vfio-pci",
"pci_ids": [
  {
"vendor": "1077",
"device": "1634",
"subsystem_vendor": "",
"subsystem_device": ""
  },
  {
"vendor": "1077",
"device": "1629",
"subsystem_vendor": "",
"subsystem_device": ""
  },
  {
"vendor": "1077",
"device": "1634",
"subsystem_vendor": "",
"subsystem_device": ""
  },
...

[2]
  {
"name": "mempool_cnxk",
"params": "max_pools=<128-1048576>",
"kmod": "vfio-pci",
"pci_ids": [
  {
"vendor": "177d",
"device": "a0fb",
"subsystem_vendor": "",
"subsystem_device": "b900"
  },
  {
"vendor": "177d",
"device": "a0fb",
"subsystem_vendor": "",
"subsystem_device": "b900"
  },
...

[3]
  {
"name": "net_cn10k",
"kmod": "vfio-pci",
"pci_ids": [
  {
"vendor": "177d",
"device": "a063",
"subsystem_vendor": "",
"subsystem_device": "b900"
  },
  {
"vendor": "177d",
"device": "a063",
"subsystem_vendor": "",
"subsystem_device": "b900"
  },
...


Re: FW: [PATCH v1] buildtools: ensure the NUMA nodes are counted correct

2022-09-20 Thread Niklas Soderlund
Hi Thomas,

Have you checked if this address the same issue you where seeing? Do you 
think we can move forward with this fix?

On 2022-08-31 10:47:24 +0200, Nole Zhang wrote:
> 
> 
> 
> > -Original Message-
> > From: Thomas Monjalon 
> > Sent: 2022年8月29日 21:15
> > To: Nole Zhang ; Chaoyong He 
> > 
> > Subject: Re: [PATCH v1] buildtools: ensure the NUMA nodes are counted 
> > correct
> > 
> > 29/08/2022 13:17, Nole Zhang:
> > > From: Thomas Monjalon 
> > > > 02/08/2022 09:54, Chaoyong He:
> > > > > From: Peng Zhang 
> > > > >
> > > > > Sorting a list of strings with the format "node[0-9]+" in order 
> > > > > to find the largest integer by looking at the last item after 
> > > > > the sort breaks. But if there are more then 10 items as a string 
> > > > > sort will sort "node10" before "node2", it will get the error NUMA 
> > > > > nodes.
> > > >
> > > > What is the error you are seeing?
> > > >
> > > >
> > > We get the error NUMA, in this example, we get the NUMA nodes is 10, 
> > > But at fact, it has 11 NUMA.
> > 
> > Please give more details, where do you see this error?
> > We should know how to reproduce and check we have the same issue.
> > Thanks
> > 
> > Please reply with a detailed answer on the mailing list.
> > 
> In the China Phytium S2500 CPU + INSPUR server, it has 16 NUMA.
> The details are as follows:
> 
> ~#: lscpu
> 
> Architecture:aarch64
> CPU op-mode(s):  64-bit
> Byte Order:  Little Endian
> CPU(s):  128
> On-line CPU(s) list: 0-127
> Thread(s) per core:  1
> Core(s) per socket:  64
> Socket(s):   2
> NUMA node(s):16
> Vendor ID:   0x70
> Model:   3
> Model name:  S2500
> Stepping:0x1
> BogoMIPS:100.00
> L1d cache:   4 MiB
> L1i cache:   4 MiB
> L2 cache:64 MiB
> L3 cache:128 MiB
> NUMA node0 CPU(s):   0-7
> NUMA node1 CPU(s):   8-15
> NUMA node2 CPU(s):   16-23
> NUMA node3 CPU(s):   24-31
> NUMA node4 CPU(s):   32-39
> NUMA node5 CPU(s):   40-47
> NUMA node6 CPU(s):   48-55
> NUMA node7 CPU(s):   56-63
> NUMA node8 CPU(s):   64-71
> NUMA node9 CPU(s):   72-79
> NUMA node10 CPU(s):  80-87
> NUMA node11 CPU(s):  88-95
> NUMA node12 CPU(s):  96-103
> NUMA node13 CPU(s):  104-111
> NUMA node14 CPU(s):  112-119
> NUMA node15 CPU(s):  120-127
> Flags:   half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva 
> idivt lpae evtstrm
> 
> 
> I use  meson build -Dmax_lcores=detect -Dmax_numa_nodes=detect to compile, 
> then dpdk initialization only shows 10 numa. 

-- 
Kind Regards,
Niklas Söderlund


Re: [PATCH v3] usertools: rewrite pmdinfo

2022-09-20 Thread Robin Jarry
Ferruh Yigit, Sep 20, 2022 at 12:10:
> For 'subsystem_vendor' & 'subsystem_device', the value "" means it 
> is not explicitly defined, so it gets default value.
> What do you think to omit those as well, when value is "", to reduce 
> noise on the output?

Sure, I could strip those as well.



[PATCH v2] net/nfp: support Corigine PCIe ID for the nfp PMD

2022-09-20 Thread Niklas Söderlund
From: James Hershaw 

Previously the nfp driver has supported NFP chips with the Netronome PCIe
ID. This patch extends the PMD to also support NFP chips with the
Corigine PCIe vendor ID (0x1da8), which at this point are assumed to be
otherwise identical from a software perspective.

Signed-off-by: James Hershaw 
Reviewed-by: Niklas Söderlund 
---
 drivers/net/nfp/nfp_common.h|  2 ++
 drivers/net/nfp/nfp_ethdev.c| 12 
 drivers/net/nfp/nfp_ethdev_vf.c |  8 
 3 files changed, 22 insertions(+)

diff --git a/drivers/net/nfp/nfp_common.h b/drivers/net/nfp/nfp_common.h
index 6d917e4b4acd..6ceb7e9e28a1 100644
--- a/drivers/net/nfp/nfp_common.h
+++ b/drivers/net/nfp/nfp_common.h
@@ -16,6 +16,8 @@
 
 #define NFP_NET_PMD_VERSION "0.1"
 #define PCI_VENDOR_ID_NETRONOME 0x19ee
+#define PCI_VENDOR_ID_CORIGINE  0x1da8
+
 #define PCI_DEVICE_ID_NFP3800_PF_NIC0x3800
 #define PCI_DEVICE_ID_NFP3800_VF_NIC0x3803
 #define PCI_DEVICE_ID_NFP4000_PF_NIC0x4000
diff --git a/drivers/net/nfp/nfp_ethdev.c b/drivers/net/nfp/nfp_ethdev.c
index e9d01f4414a3..2e546cfc5ffc 100644
--- a/drivers/net/nfp/nfp_ethdev.c
+++ b/drivers/net/nfp/nfp_ethdev.c
@@ -1072,6 +1072,18 @@ static const struct rte_pci_id pci_id_nfp_pf_net_map[] = 
{
RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME,
   PCI_DEVICE_ID_NFP6000_PF_NIC)
},
+   {
+   RTE_PCI_DEVICE(PCI_VENDOR_ID_CORIGINE,
+  PCI_DEVICE_ID_NFP3800_PF_NIC)
+   },
+   {
+   RTE_PCI_DEVICE(PCI_VENDOR_ID_CORIGINE,
+  PCI_DEVICE_ID_NFP4000_PF_NIC)
+   },
+   {
+   RTE_PCI_DEVICE(PCI_VENDOR_ID_CORIGINE,
+  PCI_DEVICE_ID_NFP6000_PF_NIC)
+   },
{
.vendor_id = 0,
},
diff --git a/drivers/net/nfp/nfp_ethdev_vf.c b/drivers/net/nfp/nfp_ethdev_vf.c
index d304d78d3448..affea9d17cdf 100644
--- a/drivers/net/nfp/nfp_ethdev_vf.c
+++ b/drivers/net/nfp/nfp_ethdev_vf.c
@@ -504,6 +504,14 @@ static const struct rte_pci_id pci_id_nfp_vf_net_map[] = {
RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME,
   PCI_DEVICE_ID_NFP6000_VF_NIC)
},
+   {
+   RTE_PCI_DEVICE(PCI_VENDOR_ID_CORIGINE,
+  PCI_DEVICE_ID_NFP3800_VF_NIC)
+   },
+   {
+   RTE_PCI_DEVICE(PCI_VENDOR_ID_CORIGINE,
+  PCI_DEVICE_ID_NFP6000_VF_NIC)
+   },
{
.vendor_id = 0,
},
-- 
2.37.3



[PATCH v4] usertools: rewrite pmdinfo

2022-09-20 Thread Robin Jarry
dpdk-pmdinfo.py does not produce any parseable output. The -r/--raw flag
merely prints multiple independent JSON lines which cannot be fed
directly to any JSON parser. Moreover, the script complexity is rather
high for such a simple task: extracting PMD_INFO_STRING from .rodata ELF
sections. Rewrite it so that it can produce valid JSON.

Remove the PCI database parsing for PCI-ID to Vendor-Device names
conversion. This should be done by external scripts (if really needed).

Here are some examples of use with jq:

Get the complete info for a given driver:

 ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
   jq '.[] | select(.name == "dmadev_idxd_pci")'
 {
   "name": "dmadev_idxd_pci",
   "params": "max_queues=0",
   "kmod": "vfio-pci",
   "pci_ids": [
 {
   "vendor": "8086",
   "device": "0b25"
 }
   ]
 }

Get only the required kernel modules for a given driver:

 ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
   jq '.[] | select(.name == "net_i40e").kmod'
 "* igb_uio | uio_pci_generic | vfio-pci"

Get only the required kernel modules for a given device:

 ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
   jq '.[] | select(.pci_ids[] | .vendor == "15b3" and .device == "1013").kmod'
 "* ib_uverbs & mlx5_core & mlx5_ib"

Print the list of drivers which define multiple parameters without
space separators:

 ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
   jq '.[] | select(.params!=null and (.params|test("=[^ ]+="))) | {name, 
params}'
 ...

The script passes flake8, black, isort and pylint checks.

I have tested this with a matrix of python/pyelftools versions:

 pyelftools
   0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29
 3.6 ok   ok   ok   ok   ok   ok   ok   ok
 3.7 ok   ok   ok   ok   ok   ok   ok   ok
  Python 3.8 ok   ok   ok   ok   ok   ok   ok   ok
 3.9 ok   ok   ok   ok   ok   ok   ok   ok
 3.10  fail fail fail fail   ok   ok   ok   ok

All failures with python 3.10 are related to the same issue:

  File "elftools/construct/lib/container.py", line 5, in 
from collections import MutableMapping
  ImportError: cannot import name 'MutableMapping' from 'collections'

Python 3.10 support is only available since pyelftools 0.26. The script
will only work with Python 3.6 and later. Update the minimal system
requirements and release notes.

NB: The output produced by the legacy -r/--raw flag can be obtained with
the following command:

  strings build/app/dpdk-testpmd | sed -n 's/^PMD_INFO_STRING= //p'

Cc: Olivier Matz 
Cc: Ferruh Yigit 
Cc: Bruce Richardson 
Signed-off-by: Robin Jarry 
---
v3 -> v4:

* also strip pci_id fields when they have the wildcard 0x value.

v2 -> v3:

* strip "pci_ids" when it is empty (some drivers do not support any
  pci devices)

v1 -> v2:

* update release notes and minimal python version requirement
* hide warnings by default (-v/--verbose to show them)
* show debug messages with -vv
* also search libs in folders listed in /etc/ld.so.conf/*.conf
* only search for DT_NEEDED on executables, not on dynamic libraries
* take DT_RUNPATH into account for searching libraries
* fix weird broken pipe error
* fix some typos:
s/begining/beginning/
s/subsystem_device/subsystem_vendor/
s/subsystem_system/subsystem_device/
* change field names for pci_ids elements (remove _id suffixes)
* DT_NEEDED of files are analyzed. There is no way to differentiate
  between dynamically linked executables and dynamic libraries.

 doc/guides/linux_gsg/sys_reqs.rst  |   2 +-
 doc/guides/rel_notes/release_22_11.rst |   5 +
 usertools/dpdk-pmdinfo.py  | 924 +
 3 files changed, 324 insertions(+), 607 deletions(-)

diff --git a/doc/guides/linux_gsg/sys_reqs.rst 
b/doc/guides/linux_gsg/sys_reqs.rst
index 08d45898f025..f842105eeda7 100644
--- a/doc/guides/linux_gsg/sys_reqs.rst
+++ b/doc/guides/linux_gsg/sys_reqs.rst
@@ -41,7 +41,7 @@ Compilation of the DPDK
resulting in statically linked applications not being linked properly.
Use an updated version of ``pkg-config`` or ``pkgconf`` instead when 
building applications
 
-*   Python 3.5 or later.
+*   Python 3.6 or later.
 
 *   Meson (version 0.49.2+) and ninja
 
diff --git a/doc/guides/rel_notes/release_22_11.rst 
b/doc/guides/rel_notes/release_22_11.rst
index 8c021cf0505e..67054f5acdc9 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -84,6 +84,11 @@ API Changes
Also, make sure to start the actual text at the margin.
===
 
+* The ``dpdk-pmdinfo.py`` script was rewritten to produce valid JSON only.
+  PCI-IDs parsing has been removed.
+  To get a similar output to the (now removed) ``-r/--raw`` flag, you may use 
the following command::
+
+ strings $dpdk_binary_or_driver | sed -n 's/^PMD_INFO_STRING= //p'
 
 ABI Changes
 ---
diff --git a/usertoo

[PATCH v2 0/6] app/procinfo: add some extended features

2022-09-20 Thread Dongdong Liu
This patchset is to add some extended features for dpdk-proc-info.

v1->v2: Fix some comments from Reshma.

Dongdong Liu (2):
  app/procinfo: fix some wrong doxygen syntax
  doc: add some extended features in procinfo guide

Jie Hai (1):
  app/proc-info: add dump of Rx/Tx burst mode

Min Hu (Connor) (3):
  app/procinfo: add version dump
  app/procinfo: add RSS RETA dump
  app/procinfo: add module info dump

 app/proc-info/main.c   | 208 +
 doc/guides/tools/proc_info.rst |  12 ++
 2 files changed, 197 insertions(+), 23 deletions(-)

--
2.22.0



[PATCH v2 2/6] app/procinfo: add RSS RETA dump

2022-09-20 Thread Dongdong Liu
From: "Min Hu (Connor)" 

This patch add support for RSS reta dump.

The command is like:
dpdk-proc-info -a :xx:xx.x --file-prefix=xxx -- --show-rss-reta

Signed-off-by: Min Hu (Connor) 
Signed-off-by: Dongdong Liu 
---
 app/proc-info/main.c | 58 
 1 file changed, 58 insertions(+)

diff --git a/app/proc-info/main.c b/app/proc-info/main.c
index da67155007..a718c201ce 100644
--- a/app/proc-info/main.c
+++ b/app/proc-info/main.c
@@ -46,6 +46,8 @@
 #define MAX_STRING_LEN 256
 
 #define ETHDEV_FWVERS_LEN 32
+#define RTE_RETA_CONF_GROUP_NUM 32
+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
 
 #define STATS_BDR_FMT ""
 #define STATS_BDR_STR(w, s) printf("%.*s%s%.*s\n", w, \
@@ -107,6 +109,8 @@ static uint32_t enable_dump_regs;
 static char *dump_regs_file_prefix;
 /* Enable show version. */
 static uint32_t enable_shw_version;
+/* Enable show RSS reta. */
+static uint32_t enable_shw_rss_reta;
 
 /**< display usage */
 static void
@@ -136,6 +140,7 @@ proc_info_usage(const char *prgname)
"  --show-ring[=name]: to display ring information\n"
"  --show-mempool[=name]: to display mempool information\n"
"  --show-version: to display DPDK version and ethdev firmware 
version\n"
+   "  --show-rss-reta: to display ports redirection table\n"
"  --iter-mempool=name: iterate mempool elements to display 
content\n"
"  --dump-regs=file-prefix: dump registers to file with the 
file-prefix\n",
prgname);
@@ -249,6 +254,7 @@ proc_info_parse_args(int argc, char **argv)
{"iter-mempool", required_argument, NULL, 0},
{"dump-regs", required_argument, NULL, 0},
{"show-version", 0, NULL, 0},
+   {"show-rss-reta", 0, NULL, 0},
{NULL, 0, 0, 0}
};
 
@@ -323,6 +329,9 @@ proc_info_parse_args(int argc, char **argv)
} else if (!strncmp(long_option[option_index].name,
"show-version", MAX_LONG_OPT_SZ))
enable_shw_version = 1;
+   else if (!strncmp(long_option[option_index].name,
+   "show-rss-reta", MAX_LONG_OPT_SZ))
+   enable_shw_rss_reta = 1;
break;
case 1:
/* Print xstat single value given by name*/
@@ -1511,6 +1520,53 @@ show_version(void)
}
 }
 
+static void
+show_port_rss_reta_info(void)
+{
+   struct rte_eth_rss_reta_entry64 reta_conf[RTE_RETA_CONF_GROUP_NUM + 1];
+   struct rte_eth_dev_info dev_info;
+   uint16_t i, idx, shift;
+   uint16_t num;
+   uint16_t id;
+   int ret;
+
+   RTE_ETH_FOREACH_DEV(id) {
+   /* Skip if port is not in mask */
+   if ((enabled_port_mask & (1ul << id)) == 0)
+   continue;
+
+   if (!rte_eth_dev_is_valid_port(id))
+   continue;
+
+   snprintf(bdr_str, MAX_STRING_LEN, " Port %u ", id);
+   STATS_BDR_STR(5, bdr_str);
+
+   ret = rte_eth_dev_info_get(id, &dev_info);
+   if (ret < 0) {
+   printf("Error getting device info, ret = %d\n", ret);
+   return;
+   }
+
+   num = DIV_ROUND_UP(dev_info.reta_size, RTE_ETH_RETA_GROUP_SIZE);
+   memset(reta_conf, 0, sizeof(reta_conf));
+   for (i = 0; i < num; i++)
+   reta_conf[i].mask = ~0ULL;
+
+   ret = rte_eth_dev_rss_reta_query(id, reta_conf, 
dev_info.reta_size);
+   if (ret < 0) {
+   printf("Failed to get RSS RETA info, ret = %d\n", ret);
+   return;
+   }
+
+   for (i = 0; i < dev_info.reta_size; i++) {
+   idx = i / RTE_ETH_RETA_GROUP_SIZE;
+   shift = i % RTE_ETH_RETA_GROUP_SIZE;
+   printf("RSS RETA configuration: hash index=%u, 
queue=%u\n",
+   i, reta_conf[idx].reta[shift]);
+   }
+   }
+}
+
 int
 main(int argc, char **argv)
 {
@@ -1626,6 +1682,8 @@ main(int argc, char **argv)
dump_regs(dump_regs_file_prefix);
if (enable_shw_version)
show_version();
+   if (enable_shw_rss_reta)
+   show_port_rss_reta_info();
RTE_ETH_FOREACH_DEV(i)
rte_eth_dev_close(i);
 
-- 
2.22.0



[PATCH v2 1/6] app/procinfo: add version dump

2022-09-20 Thread Dongdong Liu
From: "Min Hu (Connor)" 

This patch add support for dump dpdk version and ethdev firmware version.

The command is like:
dpdk-proc-info -a :xx:xx.x --file-prefix=xxx -- --show-version

Signed-off-by: Min Hu (Connor) 
Signed-off-by: Dongdong Liu 
---
 app/proc-info/main.c | 40 ++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/app/proc-info/main.c b/app/proc-info/main.c
index 1bfba5f60d..da67155007 100644
--- a/app/proc-info/main.c
+++ b/app/proc-info/main.c
@@ -39,11 +39,14 @@
 #include 
 #include 
 #include 
+#include 
 
 /* Maximum long option length for option parsing. */
 #define MAX_LONG_OPT_SZ 64
 #define MAX_STRING_LEN 256
 
+#define ETHDEV_FWVERS_LEN 32
+
 #define STATS_BDR_FMT ""
 #define STATS_BDR_STR(w, s) printf("%.*s%s%.*s\n", w, \
STATS_BDR_FMT, s, w, STATS_BDR_FMT)
@@ -102,6 +105,8 @@ static char *mempool_iter_name;
 /**< Enable dump regs. */
 static uint32_t enable_dump_regs;
 static char *dump_regs_file_prefix;
+/* Enable show version. */
+static uint32_t enable_shw_version;
 
 /**< display usage */
 static void
@@ -130,6 +135,7 @@ proc_info_usage(const char *prgname)
"  --show-crypto: to display crypto information\n"
"  --show-ring[=name]: to display ring information\n"
"  --show-mempool[=name]: to display mempool information\n"
+   "  --show-version: to display DPDK version and ethdev firmware 
version\n"
"  --iter-mempool=name: iterate mempool elements to display 
content\n"
"  --dump-regs=file-prefix: dump registers to file with the 
file-prefix\n",
prgname);
@@ -242,6 +248,7 @@ proc_info_parse_args(int argc, char **argv)
{"show-mempool", optional_argument, NULL, 0},
{"iter-mempool", required_argument, NULL, 0},
{"dump-regs", required_argument, NULL, 0},
+   {"show-version", 0, NULL, 0},
{NULL, 0, 0, 0}
};
 
@@ -313,7 +320,9 @@ proc_info_parse_args(int argc, char **argv)
"dump-regs", MAX_LONG_OPT_SZ)) {
enable_dump_regs = 1;
dump_regs_file_prefix = optarg;
-   }
+   } else if (!strncmp(long_option[option_index].name,
+   "show-version", MAX_LONG_OPT_SZ))
+   enable_shw_version = 1;
break;
case 1:
/* Print xstat single value given by name*/
@@ -1476,6 +1485,32 @@ dump_regs(char *file_prefix)
}
 }
 
+static void
+show_version(void)
+{
+   char fw_version[ETHDEV_FWVERS_LEN];
+   uint16_t i;
+
+   snprintf(bdr_str, MAX_STRING_LEN, " show - version ");
+   STATS_BDR_STR(10, bdr_str);
+   printf("DPDK version: %s\n", rte_version());
+
+   RTE_ETH_FOREACH_DEV(i) {
+   /* Skip if port is not in mask */
+   if ((enabled_port_mask & (1ul << i)) == 0)
+   continue;
+
+   if (!rte_eth_dev_is_valid_port(i))
+   continue;
+
+   if (rte_eth_dev_fw_version_get(i, fw_version,
+   ETHDEV_FWVERS_LEN) == 0)
+   printf("Ethdev port %u firmware version: %s\n", i, 
fw_version);
+   else
+   printf("Ethdev port %u firmware version: %s\n", i, "not 
available");
+   }
+}
+
 int
 main(int argc, char **argv)
 {
@@ -1589,7 +1624,8 @@ main(int argc, char **argv)
iter_mempool(mempool_iter_name);
if (enable_dump_regs)
dump_regs(dump_regs_file_prefix);
-
+   if (enable_shw_version)
+   show_version();
RTE_ETH_FOREACH_DEV(i)
rte_eth_dev_close(i);
 
-- 
2.22.0



[PATCH v2 4/6] app/proc-info: add dump of Rx/Tx burst mode

2022-09-20 Thread Dongdong Liu
From: Jie Hai 

Add dump of Rx/Tx burst mode in --show-port.

Sample output changes:
   - rx queue
- -- 0 descriptors 0/1024 drop_en rx buffer size 2048 \
mempool mb_pool_0 socket 0
+ -- 0 descriptors 0/1024 drop_en rx buffer size 2048 \
mempool mb_pool_0 socket 0 burst mode : Vector Neon
   - tx queue
- -- 0 descriptors 1024 thresh 32/928 \
offloads : MBUF_FAST_FREE
+ -- 0 descriptors 1024 thresh 32/928 \
offloads : MBUF_FAST_FREE burst mode : Scalar

Signed-off-by: Jie Hai 
Signed-off-by: Dongdong Liu 
---
 app/proc-info/main.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/app/proc-info/main.c b/app/proc-info/main.c
index bc93af51df..9f708421c5 100644
--- a/app/proc-info/main.c
+++ b/app/proc-info/main.c
@@ -838,6 +838,7 @@ show_port(void)
 
for (j = 0; j < dev_info.nb_rx_queues; j++) {
struct rte_eth_rxq_info queue_info;
+   struct rte_eth_burst_mode mode;
int count;
 
ret = rte_eth_rx_queue_info_get(i, j, &queue_info);
@@ -873,11 +874,18 @@ show_port(void)
if (queue_info.conf.offloads != 0)
show_offloads(queue_info.conf.offloads, 
rte_eth_dev_rx_offload_name);
 
+   if (rte_eth_rx_burst_mode_get(i, j, &mode) == 0)
+   printf(" burst mode : %s%s",
+  mode.info,
+  mode.flags & 
RTE_ETH_BURST_FLAG_PER_QUEUE ?
+   " (per queue)" : "");
+
printf("\n");
}
 
for (j = 0; j < dev_info.nb_tx_queues; j++) {
struct rte_eth_txq_info queue_info;
+   struct rte_eth_burst_mode mode;
 
ret = rte_eth_tx_queue_info_get(i, j, &queue_info);
if (ret != 0)
@@ -898,6 +906,13 @@ show_port(void)
 
if (queue_info.conf.offloads != 0)
show_offloads(queue_info.conf.offloads, 
rte_eth_dev_tx_offload_name);
+
+   if (rte_eth_tx_burst_mode_get(i, j, &mode) == 0)
+   printf(" burst mode : %s%s",
+  mode.info,
+  mode.flags & 
RTE_ETH_BURST_FLAG_PER_QUEUE ?
+   " (per queue)" : "");
+
printf("\n");
}
 
-- 
2.22.0



[PATCH v2 3/6] app/procinfo: add module info dump

2022-09-20 Thread Dongdong Liu
From: "Min Hu (Connor)" 

This patch add support for module info dump.

The command is like:
dpdk-proc-info -a :xx:xx.x --file-prefix=xxx -- --show-module-info

Signed-off-by: Min Hu (Connor) 
Signed-off-by: Dongdong Liu 
---
 app/proc-info/main.c | 53 
 1 file changed, 53 insertions(+)

diff --git a/app/proc-info/main.c b/app/proc-info/main.c
index a718c201ce..bc93af51df 100644
--- a/app/proc-info/main.c
+++ b/app/proc-info/main.c
@@ -48,6 +48,7 @@
 #define ETHDEV_FWVERS_LEN 32
 #define RTE_RETA_CONF_GROUP_NUM 32
 #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+#define EEPROM_DUMP_CHUNKSIZE 1024
 
 #define STATS_BDR_FMT ""
 #define STATS_BDR_STR(w, s) printf("%.*s%s%.*s\n", w, \
@@ -111,6 +112,8 @@ static char *dump_regs_file_prefix;
 static uint32_t enable_shw_version;
 /* Enable show RSS reta. */
 static uint32_t enable_shw_rss_reta;
+/* Enable show module info. */
+static uint32_t enable_shw_module_info;
 
 /**< display usage */
 static void
@@ -141,6 +144,7 @@ proc_info_usage(const char *prgname)
"  --show-mempool[=name]: to display mempool information\n"
"  --show-version: to display DPDK version and ethdev firmware 
version\n"
"  --show-rss-reta: to display ports redirection table\n"
+   "  --show-module-info: to display ports module info\n"
"  --iter-mempool=name: iterate mempool elements to display 
content\n"
"  --dump-regs=file-prefix: dump registers to file with the 
file-prefix\n",
prgname);
@@ -255,6 +259,7 @@ proc_info_parse_args(int argc, char **argv)
{"dump-regs", required_argument, NULL, 0},
{"show-version", 0, NULL, 0},
{"show-rss-reta", 0, NULL, 0},
+   {"show-module-info", 0, NULL, 0},
{NULL, 0, 0, 0}
};
 
@@ -332,6 +337,9 @@ proc_info_parse_args(int argc, char **argv)
else if (!strncmp(long_option[option_index].name,
"show-rss-reta", MAX_LONG_OPT_SZ))
enable_shw_rss_reta = 1;
+   else if (!strncmp(long_option[option_index].name,
+   "show-module-info", MAX_LONG_OPT_SZ))
+   enable_shw_module_info = 1;
break;
case 1:
/* Print xstat single value given by name*/
@@ -1567,6 +1575,49 @@ show_port_rss_reta_info(void)
}
 }
 
+static void
+show_module_eeprom_info(void)
+{
+   unsigned char bytes_eeprom[EEPROM_DUMP_CHUNKSIZE];
+   struct rte_eth_dev_module_info module_info;
+   struct rte_dev_eeprom_info eeprom_info;
+   uint16_t i;
+   int ret;
+
+   RTE_ETH_FOREACH_DEV(i) {
+   /* Skip if port is not in mask */
+   if ((enabled_port_mask & (1ul << i)) == 0)
+   continue;
+
+   if (!rte_eth_dev_is_valid_port(i))
+   continue;
+
+   snprintf(bdr_str, MAX_STRING_LEN, " Port %u ", i);
+   STATS_BDR_STR(5, bdr_str);
+
+   ret = rte_eth_dev_get_module_info(i, &module_info);
+   if (ret != 0) {
+   printf("Module EEPROM information read error %d\n", 
ret);
+   return;
+   }
+
+   eeprom_info.offset = 0;
+   eeprom_info.length = module_info.eeprom_len;
+   eeprom_info.data = bytes_eeprom;
+
+   ret = rte_eth_dev_get_module_eeprom(i, &eeprom_info);
+   if (ret != 0) {
+   printf("Module EEPROM read error %d\n", ret);
+   return;
+   }
+
+   rte_hexdump(stdout, "hexdump", eeprom_info.data,
+   eeprom_info.length);
+   printf("Finish -- Port: %u MODULE EEPROM length: %d bytes\n",
+  i, eeprom_info.length);
+   }
+}
+
 int
 main(int argc, char **argv)
 {
@@ -1684,6 +1735,8 @@ main(int argc, char **argv)
show_version();
if (enable_shw_rss_reta)
show_port_rss_reta_info();
+   if (enable_shw_module_info)
+   show_module_eeprom_info();
RTE_ETH_FOREACH_DEV(i)
rte_eth_dev_close(i);
 
-- 
2.22.0



[PATCH v2 5/6] app/procinfo: fix some wrong doxygen syntax

2022-09-20 Thread Dongdong Liu
This code is to do cleanup for the wrong doxygen syntax comments.

Signed-off-by: Dongdong Liu 
---
 app/proc-info/main.c | 40 
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/app/proc-info/main.c b/app/proc-info/main.c
index 9f708421c5..d8d8c843d1 100644
--- a/app/proc-info/main.c
+++ b/app/proc-info/main.c
@@ -54,33 +54,33 @@
 #define STATS_BDR_STR(w, s) printf("%.*s%s%.*s\n", w, \
STATS_BDR_FMT, s, w, STATS_BDR_FMT)
 
-/**< mask of enabled ports */
+/* mask of enabled ports */
 static unsigned long enabled_port_mask;
-/**< Enable stats. */
+/* Enable stats. */
 static uint32_t enable_stats;
-/**< Enable xstats. */
+/* Enable xstats. */
 static uint32_t enable_xstats;
-/**< Enable collectd format*/
+/* Enable collectd format */
 static uint32_t enable_collectd_format;
-/**< FD to send collectd format messages to STDOUT*/
+/* FD to send collectd format messages to STDOUT */
 static int stdout_fd;
-/**< Host id process is running on */
+/* Host id process is running on */
 static char host_id[MAX_LONG_OPT_SZ];
 #ifdef RTE_LIB_METRICS
-/**< Enable metrics. */
+/* Enable metrics. */
 static uint32_t enable_metrics;
 #endif
-/**< Enable stats reset. */
+/* Enable stats reset. */
 static uint32_t reset_stats;
-/**< Enable xstats reset. */
+/* Enable xstats reset. */
 static uint32_t reset_xstats;
-/**< Enable memory info. */
+/* Enable memory info. */
 static uint32_t mem_info;
-/**< Enable displaying xstat name. */
+/* Enable displaying xstat name. */
 static uint32_t enable_xstats_name;
 static char *xstats_name;
 
-/**< Enable xstats by ids. */
+/* Enable xstats by ids. */
 #define MAX_NB_XSTATS_IDS 1024
 static uint32_t nb_xstats_ids;
 static uint64_t xstats_ids[MAX_NB_XSTATS_IDS];
@@ -88,24 +88,24 @@ static uint64_t xstats_ids[MAX_NB_XSTATS_IDS];
 /* show border */
 static char bdr_str[MAX_STRING_LEN];
 
-/**< Enable show port. */
+/* Enable show port. */
 static uint32_t enable_shw_port;
 /* Enable show port private info. */
 static uint32_t enable_shw_port_priv;
-/**< Enable show tm. */
+/* Enable show tm. */
 static uint32_t enable_shw_tm;
-/**< Enable show crypto. */
+/* Enable show crypto. */
 static uint32_t enable_shw_crypto;
-/**< Enable show ring. */
+/* Enable show ring. */
 static uint32_t enable_shw_ring;
 static char *ring_name;
-/**< Enable show mempool. */
+/* Enable show mempool. */
 static uint32_t enable_shw_mempool;
 static char *mempool_name;
-/**< Enable iter mempool. */
+/* Enable iter mempool. */
 static uint32_t enable_iter_mempool;
 static char *mempool_iter_name;
-/**< Enable dump regs. */
+/* Enable dump regs. */
 static uint32_t enable_dump_regs;
 static char *dump_regs_file_prefix;
 /* Enable show version. */
@@ -115,7 +115,7 @@ static uint32_t enable_shw_rss_reta;
 /* Enable show module info. */
 static uint32_t enable_shw_module_info;
 
-/**< display usage */
+/* display usage */
 static void
 proc_info_usage(const char *prgname)
 {
-- 
2.22.0



[PATCH v2 6/6] doc: add some extended features in procinfo guide

2022-09-20 Thread Dongdong Liu
Add the below extended features in procinfo guide.

--show-port-private
--show-version
--show-rss-reta
--show-module-info

Signed-off-by: Dongdong Liu 
---
 doc/guides/tools/proc_info.rst | 12 
 1 file changed, 12 insertions(+)

diff --git a/doc/guides/tools/proc_info.rst b/doc/guides/tools/proc_info.rst
index 9772d97ef0..417fb9f308 100644
--- a/doc/guides/tools/proc_info.rst
+++ b/doc/guides/tools/proc_info.rst
@@ -69,6 +69,18 @@ mempool. For invalid or no mempool name, whole list is dump.
 The iter-mempool parameter iterates and displays mempool elements specified
 by name. For invalid or no mempool name no elements are displayed.
 
+**--show-port-private**
+The show-port-private parameter displays ports private information.
+
+**--show-version**
+The show-version parameter displays DPDK version and ethdev firmware version.
+
+**--show-rss-reta**
+The show-rss-reta parameter displays ports rss redirection table.
+
+**--show-module-info**
+The show-module-info parameter displays ports module eeprom information.
+
 Limitations
 ---
 
-- 
2.22.0



RE: [PATCH] doc: relate bifurcated driver and flow isolated mode

2022-09-20 Thread Ori Kam
Hi,

> -Original Message-
> From: Dariusz Sosnowski 
> Sent: Tuesday, 20 September 2022 11:49
> 
> Hi Thomas,
> 
> > -Original Message-
> > From: Thomas Monjalon 
> > Sent: Wednesday, September 14, 2022 23:30
> > To: dev@dpdk.org
> > Cc: Michael Savisko ; Slava Ovsiienko
> > ; Matan Azrad ; Dariusz
> > Sosnowski ; Asaf Penso ;
> Ori
> > Kam ; Ferruh Yigit ; Andrew
> > Rybchenko 
> > Subject: [PATCH] doc: relate bifurcated driver and flow isolated mode
> >
> > External email: Use caution opening links or attachments
> >
> >
> > The relation between the isolated mode in ethdev flow API and bifurcated
> > driver behaviour was not clearly explained.
> >
> > It is made clear in the how-to guide that isolated mode is required for flow
> > bifurcation to the kernel.
> > On the other side, the impact of the isolated mode on a bifurcated driver is
> > made more explicit.
> >
> > Signed-off-by: Thomas Monjalon 
> > ---
> >  doc/guides/howto/flow_bifurcation.rst | 3 ++-
> >  lib/ethdev/rte_flow.h | 4 
> >  2 files changed, 6 insertions(+), 1 deletion(-)
> >
> > diff --git a/doc/guides/howto/flow_bifurcation.rst
> > b/doc/guides/howto/flow_bifurcation.rst
> > index 7ba66b9003..79cf4f1e64 100644
> > --- a/doc/guides/howto/flow_bifurcation.rst
> > +++ b/doc/guides/howto/flow_bifurcation.rst
> > @@ -55,7 +55,8 @@ The full device is already shared with the kernel driver.
> >  The DPDK application can setup some flow steering rules,  and let the rest
> go
> > to the kernel stack.
> >  In order to define the filters strictly with flow rules, -the
> > :ref:`flow_isolated_mode` can be configured.
> > +the :ref:`flow_isolated_mode` must be configured, so there is no
> > +default rule routing traffic to userspace.
> >
> >  There is no specific instructions to follow.
> >  The recommended reading is the :doc:`../prog_guide/rte_flow` guide.
> > diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h index
> > a79f1e7ef0..1bac3fd9ec 100644
> > --- a/lib/ethdev/rte_flow.h
> > +++ b/lib/ethdev/rte_flow.h
> > @@ -4254,6 +4254,10 @@ rte_flow_query(uint16_t port_id,
> >   *
> >   * Isolated mode guarantees that all ingress traffic comes from defined
> flow
> >   * rules only (current and future).
> > + * When enabled with a bifurcated driver,
> > + * non-matched packets are routed to the kernel driver interface.
> > + * When disabled (the default),
> > + * there may be some default rules routing traffic to the DPDK port.
> >   *
> >   * Besides making ingress more deterministic, it allows PMDs to safely
> reuse
> >   * resources otherwise assigned to handle the remaining traffic, such as
> > --
> > 2.36.1
> 
> Looks good to me. Thank you.
> 
> Reviewed-by: Dariusz Sosnowski 
> 
> Best regards,
> Dariusz Sosnowski

Acked-by: Ori Kam 
Best,
Ori


RE: [PATCH v2] ethdev: add send to kernel action

2022-09-20 Thread Ori Kam
Hi Michael,

> -Original Message-
> From: Michael Savisko 
> Sent: Wednesday, 14 September 2022 12:32
> 
> In some cases application may receive a packet that should have been
> received by the kernel. In this case application uses KNI or other means
> to transfer the packet to the kernel.
> 
> With bifurcated driver we can have a rule to route packets matching
> a pattern (example: IPv4 packets) to the DPDK application and the rest
> of the traffic will be received by the kernel.
> But if we want to receive most of the traffic in DPDK except specific
> pattern (example: ICMP packets) that should be processed by the kernel,
> then it's easier to re-route these packets with a single rule.
> 
> This commit introduces new rte_flow action which allows application to
> re-route packets directly to the kernel without software involvement.
> 
> Add new testpmd rte_flow action 'send_to_kernel'. The application
> may use this action to route the packet to the kernel while still
> in the HW.
> 
> Example with testpmd command:
> 
> flow create 0 ingress priority 0 group 1 pattern eth type spec 0x0800
> type mask 0x / end actions send_to_kernel / end
> 
> Signed-off-by: Michael Savisko 
> ---
>  app/test-pmd/cmdline_flow.c | 9 +
>  doc/guides/testpmd_app_ug/testpmd_funcs.rst | 2 ++
>  lib/ethdev/rte_flow.c   | 1 +
>  lib/ethdev/rte_flow.h   | 9 +
>  4 files changed, 21 insertions(+)
> 
> diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
> index 7f50028eb7..042f6b34a6 100644
> --- a/app/test-pmd/cmdline_flow.c
> +++ b/app/test-pmd/cmdline_flow.c
> @@ -612,6 +612,7 @@ enum index {
>   ACTION_PORT_REPRESENTOR_PORT_ID,
>   ACTION_REPRESENTED_PORT,
>   ACTION_REPRESENTED_PORT_ETHDEV_PORT_ID,
> + ACTION_SEND_TO_KERNEL,
>  };
> 
>  /** Maximum size for pattern in struct rte_flow_item_raw. */
> @@ -1872,6 +1873,7 @@ static const enum index next_action[] = {
>   ACTION_CONNTRACK_UPDATE,
>   ACTION_PORT_REPRESENTOR,
>   ACTION_REPRESENTED_PORT,
> + ACTION_SEND_TO_KERNEL,
>   ZERO,
>  };
> 
> @@ -6341,6 +6343,13 @@ static const struct token token_list[] = {
>   .help = "submit a list of associated actions for red",
>   .next = NEXT(next_action),
>   },
> + [ACTION_SEND_TO_KERNEL] = {
> + .name = "send_to_kernel",
> + .help = "send packets to kernel",
> + .priv = PRIV_ACTION(SEND_TO_KERNEL, 0),
> + .next = NEXT(NEXT_ENTRY(ACTION_NEXT)),
> + .call = parse_vc,
> + },
> 
>   /* Top-level command. */
>   [ADD] = {
> diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> index 330e34427d..c259c8239a 100644
> --- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> +++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> @@ -4189,6 +4189,8 @@ This section lists supported actions and their
> attributes, if any.
> 
>- ``ethdev_port_id {unsigned}``: ethdev port ID
> 
> +- ``send_to_kernel``: send packets to kernel.
> +
>  Destroying flow rules
>  ~
> 
> diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c
> index 501be9d602..627c671ce4 100644
> --- a/lib/ethdev/rte_flow.c
> +++ b/lib/ethdev/rte_flow.c
> @@ -259,6 +259,7 @@ static const struct rte_flow_desc_data
> rte_flow_desc_action[] = {
>   MK_FLOW_ACTION(CONNTRACK, sizeof(struct
> rte_flow_action_conntrack)),
>   MK_FLOW_ACTION(PORT_REPRESENTOR, sizeof(struct
> rte_flow_action_ethdev)),
>   MK_FLOW_ACTION(REPRESENTED_PORT, sizeof(struct
> rte_flow_action_ethdev)),
> + MK_FLOW_ACTION(SEND_TO_KERNEL, 0),
>  };
> 
>  int
> diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
> index a79f1e7ef0..a82992a6ae 100644
> --- a/lib/ethdev/rte_flow.h
> +++ b/lib/ethdev/rte_flow.h
> @@ -2879,6 +2879,15 @@ enum rte_flow_action_type {
>* @see struct rte_flow_action_ethdev
>*/
>   RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT,
> +
> + /*
> +  * Send packets to the kernel, without going to userspace at all.
> +  * The packets will be received by the kernel driver sharing
> +  * the same device as the DPDK port.
> +  *
> +  * No associated configuration structure.
> +  */
> + RTE_FLOW_ACTION_TYPE_SEND_TO_KERNEL,
>  };
> 
>  /**
> --
> 2.27.0

Acked-by: Ori Kam 
Best,
Ori


RE: [PATCH v3] ethdev: add send to kernel action

2022-09-20 Thread Ori Kam
Hi Michael

> -Original Message-
> From: Michael Savisko 
> Sent: Monday, 19 September 2022 18:50
> 
> In some cases application may receive a packet that should have been
> received by the kernel. In this case application uses KNI or other means
> to transfer the packet to the kernel.
> 
> With bifurcated driver we can have a rule to route packets matching
> a pattern (example: IPv4 packets) to the DPDK application and the rest
> of the traffic will be received by the kernel.
> But if we want to receive most of the traffic in DPDK except specific
> pattern (example: ICMP packets) that should be processed by the kernel,
> then it's easier to re-route these packets with a single rule.
> 
> This commit introduces new rte_flow action which allows application to
> re-route packets directly to the kernel without software involvement.
> 
> Add new testpmd rte_flow action 'send_to_kernel'. The application
> may use this action to route the packet to the kernel while still
> in the HW.
> 
> Example with testpmd command:
> 
> flow create 0 ingress priority 0 group 1 pattern eth type spec 0x0800
> type mask 0x / end actions send_to_kernel / end
> 
> Signed-off-by: Michael Savisko 
> ---
>  app/test-pmd/cmdline_flow.c |  9 +
>  doc/guides/testpmd_app_ug/testpmd_funcs.rst |  2 ++
>  lib/ethdev/rte_flow.c   |  1 +
>  lib/ethdev/rte_flow.h   | 10 ++
>  4 files changed, 22 insertions(+)
> 
> diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
> index 7f50028eb7..042f6b34a6 100644
> --- a/app/test-pmd/cmdline_flow.c
> +++ b/app/test-pmd/cmdline_flow.c
> @@ -612,6 +612,7 @@ enum index {
>   ACTION_PORT_REPRESENTOR_PORT_ID,
>   ACTION_REPRESENTED_PORT,
>   ACTION_REPRESENTED_PORT_ETHDEV_PORT_ID,
> + ACTION_SEND_TO_KERNEL,
>  };
> 
>  /** Maximum size for pattern in struct rte_flow_item_raw. */
> @@ -1872,6 +1873,7 @@ static const enum index next_action[] = {
>   ACTION_CONNTRACK_UPDATE,
>   ACTION_PORT_REPRESENTOR,
>   ACTION_REPRESENTED_PORT,
> + ACTION_SEND_TO_KERNEL,
>   ZERO,
>  };
> 
> @@ -6341,6 +6343,13 @@ static const struct token token_list[] = {
>   .help = "submit a list of associated actions for red",
>   .next = NEXT(next_action),
>   },
> + [ACTION_SEND_TO_KERNEL] = {
> + .name = "send_to_kernel",
> + .help = "send packets to kernel",
> + .priv = PRIV_ACTION(SEND_TO_KERNEL, 0),
> + .next = NEXT(NEXT_ENTRY(ACTION_NEXT)),
> + .call = parse_vc,
> + },
> 
>   /* Top-level command. */
>   [ADD] = {
> diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> index 330e34427d..c259c8239a 100644
> --- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> +++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> @@ -4189,6 +4189,8 @@ This section lists supported actions and their
> attributes, if any.
> 
>- ``ethdev_port_id {unsigned}``: ethdev port ID
> 
> +- ``send_to_kernel``: send packets to kernel.
> +
>  Destroying flow rules
>  ~
> 
> diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c
> index 501be9d602..627c671ce4 100644
> --- a/lib/ethdev/rte_flow.c
> +++ b/lib/ethdev/rte_flow.c
> @@ -259,6 +259,7 @@ static const struct rte_flow_desc_data
> rte_flow_desc_action[] = {
>   MK_FLOW_ACTION(CONNTRACK, sizeof(struct
> rte_flow_action_conntrack)),
>   MK_FLOW_ACTION(PORT_REPRESENTOR, sizeof(struct
> rte_flow_action_ethdev)),
>   MK_FLOW_ACTION(REPRESENTED_PORT, sizeof(struct
> rte_flow_action_ethdev)),
> + MK_FLOW_ACTION(SEND_TO_KERNEL, 0),
>  };
> 
>  int
> diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
> index a79f1e7ef0..bf076087b3 100644
> --- a/lib/ethdev/rte_flow.h
> +++ b/lib/ethdev/rte_flow.h
> @@ -2879,6 +2879,16 @@ enum rte_flow_action_type {
>* @see struct rte_flow_action_ethdev
>*/
>   RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT,
> +
> + /**
> +  * Send packets to the kernel, without going to userspace at all.
> +  * The packets will be received by the kernel driver sharing
> +  * the same device as the DPDK port.
> +  * This is an ingress action only.
> +  *
> +  * No associated configuration structure.
> +  */
> + RTE_FLOW_ACTION_TYPE_SEND_TO_KERNEL,
>  };
> 
>  /**
> --
> 2.27.0

Acked-by: Ori Kam 
Best,
Ori


Re: [PATCH 1/8] memarea: introduce memory area library

2022-09-20 Thread Dmitry Kozlyuk
2022-09-20 03:46 (UTC+), Chengwen Feng:
> The memarea library is an allocator of variable-size object. It is a
> collection of allocated objects that can be efficiently alloc or free
> all at once, the main features are as follows:
> a) it facilitate alloc and free of memory with low overhead.

Yet, the overhead is 64B per element, just like rte_malloc.

> b) it provides refcnt feature which could be useful in some scenes.

Are you sure refcnt should be in this library?
I've expressed my concerns here:

https://inbox.dpdk.org/dev/caeyuuwbpc-9dcqkj0lzi6rkcuwyeyeghlrbmbubtux4ljg+...@mail.gmail.com

There are more unanswered questions in that mail,
it would be good to clarify them before reviewing these patches
in order to understand all the intentions.

> +static int
> +memarea_check_param(const struct rte_memarea_param *init)
> +{
> + size_t len;
> +
> + len = strnlen(init->name, RTE_MEMAREA_NAMESIZE);
> + if (len == 0 || len >= RTE_MEMAREA_NAMESIZE) {
> + RTE_LOG(ERR, MEMAREA, "memarea name invalid!\n");
> + return -EINVAL;
> + }

Please check init->name == NULL first.

> +struct rte_memarea *
> +rte_memarea_create(const struct rte_memarea_param *init)
> +{
[...]
> + RTE_LOG(ERR, MEMAREA, "malloc memarea management obj fail!\n");

In all error messages, it would be useful to provide details:
the name of the area, what size was requested, etc.

> +/**
> + * Memarea memory source.
> + */
> +enum rte_memarea_source {
> + /** Memory source comes from system API (e.g. malloc). */
> + RTE_MEMAREA_SOURCE_SYSTEM_API,
> + /** Memory source comes from user-provided address. */
> + RTE_MEMAREA_SOURCE_USER_ADDR,
> + /** Memory source comes from user-provided memarea. */
> + RTE_MEMAREA_SOURCE_USER_MEMAREA,
> +
> + RTE_MEMAREA_SOURCE_BUTT

DPDK enumerations must not include an item to hold the element count,
because it is harmful for ABI (e.g. developers create arrays of this size
and when a new item is added in a new DPDK version, the array overflows).

If it's supposed to mean "the end of item list",
the proper word would be "last" or "max" BTW :)

> +};
> +
> +struct rte_memarea {
> + void *private_data; /**< private management data pointer. */
> +};

Jerin and Stephen suggested to make the structure opaque,
i.e. only declare the struct and define it privately.
It would reduce ABI and simplify allocation.
Any justification to expose it?


Re: [PATCH 1/8] memarea: introduce memory area library

2022-09-20 Thread Dmitry Kozlyuk
2022-09-20 14:30 (UTC+0300), Dmitry Kozlyuk:
> 2022-09-20 03:46 (UTC+), Chengwen Feng:
> > The memarea library is an allocator of variable-size object. It is a
> > collection of allocated objects that can be efficiently alloc or free
> > all at once, the main features are as follows:
> > a) it facilitate alloc and free of memory with low overhead.  
> 
> Yet, the overhead is 64B per element, just like rte_malloc.

Disregard this one, rte_malloc overhead is 128B (2 cache lines).


Re: [PATCH 1/8] memarea: introduce memory area library

2022-09-20 Thread fengchengwen

Hi Dmitry,

On 2022/9/20 19:30, Dmitry Kozlyuk wrote:

2022-09-20 03:46 (UTC+), Chengwen Feng:

The memarea library is an allocator of variable-size object. It is a
collection of allocated objects that can be efficiently alloc or free
all at once, the main features are as follows:
a) it facilitate alloc and free of memory with low overhead.

Yet, the overhead is 64B per element, just like rte_malloc.


b) it provides refcnt feature which could be useful in some scenes.

Are you sure refcnt should be in this library?
I've expressed my concerns here:

https://inbox.dpdk.org/dev/caeyuuwbpc-9dcqkj0lzi6rkcuwyeyeghlrbmbubtux4ljg+...@mail.gmail.com

There are more unanswered questions in that mail,
it would be good to clarify them before reviewing these patches
in order to understand all the intentions.


Sorry to forgot reply it.

We have the following scene which used refcnt:

    nic-rx  ->  decoder ->  process

   |

  ->  recording

as above show, the process and recording module both use the decoder's 
output, the are just reader.


so in this case, the refcnt is useful.




+static int
+memarea_check_param(const struct rte_memarea_param *init)
+{
+   size_t len;
+
+   len = strnlen(init->name, RTE_MEMAREA_NAMESIZE);
+   if (len == 0 || len >= RTE_MEMAREA_NAMESIZE) {
+   RTE_LOG(ERR, MEMAREA, "memarea name invalid!\n");
+   return -EINVAL;
+   }

Please check init->name == NULL first.


No need checking because name is an array.

Maybe I should check init == NULL here.




+struct rte_memarea *
+rte_memarea_create(const struct rte_memarea_param *init)
+{

[...]

+   RTE_LOG(ERR, MEMAREA, "malloc memarea management obj fail!\n");

In all error messages, it would be useful to provide details:
the name of the area, what size was requested, etc.


will fix in v2.




+/**
+ * Memarea memory source.
+ */
+enum rte_memarea_source {
+   /** Memory source comes from system API (e.g. malloc). */
+   RTE_MEMAREA_SOURCE_SYSTEM_API,
+   /** Memory source comes from user-provided address. */
+   RTE_MEMAREA_SOURCE_USER_ADDR,
+   /** Memory source comes from user-provided memarea. */
+   RTE_MEMAREA_SOURCE_USER_MEMAREA,
+
+   RTE_MEMAREA_SOURCE_BUTT

DPDK enumerations must not include an item to hold the element count,
because it is harmful for ABI (e.g. developers create arrays of this size
and when a new item is added in a new DPDK version, the array overflows).

If it's supposed to mean "the end of item list",
the proper word would be "last" or "max" BTW :)

will fix in v2



+};
+
+struct rte_memarea {
+   void *private_data; /**< private management data pointer. */
+};

Jerin and Stephen suggested to make the structure opaque,
i.e. only declare the struct and define it privately.
It would reduce ABI and simplify allocation.
Any justification to expose it?


do you mean the rte_memarea just void * ? it just (void 
*)(memarea_private *)priv ?


It's another popular type to impl ABI compatiable.

It's more simpler, will fix in v2



Re: [PATCH v3 2/2] net: have checksum routines accept unaligned data

2022-09-20 Thread Mattias Rönnblom
On 2022-07-11 15:25, Olivier Matz wrote:
> On Mon, Jul 11, 2022 at 02:11:32PM +0200, Mattias Rönnblom wrote:
>> __rte_raw_cksum() (used by rte_raw_cksum() among others) accessed its
>> data through an uint16_t pointer, which allowed the compiler to assume
>> the data was 16-bit aligned. This in turn would, with certain
>> architectures and compiler flag combinations, result in code with SIMD
>> load or store instructions with restrictions on data alignment.
>>
>> This patch keeps the old algorithm, but data is read using memcpy()
>> instead of direct pointer access, forcing the compiler to always
>> generate code that handles unaligned input. The __may_alias__ GCC
>> attribute is no longer needed.
>>
>> The data on which the Internet checksum functions operates are almost
>> always 16-bit aligned, but there are exceptions. In particular, the
>> PDCP protocol header may (literally) have an odd size.
>>
>> Performance impact seems to range from none to a very slight
>> regression.
>>
>> Bugzilla ID: 1035
>> Cc: sta...@dpdk.org
> 
> Fixes: 6006818cfb26 ("net: new checksum functions")
> 
>> ---
>>
>> v3:
>>* Use RTE_ALIGN_FLOOR() in the pointer arithmetic (Olivier Matz).
>> v2:
>>* Simplified the odd-length conditional (Morten Brørup).
>>
>> Reviewed-by: Morten Brørup 
>>
>> Signed-off-by: Mattias Rönnblom 
> 
> Acked-by: Olivier Matz 
> 
> Thank you!

Are there any plans to merge this patchset?



RE: [PATCH v4 2/9] dts: add developer tools

2022-09-20 Thread Juraj Linkeš


> -Original Message-
> From: Honnappa Nagarahalli 
> Sent: Wednesday, September 14, 2022 9:08 PM
> To: tho...@monjalon.net; Juraj Linkeš ; Bruce
> Richardson 
> Cc: david.march...@redhat.com; ronan.rand...@intel.com;
> ohily...@iol.unh.edu; lijuan...@intel.com; dev@dpdk.org; nd ;
> nd 
> Subject: RE: [PATCH v4 2/9] dts: add developer tools
> 
> 
> 
> >
> > 14/09/2022 15:13, Bruce Richardson:
> > > On Wed, Sep 14, 2022 at 12:45:00PM +, Juraj Linkeš wrote:
> > > > From: Honnappa Nagarahalli
> > > > > > > On Fri, Jul 29, 2022 at 10:55:43AM +, Juraj Linkeš wrote:
> > > 
> > > > > > > > diff --git a/dts/format.sh b/dts/format.sh new file mode
> > > > > > > > 100755
> > > > > Should this be in dpdk/devtools directory? If yes, need a
> > > > > different name for the script, dts-fix-format.sh?
> > > >
> > > > We should decide where we'll put it, either to dpdk/devtools or
> > > > dpdk/dts/devtools. So far I have it in dpdk/dts/devtools, but it
> > > > may make more sense to put it into dpdk/devtools.
> > >
> > > Third option of "devtools/dts". I'd like to have it either in the
> > > devtools directory directly, or in a subdirectory of devtools.
> >
> > I am OK with devtools/dts/ or direcly in dts/ DTS *is* a developer tool, 
> > right?
> > So why some scripts should be in a different directory?
> 
> +1 for devtools/dts
> >

I like this as well, I'll move them there.


RE: [PATCH v4 2/9] dts: add developer tools

2022-09-20 Thread Tu, Lijuan


> -Original Message-
> From: Juraj Linkeš 
> Sent: Tuesday, September 20, 2022 8:14 PM
> To: Honnappa Nagarahalli ;
> tho...@monjalon.net; Richardson, Bruce 
> Cc: david.march...@redhat.com; Randles, Ronan ;
> ohily...@iol.unh.edu; Tu, Lijuan ; dev@dpdk.org; nd
> ; nd 
> Subject: RE: [PATCH v4 2/9] dts: add developer tools
> 
> 
> 
> > -Original Message-
> > From: Honnappa Nagarahalli 
> > Sent: Wednesday, September 14, 2022 9:08 PM
> > To: tho...@monjalon.net; Juraj Linkeš ;
> > Bruce Richardson 
> > Cc: david.march...@redhat.com; ronan.rand...@intel.com;
> > ohily...@iol.unh.edu; lijuan...@intel.com; dev@dpdk.org; nd
> > ; nd 
> > Subject: RE: [PATCH v4 2/9] dts: add developer tools
> >
> > 
> >
> > >
> > > 14/09/2022 15:13, Bruce Richardson:
> > > > On Wed, Sep 14, 2022 at 12:45:00PM +, Juraj Linkeš wrote:
> > > > > From: Honnappa Nagarahalli
> > > > > > > > On Fri, Jul 29, 2022 at 10:55:43AM +, Juraj Linkeš wrote:
> > > > 
> > > > > > > > > diff --git a/dts/format.sh b/dts/format.sh new file mode
> > > > > > > > > 100755
> > > > > > Should this be in dpdk/devtools directory? If yes, need a
> > > > > > different name for the script, dts-fix-format.sh?
> > > > >
> > > > > We should decide where we'll put it, either to dpdk/devtools or
> > > > > dpdk/dts/devtools. So far I have it in dpdk/dts/devtools, but it
> > > > > may make more sense to put it into dpdk/devtools.
> > > >
> > > > Third option of "devtools/dts". I'd like to have it either in the
> > > > devtools directory directly, or in a subdirectory of devtools.
> > >
> > > I am OK with devtools/dts/ or direcly in dts/ DTS *is* a developer tool, 
> > > right?
> > > So why some scripts should be in a different directory?
> >
> > +1 for devtools/dts
> > >
> 
> I like this as well, I'll move them there.

Do we consider all python files follow the same format ? if yes, prefer 
devtools, else prefer devtools/dts .


Re: [PATCH v6 1/2] Fix build of apps with external dependencies

2022-09-20 Thread Maxime Coquelin




On 9/2/22 10:45, Felix Moessbauer wrote:

This fix initializes the dependency object with the external
dependency list. Previously, the external dependencies were
just ignored.

Signed-off-by: Felix Moessbauer 
Acked-by: Bruce Richardson 
---
  app/meson.build | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/meson.build b/app/meson.build
index 93d8c15032..0ea04cadeb 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -44,7 +44,7 @@ foreach app:apps
  subdir(name)
  
  if build

-dep_objs = []
+dep_objs = ext_deps
  foreach d:deps
  var_name = get_option('default_library') + '_rte_' + d
  if not is_variable(var_name)


Reviewed-by: Maxime Coquelin 

Thanks,
Maxime



RE: [PATCH v1] ethdev: add direction info when creating the transfer table

2022-09-20 Thread Ivan Malov

Hi Ori,

On Tue, 20 Sep 2022, Ori Kam wrote:


Hi Ivan, Thomas and Rongwei


-Original Message-
From: Thomas Monjalon 
Sent: Thursday, 15 September 2022 14:16

15/09/2022 12:59, Ivan Malov:

Hi Rongwei,

In this reply, I do not include the previous mail because the amount
of inline commentary has gone haywire over the past couple of days.
Let's re-iterate.

But before I get to that, I'd like to offer a fresh perspective:

Perhaps, if we all agree that term "vport" means an endpoint which
can stand for any "port" except for physical one, then it should
be possible to use term ANY_VPORTS rather than ANY_GUEST_PORTS.


The opposite of "physical" is "virtual" indeed.


But that's tricky, of course. I don't have a way with naming,
so more opinions are welcome and very-very desirable here.

So:

1) Do you agree that, in your proposal, the new "wire_orig" / "vf_orig"
primitives are in fact yet another match criteria?

..

To me, it looks so. If they are match criteria, then they belong
in match pattern, that is, they should be expressed as new items.

For "transfer" rules, the *existing* attributes are: "group"
and "priority". As you may note, these are clearly not match
criteria. They control the look-up order. So, to this day,
there're no match criteria in DPDK expressed as attributes.

If these "wire_orig" / "vf_orig" are going to be introduced
as attributes, that should be backed with strong motivation.


I prefer we keep matching in a single place, not in attributes.



I think we are talking about two different features.
Feature 1:
Allow matching on all vports that are not wire
Feature 2:
Save allocation space and allow fast insertion.
In this case, the matching is not on all vports it can be just part of the 
vports
but it will never be the wire port.
For example:
port 0 - wire
ports 1,2,3,4,5  - vports
the application want to inset only those rules:
represented_port(port_id=2) / eth / ipv4 (src==xx)
represented_port(port_id=4) / eth / ipv4 (src==xx)
represented_port(port_id=4) / eth / ipv4 (src==yy)

For feature 1 I fully agree with you Ivan, this should be added as an item.


Thank you.


For feature 2 I think Rongwei's suggestion is the better option.
If I understand correctly the idea is to give hint to the PMD on where to 
allocate memory
and how to insert the rules most optimally. Since this is shared for all rules 
it makes more sense
to add it as an attribute, just like we don’t have an ingress item (maybe we 
should?)


But isn't pattern template also supposed to be shared for all rules
in the table? I.e., the user creates an async flow table and submits
a flow "shape" (which consists of attrs, pattern template and action
template). So why should "giving a hint" via an item template be
considered worse than doig so via an attribute?

As for "ingress" item, - no, one should not add such. We have had
many discussions concerning this bit in the past. Ingress/egress
are non-transfer terms. They belong in the scope of vNIC / ethdev
filtering, not to embedded switch rules.

In my opinion, in the embedded switch, one should either point to
some precise switch ports (using REPRESENTOR / REPRESENTED items)
or use another kind of item to refer to a "super set" of ports
which have something in common ("all wire ports", "all NON-wire ports").



Ivan we have the item RTE_FLOW_ITEM_TYPE_PF and RTE_FLOW_ITEM_TYPE_VF which are 
deprecated,
So do you want to un-deprecate them?


No. These items are deprecated because:

a) their names suggest that application knows whether an ethdev
   sits on top of a PF or that the application has some
   knowledge of existence of particular VFs, but in
   reality applications should not be worried of
   the underlying function type = to them, all
   ethdevs are just representors of something,
   and if the application needs to refer to
   VFs (or other PFs, - doesn't matter), it
   should do that via REPRESENTOR items;

b) such items would duplicate REPRESENTOR / REPRESENTED.



To summarize, if PMD can use such an hint during rule creation and save memory, 
I vote
to allow it.
if the idea is to match on all vports then it should be an item.


But such a hint would effectively be a match criterion, too, right?
So, in fact it's a combined use case: a match criterion which is
flexible enough to be a "hint" = i.e. the PMD can see it when
processing the pattern *template* and treat it as a hint.






2) From your viewpoint, why items "ANY_PHYS_PORTS" and

"ANY_VPORTS"

won't do? Or, which problems do you think they may inflict?

..

Previously, you explained why REPRESENTED_PORT would not
fit your needs. And I understand your point: to async API,
two pattern templates which both have item REPRESENTED_PORT
in them cannot be clearly distinguished and are in fact the
same set of criteria (provided that all other items are also
the same and have the same masks). Templates are, well,
templates (or shap

Re: FW: [PATCH v1] buildtools: ensure the NUMA nodes are counted correct

2022-09-20 Thread Thomas Monjalon
Hi,

20/09/2022 12:11, Niklas Soderlund:
> Hi Thomas,
> 
> Have you checked if this address the same issue you where seeing? Do you 
> think we can move forward with this fix?

Yes. No need to show lscpu output,
but the output of the DPDK initialization would be more interesting.


> On 2022-08-31 10:47:24 +0200, Nole Zhang wrote:
> > 
> > 
> > 
> > > -Original Message-
> > > From: Thomas Monjalon 
> > > Sent: 2022年8月29日 21:15
> > > To: Nole Zhang ; Chaoyong He 
> > > 
> > > Subject: Re: [PATCH v1] buildtools: ensure the NUMA nodes are counted 
> > > correct
> > > 
> > > 29/08/2022 13:17, Nole Zhang:
> > > > From: Thomas Monjalon 
> > > > > 02/08/2022 09:54, Chaoyong He:
> > > > > > From: Peng Zhang 
> > > > > >
> > > > > > Sorting a list of strings with the format "node[0-9]+" in order 
> > > > > > to find the largest integer by looking at the last item after 
> > > > > > the sort breaks. But if there are more then 10 items as a string 
> > > > > > sort will sort "node10" before "node2", it will get the error NUMA 
> > > > > > nodes.
> > > > >
> > > > > What is the error you are seeing?
> > > > >
> > > > >
> > > > We get the error NUMA, in this example, we get the NUMA nodes is 10, 
> > > > But at fact, it has 11 NUMA.
> > > 
> > > Please give more details, where do you see this error?
> > > We should know how to reproduce and check we have the same issue.
> > > Thanks
> > > 
> > > Please reply with a detailed answer on the mailing list.
> > > 
> > In the China Phytium S2500 CPU + INSPUR server, it has 16 NUMA.
> > The details are as follows:
> > 
> > ~#: lscpu
> > 
> > Architecture:aarch64
> > CPU op-mode(s):  64-bit
> > Byte Order:  Little Endian
> > CPU(s):  128
> > On-line CPU(s) list: 0-127
> > Thread(s) per core:  1
> > Core(s) per socket:  64
> > Socket(s):   2
> > NUMA node(s):16
> > Vendor ID:   0x70
> > Model:   3
> > Model name:  S2500
> > Stepping:0x1
> > BogoMIPS:100.00
> > L1d cache:   4 MiB
> > L1i cache:   4 MiB
> > L2 cache:64 MiB
> > L3 cache:128 MiB
> > NUMA node0 CPU(s):   0-7
> > NUMA node1 CPU(s):   8-15
> > NUMA node2 CPU(s):   16-23
> > NUMA node3 CPU(s):   24-31
> > NUMA node4 CPU(s):   32-39
> > NUMA node5 CPU(s):   40-47
> > NUMA node6 CPU(s):   48-55
> > NUMA node7 CPU(s):   56-63
> > NUMA node8 CPU(s):   64-71
> > NUMA node9 CPU(s):   72-79
> > NUMA node10 CPU(s):  80-87
> > NUMA node11 CPU(s):  88-95
> > NUMA node12 CPU(s):  96-103
> > NUMA node13 CPU(s):  104-111
> > NUMA node14 CPU(s):  112-119
> > NUMA node15 CPU(s):  120-127
> > Flags:   half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 
> > idiva idivt lpae evtstrm
> > 
> > 
> > I use  meson build -Dmax_lcores=detect -Dmax_numa_nodes=detect to compile, 
> > then dpdk initialization only shows 10 numa. 
> 
> 







Re: Cannot set affinity - pthread_setaffinity_np - DPDK21

2022-09-20 Thread Kamaraj P
Thank you for the clarification.

On Mon, 19 Sep 2022, 10:34 pm Stephen Hemminger, 
wrote:

> On Sun, 18 Sep 2022 22:05:57 +0530
> Kamaraj P  wrote:
>
> > Yes. The core mask we have aligned to separate cgroup. Probably this
> might
> > be an issue here. We will change the alignment in  our DPDK application.
> > BTW is there a way to check the cores affinity set to DPDK after
> > rte_eail_init ? Do we have to use the taskset command ?
>
> DPDK doesn't (and shouldn't) know about what cgroups has set.
> You need to align the cgroup and the startup flags of the application.
>


RE: [PATCH v1] ethdev: add direction info when creating the transfer table

2022-09-20 Thread Ori Kam
Hi Ivan,

> -Original Message-
> From: Ivan Malov 
> Sent: Tuesday, 20 September 2022 15:46
> 
> Hi Ori,
> 
> On Tue, 20 Sep 2022, Ori Kam wrote:
> 
> > Hi Ivan, Thomas and Rongwei
> >
> >> -Original Message-
> >> From: Thomas Monjalon 
> >> Sent: Thursday, 15 September 2022 14:16
> >>
> >> 15/09/2022 12:59, Ivan Malov:
> >>> Hi Rongwei,
> >>>
> >>> In this reply, I do not include the previous mail because the amount
> >>> of inline commentary has gone haywire over the past couple of days.
> >>> Let's re-iterate.
> >>>
> >>> But before I get to that, I'd like to offer a fresh perspective:
> >>>
> >>> Perhaps, if we all agree that term "vport" means an endpoint which
> >>> can stand for any "port" except for physical one, then it should
> >>> be possible to use term ANY_VPORTS rather than ANY_GUEST_PORTS.
> >>
> >> The opposite of "physical" is "virtual" indeed.
> >>
> >>> But that's tricky, of course. I don't have a way with naming,
> >>> so more opinions are welcome and very-very desirable here.
> >>>
> >>> So:
> >>>
> >>> 1) Do you agree that, in your proposal, the new "wire_orig" / "vf_orig"
> >>> primitives are in fact yet another match criteria?
> >>>
> >>> ..
> >>>
> >>> To me, it looks so. If they are match criteria, then they belong
> >>> in match pattern, that is, they should be expressed as new items.
> >>>
> >>> For "transfer" rules, the *existing* attributes are: "group"
> >>> and "priority". As you may note, these are clearly not match
> >>> criteria. They control the look-up order. So, to this day,
> >>> there're no match criteria in DPDK expressed as attributes.
> >>>
> >>> If these "wire_orig" / "vf_orig" are going to be introduced
> >>> as attributes, that should be backed with strong motivation.
> >>
> >> I prefer we keep matching in a single place, not in attributes.
> >>
> >
> > I think we are talking about two different features.
> > Feature 1:
> > Allow matching on all vports that are not wire
> > Feature 2:
> > Save allocation space and allow fast insertion.
> > In this case, the matching is not on all vports it can be just part of the 
> > vports
> > but it will never be the wire port.
> > For example:
> > port 0 - wire
> > ports 1,2,3,4,5  - vports
> > the application want to inset only those rules:
> > represented_port(port_id=2) / eth / ipv4 (src==xx)
> > represented_port(port_id=4) / eth / ipv4 (src==xx)
> > represented_port(port_id=4) / eth / ipv4 (src==yy)
> >
> > For feature 1 I fully agree with you Ivan, this should be added as an item.
> 
> Thank you.
> 
> > For feature 2 I think Rongwei's suggestion is the better option.
> > If I understand correctly the idea is to give hint to the PMD on where to
> allocate memory
> > and how to insert the rules most optimally. Since this is shared for all 
> > rules it
> makes more sense
> > to add it as an attribute, just like we don’t have an ingress item (maybe we
> should?)
> 
> But isn't pattern template also supposed to be shared for all rules
> in the table? I.e., the user creates an async flow table and submits
> a flow "shape" (which consists of attrs, pattern template and action
> template). So why should "giving a hint" via an item template be
> considered worse than doig so via an attribute?
> 

The same item template maybe used elsewhere, for example, the following
pattern  eth / ipv4(src, dst) / udp(sport, dport), can be used on number of 
different
tables.
I think that the main difference between us is that from my point of view this 
value is just
where to allocate resources / how to better insert the rule. It is not related 
to matching.
From Nvidia viewpoint we need this information so we can allocate the resource 
at the correct
place and avoid inserting duplication of rules.
I agree that by using the item we can get the same results, but it is incorrect 
since we are not matching on it.
Part of the idea of template API is to give as many hints as possible to the 
PMD so the insertion will be optimized.


> As for "ingress" item, - no, one should not add such. We have had
> many discussions concerning this bit in the past. Ingress/egress
> are non-transfer terms. They belong in the scope of vNIC / ethdev
> filtering, not to embedded switch rules.
> 
> In my opinion, in the embedded switch, one should either point to
> some precise switch ports (using REPRESENTOR / REPRESENTED items)
> or use another kind of item to refer to a "super set" of ports
> which have something in common ("all wire ports", "all NON-wire ports").
> 

But this is my point we don't want all wire ports or all NON-wire ports, we 
just know that in this table
we will have only non-wire / wire ports.

> >
> > Ivan we have the item RTE_FLOW_ITEM_TYPE_PF and
> RTE_FLOW_ITEM_TYPE_VF which are deprecated,
> > So do you want to un-deprecate them?
> 
> No. These items are deprecated because:
> 
> a) their names suggest that application knows whether an ethdev
> sits on top of a PF or that the appli

Re: [PATCH v6 2/2] Add l2reflect measurement application

2022-09-20 Thread Maxime Coquelin

Hi Felix,

First, I support the idea of having the l2reflect application part of
the DPDK repository.

Please note CI failed to build it on different platforms:
http://mails.dpdk.org/archives/test-report/2022-September/304617.html

It also fails to build on my Fc35 machine:
[3237/3537] Compiling C object app/dpdk-l2reflect.p/l2reflect_main.c.o
../app/l2reflect/main.c: In function ‘l2reflect_main_loop’:
../app/l2reflect/main.c:560:19: warning: array subscript ‘uint64_t {aka 
long unsigned int}[0]’ is partly outside array bounds of ‘struct 
rte_ether_addr[1]’ [-Warray-bounds]
  560 | i_win = ((*((uint64_t *)&l2reflect_port_eth_addr)   & 
MAC_ADDR_CMP) >

  |   ^~~
../app/l2reflect/main.c:110:23: note: while referencing 
‘l2reflect_port_eth_addr’

  110 | struct rte_ether_addr l2reflect_port_eth_addr;
  |   ^~~
../app/l2reflect/main.c:561:27: warning: array subscript ‘uint64_t {aka 
long unsigned int}[0]’ is partly outside array bounds of ‘struct 
rte_ether_addr[1]’ [-Warray-bounds]
  561 |  (*((uint64_t 
*)&l2reflect_remote_eth_addr) & MAC_ADDR_CMP));

  |   ^
../app/l2reflect/main.c:111:23: note: while referencing 
‘l2reflect_remote_eth_addr’

  111 | struct rte_ether_addr l2reflect_remote_eth_addr;
  |   ^

Some more comments inline:

On 9/2/22 10:45, Felix Moessbauer wrote:

The l2reflect application implements a ping-pong benchmark to
measure the latency between two instances. For communication,
we use raw ethernet and send one packet at a time. The timing data
is collected locally and min/max/avg values are displayed in a TUI.
Finally, a histogram of the latencies is printed which can be
further processed with the jitterdebugger visualization scripts.
To debug latency spikes, a max threshold can be defined.
If it is hit, a trace point is created on both instances.

Signed-off-by: Felix Moessbauer 
Signed-off-by: Henning Schild 
---
  app/l2reflect/colors.c|   34 ++
  app/l2reflect/colors.h|   19 +
  app/l2reflect/l2reflect.h |   53 ++
  app/l2reflect/main.c  | 1007 +
  app/l2reflect/meson.build |   21 +
  app/l2reflect/payload.h   |   26 +
  app/l2reflect/stats.c |  225 +
  app/l2reflect/stats.h |   67 +++
  app/l2reflect/utils.c |   67 +++
  app/l2reflect/utils.h |   20 +
  app/meson.build   |1 +
  11 files changed, 1540 insertions(+)
  create mode 100644 app/l2reflect/colors.c
  create mode 100644 app/l2reflect/colors.h
  create mode 100644 app/l2reflect/l2reflect.h
  create mode 100644 app/l2reflect/main.c
  create mode 100644 app/l2reflect/meson.build
  create mode 100644 app/l2reflect/payload.h
  create mode 100644 app/l2reflect/stats.c
  create mode 100644 app/l2reflect/stats.h
  create mode 100644 app/l2reflect/utils.c
  create mode 100644 app/l2reflect/utils.h


If we agree to have this application in app/ directory,
I think you'll have to add documentation for this new tool in
doc/guides/tools/.


diff --git a/app/l2reflect/colors.c b/app/l2reflect/colors.c
new file mode 100644
index 00..af881d8788
--- /dev/null
+++ b/app/l2reflect/colors.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Siemens AG
+ */
+
+#include "colors.h"
+
+const struct color_palette *colors;
+
+static const struct color_palette color_palette_default = {
+   .red = "\x1b[01;31m",
+   .green = "\x1b[01;32m",
+   .yellow = "\x1b[01;33m",
+   .blue = "\x1b[01;34m",
+   .magenta = "\x1b[01;35m",
+   .cyan = "\x1b[01;36m",
+   .reset = "\x1b[0m"
+};
+
+static const struct color_palette color_palette_bw = { .red = "",
+  .green = "",
+  .yellow = "",
+  .blue = "",
+  .magenta = "",
+  .cyan = "",
+  .reset = "" };
+
+void
+enable_colors(int enable)
+{
+   if (enable)
+   colors = &color_palette_default;
+   else
+   colors = &color_palette_bw;
+}
diff --git a/app/l2reflect/colors.h b/app/l2reflect/colors.h
new file mode 100644
index 00..346547138b
--- /dev/null
+++ b/app/l2reflect/colors.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Siemens AG
+ */
+#ifndef _L2REFLECT_COLORS_H_
+#define _L2REFLECT_COLORS_H_
+
+/* posix terminal colors */
+struct color_palette {
+   const char *red, *green, *yellow, *blue, *magenta, *cyan, *reset;
+};
+
+/* ptr to the current tui color palette */
+extern const struct color_palette *colors;
+
+/* disable colored output */
+v

[PATCH 00/10] net/mlx5: implement send to kernel action

2022-09-20 Thread Michael Savisko
This patchset provides mlx5 implementation of new rte_flow
action RTE_FLOW_ACTION_TYPE_SEND_TO_KERNEL.

Depends-on: series=24713 ("ethdev: add send to kernel action")

Signed-off-by: Michael Savisko 

Michael Savisko (10):
  common/mlx5: update meson build file
  net/mlx5: disable send to kernel action in HW streering
  common/mlx5: new glue callback for send to kernel action
  net/mlx5: add function to create send to kernel action
  net/mlx5: introduce new mlx5 action flag
  net/mlx5: introduce new mlx5 flow fate
  net/mlx5: get priority to send traffic to kernel
  net/mlx5: expose table resource release function
  net/mlx5: add send to kernel action resource holder
  net/mlx5: translation of rte flow send to kernel action

 drivers/common/mlx5/linux/meson.build |  2 +
 drivers/common/mlx5/linux/mlx5_glue.c | 17 ++
 drivers/common/mlx5/linux/mlx5_glue.h |  2 +
 drivers/net/mlx5/linux/mlx5_flow_os.h | 22 
 drivers/net/mlx5/linux/mlx5_os.c  | 11 
 drivers/net/mlx5/mlx5.h   |  6 ++
 drivers/net/mlx5/mlx5_flow.c  | 33 +++
 drivers/net/mlx5/mlx5_flow.h  | 10 +++-
 drivers/net/mlx5/mlx5_flow_dv.c   | 79 +--
 drivers/net/mlx5/mlx5_flow_hw.c   |  3 +
 10 files changed, 178 insertions(+), 7 deletions(-)

-- 
2.27.0



[PATCH 02/10] net/mlx5: disable send to kernel action in HW streering

2022-09-20 Thread Michael Savisko
Fail translation of RTE_FLOW_ACTION_TYPE_SEND_TO_KERNEL action in
HW steering.

Signed-off-by: Michael Savisko 
---
 drivers/net/mlx5/mlx5_flow_hw.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index 12498794a5..b168ff9e7e 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -707,6 +707,9 @@ flow_hw_actions_translate(struct rte_eth_dev *dev,
reformat_pos = i++;
refmt_type = MLX5DR_ACTION_REFORMAT_TYPE_TNL_L2_TO_L2;
break;
+   case RTE_FLOW_ACTION_TYPE_SEND_TO_KERNEL:
+   DRV_LOG(ERR, "send to kernel action is not supported in 
HW steering.");
+   goto err;
case RTE_FLOW_ACTION_TYPE_END:
actions_end = true;
break;
-- 
2.27.0



[PATCH 01/10] common/mlx5: update meson build file

2022-09-20 Thread Michael Savisko
Define HAVE_MLX5DV_DR_ACTION_CREATE_DEST_ROOT_TABLE macro if function
mlx5dv_dr_action_create_dest_root_table exists in infiniband/mlx5dv.h

Signed-off-by: Michael Savisko 
---
 drivers/common/mlx5/linux/meson.build | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/common/mlx5/linux/meson.build 
b/drivers/common/mlx5/linux/meson.build
index f9d1937571..e13ee55819 100644
--- a/drivers/common/mlx5/linux/meson.build
+++ b/drivers/common/mlx5/linux/meson.build
@@ -213,6 +213,8 @@ has_sym_args = [
 'ibv_reg_mr_iova' ],
 [ 'HAVE_MLX5_IBV_IMPORT_CTX_PD_AND_MR', 'infiniband/verbs.h',
 'ibv_import_device' ],
+[ 'HAVE_MLX5DV_DR_ACTION_CREATE_DEST_ROOT_TABLE', 
'infiniband/mlx5dv.h',
+'mlx5dv_dr_action_create_dest_root_table' ],
 ]
 if  libmtcr_ul_found
 has_sym_args += [
-- 
2.27.0



[PATCH 03/10] common/mlx5: new glue callback for send to kernel action

2022-09-20 Thread Michael Savisko
Add new glue callback dr_create_flow_action_send_to_kernel.
Default callback invokes mlx5dv_dr_action_create_dest_root_table().

Signed-off-by: Michael Savisko 
---
 drivers/common/mlx5/linux/mlx5_glue.c | 17 +
 drivers/common/mlx5/linux/mlx5_glue.h |  2 ++
 2 files changed, 19 insertions(+)

diff --git a/drivers/common/mlx5/linux/mlx5_glue.c 
b/drivers/common/mlx5/linux/mlx5_glue.c
index 450dd6a06a..b954df0784 100644
--- a/drivers/common/mlx5/linux/mlx5_glue.c
+++ b/drivers/common/mlx5/linux/mlx5_glue.c
@@ -1434,6 +1434,21 @@ mlx5_glue_dv_free_pp(struct mlx5dv_pp *pp)
 #endif
 }
 
+static void *
+mlx5_glue_dr_create_flow_action_send_to_kernel(void *tbl, uint16_t priority)
+{
+#ifdef HAVE_MLX5DV_DR_ACTION_CREATE_DEST_ROOT_TABLE
+   struct mlx5dv_dr_table *table = (struct mlx5dv_dr_table *)tbl;
+
+   return mlx5dv_dr_action_create_dest_root_table(table, priority);
+#else
+   RTE_SET_USED(tbl);
+   RTE_SET_USED(priority);
+   errno = ENOTSUP;
+   return NULL;
+#endif
+}
+
 __rte_cache_aligned
 const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue) {
.version = MLX5_GLUE_VERSION,
@@ -1561,4 +1576,6 @@ const struct mlx5_glue *mlx5_glue = &(const struct 
mlx5_glue) {
.dv_free_var = mlx5_glue_dv_free_var,
.dv_alloc_pp = mlx5_glue_dv_alloc_pp,
.dv_free_pp = mlx5_glue_dv_free_pp,
+   .dr_create_flow_action_send_to_kernel =
+   mlx5_glue_dr_create_flow_action_send_to_kernel,
 };
diff --git a/drivers/common/mlx5/linux/mlx5_glue.h 
b/drivers/common/mlx5/linux/mlx5_glue.h
index c4903a6dce..9616dfdd06 100644
--- a/drivers/common/mlx5/linux/mlx5_glue.h
+++ b/drivers/common/mlx5/linux/mlx5_glue.h
@@ -373,6 +373,8 @@ struct mlx5_glue {
void *(*dv_create_flow_action_aso)
(struct mlx5dv_dr_domain *domain, void *aso_obj,
 uint32_t offset, uint32_t flags, uint8_t return_reg_c);
+   void *(*dr_create_flow_action_send_to_kernel)(void *tbl,
+ uint16_t priority);
 };
 
 extern const struct mlx5_glue *mlx5_glue;
-- 
2.27.0



[PATCH 04/10] net/mlx5: add function to create send to kernel action

2022-09-20 Thread Michael Savisko
Add static inline mlx5_flow_os_create_flow_action_send_to_kernel(),
which calls dr_create_flow_action_send_to_kernel glue callback.

Signed-off-by: Michael Savisko 
---
 drivers/net/mlx5/linux/mlx5_flow_os.h | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/drivers/net/mlx5/linux/mlx5_flow_os.h 
b/drivers/net/mlx5/linux/mlx5_flow_os.h
index bcb48b3e56..ed71289322 100644
--- a/drivers/net/mlx5/linux/mlx5_flow_os.h
+++ b/drivers/net/mlx5/linux/mlx5_flow_os.h
@@ -368,6 +368,28 @@ mlx5_flow_os_create_flow_action_default_miss(void **action)
return (*action) ? 0 : -1;
 }
 
+/**
+ * Create flow action: send_to_kernel.
+ *
+ * @param[in] tbl
+ *   Pointer to destination root table.
+ * @param[in] priority
+ *   Priority to which traffic will arrive.
+ * @param[out] action
+ *   Pointer to a valid action on success, NULL otherwise.
+ *
+ * @return
+ *   0 on success, or -1 on failure and errno is set.
+ */
+static inline int
+mlx5_flow_os_create_flow_action_send_to_kernel(void *tbl, uint16_t priority,
+ void **action)
+{
+   *action = mlx5_glue->dr_create_flow_action_send_to_kernel(tbl,
+ priority);
+   return (*action) ? 0 : -1;
+}
+
 /**
  * Create flow action: dest_devx_tir
  *
-- 
2.27.0



[PATCH 05/10] net/mlx5: introduce new mlx5 action flag

2022-09-20 Thread Michael Savisko
Add new mlx5 action flag MLX5_FLOW_ACTION_SEND_TO_KERNEL.

Signed-off-by: Michael Savisko 
---
 drivers/net/mlx5/mlx5_flow.h|  4 +++-
 drivers/net/mlx5/mlx5_flow_dv.c | 11 +++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 4c233cd94a..3afdd46421 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -274,12 +274,14 @@ enum mlx5_feature_name {
 #define MLX5_FLOW_ACTION_MODIFY_FIELD (1ull << 39)
 #define MLX5_FLOW_ACTION_METER_WITH_TERMINATED_POLICY (1ull << 40)
 #define MLX5_FLOW_ACTION_CT (1ull << 41)
+#define MLX5_FLOW_ACTION_SEND_TO_KERNEL (1ull << 42)
 
 #define MLX5_FLOW_FATE_ACTIONS \
(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | \
 MLX5_FLOW_ACTION_RSS | MLX5_FLOW_ACTION_JUMP | \
 MLX5_FLOW_ACTION_DEFAULT_MISS | \
-MLX5_FLOW_ACTION_METER_WITH_TERMINATED_POLICY)
+MLX5_FLOW_ACTION_METER_WITH_TERMINATED_POLICY | \
+MLX5_FLOW_ACTION_SEND_TO_KERNEL)
 
 #define MLX5_FLOW_FATE_ESWITCH_ACTIONS \
(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index bee9363515..7e0b13b8b1 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -7953,6 +7953,12 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct 
rte_flow_attr *attr,
 * list it here as a supported type
 */
break;
+#ifdef HAVE_MLX5DV_DR_ACTION_CREATE_DEST_ROOT_TABLE
+   case RTE_FLOW_ACTION_TYPE_SEND_TO_KERNEL:
+   action_flags |= MLX5_FLOW_ACTION_SEND_TO_KERNEL;
+   ++actions_n;
+   break;
+#endif
default:
return rte_flow_error_set(error, ENOTSUP,
  RTE_FLOW_ERROR_TYPE_ACTION,
@@ -13677,6 +13683,11 @@ flow_dv_translate(struct rte_eth_dev *dev,
actions_n++;
action_flags |= MLX5_FLOW_ACTION_CT;
break;
+   case RTE_FLOW_ACTION_TYPE_SEND_TO_KERNEL:
+   return rte_flow_error_set(error, ENOTSUP,
+   RTE_FLOW_ERROR_TYPE_ACTION,
+   NULL, "send to kernel action is not 
supported.");
+   break;
case RTE_FLOW_ACTION_TYPE_END:
actions_end = true;
if (mhdr_res->actions_num) {
-- 
2.27.0



[PATCH 06/10] net/mlx5: introduce new mlx5 flow fate

2022-09-20 Thread Michael Savisko
Add element MLX5_FLOW_FATE_SEND_TO_KERNEL in enum mlx5_flow_fate_type.

For that purpose field 'fate_action' in structure mlx5_flow_handle must be
expanded from 3 bits to 4 bits.

Signed-off-by: Michael Savisko 
---
 drivers/net/mlx5/mlx5_flow.h| 3 ++-
 drivers/net/mlx5/mlx5_flow_dv.c | 6 ++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 3afdd46421..860c6cd2ad 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -476,6 +476,7 @@ enum mlx5_flow_fate_type {
MLX5_FLOW_FATE_DEFAULT_MISS,
MLX5_FLOW_FATE_SHARED_RSS,
MLX5_FLOW_FATE_MTR,
+   MLX5_FLOW_FATE_SEND_TO_KERNEL,
MLX5_FLOW_FATE_MAX,
 };
 
@@ -708,7 +709,7 @@ struct mlx5_flow_handle {
void *drv_flow; /**< pointer to driver flow object. */
uint32_t split_flow_id:27; /**< Sub flow unique match flow id. */
uint32_t is_meter_flow_id:1; /**< Indicate if flow_id is for meter. */
-   uint32_t fate_action:3; /**< Fate action type. */
+   uint32_t fate_action:4; /**< Fate action type. */
union {
uint32_t rix_hrxq; /**< Hash Rx queue object index. */
uint32_t rix_jump; /**< Index to the jump action resource. */
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 7e0b13b8b1..8f8a1208e9 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -14735,6 +14735,12 @@ flow_dv_fate_resource_release(struct rte_eth_dev *dev,
flow_dv_port_id_action_resource_release(dev,
handle->rix_port_id_action);
break;
+   case MLX5_FLOW_FATE_SEND_TO_KERNEL:
+   /* In case of send_to_kernel action the actual release of
+* resource is done when all shared DR resources are released
+* since this resource is created once and always reused.
+*/
+   break;
default:
DRV_LOG(DEBUG, "Incorrect fate action:%d", handle->fate_action);
break;
-- 
2.27.0



[PATCH 07/10] net/mlx5: get priority to send traffic to kernel

2022-09-20 Thread Michael Savisko
Introduce mlx5_get_send_to_kernel_priority() function which returns
value of priority which must be used to jump back to table 0 in order
to send traffic to kernel. This function returns lowest priority.

Signed-off-by: Michael Savisko 
---
 drivers/net/mlx5/mlx5_flow.c | 33 +
 drivers/net/mlx5/mlx5_flow.h |  1 +
 2 files changed, 34 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 8c93a3f2e5..c9e2b68957 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -10968,6 +10968,39 @@ mlx5_flow_adjust_priority(struct rte_eth_dev *dev, 
int32_t priority,
return  res;
 }
 
+/**
+ * Get the priority for sending traffic to kernel table.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ *
+ * @return
+ *   On success: the value of priority for sending traffic to kernel table
+ *   On failure: -1
+ */
+uint32_t
+mlx5_get_send_to_kernel_priority(struct rte_eth_dev *dev)
+{
+   struct mlx5_priv *priv = dev->data->dev_private;
+   uint32_t res;
+
+   switch (priv->sh->flow_max_priority) {
+   case RTE_DIM(priority_map_5):
+   res = 15;
+   break;
+   case RTE_DIM(priority_map_3):
+   res = 7;
+   break;
+   default:
+   DRV_LOG(ERR,
+   "port %u maximum priority: %d expected 8/16",
+   dev->data->port_id, priv->sh->flow_max_priority);
+   res = (uint32_t)-1;
+   }
+
+   return res;
+}
+
 /**
  * Get the E-Switch Manager vport id.
  *
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 860c6cd2ad..f4228a5549 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -1750,6 +1750,7 @@ uint32_t mlx5_get_lowest_priority(struct rte_eth_dev *dev,
 uint16_t mlx5_get_matcher_priority(struct rte_eth_dev *dev,
   const struct rte_flow_attr *attr,
   uint32_t subpriority, bool external);
+uint32_t mlx5_get_send_to_kernel_priority(struct rte_eth_dev *dev);
 int mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
 enum mlx5_feature_name feature,
 uint32_t id,
-- 
2.27.0



[PATCH 08/10] net/mlx5: expose table resource release function

2022-09-20 Thread Michael Savisko
Change function flow_dv_tbl_resource_release() from
static to external. Prestep for next commit.

Signed-off-by: Michael Savisko 
---
 drivers/net/mlx5/mlx5_flow.h| 2 ++
 drivers/net/mlx5/mlx5_flow_dv.c | 6 +-
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index f4228a5549..dac65a640c 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -1916,6 +1916,8 @@ struct mlx5_flow_tbl_resource 
*flow_dv_tbl_resource_get(struct rte_eth_dev *dev,
bool external, const struct mlx5_flow_tunnel *tunnel,
uint32_t group_id, uint8_t dummy,
uint32_t table_id, struct rte_flow_error *error);
+int flow_dv_tbl_resource_release(struct mlx5_dev_ctx_shared *sh,
+struct mlx5_flow_tbl_resource *tbl);
 
 struct mlx5_list_entry *flow_dv_tag_create_cb(void *tool_ctx, void *cb_ctx);
 int flow_dv_tag_match_cb(void *tool_ctx, struct mlx5_list_entry *entry,
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 8f8a1208e9..01bdd34d1d 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -75,10 +75,6 @@ union flow_dv_attr {
uint32_t attr;
 };
 
-static int
-flow_dv_tbl_resource_release(struct mlx5_dev_ctx_shared *sh,
-struct mlx5_flow_tbl_resource *tbl);
-
 static int
 flow_dv_encap_decap_resource_release(struct rte_eth_dev *dev,
 uint32_t encap_decap_idx);
@@ -10911,7 +10907,7 @@ flow_dv_tbl_remove_cb(void *tool_ctx, struct 
mlx5_list_entry *entry)
  * @return
  *   Returns 0 if table was released, else return 1;
  */
-static int
+int
 flow_dv_tbl_resource_release(struct mlx5_dev_ctx_shared *sh,
 struct mlx5_flow_tbl_resource *tbl)
 {
-- 
2.27.0



[PATCH 09/10] net/mlx5: add send to kernel action resource holder

2022-09-20 Thread Michael Savisko
Add new structure mlx5_send_to_kernel_action which will hold
together allocated action resource and a reference to used table.
A new structure member of this type added to struct mlx5_dev_ctx_shared.
The member will be initialized upon first created send_to_kernel
action and will be reused for all future actions of this type.
Release of these resources will be done when all shared DR
resources are being released in mlx5_os_free_shared_dr().

Signed-off-by: Michael Savisko 
---
 drivers/net/mlx5/linux/mlx5_os.c | 11 +++
 drivers/net/mlx5/mlx5.h  |  6 ++
 2 files changed, 17 insertions(+)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 0741028dab..840e650045 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -708,6 +708,17 @@ mlx5_os_free_shared_dr(struct mlx5_priv *priv)
mlx5_glue->destroy_flow_action(sh->pop_vlan_action);
sh->pop_vlan_action = NULL;
}
+   if (sh->send_to_kernel_action.action) {
+   void *action = sh->send_to_kernel_action.action;
+   mlx5_glue->destroy_flow_action(action);
+   sh->send_to_kernel_action.action = NULL;
+   }
+   if (sh->send_to_kernel_action.tbl) {
+   struct mlx5_flow_tbl_resource *tbl =
+   sh->send_to_kernel_action.tbl;
+   flow_dv_tbl_resource_release(sh, tbl);
+   sh->send_to_kernel_action.tbl = NULL;
+   }
 #endif /* HAVE_MLX5DV_DR */
if (sh->default_miss_action)
mlx5_glue->destroy_flow_action
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 8af84aef50..b93d451af6 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1178,6 +1178,11 @@ struct mlx5_flex_item {
struct mlx5_flex_pattern_field map[MLX5_FLEX_ITEM_MAPPING_NUM];
 };
 
+struct mlx5_send_to_kernel_action {
+   void *action;
+   void *tbl;
+};
+
 /*
  * Shared Infiniband device context for Master/Representors
  * which belong to same IB device with multiple IB ports.
@@ -1229,6 +1234,7 @@ struct mlx5_dev_ctx_shared {
/* Direct Rules tables for FDB, NIC TX+RX */
void *dr_drop_action; /* Pointer to DR drop action, any domain. */
void *pop_vlan_action; /* Pointer to DR pop VLAN action. */
+   struct mlx5_send_to_kernel_action send_to_kernel_action;
struct mlx5_hlist *encaps_decaps; /* Encap/decap action hash list. */
struct mlx5_hlist *modify_cmds;
struct mlx5_hlist *tag_table;
-- 
2.27.0



[PATCH 10/10] net/mlx5: translation of rte flow send to kernel action

2022-09-20 Thread Michael Savisko
Add flow_dv_translate_action_send_to_kernel() function which
will allocate rdma-core send_to_kernel action object.
Called from flow_dv_translate().

Signed-off-by: Michael Savisko 
---
 drivers/net/mlx5/mlx5_flow_dv.c | 62 +++--
 1 file changed, 59 insertions(+), 3 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 01bdd34d1d..bb9b8f9800 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -12175,6 +12175,56 @@ flow_dv_translate_action_sample(struct rte_eth_dev 
*dev,
return 0;
 }
 
+static void *
+flow_dv_translate_action_send_to_kernel(struct rte_eth_dev *dev,
+   struct rte_flow_error *error)
+{
+   struct mlx5_flow_tbl_resource *tbl;
+   struct mlx5_dev_ctx_shared *sh;
+   uint32_t priority;
+   void *action;
+   int ret;
+
+   sh = MLX5_SH(dev);
+   if (sh->send_to_kernel_action.action)
+   return sh->send_to_kernel_action.action;
+
+   priority = mlx5_get_send_to_kernel_priority(dev);
+   if (priority == (uint32_t)-1) {
+   rte_flow_error_set(error, ENOTSUP,
+  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+  "required priority is not available");
+   return NULL;
+   }
+
+   tbl = flow_dv_tbl_resource_get(dev, 0, 0, 0, false, NULL, 0, 0, 0,
+  error);
+   if (!tbl) {
+   rte_flow_error_set(error, ENODATA,
+  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+  "cannot find destination root table");
+   return NULL;
+   }
+
+   ret = mlx5_flow_os_create_flow_action_send_to_kernel(tbl->obj,
+   priority, &action);
+   if (ret) {
+   rte_flow_error_set(error, ENOMEM,
+  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+  "cannot create action");
+   goto err;
+   }
+
+   MLX5_ASSERT(action);
+   sh->send_to_kernel_action.action = action;
+   sh->send_to_kernel_action.tbl = tbl;
+   return action;
+
+err:
+   flow_dv_tbl_resource_release(sh, tbl);
+   return NULL;
+}
+
 /**
  * Convert Sample action to DV specification.
  *
@@ -13680,9 +13730,15 @@ flow_dv_translate(struct rte_eth_dev *dev,
action_flags |= MLX5_FLOW_ACTION_CT;
break;
case RTE_FLOW_ACTION_TYPE_SEND_TO_KERNEL:
-   return rte_flow_error_set(error, ENOTSUP,
-   RTE_FLOW_ERROR_TYPE_ACTION,
-   NULL, "send to kernel action is not 
supported.");
+   dev_flow->dv.actions[actions_n] =
+   flow_dv_translate_action_send_to_kernel(dev,
+   error);
+   if (!dev_flow->dv.actions[actions_n])
+   return -rte_errno;
+   actions_n++;
+   action_flags |= MLX5_FLOW_ACTION_SEND_TO_KERNEL;
+   dev_flow->handle->fate_action =
+   MLX5_FLOW_FATE_SEND_TO_KERNEL;
break;
case RTE_FLOW_ACTION_TYPE_END:
actions_end = true;
-- 
2.27.0



Re: [PATCH v4] usertools: rewrite pmdinfo

2022-09-20 Thread Olivier Matz
On Tue, Sep 20, 2022 at 12:42:12PM +0200, Robin Jarry wrote:
> dpdk-pmdinfo.py does not produce any parseable output. The -r/--raw flag
> merely prints multiple independent JSON lines which cannot be fed
> directly to any JSON parser. Moreover, the script complexity is rather
> high for such a simple task: extracting PMD_INFO_STRING from .rodata ELF
> sections. Rewrite it so that it can produce valid JSON.
> 
> Remove the PCI database parsing for PCI-ID to Vendor-Device names
> conversion. This should be done by external scripts (if really needed).
> 
> Here are some examples of use with jq:
> 
> Get the complete info for a given driver:
> 
>  ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
>jq '.[] | select(.name == "dmadev_idxd_pci")'
>  {
>"name": "dmadev_idxd_pci",
>"params": "max_queues=0",
>"kmod": "vfio-pci",
>"pci_ids": [
>  {
>"vendor": "8086",
>"device": "0b25"
>  }
>]
>  }
> 
> Get only the required kernel modules for a given driver:
> 
>  ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
>jq '.[] | select(.name == "net_i40e").kmod'
>  "* igb_uio | uio_pci_generic | vfio-pci"
> 
> Get only the required kernel modules for a given device:
> 
>  ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
>jq '.[] | select(.pci_ids[] | .vendor == "15b3" and .device == 
> "1013").kmod'
>  "* ib_uverbs & mlx5_core & mlx5_ib"
> 
> Print the list of drivers which define multiple parameters without
> space separators:
> 
>  ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
>jq '.[] | select(.params!=null and (.params|test("=[^ ]+="))) | {name, 
> params}'
>  ...
> 
> The script passes flake8, black, isort and pylint checks.
> 
> I have tested this with a matrix of python/pyelftools versions:
> 
>  pyelftools
>0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29
>  3.6 ok   ok   ok   ok   ok   ok   ok   ok
>  3.7 ok   ok   ok   ok   ok   ok   ok   ok
>   Python 3.8 ok   ok   ok   ok   ok   ok   ok   ok
>  3.9 ok   ok   ok   ok   ok   ok   ok   ok
>  3.10  fail fail fail fail   ok   ok   ok   ok
> 
> All failures with python 3.10 are related to the same issue:
> 
>   File "elftools/construct/lib/container.py", line 5, in 
> from collections import MutableMapping
>   ImportError: cannot import name 'MutableMapping' from 'collections'
> 
> Python 3.10 support is only available since pyelftools 0.26. The script
> will only work with Python 3.6 and later. Update the minimal system
> requirements and release notes.
> 
> NB: The output produced by the legacy -r/--raw flag can be obtained with
> the following command:
> 
>   strings build/app/dpdk-testpmd | sed -n 's/^PMD_INFO_STRING= //p'
> 
> Cc: Olivier Matz 
> Cc: Ferruh Yigit 
> Cc: Bruce Richardson 
> Signed-off-by: Robin Jarry 

Tested-by: Olivier Matz 


Re: [PATCH v9 00/12] preparation for the rte_flow offload of nfp PMD

2022-09-20 Thread Ferruh Yigit

On 9/15/2022 11:44 AM, Chaoyong He wrote:

* Changes since v8
- Update the nfp.rst
- Fix the 'app_hw' to 'app_fw'
- Remove the ovs compatible header file
- Remove the use of 
rte_eth_dev_configure()/rte_eth_rx_burst()/rte_eth_dev_start() API

* Changes since v7
- Adjust the logics to make sure not break the pci probe process
- Change 'app' to 'app_fw' in all logics to avoid confuse
- Fix problem about log level

* Changes since v6
- Fix the compile error

* Changes since v5
- Compare integer with 0 explicitly
- Change helper macro to function
- Implement the dummy functions
- Remove some unnecessary logics

* Changes since v4
- Remove the unneeded '__rte_unused' attribute
- Fixup a potential memory leak problem

* Changes since v3
- Add the 'Depends-on' tag

* Changes since v2
- Remove the use of rte_panic()

* Changes since v1
- Fix the compile error

Depends-on: series-23707 ("Add support of NFP3800 chip and firmware with NFDk")

Chaoyong He (12):
   net/nfp: move app specific attributes to own struct
   net/nfp: simplify initialization and remove dead code
   net/nfp: move app specific init logic to own function
   net/nfp: add initial flower firmware support
   net/nfp: add flower PF setup logic
   net/nfp: add flower PF related routines
   net/nfp: add flower ctrl VNIC related logics
   net/nfp: move common rxtx function for flower use
   net/nfp: add flower ctrl VNIC rxtx logic
   net/nfp: add flower representor framework
   net/nfp: move rxtx function to header file
   net/nfp: add flower PF rxtx logic



Hi Chaoyong,

Patchset looks good, except from two issues we have discussed before, 
those issues are:


* Creating a new ethdev just for driver-FW control communication
* Application (OvS) specific code in the driver

I commented them separately and cc'ed more folks, we can proceed when 
above are resolved.


Thanks,
ferruh



Re: [PATCH v9 07/12] net/nfp: add flower ctrl VNIC related logics

2022-09-20 Thread Ferruh Yigit

On 9/15/2022 11:44 AM, Chaoyong He wrote:

Adds the setup/start logic for the ctrl vNIC. This vNIC is used by
the PMD and flower firmware application as a communication channel
between driver and firmware. In the case of OVS it is also used to
communicate flow statistics from hardware to the driver.

A rte_eth device is not exposed to DPDK for this vNIC as it is strictly
used internally by flower logic.



Hi Chaoyong,

Similar comment with previous versions, interface is created using 
regular 'rte_eth_dev_allocate()' API, I think interface will be visible 
to application, I can't understand the need of creating an interface for 
control.


What is the communication method between driver and FW?
Since one of the following patches (09/12) introduces Rx/Tx for ctrl 
interface, is device interface is control packets (similar to network 
data packets)?



Because of the add of ctrl vNIC, a new PCItoCPPBar is needed. Modify the
related logics.

Signed-off-by: Chaoyong He 
Reviewed-by: Niklas Söderlund 


<...>



Re: [PATCH v9 05/12] net/nfp: add flower PF setup logic

2022-09-20 Thread Ferruh Yigit

On 9/15/2022 11:44 AM, Chaoyong He wrote:

Adds the vNIC initialization logic for the flower PF vNIC. The flower
firmware application exposes this vNIC for the purposes of fallback
traffic in the switchdev use-case.

Adds minimal dev_ops for this PF vNIC device. Because the device is
being exposed externally to DPDK it needs to implements a minimal set
of dev_ops.

Signed-off-by: Chaoyong He 
Reviewed-by: Niklas Söderlund 


<...>


+
+struct dp_packet {
+   struct rte_mbuf mbuf;
+   uint32_t source;
+};
+
+static void
+nfp_flower_pf_mp_init(__rte_unused struct rte_mempool *mp,
+   __rte_unused void *opaque_arg,
+   void *packet,
+   __rte_unused unsigned int i)
+{
+   struct dp_packet *pkt = packet;
+   /* Indicate that this pkt is from DPDK */
+   pkt->source = 3;
+}
+
+static struct rte_mempool *
+nfp_flower_pf_mp_create(void)
+{
+   uint32_t nb_mbufs;
+   unsigned int numa_node;
+   struct rte_mempool *pktmbuf_pool;
+   uint32_t n_rxd = PF_VNIC_NB_DESC;
+   uint32_t n_txd = PF_VNIC_NB_DESC;
+
+   nb_mbufs = RTE_MAX(n_rxd + n_txd + MAX_PKT_BURST + MEMPOOL_CACHE_SIZE, 
81920U);
+
+   numa_node = rte_socket_id();
+   pktmbuf_pool = rte_pktmbuf_pool_create("flower_pf_mbuf_pool", nb_mbufs,
+   MEMPOOL_CACHE_SIZE, MBUF_PRIV_SIZE,
+   RTE_MBUF_DEFAULT_BUF_SIZE, numa_node);
+   if (pktmbuf_pool == NULL) {
+   PMD_INIT_LOG(ERR, "Cannot init pf vnic mbuf pool");
+   return NULL;
+   }
+
+   rte_mempool_obj_iter(pktmbuf_pool, nfp_flower_pf_mp_init, NULL);
+
+   return pktmbuf_pool;
+}
+


Hi Chaoyong,

Again, similar comment to previous versions, what I understand is this 
new flower FW supports HW flow filter and intended use case is for OvS 
HW acceleration.
But is DPDK driver need to know OvS data structures, like "struct 
dp_packet", can it be transparent to application, I am sure there are 
other devices offloading some OvS task to HW.


@Ian, @David,

Can you please comment on above usage, do you guys see any way to escape 
from OvS specific code in the driver?




[PATCH] raw/ioat: remove deprecated driver

2022-09-20 Thread Bruce Richardson
The ioat rawdev driver has been superceded by the ioat and idxd dmadev
drivers, and has been deprecated for some time, so remove it.

Signed-off-by: Bruce Richardson 
---
 doc/guides/rawdevs/index.rst   |   1 -
 doc/guides/rawdevs/ioat.rst| 333 ---
 doc/guides/rel_notes/deprecation.rst   |   7 -
 drivers/raw/ioat/dpdk_idxd_cfg.py  |   1 -
 drivers/raw/ioat/idxd_bus.c| 365 
 drivers/raw/ioat/idxd_pci.c| 380 -
 drivers/raw/ioat/ioat_common.c | 273 -
 drivers/raw/ioat/ioat_private.h|  84 ---
 drivers/raw/ioat/ioat_rawdev.c | 332 ---
 drivers/raw/ioat/ioat_rawdev_test.c| 734 -
 drivers/raw/ioat/ioat_spec.h   | 336 ---
 drivers/raw/ioat/meson.build   |  36 --
 drivers/raw/ioat/rte_idxd_rawdev_fns.h | 394 -
 drivers/raw/ioat/rte_ioat_rawdev.h | 214 ---
 drivers/raw/ioat/rte_ioat_rawdev_fns.h | 379 -
 drivers/raw/ioat/version.map   |   3 -
 drivers/raw/meson.build|   1 -
 17 files changed, 3873 deletions(-)
 delete mode 100644 doc/guides/rawdevs/ioat.rst
 delete mode 12 drivers/raw/ioat/dpdk_idxd_cfg.py
 delete mode 100644 drivers/raw/ioat/idxd_bus.c
 delete mode 100644 drivers/raw/ioat/idxd_pci.c
 delete mode 100644 drivers/raw/ioat/ioat_common.c
 delete mode 100644 drivers/raw/ioat/ioat_private.h
 delete mode 100644 drivers/raw/ioat/ioat_rawdev.c
 delete mode 100644 drivers/raw/ioat/ioat_rawdev_test.c
 delete mode 100644 drivers/raw/ioat/ioat_spec.h
 delete mode 100644 drivers/raw/ioat/meson.build
 delete mode 100644 drivers/raw/ioat/rte_idxd_rawdev_fns.h
 delete mode 100644 drivers/raw/ioat/rte_ioat_rawdev.h
 delete mode 100644 drivers/raw/ioat/rte_ioat_rawdev_fns.h
 delete mode 100644 drivers/raw/ioat/version.map

diff --git a/doc/guides/rawdevs/index.rst b/doc/guides/rawdevs/index.rst
index cf69633064..f34315f051 100644
--- a/doc/guides/rawdevs/index.rst
+++ b/doc/guides/rawdevs/index.rst
@@ -15,5 +15,4 @@ application through rawdev API.
 cnxk_gpio
 dpaa2_cmdif
 ifpga
-ioat
 ntb
diff --git a/doc/guides/rawdevs/ioat.rst b/doc/guides/rawdevs/ioat.rst
deleted file mode 100644
index 98d15dd032..00
--- a/doc/guides/rawdevs/ioat.rst
+++ /dev/null
@@ -1,333 +0,0 @@
-..  SPDX-License-Identifier: BSD-3-Clause
-Copyright(c) 2019 Intel Corporation.
-
-.. include:: 
-
-IOAT Rawdev Driver
-===
-
-.. warning::
-As of DPDK 21.11 the rawdev implementation of the IOAT driver has been 
deprecated.
-Please use the dmadev library instead.
-
-The ``ioat`` rawdev driver provides a poll-mode driver (PMD) for Intel\ |reg|
-Data Streaming Accelerator `(Intel DSA)
-`_ and 
for Intel\ |reg|
-QuickData Technology, part of Intel\ |reg| I/O Acceleration Technology
-`(Intel I/OAT)
-`_.
-This PMD, when used on supported hardware, allows data copies, for example,
-cloning packet data, to be accelerated by that hardware rather than having to
-be done by software, freeing up CPU cycles for other tasks.
-
-Hardware Requirements
---
-
-The ``dpdk-devbind.py`` script, included with DPDK,
-can be used to show the presence of supported hardware.
-Running ``dpdk-devbind.py --status-dev misc`` will show all the miscellaneous,
-or rawdev-based devices on the system.
-For Intel\ |reg| QuickData Technology devices, the hardware will be often 
listed as "Crystal Beach DMA",
-or "CBDMA".
-For Intel\ |reg| DSA devices, they are currently (at time of writing) 
appearing as devices with type "0b25",
-due to the absence of pci-id database entries for them at this point.
-
-Compilation
-
-
-For builds using ``meson`` and ``ninja``, the driver will be built when the 
target platform is x86-based.
-No additional compilation steps are necessary.
-
-.. note::
-Since the addition of the dmadev library, the ``ioat`` and ``idxd`` 
parts of this driver
-will only be built if their ``dmadev`` counterparts are not built.
-The following can be used to disable the ``dmadev`` drivers,
-if the raw drivers are to be used instead::
-
-$ meson -Ddisable_drivers=dma/* 
-
-Device Setup
--
-
-Depending on support provided by the PMD, HW devices can either use the kernel 
configured driver
-or be bound to a user-space IO driver for use.
-For example, Intel\ |reg| DSA devices can use the IDXD kernel driver or 
DPDK-supported drivers,
-such as ``vfio-pci``.
-
-Intel\ |reg| DSA devices using idxd kernel driver
-~~
-
-To use a Intel\ |reg| DSA device bound to the IDXD kernel driver, the device 
must first be configured.
-The `accel-config `_ utility library can 
be used

[PATCH] examples/kni: remove deprecated kni example

2022-09-20 Thread Bruce Richardson
As part of the agreed process for deprecating KNI in DPDK, the example
app is scheduled for removal as part of the 22.11 release.

Signed-off-by: Bruce Richardson 
---
 doc/guides/rel_notes/deprecation.rst  |1 -
 doc/guides/sample_app_ug/index.rst|1 -
 .../sample_app_ug/kernel_nic_interface.rst|  318 -
 examples/kni/Makefile |   52 -
 examples/kni/main.c   | 1140 -
 examples/kni/meson.build  |   13 -
 examples/meson.build  |1 -
 7 files changed, 1526 deletions(-)
 delete mode 100644 doc/guides/sample_app_ug/kernel_nic_interface.rst
 delete mode 100644 examples/kni/Makefile
 delete mode 100644 examples/kni/main.c
 delete mode 100644 examples/kni/meson.build

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index c1975d6c3e..2ded2d69d1 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -70,7 +70,6 @@ Deprecation Notices
   and `refinement `_:
 
   * Some deprecation warnings will be added in DPDK 22.11.
-  * The KNI example application will be removed from DPDK 22.11.
   * The KNI kernel module, library and PMD will be removed from the DPDK 23.11.
 
 * lib: will fix extending some enum/define breaking the ABI. There are multiple
diff --git a/doc/guides/sample_app_ug/index.rst 
b/doc/guides/sample_app_ug/index.rst
index cc9fae1e8c..6e1e83d7d7 100644
--- a/doc/guides/sample_app_ug/index.rst
+++ b/doc/guides/sample_app_ug/index.rst
@@ -20,7 +20,6 @@ Sample Applications User Guides
 ip_frag
 ipv4_multicast
 ip_reassembly
-kernel_nic_interface
 keep_alive
 dma
 l2_forward_crypto
diff --git a/doc/guides/sample_app_ug/kernel_nic_interface.rst 
b/doc/guides/sample_app_ug/kernel_nic_interface.rst
deleted file mode 100644
index ef441cc1e5..00
--- a/doc/guides/sample_app_ug/kernel_nic_interface.rst
+++ /dev/null
@@ -1,318 +0,0 @@
-..  SPDX-License-Identifier: BSD-3-Clause
-Copyright(c) 2010-2014 Intel Corporation.
-
-Kernel NIC Interface Sample Application
-===
-
-The Kernel NIC Interface (KNI) is a DPDK control plane solution that
-allows userspace applications to exchange packets with the kernel networking 
stack.
-To accomplish this, DPDK userspace applications use an IOCTL call
-to request the creation of a KNI virtual device in the Linux* kernel.
-The IOCTL call provides interface information and the DPDK's physical address 
space,
-which is re-mapped into the kernel address space by the KNI kernel loadable 
module
-that saves the information to a virtual device context.
-The DPDK creates FIFO queues for packet ingress and egress
-to the kernel module for each device allocated.
-
-The KNI kernel loadable module is a standard net driver,
-which upon receiving the IOCTL call access the DPDK's FIFO queue to
-receive/transmit packets from/to the DPDK userspace application.
-The FIFO queues contain pointers to data packets in the DPDK. This:
-
-*   Provides a faster mechanism to interface with the kernel net stack and 
eliminates system calls
-
-*   Facilitates the DPDK using standard Linux* userspace net tools (tshark, 
rsync, and so on)
-
-*   Eliminate the copy_to_user and copy_from_user operations on packets.
-
-The Kernel NIC Interface sample application is a simple example that 
demonstrates the use
-of the DPDK to create a path for packets to go through the Linux* kernel.
-This is done by creating one or more kernel net devices for each of the DPDK 
ports.
-The application allows the use of standard Linux tools (ethtool, iproute, 
tshark) with the DPDK ports and
-also the exchange of packets between the DPDK application and the Linux* 
kernel.
-
-The Kernel NIC Interface sample application requires that the
-KNI kernel module ``rte_kni`` be loaded into the kernel.  See
-:doc:`../prog_guide/kernel_nic_interface` for more information on loading
-the ``rte_kni`` kernel module.
-
-Overview
-
-
-The Kernel NIC Interface sample application ``kni`` allocates one or more
-KNI interfaces for each physical NIC port.  For each physical NIC port,
-``kni`` uses two DPDK threads in user space; one thread reads from the port and
-writes to the corresponding KNI interfaces and the other thread reads from
-the KNI interfaces and writes the data unmodified to the physical NIC port.
-
-It is recommended to configure one KNI interface for each physical NIC port.
-The application can be configured with more than one KNI interface for
-each physical NIC port for performance testing or it can work together with
-VMDq support in future.
-
-The packet flow through the Kernel NIC Interface application is as shown
-in the following figure.
-
-.. _figure_kernel_nic:
-
-.. figure:: img/kernel_nic.*
-
-   Kernel NIC Application Packet Flow
-
-If link monitoring is enabled 

Re: [PATCH v2 1/6] app/procinfo: add version dump

2022-09-20 Thread Stephen Hemminger
On Tue, 20 Sep 2022 18:51:42 +0800
Dongdong Liu  wrote:

> From: "Min Hu (Connor)" 
> 
> This patch add support for dump dpdk version and ethdev firmware version.
> 
> The command is like:
> dpdk-proc-info -a :xx:xx.x --file-prefix=xxx -- --show-version
> 
> Signed-off-by: Min Hu (Connor) 
> Signed-off-by: Dongdong Liu 
> ---

Why mix DPDK and firmware version in one option?

Why not use semi-standard convention of -V --version option for DPDK version
and add --firmware-version option to show firmware.


Re: [PATCH v2 3/6] app/procinfo: add module info dump

2022-09-20 Thread Stephen Hemminger
On Tue, 20 Sep 2022 18:51:44 +0800
Dongdong Liu  wrote:

> +
> + RTE_ETH_FOREACH_DEV(i) {
> + /* Skip if port is not in mask */
> + if ((enabled_port_mask & (1ul << i)) == 0)
> + continue;
> +
> + if (!rte_eth_dev_is_valid_port(i))
> + continue;

There is no way RTE_ETH_FOREACH_DEV would iterate
over an invalid port.  If it did the macro would be seriously
broken.

That code is unnecessary.


Re: [PATCH v2 2/6] app/procinfo: add RSS RETA dump

2022-09-20 Thread Stephen Hemminger
On Tue, 20 Sep 2022 18:51:43 +0800
Dongdong Liu  wrote:

> + ret = rte_eth_dev_info_get(id, &dev_info);
> + if (ret < 0) {
> + printf("Error getting device info, ret = %d\n", ret);

Proc-info should be showing all errors on stderr, not stdout.
And in case of error should exit with non-zero status.

But this maybe a generic problem in lots of places in the application.


Re: [PATCH] examples/kni: remove deprecated kni example

2022-09-20 Thread Stephen Hemminger
On Tue, 20 Sep 2022 16:14:53 +0100
Bruce Richardson  wrote:

> As part of the agreed process for deprecating KNI in DPDK, the example
> app is scheduled for removal as part of the 22.11 release.
> 
> Signed-off-by: Bruce Richardson 

And as expected, no one will notice until it is gone!

Acked-by: Stephen Hemminger 


RE: [PATCH v1] ethdev: add direction info when creating the transfer table

2022-09-20 Thread Ivan Malov

Hi Ori,

On Tue, 20 Sep 2022, Ori Kam wrote:


Hi Ivan,


-Original Message-
From: Ivan Malov 
Sent: Tuesday, 20 September 2022 15:46

Hi Ori,

On Tue, 20 Sep 2022, Ori Kam wrote:


Hi Ivan, Thomas and Rongwei


-Original Message-
From: Thomas Monjalon 
Sent: Thursday, 15 September 2022 14:16

15/09/2022 12:59, Ivan Malov:

Hi Rongwei,

In this reply, I do not include the previous mail because the amount
of inline commentary has gone haywire over the past couple of days.
Let's re-iterate.

But before I get to that, I'd like to offer a fresh perspective:

Perhaps, if we all agree that term "vport" means an endpoint which
can stand for any "port" except for physical one, then it should
be possible to use term ANY_VPORTS rather than ANY_GUEST_PORTS.


The opposite of "physical" is "virtual" indeed.


But that's tricky, of course. I don't have a way with naming,
so more opinions are welcome and very-very desirable here.

So:

1) Do you agree that, in your proposal, the new "wire_orig" / "vf_orig"
primitives are in fact yet another match criteria?

..

To me, it looks so. If they are match criteria, then they belong
in match pattern, that is, they should be expressed as new items.

For "transfer" rules, the *existing* attributes are: "group"
and "priority". As you may note, these are clearly not match
criteria. They control the look-up order. So, to this day,
there're no match criteria in DPDK expressed as attributes.

If these "wire_orig" / "vf_orig" are going to be introduced
as attributes, that should be backed with strong motivation.


I prefer we keep matching in a single place, not in attributes.



I think we are talking about two different features.
Feature 1:
Allow matching on all vports that are not wire
Feature 2:
Save allocation space and allow fast insertion.
In this case, the matching is not on all vports it can be just part of the 
vports
but it will never be the wire port.
For example:
port 0 - wire
ports 1,2,3,4,5  - vports
the application want to inset only those rules:
represented_port(port_id=2) / eth / ipv4 (src==xx)
represented_port(port_id=4) / eth / ipv4 (src==xx)
represented_port(port_id=4) / eth / ipv4 (src==yy)

For feature 1 I fully agree with you Ivan, this should be added as an item.


Thank you.


For feature 2 I think Rongwei's suggestion is the better option.
If I understand correctly the idea is to give hint to the PMD on where to

allocate memory

and how to insert the rules most optimally. Since this is shared for all rules 
it

makes more sense

to add it as an attribute, just like we don’t have an ingress item (maybe we

should?)

But isn't pattern template also supposed to be shared for all rules
in the table? I.e., the user creates an async flow table and submits
a flow "shape" (which consists of attrs, pattern template and action
template). So why should "giving a hint" via an item template be
considered worse than doig so via an attribute?



The same item template maybe used elsewhere, for example, the following
pattern  eth / ipv4(src, dst) / udp(sport, dport), can be used on number of 
different
tables.


In my understanding, the user may want to create flow table A
and use pattern template A' for it, which is as follows:

any_vports / eth / ipv4 / udp

The PMD can see this item and treat it exactly the same
way as it could treat such attribute ("where to allocate
resources, etc.").

Then the user may want to create flow table B and
use pattern template B' for it:

any_phy_ports / eth / ipv4 / udp

Once again, the PMD can clearly see the difference between
the A' and B' templates and, this time, allocate resources
the other way (as per efficiency requirements).

By saying "can be used on number of different tables", do you mean
that it is important to make the *network* part of the pattern
shareable between flow tables? I.e. are you saying that
templates A' and B' cause resource duplication just
because of the same *network* part in your case?


I think that the main difference between us is that from my point of view this 
value is just
where to allocate resources / how to better insert the rule. It is not related 
to matching.


To me, it *is* the match criterion which, at the same time, serves
as a value indicating the way how resources should be allocated.
But before all, it is a match criterion.

If it refers to a group of ports = in order to ditch "the other half"
of traffic from consideration (like Rongwei explained), then it
looks like a match criterion.


From Nvidia viewpoint we need this information so we can allocate the resource 
at the correct
place and avoid inserting duplication of rules.


I see.


I agree that by using the item we can get the same results, but it is incorrect 
since we are not matching on it.


If one provides item UDP in the pattern and does not match on any UDP
fields, doing so nevertheless *is* matching on particular packet type.

The same seemingly goes for the new attribu

[PATCH v2] examples/kni: remove deprecated kni example

2022-09-20 Thread Bruce Richardson
As part of the agreed process for deprecating KNI in DPDK, the example
app is scheduled for removal as part of the 22.11 release.

Signed-off-by: Bruce Richardson 
---
V2: Correct old release note links to KNI guide by referencing older doc
---
 doc/guides/rel_notes/deprecation.rst  |1 -
 doc/guides/rel_notes/release_18_11.rst|4 +-
 doc/guides/sample_app_ug/index.rst|1 -
 .../sample_app_ug/kernel_nic_interface.rst|  318 -
 examples/kni/Makefile |   52 -
 examples/kni/main.c   | 1140 -
 examples/kni/meson.build  |   13 -
 examples/meson.build  |1 -
 8 files changed, 2 insertions(+), 1528 deletions(-)
 delete mode 100644 doc/guides/sample_app_ug/kernel_nic_interface.rst
 delete mode 100644 examples/kni/Makefile
 delete mode 100644 examples/kni/main.c
 delete mode 100644 examples/kni/meson.build

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index c1975d6c3e..2ded2d69d1 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -70,7 +70,6 @@ Deprecation Notices
   and `refinement `_:

   * Some deprecation warnings will be added in DPDK 22.11.
-  * The KNI example application will be removed from DPDK 22.11.
   * The KNI kernel module, library and PMD will be removed from the DPDK 23.11.

 * lib: will fix extending some enum/define breaking the ABI. There are multiple
diff --git a/doc/guides/rel_notes/release_18_11.rst 
b/doc/guides/rel_notes/release_18_11.rst
index be52bf0803..708bc9d4dd 100644
--- a/doc/guides/rel_notes/release_18_11.rst
+++ b/doc/guides/rel_notes/release_18_11.rst
@@ -265,7 +265,7 @@ New Features
   carrier state is set to ``on`` via ``rte_kni_update_link`` or
   by writing ``1`` to ``/sys/devices/virtual/net//carrier``.
   In previous versions the default carrier state was left undefined.
-  See :doc:`../prog_guide/kernel_nic_interface` for more information.
+  See `Kernel NIC Interface 
`_ 
for more information.

   Also added the new API function ``rte_kni_update_link()`` to allow the user
   to set the carrier state of the KNI kernel network interface.
@@ -273,7 +273,7 @@ New Features
   Also added a new command line flag ``-m`` to the KNI sample application to
   monitor and automatically reflect the physical NIC carrier state to the
   KNI kernel network interface with the new ``rte_kni_update_link()`` API.
-  See :doc:`../sample_app_ug/kernel_nic_interface` for more information.
+  See `Kernel NIC Interface 
`_ 
for more information.

 * **Added ability to switch queue deferred start flag on testpmd app.**

diff --git a/doc/guides/sample_app_ug/index.rst 
b/doc/guides/sample_app_ug/index.rst
index cc9fae1e8c..6e1e83d7d7 100644
--- a/doc/guides/sample_app_ug/index.rst
+++ b/doc/guides/sample_app_ug/index.rst
@@ -20,7 +20,6 @@ Sample Applications User Guides
 ip_frag
 ipv4_multicast
 ip_reassembly
-kernel_nic_interface
 keep_alive
 dma
 l2_forward_crypto
diff --git a/doc/guides/sample_app_ug/kernel_nic_interface.rst 
b/doc/guides/sample_app_ug/kernel_nic_interface.rst
deleted file mode 100644
index ef441cc1e5..00
--- a/doc/guides/sample_app_ug/kernel_nic_interface.rst
+++ /dev/null
@@ -1,318 +0,0 @@
-..  SPDX-License-Identifier: BSD-3-Clause
-Copyright(c) 2010-2014 Intel Corporation.
-
-Kernel NIC Interface Sample Application
-===
-
-The Kernel NIC Interface (KNI) is a DPDK control plane solution that
-allows userspace applications to exchange packets with the kernel networking 
stack.
-To accomplish this, DPDK userspace applications use an IOCTL call
-to request the creation of a KNI virtual device in the Linux* kernel.
-The IOCTL call provides interface information and the DPDK's physical address 
space,
-which is re-mapped into the kernel address space by the KNI kernel loadable 
module
-that saves the information to a virtual device context.
-The DPDK creates FIFO queues for packet ingress and egress
-to the kernel module for each device allocated.
-
-The KNI kernel loadable module is a standard net driver,
-which upon receiving the IOCTL call access the DPDK's FIFO queue to
-receive/transmit packets from/to the DPDK userspace application.
-The FIFO queues contain pointers to data packets in the DPDK. This:
-
-*   Provides a faster mechanism to interface with the kernel net stack and 
eliminates system calls
-
-*   Facilitates the DPDK using standard Linux* userspace net tools (tshark, 
rsync, and so on)
-
-*   Eliminate the copy_to_user and copy_from_user operations on packets.
-
-The Kernel NIC Interface sample application is a simple example that 
demonstrates t

[PATCH v2] raw/ioat: remove deprecated driver

2022-09-20 Thread Bruce Richardson
The ioat driver has been superseded by the ioat and idxd dmadev drivers,
and has been deprecated for some time, so remove it.

Signed-off-by: Bruce Richardson 
---
V2: remove references to ioat rawdev from API guide
---
 doc/api/doxy-api-index.md  |   1 -
 doc/api/doxy-api.conf.in   |   1 -
 doc/guides/rawdevs/index.rst   |   1 -
 doc/guides/rawdevs/ioat.rst| 333 ---
 doc/guides/rel_notes/deprecation.rst   |   7 -
 drivers/raw/ioat/dpdk_idxd_cfg.py  |   1 -
 drivers/raw/ioat/idxd_bus.c| 365 
 drivers/raw/ioat/idxd_pci.c| 380 -
 drivers/raw/ioat/ioat_common.c | 273 -
 drivers/raw/ioat/ioat_private.h|  84 ---
 drivers/raw/ioat/ioat_rawdev.c | 332 ---
 drivers/raw/ioat/ioat_rawdev_test.c| 734 -
 drivers/raw/ioat/ioat_spec.h   | 336 ---
 drivers/raw/ioat/meson.build   |  36 --
 drivers/raw/ioat/rte_idxd_rawdev_fns.h | 394 -
 drivers/raw/ioat/rte_ioat_rawdev.h | 214 ---
 drivers/raw/ioat/rte_ioat_rawdev_fns.h | 379 -
 drivers/raw/ioat/version.map   |   3 -
 drivers/raw/meson.build|   1 -
 19 files changed, 3875 deletions(-)
 delete mode 100644 doc/guides/rawdevs/ioat.rst
 delete mode 12 drivers/raw/ioat/dpdk_idxd_cfg.py
 delete mode 100644 drivers/raw/ioat/idxd_bus.c
 delete mode 100644 drivers/raw/ioat/idxd_pci.c
 delete mode 100644 drivers/raw/ioat/ioat_common.c
 delete mode 100644 drivers/raw/ioat/ioat_private.h
 delete mode 100644 drivers/raw/ioat/ioat_rawdev.c
 delete mode 100644 drivers/raw/ioat/ioat_rawdev_test.c
 delete mode 100644 drivers/raw/ioat/ioat_spec.h
 delete mode 100644 drivers/raw/ioat/meson.build
 delete mode 100644 drivers/raw/ioat/rte_idxd_rawdev_fns.h
 delete mode 100644 drivers/raw/ioat/rte_ioat_rawdev.h
 delete mode 100644 drivers/raw/ioat/rte_ioat_rawdev_fns.h
 delete mode 100644 drivers/raw/ioat/version.map

diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index 186a258be4..e5c7aceaaf 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -46,7 +46,6 @@ The public API headers are grouped by topics:
   [i40e](@ref rte_pmd_i40e.h),
   [ice](@ref rte_pmd_ice.h),
   [iavf](@ref rte_pmd_iavf.h),
-  [ioat](@ref rte_ioat_rawdev.h),
   [bnxt](@ref rte_pmd_bnxt.h),
   [cnxk](@ref rte_pmd_cnxk.h),
   [dpaa](@ref rte_pmd_dpaa.h),
diff --git a/doc/api/doxy-api.conf.in b/doc/api/doxy-api.conf.in
index 608494a7c0..c556995c74 100644
--- a/doc/api/doxy-api.conf.in
+++ b/doc/api/doxy-api.conf.in
@@ -24,7 +24,6 @@ INPUT   = @TOPDIR@/doc/api/doxy-api-index.md \
   @TOPDIR@/drivers/net/softnic \
   @TOPDIR@/drivers/raw/dpaa2_cmdif \
   @TOPDIR@/drivers/raw/ifpga \
-  @TOPDIR@/drivers/raw/ioat \
   @TOPDIR@/lib/eal/include \
   @TOPDIR@/lib/eal/include/generic \
   @TOPDIR@/lib/acl \
diff --git a/doc/guides/rawdevs/index.rst b/doc/guides/rawdevs/index.rst
index cf69633064..f34315f051 100644
--- a/doc/guides/rawdevs/index.rst
+++ b/doc/guides/rawdevs/index.rst
@@ -15,5 +15,4 @@ application through rawdev API.
 cnxk_gpio
 dpaa2_cmdif
 ifpga
-ioat
 ntb
diff --git a/doc/guides/rawdevs/ioat.rst b/doc/guides/rawdevs/ioat.rst
deleted file mode 100644
index 98d15dd032..00
--- a/doc/guides/rawdevs/ioat.rst
+++ /dev/null
@@ -1,333 +0,0 @@
-..  SPDX-License-Identifier: BSD-3-Clause
-Copyright(c) 2019 Intel Corporation.
-
-.. include:: 
-
-IOAT Rawdev Driver
-===
-
-.. warning::
-As of DPDK 21.11 the rawdev implementation of the IOAT driver has been 
deprecated.
-Please use the dmadev library instead.
-
-The ``ioat`` rawdev driver provides a poll-mode driver (PMD) for Intel\ |reg|
-Data Streaming Accelerator `(Intel DSA)
-`_ and 
for Intel\ |reg|
-QuickData Technology, part of Intel\ |reg| I/O Acceleration Technology
-`(Intel I/OAT)
-`_.
-This PMD, when used on supported hardware, allows data copies, for example,
-cloning packet data, to be accelerated by that hardware rather than having to
-be done by software, freeing up CPU cycles for other tasks.
-
-Hardware Requirements
---
-
-The ``dpdk-devbind.py`` script, included with DPDK,
-can be used to show the presence of supported hardware.
-Running ``dpdk-devbind.py --status-dev misc`` will show all the miscellaneous,
-or rawdev-based devices on the system.
-For Intel\ |reg| QuickData Technology devices, the hardware will be often 
listed as "Crystal Beach DMA",
-or "CBDMA".
-For Intel\ |reg| DSA devices, they are currently (at time of writing) 
appearing as devices wi

Re: [PATCH v3 2/2] net: have checksum routines accept unaligned data

2022-09-20 Thread Thomas Monjalon
20/09/2022 14:09, Mattias Rönnblom:
> On 2022-07-11 15:25, Olivier Matz wrote:
> > On Mon, Jul 11, 2022 at 02:11:32PM +0200, Mattias Rönnblom wrote:
> >> __rte_raw_cksum() (used by rte_raw_cksum() among others) accessed its
> >> data through an uint16_t pointer, which allowed the compiler to assume
> >> the data was 16-bit aligned. This in turn would, with certain
> >> architectures and compiler flag combinations, result in code with SIMD
> >> load or store instructions with restrictions on data alignment.
> >>
> >> This patch keeps the old algorithm, but data is read using memcpy()
> >> instead of direct pointer access, forcing the compiler to always
> >> generate code that handles unaligned input. The __may_alias__ GCC
> >> attribute is no longer needed.
> >>
> >> The data on which the Internet checksum functions operates are almost
> >> always 16-bit aligned, but there are exceptions. In particular, the
> >> PDCP protocol header may (literally) have an odd size.
> >>
> >> Performance impact seems to range from none to a very slight
> >> regression.
> >>
> >> Bugzilla ID: 1035
> >> Cc: sta...@dpdk.org
> > 
> > Fixes: 6006818cfb26 ("net: new checksum functions")
> > 
> >> ---
> >>
> >> v3:
> >>* Use RTE_ALIGN_FLOOR() in the pointer arithmetic (Olivier Matz).
> >> v2:
> >>* Simplified the odd-length conditional (Morten Brørup).
> >>
> >> Reviewed-by: Morten Brørup 
> >>
> >> Signed-off-by: Mattias Rönnblom 
> > 
> > Acked-by: Olivier Matz 
> > 
> > Thank you!
> 
> Are there any plans to merge this patchset?

Applied, thanks.
Sorry for the delay.





Re: [PATCH] net/nfp: improve readability NFP HWINFO header

2022-09-20 Thread Ferruh Yigit

On 8/26/2022 6:39 AM, Chaoyong He wrote:

From: James Hershaw 

Prepend `0x` to the NFP HWINFO header value that is printed to improve
the readability of the printed statement.

Signed-off-by: James Hershaw 
Reviewed-by: Chaoyong He 
Reviewed-by: Niklas Söderlund 
---
  drivers/net/nfp/nfpcore/nfp_hwinfo.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/nfp/nfpcore/nfp_hwinfo.c 
b/drivers/net/nfp/nfpcore/nfp_hwinfo.c
index c0516bf..9f848bd 100644
--- a/drivers/net/nfp/nfpcore/nfp_hwinfo.c
+++ b/drivers/net/nfp/nfpcore/nfp_hwinfo.c
@@ -108,7 +108,7 @@
goto exit_free;
  
  	header = (void *)db;

-   printf("NFP HWINFO header: %08x\n", *(uint32_t *)header);
+   printf("NFP HWINFO header: %#08x\n", *(uint32_t *)header);


Why driver is directly using 'printf', but not rte_log APIs?

I can see there are already 'PMD_INIT_LOG' & 'PMD_DRV_LOG' macros for this.



Re: [PATCH v4] usertools: rewrite pmdinfo

2022-09-20 Thread Ferruh Yigit

On 9/20/2022 11:42 AM, Robin Jarry wrote:



dpdk-pmdinfo.py does not produce any parseable output. The -r/--raw flag
merely prints multiple independent JSON lines which cannot be fed
directly to any JSON parser. Moreover, the script complexity is rather
high for such a simple task: extracting PMD_INFO_STRING from .rodata ELF
sections. Rewrite it so that it can produce valid JSON.

Remove the PCI database parsing for PCI-ID to Vendor-Device names
conversion. This should be done by external scripts (if really needed).

Here are some examples of use with jq:

Get the complete info for a given driver:

  ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
jq '.[] | select(.name == "dmadev_idxd_pci")'
  {
"name": "dmadev_idxd_pci",
"params": "max_queues=0",
"kmod": "vfio-pci",
"pci_ids": [
  {
"vendor": "8086",
"device": "0b25"
  }
]
  }

Get only the required kernel modules for a given driver:

  ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
jq '.[] | select(.name == "net_i40e").kmod'
  "* igb_uio | uio_pci_generic | vfio-pci"

Get only the required kernel modules for a given device:

  ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
jq '.[] | select(.pci_ids[] | .vendor == "15b3" and .device == "1013").kmod'
  "* ib_uverbs & mlx5_core & mlx5_ib"

Print the list of drivers which define multiple parameters without
space separators:

  ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
jq '.[] | select(.params!=null and (.params|test("=[^ ]+="))) | {name, 
params}'
  ...

The script passes flake8, black, isort and pylint checks.

I have tested this with a matrix of python/pyelftools versions:

  pyelftools
0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29
  3.6 ok   ok   ok   ok   ok   ok   ok   ok
  3.7 ok   ok   ok   ok   ok   ok   ok   ok
   Python 3.8 ok   ok   ok   ok   ok   ok   ok   ok
  3.9 ok   ok   ok   ok   ok   ok   ok   ok
  3.10  fail fail fail fail   ok   ok   ok   ok

All failures with python 3.10 are related to the same issue:

   File "elftools/construct/lib/container.py", line 5, in 
 from collections import MutableMapping
   ImportError: cannot import name 'MutableMapping' from 'collections'

Python 3.10 support is only available since pyelftools 0.26. The script
will only work with Python 3.6 and later. Update the minimal system
requirements and release notes.

NB: The output produced by the legacy -r/--raw flag can be obtained with
the following command:

   strings build/app/dpdk-testpmd | sed -n 's/^PMD_INFO_STRING= //p'

Cc: Olivier Matz 
Cc: Ferruh Yigit 
Cc: Bruce Richardson 
Signed-off-by: Robin Jarry 


<...>


diff --git a/doc/guides/rel_notes/release_22_11.rst 
b/doc/guides/rel_notes/release_22_11.rst
index 8c021cf0505e..67054f5acdc9 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -84,6 +84,11 @@ API Changes
 Also, make sure to start the actual text at the margin.
 ===

+* The ``dpdk-pmdinfo.py`` script was rewritten to produce valid JSON only.
+  PCI-IDs parsing has been removed.
+  To get a similar output to the (now removed) ``-r/--raw`` flag, you may use 
the following command::
+
+ strings $dpdk_binary_or_driver | sed -n 's/^PMD_INFO_STRING= //p'



Empty line is missing (in case there will be a new version for some 
other reason).



Thanks for the update,
Tested-by: Ferruh Yigit 



Re: [PATCH v4] usertools: rewrite pmdinfo

2022-09-20 Thread Ferruh Yigit

On 9/20/2022 6:48 PM, Ferruh Yigit wrote:

On 9/20/2022 11:42 AM, Robin Jarry wrote:



dpdk-pmdinfo.py does not produce any parseable output. The -r/--raw flag
merely prints multiple independent JSON lines which cannot be fed
directly to any JSON parser. Moreover, the script complexity is rather
high for such a simple task: extracting PMD_INFO_STRING from .rodata ELF
sections. Rewrite it so that it can produce valid JSON.

Remove the PCI database parsing for PCI-ID to Vendor-Device names
conversion. This should be done by external scripts (if really needed).

Here are some examples of use with jq:

Get the complete info for a given driver:

  ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
    jq '.[] | select(.name == "dmadev_idxd_pci")'
  {
    "name": "dmadev_idxd_pci",
    "params": "max_queues=0",
    "kmod": "vfio-pci",
    "pci_ids": [
  {
    "vendor": "8086",
    "device": "0b25"
  }
    ]
  }

Get only the required kernel modules for a given driver:

  ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
    jq '.[] | select(.name == "net_i40e").kmod'
  "* igb_uio | uio_pci_generic | vfio-pci"

Get only the required kernel modules for a given device:

  ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
    jq '.[] | select(.pci_ids[] | .vendor == "15b3" and .device == 
"1013").kmod'

  "* ib_uverbs & mlx5_core & mlx5_ib"

Print the list of drivers which define multiple parameters without
space separators:

  ~$ usertools/dpdk-pmdinfo.py build/app/dpdk-testpmd | \
    jq '.[] | select(.params!=null and (.params|test("=[^ ]+="))) | 
{name, params}'

  ...

The script passes flake8, black, isort and pylint checks.

I have tested this with a matrix of python/pyelftools versions:

  pyelftools
    0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29
  3.6 ok   ok   ok   ok   ok   ok   ok   ok
  3.7 ok   ok   ok   ok   ok   ok   ok   ok
   Python 3.8 ok   ok   ok   ok   ok   ok   ok   ok
  3.9 ok   ok   ok   ok   ok   ok   ok   ok
  3.10  fail fail fail fail   ok   ok   ok   ok

All failures with python 3.10 are related to the same issue:

   File "elftools/construct/lib/container.py", line 5, in 
 from collections import MutableMapping
   ImportError: cannot import name 'MutableMapping' from 'collections'

Python 3.10 support is only available since pyelftools 0.26. The script
will only work with Python 3.6 and later. Update the minimal system
requirements and release notes.

NB: The output produced by the legacy -r/--raw flag can be obtained with
the following command:

   strings build/app/dpdk-testpmd | sed -n 's/^PMD_INFO_STRING= //p'

Cc: Olivier Matz 
Cc: Ferruh Yigit 
Cc: Bruce Richardson 
Signed-off-by: Robin Jarry 


<...>

diff --git a/doc/guides/rel_notes/release_22_11.rst 
b/doc/guides/rel_notes/release_22_11.rst

index 8c021cf0505e..67054f5acdc9 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -84,6 +84,11 @@ API Changes
 Also, make sure to start the actual text at the margin.
 ===

+* The ``dpdk-pmdinfo.py`` script was rewritten to produce valid JSON 
only.

+  PCI-IDs parsing has been removed.
+  To get a similar output to the (now removed) ``-r/--raw`` flag, you 
may use the following command::

+
+ strings $dpdk_binary_or_driver | sed -n 's/^PMD_INFO_STRING= //p'



Empty line is missing (in case there will be a new version for some 
other reason).



Thanks for the update,
Tested-by: Ferruh Yigit 



Thomas, John,

Should we have documentation for usertools, since they are user facing, 
what do you think?

Can it be possible to find resource for it?

Thanks,
ferruh


Re: [PATCH] net/nfp: improve readability NFP HWINFO header

2022-09-20 Thread Niklas Söderlund
Hi Ferruh,

Thanks for your feedback.

On 2022-09-20 18:33:02 +0100, Ferruh Yigit wrote:
> On 8/26/2022 6:39 AM, Chaoyong He wrote:
> > From: James Hershaw 
> > 
> > Prepend `0x` to the NFP HWINFO header value that is printed to improve
> > the readability of the printed statement.
> > 
> > Signed-off-by: James Hershaw 
> > Reviewed-by: Chaoyong He 
> > Reviewed-by: Niklas Söderlund 
> > ---
> >   drivers/net/nfp/nfpcore/nfp_hwinfo.c | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/drivers/net/nfp/nfpcore/nfp_hwinfo.c 
> > b/drivers/net/nfp/nfpcore/nfp_hwinfo.c
> > index c0516bf..9f848bd 100644
> > --- a/drivers/net/nfp/nfpcore/nfp_hwinfo.c
> > +++ b/drivers/net/nfp/nfpcore/nfp_hwinfo.c
> > @@ -108,7 +108,7 @@
> > goto exit_free;
> > header = (void *)db;
> > -   printf("NFP HWINFO header: %08x\n", *(uint32_t *)header);
> > +   printf("NFP HWINFO header: %#08x\n", *(uint32_t *)header);
> 
> Why driver is directly using 'printf', but not rte_log APIs?
> 
> I can see there are already 'PMD_INIT_LOG' & 'PMD_DRV_LOG' macros for this.

We have a series ready to convert all printf style logging into rte_log 
APIs as well as fix some other style issues.

We also have a few other things in our internal patch queue waiting to 
be sent out. To reduce conflicts in patchwork we are sending them out in 
the order as some of them depends on each other. And the one cleaning up 
log messages are at the end of the pile unfortunately.

Do you think it's acceptable to take this fix as-is and then a patch 
that convert all printf on one go, or would you prefers we move touch 
this line only once and create a v2 of this fix while also moving it to 
the rte_log APIs?

-- 
Kind Regards,
Niklas Söderlund


RE: [PATCH v4 4/9] dts: add ssh pexpect library

2022-09-20 Thread Honnappa Nagarahalli

> >
> >  On Fri, Jul 29, 2022 at 10:55:45AM +, Juraj Linkeš wrote:
> >  
> >  > +self.session = pxssh.pxssh(encoding="utf-8")
> >  > +self.session.login(
> >  > +self.node,
> >  > +self.username,
> >  > +self.password,
> >  > +original_prompt="[$#>]",
> >  > +
> >  password_regex=r"(?i)(?:password:)|(?:passphrase for
> >  key)|(?i)(password for .+:)",
> >  > +)
> >  > +
> > [1]self.logger.info(f"Connection to {self.node}
> >  succeeded")
> >  > +self.send_expect("stty -echo", "#")
> >  > +self.send_expect("stty columns 1000", "#")
> >  First of all, thanks for those changes! Having DTS inside DPDK makes
> >  test synchronization a lot easier. I'm happy to say (unsurprisingly)
> >  that it works with my RISC-V HiFive Unmatched board like a charm.
> >
> >
> >  Though there is a small issue with the lines above. They assume "#"
> >  as
> >  the prompt sign, even though original_prompt was set to "[$#>]".
> >  This
> >  touches on two problems:
> >  1. # is usually a root prompt - is DTS assumed to be run with root
> > privileges? DPDK may (in theory) run without them with some
> >  permission
> > adjustment (hugetlb, VFIO container, etc.). If we assume DTS
> >  needs
> > root access, this has to be both documented and validated before
> > running the whole suite. Otherwise it'll be hard to debug.
> >
> >
> >Around a year ago there were some attempts to get DTS to not require
> >root. This ended up running into issues because DTS sets up drivers for
> >you, which requires root as far as I know, as well as setting up
> >hugepages, which I think also requires root. The current version of DTS
> >can probably run without root, but it will probably stop working as
> >soon as DTS starts interacting with PCI devices. Elevating privileges
> >using pkexec or sudo is less portable and would require supporting a
> >lot more forms of authentication (kerberos/ldap for enterprise
> >deployments, passwords, 2fa, etc). It is much easier to say that the
> >default SSH agent must provide root access to the SUT and Traffic
> >Generator either with a password or pre-configured passwordless
> >authentication (ssh keys, kerberos, etc).
> >
> >[Honnappa] One of the feedback we collected asks to deprecate the use
> >of clear text passwords in config files and root user. It suggests to
> >use keys and sudo. It is a ‘Must Have’ item.
> >
> >
> >I agree it should be documented. I honestly didn't consider that anyone
> >would try running DTS as a non-root user.
> >
> >[Honnappa] +1 for supporting root users for now and documenting.
> >
> >
> >  2. Different shells use different prompts on different distros.
> >  Hence
> > perhaps there should be a regex here (same as with
> >  original_prompt)
> > and there could be a conf.yaml option to modify it on a per-host
> > basis?
> >
> >
> >As far as customizing the prompts, I think that is doable via a
> >configuration option.
> >As far as different shells, I don't think we were planning to support
> >anything besides either bash or posix-compatible shells. At the moment
> >all of the community lab systems use bash, and for ease of test
> >development it will be easier to mandate that everyone uses one shell.
> >Otherwise DTS CI will need to run once for each shell to catch issues,
> >which in my opinion are resources better spent on more in-depth testing
> >of DTS and DPDK.
> >
> >[Honnappa] +1 for using just bash, we can document this as well.
> >
>
> I would agree overall. Just supporting one shell is fine - certainly for now. 
> Also
> completely agree that we need to remove hard-coded passwords and ideally
> non-root. However, I think for the initial versions the main thing should be
> removing the passwords so I would be ok for keeping the "root"
> login requirement, so long as we support using ssh keys for login rather than
> hard-coded passwords.
I would be for dropping support for the hard-coded passwords completely. 
Setting up the password-less SSH is straightforward (not sure if you meant the 
same).

>
> /Bruce

I think the question is whether there are any platforms/devices that should be 
tested by DTS that do not support passwordless SSH.  Right now, the community 
lab is using SSH keys for everything. If Intel also doesn't need passwords, 
then it's up to the community whether to support them at all. It does make it a 
lot easier on DTS if we can just require that the active OpenSSH agent can log 
into all of the systems involved without a password. This would also make it 
easier to enable AD authent

Re: [PATCH] net/nfp: improve readability NFP HWINFO header

2022-09-20 Thread Ferruh Yigit

On 9/20/2022 6:51 PM, Niklas Söderlund wrote:

Hi Ferruh,

Thanks for your feedback.

On 2022-09-20 18:33:02 +0100, Ferruh Yigit wrote:

On 8/26/2022 6:39 AM, Chaoyong He wrote:

From: James Hershaw 

Prepend `0x` to the NFP HWINFO header value that is printed to improve
the readability of the printed statement.

Signed-off-by: James Hershaw 
Reviewed-by: Chaoyong He 
Reviewed-by: Niklas Söderlund 
---
   drivers/net/nfp/nfpcore/nfp_hwinfo.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/nfp/nfpcore/nfp_hwinfo.c 
b/drivers/net/nfp/nfpcore/nfp_hwinfo.c
index c0516bf..9f848bd 100644
--- a/drivers/net/nfp/nfpcore/nfp_hwinfo.c
+++ b/drivers/net/nfp/nfpcore/nfp_hwinfo.c
@@ -108,7 +108,7 @@
goto exit_free;
header = (void *)db;
-   printf("NFP HWINFO header: %08x\n", *(uint32_t *)header);
+   printf("NFP HWINFO header: %#08x\n", *(uint32_t *)header);


Why driver is directly using 'printf', but not rte_log APIs?

I can see there are already 'PMD_INIT_LOG' & 'PMD_DRV_LOG' macros for this.


We have a series ready to convert all printf style logging into rte_log
APIs as well as fix some other style issues.

We also have a few other things in our internal patch queue waiting to
be sent out. To reduce conflicts in patchwork we are sending them out in
the order as some of them depends on each other. And the one cleaning up
log messages are at the end of the pile unfortunately.

Do you think it's acceptable to take this fix as-is and then a patch
that convert all printf on one go, or would you prefers we move touch
this line only once and create a v2 of this fix while also moving it to
the rte_log APIs?



Hi Niklas,

Good to hear that you have patch to convert them to rte_log.

Instead of changing the log content and API with same patch, it is 
better to have them separate.


I prefer to convert them to proper log API first, and later fix the 
content of the log (to not update a line with wrong call).
But order of patch preference is a soft one, if somehow other-way around 
(first fix the log content, later the API) makes your life easier, I am 
OK to go with that too (as long as both issues are fixed).


Re: [PATCH 2/2] net/nfp: use dpdk debug macro to control nfp Rx/Tx log print

2022-09-20 Thread Ferruh Yigit

On 8/26/2022 7:03 AM, Chaoyong He wrote:

From: Long Wu 

Nfp log print was controlled by nfp's own macro before. This
commit changes to use dpdk debug rx/tx macro to control it.

Signed-off-by: Long Wu 
Reviewed-by: Chaoyong He 
Reviewed-by: Niklas Söderlund 


Acked-by: Ferruh Yigit 



Re: [PATCH 0/2] upgrade the log system of nfp PMD

2022-09-20 Thread Ferruh Yigit

On 8/26/2022 7:03 AM, Chaoyong He wrote:

This patch series do some upgrade of the log system of nfp PMD:
Use DPDK debug macro to control the nfp Rx/Tx log.
Add the support of nfp cpp log macro.

Long Wu (2):
   net/nfp: add support for nfp cpp log print
   net/nfp: use dpdk debug macro to control nfp Rx/Tx log print



Series applied to dpdk-next-net/main, thanks.


Patch titles updated slightly, to remove duplicated 'nfp' etc, please 
double check them in the next-net git repo.




Re: [PATCH v2] net/nfp: support Corigine PCIe ID for the nfp PMD

2022-09-20 Thread Ferruh Yigit

On 9/20/2022 11:28 AM, Niklas Söderlund wrote:

From: James Hershaw

Previously the nfp driver has supported NFP chips with the Netronome PCIe
ID. This patch extends the PMD to also support NFP chips with the
Corigine PCIe vendor ID (0x1da8), which at this point are assumed to be
otherwise identical from a software perspective.

Signed-off-by: James Hershaw
Reviewed-by: Niklas Söderlund


Applied to dpdk-next-net/main, thanks.


Re: [PATCH v4] usertools: rewrite pmdinfo

2022-09-20 Thread Robin Jarry
Ferruh Yigit, Sep 20, 2022 at 19:48:
> > +* The ``dpdk-pmdinfo.py`` script was rewritten to produce valid JSON only.
> > +  PCI-IDs parsing has been removed.
> > +  To get a similar output to the (now removed) ``-r/--raw`` flag, you may 
> > use the following command::
> > +
> > + strings $dpdk_binary_or_driver | sed -n 's/^PMD_INFO_STRING= //p'
> > 
>
> Empty line is missing (in case there will be a new version for some 
> other reason).

What do you mean?



RE: [PATCH v4 4/9] dts: add ssh pexpect library

2022-09-20 Thread Tu, Lijuan


> >
> >  On Fri, Jul 29, 2022 at 10:55:45AM +, Juraj Linkeš wrote:
> >  
> >  > +self.session = pxssh.pxssh(encoding="utf-8")
> >  > +self.session.login(
> >  > +self.node,
> >  > +self.username,
> >  > +self.password,
> >  > +original_prompt="[$#>]",
> >  > +
> >  password_regex=r"(?i)(?:password:)|(?:passphrase for
> >  key)|(?i)(password for .+:)",
> >  > +)
> >  > +
> > [1]self.logger.info(f"Connection to {self.node}
> >  succeeded")
> >  > +self.send_expect("stty -echo", "#")
> >  > +self.send_expect("stty columns 1000", "#")
> >  First of all, thanks for those changes! Having DTS inside DPDK makes
> >  test synchronization a lot easier. I'm happy to say (unsurprisingly)
> >  that it works with my RISC-V HiFive Unmatched board like a charm.
> >
> >
> >  Though there is a small issue with the lines above. They assume "#"
> >  as
> >  the prompt sign, even though original_prompt was set to "[$#>]".
> >  This
> >  touches on two problems:
> >  1. # is usually a root prompt - is DTS assumed to be run with root
> > privileges? DPDK may (in theory) run without them with some
> >  permission
> > adjustment (hugetlb, VFIO container, etc.). If we assume DTS
> >  needs
> > root access, this has to be both documented and validated before
> > running the whole suite. Otherwise it'll be hard to debug.
> >
> >
> >Around a year ago there were some attempts to get DTS to not require
> >root. This ended up running into issues because DTS sets up drivers for
> >you, which requires root as far as I know, as well as setting up
> >hugepages, which I think also requires root. The current version of DTS
> >can probably run without root, but it will probably stop working as
> >soon as DTS starts interacting with PCI devices. Elevating privileges
> >using pkexec or sudo is less portable and would require supporting a
> >lot more forms of authentication (kerberos/ldap for enterprise
> >deployments, passwords, 2fa, etc). It is much easier to say that the
> >default SSH agent must provide root access to the SUT and Traffic
> >Generator either with a password or pre-configured passwordless
> >authentication (ssh keys, kerberos, etc).
> >
> >[Honnappa] One of the feedback we collected asks to deprecate the use
> >of clear text passwords in config files and root user. It suggests to
> >use keys and sudo. It is a ‘Must Have’ item.
> >
> >
> >I agree it should be documented. I honestly didn't consider that anyone
> >would try running DTS as a non-root user.
> >
> >[Honnappa] +1 for supporting root users for now and documenting.
> >
> >
> >  2. Different shells use different prompts on different distros.
> >  Hence
> > perhaps there should be a regex here (same as with
> >  original_prompt)
> > and there could be a conf.yaml option to modify it on a per-host
> > basis?
> >
> >
> >As far as customizing the prompts, I think that is doable via a
> >configuration option.
> >As far as different shells, I don't think we were planning to support
> >anything besides either bash or posix-compatible shells. At the moment
> >all of the community lab systems use bash, and for ease of test
> >development it will be easier to mandate that everyone uses one shell.
> >Otherwise DTS CI will need to run once for each shell to catch issues,
> >which in my opinion are resources better spent on more in-depth testing
> >of DTS and DPDK.
> >
> >[Honnappa] +1 for using just bash, we can document this as well.
> >
>
> I would agree overall. Just supporting one shell is fine - certainly for now. 
> Also
> completely agree that we need to remove hard-coded passwords and ideally
> non-root. However, I think for the initial versions the main thing should be
> removing the passwords so I would be ok for keeping the "root"
> login requirement, so long as we support using ssh keys for login rather than
> hard-coded passwords.
I would be for dropping support for the hard-coded passwords completely. 
Setting up the password-less SSH is straightforward (not sure if you meant the 
same).

>
> /Bruce

I think the question is whether there are any platforms/devices that should be 
tested by DTS that do not support passwordless SSH.  Right now, the community 
lab is using SSH keys for everything. If Intel also doesn't need passwords, 
then it's up to the community whether to support them at all. It does make it a 
lot easier on DTS if we can just require that the active OpenSSH agent can log 
into all of the systems involved without a password. This would also make it 
easier to enable AD authen

RE: [PATCH v9 07/12] net/nfp: add flower ctrl VNIC related logics

2022-09-20 Thread Chaoyong He
> On 9/15/2022 11:44 AM, Chaoyong He wrote:
> > Adds the setup/start logic for the ctrl vNIC. This vNIC is used by the
> > PMD and flower firmware application as a communication channel
> between
> > driver and firmware. In the case of OVS it is also used to communicate
> > flow statistics from hardware to the driver.
> >
> > A rte_eth device is not exposed to DPDK for this vNIC as it is
> > strictly used internally by flower logic.
> >
> 
> Hi Chaoyong,
> 
> Similar comment with previous versions, interface is created using regular
> 'rte_eth_dev_allocate()' API, I think interface will be visible to 
> application, I
> can't understand the need of creating an interface for control.
> 
> What is the communication method between driver and FW?
> Since one of the following patches (09/12) introduces Rx/Tx for ctrl 
> interface,
> is device interface is control packets (similar to network data packets)?
> 

Basically, the 'control message' is exist in the form of normal data packets.

When we use the flower firmware application, there exist two types of packets 
for now,
and they are identified only from the prepend meta-data.

Bit3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0
-\ 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
Word  +---+---+---+---+
   0  |type   | type  | type  | type  |
  +---+---+---+---+
The 'control message' packets are processed by the ctrl vNIC.
The 'normal' packets are processed by the pf vNIC.

The communication method between driver and firmware is decided by the
designment of hardware and firmware.

The kernel driver also has the same ctrl vNIC and pf vNIC ethdev and the usage 
is same.

> > Because of the add of ctrl vNIC, a new PCItoCPPBar is needed. Modify
> > the related logics.
> >
> > Signed-off-by: Chaoyong He 
> > Reviewed-by: Niklas Söderlund 
> 
> <...>



[PATCH v4 0/7] ethdev: separate metering and marking from policing

2022-09-20 Thread Alexander Kozyrev
Extend Metering and Marking support in the Flow API:
1. Add METER_COLOR item to match Color Marker set by a Meter.
2. Add the ability to set Color Marker via modify_field Flow API.
3. Add Meter API to get profile/policy objects.
4. Add METER_MARK action to perform Meter color metering and marking.
Provide greater flexibility in how Metering can be used.

RFC: 
https://patchwork.dpdk.org/project/dpdk/cover/20220502200439.4100965-1-akozy...@nvidia.com/

Traditional Meter Usage:

profile_id = rte_mtr_meter_profile_add(RFC_params);
policy_id = rte_mtr_meter_policy_add(actions[RED/YELLOW/GREEN]);
meter_id = rte_mtr_create(profile_id, policy_id);
rte_flow_create(pattern=5-tuple,actions=METER(meter_id));

The METER action effectively translates to the following:
1. Metering a packet stream.
2. Marking packets with an appropriate color.
3. Jump to a policy group.
4. Match on a color.
5. Execute assigned policy actions for the color.

New Meter Usage Model:
profile_id = rte_mtr_meter_profile_add(RFC_params);
*profile_obj_ptr = rte_mtr_meter_profile_get(profile_id);
rte_flow_create(pattern=5-tuple,
actions=METER(profile_obj_ptr),JUMP);
rte_flow_create(pattern=COLOR, actions=...);

The METER_MARK action effectively translates to the following:
1. Metering a packet stream.
2. Marking packets with an appropriate color.

A user is able to match the color later with the COLOR item.
In order to do this we add the JUMP action after the METER action.

3. Jump to a policy group.
4. Match on a color.
5. Execute actions for the color.

Here we decoupled the meter profile usage from the meter policy usage
for greater flexibility and got rid of any locks related to meter_id lookup.

Another example of the meter creation to mimic the old model entirely:
profile_id = rte_mtr_meter_profile_add(RFC_params);
*profile_obj_ptr = rte_mtr_meter_profile_get(profile_id);
policy_id = rte_mtr_meter_policy_add(actions[RED/YELLOW/GREEN]);
*policy_obj_ptr = rte_mtr_meter_policy_get(policy_id);
rte_flow_create(pattern=5-tuple,
actions=METER(profile_obj_ptr, policy_obj_ptr));

In this case, we define the policy actions right away.
The main advantage is not having to lookup for profile_id/policy_id.

To free the meter obects we need to do the following:
rte_flow_destroy(flow_handle);
rte_mtr_meter_policy_delete(policy_id);
rte_mtr_meter_profile_delete(profile_id);.
profile_obj_ptr and policy_obj_ptr are no longer valid after that.

The meter profile configuration cannot be updated dynamically
with the current set of patches, but can be supported later on.
Now you have to destroy flows and profiles and recreate them.
But rte_mtr_meter_profile_update()/rte_mtr_meter_policy_update()
can have the corresponding siblings without mtr_id parameters.
In this case, we can update the config and all the flows using them.

The meter sharing is done via the indirect action Flow API:
profile_id = rte_mtr_meter_profile_add(RFC_params);
*profile_obj_ptr = rte_mtr_meter_prof8ile_get(profile_id);
handle = rte_flow_action_handle_create(action=METER(profile_obj_ptr, NULL));
flow1 = rte_flow_create(pattern=5-tuple-1, actions=INDIRECT(handle));
flow2 = rte_flow_create(pattern=5-tuple-2, actions=INDIRECT(handle));

Once we are done with the flow rules we can free everything.
rte_flow_destroy(flow1);
rte_flow_destroy(flow2);
rte_flow_action_handle_destroy(handle);
rte_mtr_meter_profile_delete(profile_id);

Signed-off-by: Alexander Kozyrev 

Alexander Kozyrev (7):
  ethdev: add meter color flow matching item
  ethdev: allow meter color marker modification
  ethdev: get meter profile/policy objects
  ethdev: add meter color mark flow action
  app/test-pmd: add meter color flow matching item
  app/test-pmd: allow meter color marker modification
  app/testpmd: add meter color mark flow action

 app/test-pmd/cmdline_flow.c   | 212 +-
 app/test-pmd/config.c |  26 +++
 app/test-pmd/testpmd.h|   4 +
 doc/guides/prog_guide/rte_flow.rst|  32 +++
 .../traffic_metering_and_policing.rst |   7 +
 doc/guides/rel_notes/release_22_11.rst|   6 +
 doc/guides/testpmd_app_ug/testpmd_funcs.rst   |   9 +
 lib/ethdev/rte_flow.c |   2 +
 lib/ethdev/rte_flow.h |  60 +
 lib/ethdev/rte_mtr.c  |  41 
 lib/ethdev/rte_mtr.h  |  40 
 lib/ethdev/rte_mtr_driver.h   |  19 ++
 lib/ethdev/version.map|   4 +
 13 files changed, 461 insertions(+), 1 deletion(-)

-- 
2.18.2



[PATCH v4 1/7] ethdev: add meter color flow matching item

2022-09-20 Thread Alexander Kozyrev
Provide an ability to use a Color Marker set by a Meter
as a matching item in Flow API. The Color Marker reflects
the metering result by setting the metadata for a
packet to a particular codepoint: green, yellow or red.

Signed-off-by: Alexander Kozyrev 
---
 doc/guides/prog_guide/rte_flow.rst |  7 +++
 doc/guides/rel_notes/release_22_11.rst |  3 +++
 lib/ethdev/rte_flow.c  |  1 +
 lib/ethdev/rte_flow.h  | 24 
 4 files changed, 35 insertions(+)

diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index 588914b231..018def1033 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -1651,6 +1651,13 @@ Matches a PPP header.
 - ``proto_id``: PPP protocol identifier.
 - Default ``mask`` matches addr, ctrl, proto_id.
 
+Item: ``METER_COLOR``
+^
+
+Matches Color Marker set by a Meter.
+
+- ``color``: Metering color marker.
+
 Actions
 ~~~
 
diff --git a/doc/guides/rel_notes/release_22_11.rst 
b/doc/guides/rel_notes/release_22_11.rst
index 8c021cf050..f6c02bb5e7 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -55,6 +55,9 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* **Extended Metering and Marking support in the Flow API.**
+
+  * Added METER_COLOR item to match Color Marker set by a Meter.
 
 Removed Items
 -
diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c
index 501be9d602..99247b599d 100644
--- a/lib/ethdev/rte_flow.c
+++ b/lib/ethdev/rte_flow.c
@@ -159,6 +159,7 @@ static const struct rte_flow_desc_data rte_flow_desc_item[] 
= {
rte_flow_item_flex_conv),
MK_FLOW_ITEM(L2TPV2, sizeof(struct rte_flow_item_l2tpv2)),
MK_FLOW_ITEM(PPP, sizeof(struct rte_flow_item_ppp)),
+   MK_FLOW_ITEM(METER_COLOR, sizeof(struct rte_flow_item_meter_color)),
 };
 
 /** Generate flow_action[] entry. */
diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
index a79f1e7ef0..d49f5fd1b7 100644
--- a/lib/ethdev/rte_flow.h
+++ b/lib/ethdev/rte_flow.h
@@ -668,6 +668,14 @@ enum rte_flow_item_type {
 * See struct rte_flow_item_gre_opt.
 */
RTE_FLOW_ITEM_TYPE_GRE_OPTION,
+
+   /**
+* Matches Meter Color Marker.
+*
+* See struct rte_flow_item_meter_color.
+*/
+
+   RTE_FLOW_ITEM_TYPE_METER_COLOR,
 };
 
 /**
@@ -2198,6 +2206,22 @@ struct rte_flow_item_flex_conf {
uint32_t nb_outputs;
 };
 
+/**
+ * RTE_FLOW_ITEM_TYPE_METER_COLOR.
+ *
+ * Matches Color Marker set by a Meter.
+ */
+struct rte_flow_item_meter_color {
+   enum rte_color color; /**< Meter color marker. */
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_METER_COLOR. */
+#ifndef __cplusplus
+static const struct rte_flow_item_meter_color rte_flow_item_meter_color_mask = 
{
+   .color = RTE_COLORS,
+};
+#endif
+
 /**
  * Action types.
  *
-- 
2.18.2



[PATCH v4 2/7] ethdev: allow meter color marker modification

2022-09-20 Thread Alexander Kozyrev
Extend modify_field Flow API with support of Meter Color Marker
modifications. It allows setting the packet's metadata to any
color marker: green, yellow or red. A user is able to specify
an initial packet color for Meter API or create simple Metering
and Marking flow rules based on his own coloring algorithm.

Signed-off-by: Alexander Kozyrev 
---
 doc/guides/rel_notes/release_22_11.rst | 1 +
 lib/ethdev/rte_flow.h  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/doc/guides/rel_notes/release_22_11.rst 
b/doc/guides/rel_notes/release_22_11.rst
index f6c02bb5e7..a7651f69ba 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -58,6 +58,7 @@ New Features
 * **Extended Metering and Marking support in the Flow API.**
 
   * Added METER_COLOR item to match Color Marker set by a Meter.
+  * Added ability to set Color Marker via modify_field Flow API.
 
 Removed Items
 -
diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
index d49f5fd1b7..fddd47e7b5 100644
--- a/lib/ethdev/rte_flow.h
+++ b/lib/ethdev/rte_flow.h
@@ -3713,6 +3713,7 @@ enum rte_flow_field_id {
RTE_FLOW_FIELD_VALUE,   /**< Immediate value. */
RTE_FLOW_FIELD_IPV4_ECN,/**< IPv4 ECN. */
RTE_FLOW_FIELD_IPV6_ECN,/**< IPv6 ECN. */
+   RTE_FLOW_FIELD_METER_COLOR, /**< Meter color marker. */
 };
 
 /**
-- 
2.18.2



[PATCH v4 3/7] ethdev: get meter profile/policy objects

2022-09-20 Thread Alexander Kozyrev
Introduce a new Meter API to retrieve a Meter profile and policy
objects using the profile/policy ID previously created with
meter_profile_add() and meter_policy_create() functions.
That allows to save the pointer and avoid any lookups in the
corresponding lists for quick access during a flow rule creation.
Also, it eliminates the need for CIR, CBS and EBS calculations
and conversion to a PMD-specific format when the profile is used.
Pointers are destroyed and cannot be used after the corresponding
meter_profile_delete() or meter_policy_delete() are called.

Signed-off-by: Alexander Kozyrev 
---
 .../traffic_metering_and_policing.rst |  7 
 doc/guides/rel_notes/release_22_11.rst|  1 +
 lib/ethdev/rte_flow.h |  7 
 lib/ethdev/rte_mtr.c  | 41 +++
 lib/ethdev/rte_mtr.h  | 40 ++
 lib/ethdev/rte_mtr_driver.h   | 19 +
 lib/ethdev/version.map|  4 ++
 7 files changed, 119 insertions(+)

diff --git a/doc/guides/prog_guide/traffic_metering_and_policing.rst 
b/doc/guides/prog_guide/traffic_metering_and_policing.rst
index d1958a023d..2ce3236ad8 100644
--- a/doc/guides/prog_guide/traffic_metering_and_policing.rst
+++ b/doc/guides/prog_guide/traffic_metering_and_policing.rst
@@ -107,6 +107,13 @@ traffic meter and policing library.
  to the list of meter actions (``struct 
rte_mtr_meter_policy_params::actions``)
  specified per color as show in :numref:`figure_rte_mtr_chaining`.
 
+#. The ``rte_mtr_meter_profile_get()`` and ``rte_mtr_meter_policy_get()``
+   API functions are available for getting the object pointers directly.
+   These pointers allow quick access to profile/policy objects and are
+   required by the ``RTE_FLOW_ACTION_TYPE_METER_MARK`` action.
+   This action may omit the policy definition to providei flexibility
+   to match a color later with the ``RTE_FLOW_ITEM_TYPE_METER_COLOR`` item.
+
 Protocol based input color selection
 
 
diff --git a/doc/guides/rel_notes/release_22_11.rst 
b/doc/guides/rel_notes/release_22_11.rst
index a7651f69ba..7969609788 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -59,6 +59,7 @@ New Features
 
   * Added METER_COLOR item to match Color Marker set by a Meter.
   * Added ability to set Color Marker via modify_field Flow API.
+  * Added Meter API to get a pointer to profile/policy by their ID.
 
 Removed Items
 -
diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
index fddd47e7b5..edf69fc44f 100644
--- a/lib/ethdev/rte_flow.h
+++ b/lib/ethdev/rte_flow.h
@@ -3826,6 +3826,13 @@ struct rte_flow_action {
  */
 struct rte_flow;
 
+/**
+ * Opaque type for Meter profile object returned by MTR API.
+ *
+ * This handle can be used to create Meter actions instead of profile ID.
+ */
+struct rte_flow_meter_profile;
+
 /**
  * @warning
  * @b EXPERIMENTAL: this structure may change without prior notice
diff --git a/lib/ethdev/rte_mtr.c b/lib/ethdev/rte_mtr.c
index c460e4f4e0..9e79b744da 100644
--- a/lib/ethdev/rte_mtr.c
+++ b/lib/ethdev/rte_mtr.c
@@ -56,6 +56,25 @@ rte_mtr_ops_get(uint16_t port_id, struct rte_mtr_error 
*error)
ops->func;  \
 })
 
+#define RTE_MTR_HNDL_FUNC(port_id, func)   \
+({ \
+   const struct rte_mtr_ops *ops = \
+   rte_mtr_ops_get(port_id, error);\
+   if (ops == NULL)\
+   return NULL;\
+   \
+   if (ops->func == NULL) {\
+   rte_mtr_error_set(error,\
+   ENOSYS, \
+   RTE_MTR_ERROR_TYPE_UNSPECIFIED, \
+   NULL,   \
+   rte_strerror(ENOSYS));  \
+   return NULL;\
+   }   \
+   \
+   ops->func;  \
+})
+
 /* MTR capabilities get */
 int
 rte_mtr_capabilities_get(uint16_t port_id,
@@ -90,6 +109,17 @@ rte_mtr_meter_profile_delete(uint16_t port_id,
meter_profile_id, error);
 }
 
+/** MTR meter profile get */
+struct rte_flow_meter_profile *
+rte_mtr_meter_profile_get(uint16_t port_id,
+   uint32_t meter_profile_id,
+   struct rte_mtr_error *error)
+{
+   struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+   return RTE_MTR_HNDL_FUNC(port_id, meter_profile_get)(dev,
+   meter_profile_id, error);
+}
+
 /* MTR meter policy validate */
 int
 rte_mtr_meter_policy_validate(uint16_t port_id,
@@ -124,6 +

[PATCH v4 4/7] ethdev: add meter color mark flow action

2022-09-20 Thread Alexander Kozyrev
Create a new Flow API action: METER_MARK.
It Meters a packet stream and marks its packets with colors.
The marking is done on a metadata, not on a packet field.
Unlike the METER action, it performs no policing at all.
A user has the flexibility to create any policies with the help of
the METER_COLOR item later, only meter profile is mandatory here.

Signed-off-by: Alexander Kozyrev 
---
 doc/guides/prog_guide/rte_flow.rst | 25 +++
 doc/guides/rel_notes/release_22_11.rst |  1 +
 lib/ethdev/rte_flow.c  |  1 +
 lib/ethdev/rte_flow.h  | 28 ++
 4 files changed, 55 insertions(+)

diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index 018def1033..5b87d9f61e 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -3411,6 +3411,31 @@ This action is meant to use the same structure as 
`Action: PORT_REPRESENTOR`_.
 
 See also `Item: REPRESENTED_PORT`_.
 
+Action: ``METER_MARK``
+^^
+
+Meters a packet stream and marks its packets with colors.
+
+Unlike the ``METER`` action, policing is optional and may be
+performed later with the help of the ``METER_COLOR`` item.
+The profile and/or policy objects have to be created
+using the rte_mtr_profile_add()/rte_mtr_policy_add() API.
+Pointers to these objects are used as action parameters
+and need to be retrieved using the rte_mtr_profile_get() API
+and rte_mtr_policy_get() API respectively.
+
+.. _table_rte_flow_action_meter_mark:
+
+.. table:: METER_MARK
+
+   +--+--+
+   | Field| Value|
+   +==+==+
+   | ``profile``  | Meter profile object |
+   +--+--+
+   | ``policy``   | Meter policy object  |
+   +--+--+
+
 Negative types
 ~~
 
diff --git a/doc/guides/rel_notes/release_22_11.rst 
b/doc/guides/rel_notes/release_22_11.rst
index 7969609788..401552ff84 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -60,6 +60,7 @@ New Features
   * Added METER_COLOR item to match Color Marker set by a Meter.
   * Added ability to set Color Marker via modify_field Flow API.
   * Added Meter API to get a pointer to profile/policy by their ID.
+  * Added METER_MARK action for Metering with lockless profile/policy access.
 
 Removed Items
 -
diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c
index 99247b599d..7ff024f33e 100644
--- a/lib/ethdev/rte_flow.c
+++ b/lib/ethdev/rte_flow.c
@@ -260,6 +260,7 @@ static const struct rte_flow_desc_data 
rte_flow_desc_action[] = {
MK_FLOW_ACTION(CONNTRACK, sizeof(struct rte_flow_action_conntrack)),
MK_FLOW_ACTION(PORT_REPRESENTOR, sizeof(struct rte_flow_action_ethdev)),
MK_FLOW_ACTION(REPRESENTED_PORT, sizeof(struct rte_flow_action_ethdev)),
+   MK_FLOW_ACTION(METER_MARK, sizeof(struct rte_flow_action_meter_mark)),
 };
 
 int
diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
index edf69fc44f..74e7ddf73a 100644
--- a/lib/ethdev/rte_flow.h
+++ b/lib/ethdev/rte_flow.h
@@ -2903,6 +2903,15 @@ enum rte_flow_action_type {
 * @see struct rte_flow_action_ethdev
 */
RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT,
+
+   /**
+* Traffic metering and marking (MTR).
+* the entity represented by the given ethdev.
+*
+* @see struct rte_flow_action_meter_mark
+* See file rte_mtr.h for MTR profile object configuration.
+*/
+   RTE_FLOW_ACTION_TYPE_METER_MARK,
 };
 
 /**
@@ -3774,6 +3783,25 @@ struct rte_flow_action_modify_field {
uint32_t width; /**< Number of bits to use from a source field. */
 };
 
+/**
+ * RTE_FLOW_ACTION_TYPE_METER_MARK
+ *
+ * Traffic metering and marking (MTR).
+ *
+ * Meters a packet stream and marks its packets either
+ * green, yellow, or red according to the specified profile.
+ * The policy is optional and may be specified for defining
+ * subsequent actions based on a color assigned by MTR.
+ * Alternatively, the METER_COLOR item may be used for this.
+ */
+struct rte_flow_action_meter_mark {
+
+   /**< Profile config retrieved with rte_mtr_profile_get(). */
+   struct rte_flow_meter_profile *profile;
+   /**< Policy config retrieved with rte_mtr_policy_get(). */
+   struct rte_flow_meter_policy *policy;
+};
+
 /* Mbuf dynamic field offset for metadata. */
 extern int32_t rte_flow_dynf_metadata_offs;
 
-- 
2.18.2



[PATCH v4 6/7] app/test-pmd: allow meter color marker modification

2022-09-20 Thread Alexander Kozyrev
Extend the list of available modify_field IDs to include
recently added meter color marker item in testpmd CLI.

Signed-off-by: Alexander Kozyrev 
---
 app/test-pmd/cmdline_flow.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index b8c92efb1d..e6f4bdf3b2 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -799,7 +799,7 @@ static const char *const modify_field_ids[] = {
"udp_port_src", "udp_port_dst",
"vxlan_vni", "geneve_vni", "gtp_teid",
"tag", "mark", "meta", "pointer", "value",
-   "ipv4_ecn", "ipv6_ecn", NULL
+   "ipv4_ecn", "ipv6_ecn", "meter_color", NULL
 };
 
 static const char *const meter_colors[] = {
-- 
2.18.2



[PATCH v4 5/7] app/test-pmd: add meter color flow matching item

2022-09-20 Thread Alexander Kozyrev
Add testpmd command line to match on a meter color:
flow create 0 ingress group 0 pattern meter color is green / end

Signed-off-by: Alexander Kozyrev 
---
 app/test-pmd/cmdline_flow.c | 83 +
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  4 +
 2 files changed, 87 insertions(+)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 7f50028eb7..b8c92efb1d 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -458,6 +458,9 @@ enum index {
ITEM_PPP_ADDR,
ITEM_PPP_CTRL,
ITEM_PPP_PROTO_ID,
+   ITEM_METER,
+   ITEM_METER_COLOR,
+   ITEM_METER_COLOR_NAME,
 
/* Validate/create actions. */
ACTIONS,
@@ -799,6 +802,10 @@ static const char *const modify_field_ids[] = {
"ipv4_ecn", "ipv6_ecn", NULL
 };
 
+static const char *const meter_colors[] = {
+   "green", "yellow", "red", "all", NULL
+};
+
 /** Maximum number of subsequent tokens and arguments on the stack. */
 #define CTX_STACK_SIZE 16
 
@@ -1332,6 +1339,7 @@ static const enum index next_item[] = {
ITEM_FLEX,
ITEM_L2TPV2,
ITEM_PPP,
+   ITEM_METER,
END_SET,
ZERO,
 };
@@ -1804,6 +1812,12 @@ static const enum index item_ppp[] = {
ZERO,
 };
 
+static const enum index item_meter[] = {
+   ITEM_METER_COLOR,
+   ITEM_NEXT,
+   ZERO,
+};
+
 static const enum index next_action[] = {
ACTION_END,
ACTION_VOID,
@@ -2372,6 +2386,9 @@ static int parse_ia_id2ptr(struct context *ctx, const 
struct token *token,
 static int parse_mp(struct context *, const struct token *,
const char *, unsigned int,
void *, unsigned int);
+static int parse_meter_color(struct context *ctx, const struct token *token,
+const char *str, unsigned int len, void *buf,
+unsigned int size);
 static int comp_none(struct context *, const struct token *,
 unsigned int, char *, unsigned int);
 static int comp_boolean(struct context *, const struct token *,
@@ -2402,6 +2419,8 @@ static int comp_table_id(struct context *, const struct 
token *,
 unsigned int, char *, unsigned int);
 static int comp_queue_id(struct context *, const struct token *,
 unsigned int, char *, unsigned int);
+static int comp_meter_color(struct context *, const struct token *,
+   unsigned int, char *, unsigned int);
 
 /** Token definitions. */
 static const struct token token_list[] = {
@@ -5064,6 +5083,29 @@ static const struct token token_list[] = {
.args = ARGS(ARGS_ENTRY(struct rte_flow_item_ppp,
hdr.proto_id)),
},
+   [ITEM_METER] = {
+   .name = "meter",
+   .help = "match meter color",
+   .priv = PRIV_ITEM(METER_COLOR,
+ sizeof(struct rte_flow_item_meter_color)),
+   .next = NEXT(item_meter),
+   .call = parse_vc,
+   },
+   [ITEM_METER_COLOR] = {
+   .name = "color",
+   .help = "meter color",
+   .next = NEXT(item_meter,
+NEXT_ENTRY(ITEM_METER_COLOR_NAME),
+item_param),
+   .args = ARGS(ARGS_ENTRY(struct rte_flow_item_meter_color,
+   color)),
+   },
+   [ITEM_METER_COLOR_NAME] = {
+   .name = "color_name",
+   .help = "meter color name",
+   .call = parse_meter_color,
+   .comp = comp_meter_color,
+   },
/* Validate/create actions. */
[ACTIONS] = {
.name = "actions",
@@ -9867,6 +9909,30 @@ parse_flex_handle(struct context *ctx, const struct 
token *token,
return ret;
 }
 
+/** Parse Meter color name */
+static int
+parse_meter_color(struct context *ctx, const struct token *token,
+ const char *str, unsigned int len, void *buf,
+ unsigned int size)
+{
+   struct rte_flow_item_meter_color *meter_color;
+   unsigned int i;
+
+   (void)token;
+   (void)buf;
+   (void)size;
+   for (i = 0; meter_colors[i]; ++i)
+   if (!strcmp_partial(meter_colors[i], str, len))
+   break;
+   if (!meter_colors[i])
+   return -1;
+   if (!ctx->object)
+   return len;
+   meter_color = ctx->object;
+   meter_color->color = (enum rte_color)i;
+   return len;
+}
+
 /** No completion. */
 static int
 comp_none(struct context *ctx, const struct token *token,
@@ -10158,6 +10224,20 @@ comp_queue_id(struct context *ctx, const struct token 
*token,
return i;
 }
 
+/** Complete available Meter colors. */
+static int
+comp_meter_color(struct context *ctx, const struct token *token,
+unsig

[PATCH v4 7/7] app/testpmd: add meter color mark flow action

2022-09-20 Thread Alexander Kozyrev
Add testpmd command line to match for METER_MARK action:
flow create ... actions meter_mark mtr_profile 20 / end

Signed-off-by: Alexander Kozyrev 
---
 app/test-pmd/cmdline_flow.c | 127 
 app/test-pmd/config.c   |  26 
 app/test-pmd/testpmd.h  |   4 +
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |   5 +
 4 files changed, 162 insertions(+)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index e6f4bdf3b2..dd837e27f7 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -53,6 +53,7 @@ enum index {
COMMON_GROUP_ID,
COMMON_PRIORITY_LEVEL,
COMMON_INDIRECT_ACTION_ID,
+   COMMON_PROFILE_ID,
COMMON_POLICY_ID,
COMMON_FLEX_HANDLE,
COMMON_FLEX_TOKEN,
@@ -508,6 +509,11 @@ enum index {
ACTION_METER_COLOR_YELLOW,
ACTION_METER_COLOR_RED,
ACTION_METER_ID,
+   ACTION_METER_MARK,
+   ACTION_METER_PROFILE,
+   ACTION_METER_PROFILE_ID2PTR,
+   ACTION_METER_POLICY,
+   ACTION_METER_POLICY_ID2PTR,
ACTION_OF_SET_MPLS_TTL,
ACTION_OF_SET_MPLS_TTL_MPLS_TTL,
ACTION_OF_DEC_MPLS_TTL,
@@ -1835,6 +1841,7 @@ static const enum index next_action[] = {
ACTION_PORT_ID,
ACTION_METER,
ACTION_METER_COLOR,
+   ACTION_METER_MARK,
ACTION_OF_SET_MPLS_TTL,
ACTION_OF_DEC_MPLS_TTL,
ACTION_OF_SET_NW_TTL,
@@ -1951,6 +1958,13 @@ static const enum index action_meter_color[] = {
ZERO,
 };
 
+static const enum index action_meter_mark[] = {
+   ACTION_METER_PROFILE,
+   ACTION_METER_POLICY,
+   ACTION_NEXT,
+   ZERO,
+};
+
 static const enum index action_of_set_mpls_ttl[] = {
ACTION_OF_SET_MPLS_TTL_MPLS_TTL,
ACTION_NEXT,
@@ -2389,6 +2403,14 @@ static int parse_mp(struct context *, const struct token 
*,
 static int parse_meter_color(struct context *ctx, const struct token *token,
 const char *str, unsigned int len, void *buf,
 unsigned int size);
+static int parse_meter_profile_id2ptr(struct context *ctx,
+ const struct token *token,
+ const char *str, unsigned int len,
+ void *buf, unsigned int size);
+static int parse_meter_policy_id2ptr(struct context *ctx,
+const struct token *token,
+const char *str, unsigned int len,
+void *buf, unsigned int size);
 static int comp_none(struct context *, const struct token *,
 unsigned int, char *, unsigned int);
 static int comp_boolean(struct context *, const struct token *,
@@ -2550,6 +2572,13 @@ static const struct token token_list[] = {
.call = parse_int,
.comp = comp_none,
},
+   [COMMON_PROFILE_ID] = {
+   .name = "{profile_id}",
+   .type = "PROFILE_ID",
+   .help = "profile id",
+   .call = parse_int,
+   .comp = comp_none,
+   },
[COMMON_POLICY_ID] = {
.name = "{policy_id}",
.type = "POLICY_ID",
@@ -5428,6 +5457,42 @@ static const struct token token_list[] = {
.args = ARGS(ARGS_ENTRY(struct rte_flow_action_meter, mtr_id)),
.call = parse_vc_conf,
},
+   [ACTION_METER_MARK] = {
+   .name = "meter_mark",
+   .help = "meter the directed packets using profile and policy",
+   .priv = PRIV_ACTION(METER_MARK,
+   sizeof(struct rte_flow_action_meter_mark)),
+   .next = NEXT(action_meter_mark),
+   .call = parse_vc,
+   },
+   [ACTION_METER_PROFILE] = {
+   .name = "mtr_profile",
+   .help = "meter profile id to use",
+   .next = NEXT(NEXT_ENTRY(ACTION_METER_PROFILE_ID2PTR)),
+   .args = ARGS(ARGS_ENTRY_ARB(0, sizeof(uint32_t))),
+   },
+   [ACTION_METER_PROFILE_ID2PTR] = {
+   .name = "{mtr_profile_id}",
+   .type = "PROFILE_ID",
+   .help = "meter profile id",
+   .next = NEXT(action_meter_mark),
+   .call = parse_meter_profile_id2ptr,
+   .comp = comp_none,
+   },
+   [ACTION_METER_POLICY] = {
+   .name = "mtr_policy",
+   .help = "meter policy id to use",
+   .next = NEXT(NEXT_ENTRY(ACTION_METER_POLICY_ID2PTR)),
+   ARGS(ARGS_ENTRY_ARB(0, sizeof(uint32_t))),
+   },
+   [ACTION_METER_POLICY_ID2PTR] = {
+   .name = "{mtr_policy_id}",
+   .type = "POLICY_ID",
+   .help = "meter policy id",
+   .next = NEXT(action_meter_mark),
+   .call = parse_meter_policy

RE: [PATCH v9 05/12] net/nfp: add flower PF setup logic

2022-09-20 Thread Chaoyong He
> On 9/15/2022 11:44 AM, Chaoyong He wrote:
> > Adds the vNIC initialization logic for the flower PF vNIC. The flower
> > firmware application exposes this vNIC for the purposes of fallback
> > traffic in the switchdev use-case.
> >
> > Adds minimal dev_ops for this PF vNIC device. Because the device is
> > being exposed externally to DPDK it needs to implements a minimal set
> > of dev_ops.
> >
> > Signed-off-by: Chaoyong He 
> > Reviewed-by: Niklas Söderlund 
> 
> <...>
> 
> > +
> > +struct dp_packet {
> > +   struct rte_mbuf mbuf;
> > +   uint32_t source;
> > +};
> > +
> > +static void
> > +nfp_flower_pf_mp_init(__rte_unused struct rte_mempool *mp,
> > +   __rte_unused void *opaque_arg,
> > +   void *packet,
> > +   __rte_unused unsigned int i)
> > +{
> > +   struct dp_packet *pkt = packet;
> > +   /* Indicate that this pkt is from DPDK */
> > +   pkt->source = 3;
> > +}
> > +
> > +static struct rte_mempool *
> > +nfp_flower_pf_mp_create(void)
> > +{
> > +   uint32_t nb_mbufs;
> > +   unsigned int numa_node;
> > +   struct rte_mempool *pktmbuf_pool;
> > +   uint32_t n_rxd = PF_VNIC_NB_DESC;
> > +   uint32_t n_txd = PF_VNIC_NB_DESC;
> > +
> > +   nb_mbufs = RTE_MAX(n_rxd + n_txd + MAX_PKT_BURST +
> > +MEMPOOL_CACHE_SIZE, 81920U);
> > +
> > +   numa_node = rte_socket_id();
> > +   pktmbuf_pool = rte_pktmbuf_pool_create("flower_pf_mbuf_pool",
> nb_mbufs,
> > +   MEMPOOL_CACHE_SIZE, MBUF_PRIV_SIZE,
> > +   RTE_MBUF_DEFAULT_BUF_SIZE, numa_node);
> > +   if (pktmbuf_pool == NULL) {
> > +   PMD_INIT_LOG(ERR, "Cannot init pf vnic mbuf pool");
> > +   return NULL;
> > +   }
> > +
> > +   rte_mempool_obj_iter(pktmbuf_pool, nfp_flower_pf_mp_init,
> NULL);
> > +
> > +   return pktmbuf_pool;
> > +}
> > +
> 
> Hi Chaoyong,
> 
> Again, similar comment to previous versions, what I understand is this new
> flower FW supports HW flow filter and intended use case is for OvS HW
> acceleration.
> But is DPDK driver need to know OvS data structures, like "struct dp_packet",
> can it be transparent to application, I am sure there are other devices
> offloading some OvS task to HW.
> 
> @Ian, @David,
> 
> Can you please comment on above usage, do you guys see any way to
> escape from OvS specific code in the driver?

Firstly, I'll explain why we must include some OvS specific code in the driver.
If we don't set the `pkt->source = 3`, the OvS will coredump like this:
```
(gdb) bt
#0  0x7fe1d48fd387 in raise () from /lib64/libc.so.6
#1  0x7fe1d48fea78 in abort () from /lib64/libc.so.6
#2  0x7fe1d493ff67 in __libc_message () from /lib64/libc.so.6
#3  0x7fe1d4948329 in _int_free () from /lib64/libc.so.6
#4  0x0049c006 in dp_packet_uninit (b=0x1f262db80) at 
lib/dp-packet.c:135
#5  0x0061440a in dp_packet_delete (b=0x1f262db80) at 
lib/dp-packet.h:261
#6  0x00619aa0 in dpdk_copy_batch_to_mbuf (netdev=0x1f0a04a80, 
batch=0x7fe1b40050c0) at lib/netdev-dpdk.c:274
#7  0x00619b46 in netdev_dpdk_common_send (netdev=0x1f0a04a80, 
batch=0x7fe1b40050c0, stats=0x7fe1be7321f0) at
#8  0x0061a0ba in netdev_dpdk_eth_send (netdev=0x1f0a04a80, qid=0, 
batch=0x7fe1b40050c0, concurrent_txq=true)
#9  0x004fbd10 in netdev_send (netdev=0x1f0a04a80, qid=0, 
batch=0x7fe1b40050c0, concurrent_txq=true) at lib/n
#10 0x004aa663 in dp_netdev_pmd_flush_output_on_port 
(pmd=0x7fe1be735010, p=0x7fe1b4005090) at lib/dpif-netde
#11 0x004aa85d in dp_netdev_pmd_flush_output_packets 
(pmd=0x7fe1be735010, force=false) at lib/dpif-netdev.c:5
#12 0x004aaaef in dp_netdev_process_rxq_port (pmd=0x7fe1be735010, 
rxq=0x16f3f80, port_no=3) at lib/dpif-netde
#13 0x004af17a in pmd_thread_main (f_=0x7fe1be735010) at 
lib/dpif-netdev.c:6958
#14 0x0057da80 in ovsthread_wrapper (aux_=0x1608b30) at 
lib/ovs-thread.c:422
#15 0x7fe1d51a6ea5 in start_thread () from /lib64/libpthread.so.0
#16 0x7fe1d49c5b0d in clone () from /lib64/libc.so.6
```
The logic in function `dp_packet_delete()` run into the wrong branch.

Then, why just our PMD need do this, and other PMDs don't?
Generally, it's greatly dependent on the hardware.

The Netronome's Network Flow Processor 4xxx (NFP-4xxx) card is the target card 
of these series patches.
Which only has one PF but has 2 physical ports, and the NFP PMD can work with 
up to 8 ports on the same PF device. 
Other PMDs hardware seems all 'one PF <--> one physical port'.

For the use case of OvS, we should add the representor port of 'physical port' 
to the bridge, not the representor port of PF like other PMDs.

We use a two-layer poll mode architecture. (Other PMDs are simple poll mode 
architecture)
In the RX direction:
1. When the physical port or vf receives pkts, the firmware will prepend a 
meta-data(indicating the input port) into the pkt.
2. We use the PF vNIC as a multiplexer, which keeps polling pkts from the 
firmware.
3. The PF vNIC will parse the meta-data, and

[PATCH v2 0/9] introduce memarea library

2022-09-20 Thread Chengwen Feng
The memarea library is an allocator of variable-size object. It is a
collection of allocated objects that can be efficiently alloc or free
all at once, the main feature are as follows:
a) it facilitate alloc and free of memory with low overhead.

b) it provides refcnt feature which could be useful in some scenes.

c) it supports MT-safe as long as it's specified at creation time.

d) it's memory source could comes from:
d.1) system API: malloc in C library.
d.2) user provided address: it can be from the rte_malloc API series
or extended memory as long as it is available.
d.3) user provided memarea: it can be from another memarea.

e) it provides backup memory mechanism, the memarea object could use
another memarea object as a backup.

Note:
a) the memarea is oriented towards the application layer, which could
provides 'region-based memory management' [1] function.
b) the eal library also provide memory zone/heap management, but these
are tied to huge pages management.

[1] https://en.wikipedia.org/wiki/Region-based_memory_management

Signed-off-by: Chengwen Feng 

Chengwen Feng (9):
  memarea: introduce memory area library
  test/memarea: support memarea test
  memarea: support alloc/free/update-refcnt API
  test/memarea: support alloc/free/update-refcnt test
  memarea: support dump API
  test/memarea: support dump test
  memarea: support backup memory mechanism
  test/memarea: support backup memory test
  test/memarea: support no MT-safe test

---
v2: 
* fix compile issues reported by dpdk-test-report
* address Dimitry and Jerin's comments
* add no MT-safe test

 MAINTAINERS|   6 +
 app/test/meson.build   |   2 +
 app/test/test_memarea.c| 379 +
 doc/api/doxy-api-index.md  |   3 +-
 doc/api/doxy-api.conf.in   |   1 +
 doc/guides/prog_guide/index.rst|   1 +
 doc/guides/prog_guide/memarea_lib.rst  |  57 
 doc/guides/rel_notes/release_22_11.rst |   6 +
 lib/eal/include/rte_log.h  |   1 +
 lib/memarea/memarea_private.h  |  35 +++
 lib/memarea/meson.build|  16 ++
 lib/memarea/rte_memarea.c  | 379 +
 lib/memarea/rte_memarea.h  | 210 ++
 lib/memarea/version.map|  16 ++
 lib/meson.build|   1 +
 15 files changed, 1112 insertions(+), 1 deletion(-)
 create mode 100644 app/test/test_memarea.c
 create mode 100644 doc/guides/prog_guide/memarea_lib.rst
 create mode 100644 lib/memarea/memarea_private.h
 create mode 100644 lib/memarea/meson.build
 create mode 100644 lib/memarea/rte_memarea.c
 create mode 100644 lib/memarea/rte_memarea.h
 create mode 100644 lib/memarea/version.map

-- 
2.17.1



[PATCH v2 9/9] test/memarea: support no MT-safe test

2022-09-20 Thread Chengwen Feng
MT-safe is enabled by default in previous test, this patch adds no
MT-safe test.

Signed-off-by: Chengwen Feng 
---
 app/test/test_memarea.c | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/app/test/test_memarea.c b/app/test/test_memarea.c
index 9609909d7c..2ab90fb5b5 100644
--- a/app/test/test_memarea.c
+++ b/app/test/test_memarea.c
@@ -331,6 +331,35 @@ test_memarea_backup(void)
return 0;
 }
 
+static int
+test_memarea_no_mt_safe(void)
+{
+   struct rte_memarea_param init;
+   struct rte_memarea *ma;
+   int ret;
+
+   /* prepare env */
+   test_memarea_init_def_param(&init);
+   init.source = RTE_MEMAREA_SOURCE_SYSTEM_API;
+   init.total_sz = MEMAREA_TEST_DEFAULT_SIZE;
+   init.mt_safe = false;
+   ma = rte_memarea_create(&init);
+   RTE_TEST_ASSERT(ma != NULL, "Expected Non-NULL");
+
+   /* test for all API */
+   (void)rte_memarea_alloc(ma, 1, 0);
+   (void)rte_memarea_alloc(ma, 1, 0);
+   rte_memarea_free(ma, rte_memarea_alloc(ma, 1, 0));
+   rte_memarea_update_refcnt(ma, rte_memarea_alloc(ma, 1, 0), 1);
+   rte_memarea_update_refcnt(ma, rte_memarea_alloc(ma, 1, 0), -1);
+   ret = rte_memarea_dump(ma, stderr, true);
+   RTE_TEST_ASSERT(ret == 0, "Expected ZERO");
+
+   rte_memarea_destroy(ma);
+
+   return 0;
+}
+
 static int
 test_memarea(void)
 {
@@ -341,6 +370,7 @@ test_memarea(void)
MEMAREA_TEST_API_RUN(test_memarea_alloc_free);
MEMAREA_TEST_API_RUN(test_memarea_dump);
MEMAREA_TEST_API_RUN(test_memarea_backup);
+   MEMAREA_TEST_API_RUN(test_memarea_no_mt_safe);
return 0;
 }
 
-- 
2.17.1



  1   2   >