Re: [dpdk-dev] [PATCH] ethdev: introduce generic copy rte flow action

2021-01-10 Thread Ori Kam
Hi Alexander,

I guess that the test-pmd part will be available later right?

> -Original Message-
> From: Alexander Kozyrev 
> Sent: Friday, January 8, 2021 8:33 AM
> Subject: [PATCH] ethdev: introduce generic copy rte flow action
> 
> Implement a generic copy flow API to allow copying of an arbitrary
> header field (as well as mark, metadata or tag) to another item.
> 
> This generic copy mechanism removes the necessity to implement a
> separate RTE Flow action every time we need to modify a new packet
> field in the future. A user-provided value can be used from a
> specified tag/metadata or directly copied from other packet field.
> 
> The number of bits to copy as well as the offset to start from can
> be specified to allow a partial copy or copy into an arbitrary
> place in a packet for greater flexibility.
> 

Since the question why you are using enum and not just offset from 
the start of the packet, was discussed and raised by number of people it will 
be best
if it will appear in the commit log, at least the advantages to this 
implementation.

> RFC:
> https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatches.d
> pdk.org%2Fpatch%2F85384%2F&data=04%7C01%7Corika%40nvidia.com%
> 7Cd04c2e49c3a840994da408d8b39f3304%7C43083d15727340c1b7db39efd9cc
> c17a%7C0%7C0%7C637456843629116269%7CUnknown%7CTWFpbGZsb3d8eyJ
> WIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C
> 1000&sdata=85096rASBtzbjU42pV6sPkl3nVt5HlR6%2BL9nxI3qgFA%3D&a
> mp;reserved=0
> 
> Signed-off-by: Alexander Kozyrev 
> ---
>  doc/guides/prog_guide/rte_flow.rst | 35 ++
>  lib/librte_ethdev/rte_flow.c   |  1 +
>  lib/librte_ethdev/rte_flow.h   | 59 ++
>  3 files changed, 95 insertions(+)
> 
> diff --git a/doc/guides/prog_guide/rte_flow.rst
> b/doc/guides/prog_guide/rte_flow.rst
> index 86b3444803..b737ff9dad 100644
> --- a/doc/guides/prog_guide/rte_flow.rst
> +++ b/doc/guides/prog_guide/rte_flow.rst
> @@ -2766,6 +2766,41 @@ The behaviour of the shared action defined by
> ``action`` argument of type
> | no properties |
> +---+
> 
> +Action: ``COPY_ITEM``
> +^
> +
> +Copy ``width`` bits from ``src`` item to ``dst`` item.
> +
> +An arbitrary header field (as well as mark, metadata or tag values)
> +can be used as both source and destination items as set by ``item``.
> +

For tag I think you should also use the index right?

> +Inner packet header fields can be accessed using the ``index`` and
> +it is possible to start the copy from the ``offset`` bits in an item.

Please specify  what means index 0 /1 ... 0 is outer most? Inner most?
You can look at the RSS level for reference.
I think it will be best to use the same values.

What happens if we want to copy between different sizes?
for example copy IPV6 src to number of tags? I assume we will be using offset 
and
split the copy command to number of actions right?

> +
> +.. _table_rte_flow_action_copy_item:
> +
> +.. table:: COPY_ITEM
> +
> +   +-+
> +   | Field | Value   |
> +   +===+=+
> +   | ``dst``   | destination item|
> +   | ``src``   | source item |
> +   | ``width`` | number of bits to copy  |
> +   +---+-+
> +
> +.. _table_rte_flow_action_copy_data:
> +
> +.. table:: destination/source item definition
> +
> +   +--+
> +   | Field | Value|
> +   +===+==+
> +   | ``item``  | ID of a packet field/mark/metadata/tag   |
> +   | ``index`` | index of outer/inner header or tag array |
> +   | ``offset``| number of bits to skip during the copy   |
> +   +---+--+
> +
>  Negative types
>  ~~
> 
> diff --git a/lib/librte_ethdev/rte_flow.c b/lib/librte_ethdev/rte_flow.c
> index a06f64c271..fdbabefc47 100644
> --- a/lib/librte_ethdev/rte_flow.c
> +++ b/lib/librte_ethdev/rte_flow.c
> @@ -176,6 +176,7 @@ static const struct rte_flow_desc_data
> rte_flow_desc_action[] = {
>   MK_FLOW_ACTION(SET_IPV6_DSCP, sizeof(struct
> rte_flow_action_set_dscp)),
>   MK_FLOW_ACTION(AGE, sizeof(struct rte_flow_action_age)),
>   MK_FLOW_ACTION(SAMPLE, sizeof(struct rte_flow_action_sample)),
> + MK_FLOW_ACTION(COPY_ITEM, sizeof(struct
> rte_flow_action_copy_item)),
>   /**
>* Shared action represented as handle of type
>* (struct rte_flow_shared action *) stored in conf field (see
> diff --git a/lib/librte_ethdev/rte_flow.h b/lib/librte_ethdev/rte_flow.h
> index 0977a78270..0540c861fb 100644
> --- a/lib/librte_ethdev/rte_flow.h
> +++ b/lib/librte_ethdev/rte_flow.h
> @@ -2198,6 +2198,16 @@ enum rte_flow_action_type {
>* struct rte_flow_shared_action).
>  

[dpdk-dev] [PATCH] net/i40e: fix SFP I X722 with FW4.16

2021-01-10 Thread Weifeng Li
When NVM API version is 1.7 or above adminq operation to set TPID is
set as supported. This cause using adminq instead of registers.
For SFP_I_X722 FW4.16, reported NVM API version is 1.8, and this cause
adminq operation to set as supported but it is not supported on FW4.16
Additional check added for SFP_I_X722 to not enable adminq operation.

Fixes: 9efa8d28b4da ("net/i40e: fix SFP X722 with FW4.16")
Cc: sta...@dpdk.org

Signed-off-by: Weifeng Li 
---
 drivers/net/i40e/i40e_ethdev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 1462248..a07879c 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -1550,7 +1550,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void 
*init_params __rte_unused)
return -EIO;
}
/* Firmware of SFP x722 does not support adminq option */
-   if (hw->device_id == I40E_DEV_ID_SFP_X722)
+   if (hw->device_id == I40E_DEV_ID_SFP_X722 ||
+   hw->device_id == I40E_DEV_ID_SFP_I_X722)
hw->flags &= ~I40E_HW_FLAG_802_1AD_CAPABLE;
 
PMD_INIT_LOG(INFO, "FW %d.%d API %d.%d NVM %02d.%02d.%02d eetrack %04x",
-- 
2.9.5



[dpdk-dev] [PATCH 2/3] net/mlx5: handle the RSS action in the sample

2021-01-10 Thread Jiawei Wang
PMD validates the rss action in the sample sub-actions list,
then translate into rdma-core action and it will be used for sample
path destination.

If the RSS action both in sample sub-actions list and original flow,
the rss level and rss type in the sample sub-actions list should be
consistent with the original flow list, since the expanding items
for RSS should same in one flow.

Signed-off-by: Jiawei Wang 
---
 drivers/net/mlx5/mlx5_flow.c|  24 -
 drivers/net/mlx5/mlx5_flow_dv.c | 226 +++-
 2 files changed, 175 insertions(+), 75 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 2a4073c..96a7e0d 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -3174,16 +3174,28 @@ struct mlx5_flow_tunnel_info {
 static const struct rte_flow_action_rss*
 flow_get_rss_action(const struct rte_flow_action actions[])
 {
+   const struct rte_flow_action_rss *rss = NULL;
+
for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
switch (actions->type) {
case RTE_FLOW_ACTION_TYPE_RSS:
-   return (const struct rte_flow_action_rss *)
-  actions->conf;
+   rss = actions->conf;
+   break;
+   case RTE_FLOW_ACTION_TYPE_SAMPLE:
+   {
+   const struct rte_flow_action_sample *sample =
+   actions->conf;
+   const struct rte_flow_action *act = sample->actions;
+   for (; act->type != RTE_FLOW_ACTION_TYPE_END; act++)
+   if (act->type == RTE_FLOW_ACTION_TYPE_RSS)
+   rss = act->conf;
+   break;
+   }
default:
break;
}
}
-   return NULL;
+   return rss;
 }
 
 /**
@@ -5278,7 +5290,7 @@ struct mlx5_hlist_entry *
struct mlx5_priv *priv = dev->data->dev_private;
struct rte_flow *flow = NULL;
struct mlx5_flow *dev_flow;
-   const struct rte_flow_action_rss *rss;
+   const struct rte_flow_action_rss *rss = NULL;
struct mlx5_translated_shared_action
shared_actions[MLX5_MAX_SHARED_ACTIONS];
int shared_actions_n = MLX5_MAX_SHARED_ACTIONS;
@@ -5356,7 +5368,9 @@ struct mlx5_hlist_entry *
MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
flow->drv_type < MLX5_FLOW_TYPE_MAX);
memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
-   rss = flow_get_rss_action(p_actions_rx);
+   /* RSS Action only works on NIC RX domain */
+   if (attr->ingress && !attr->transfer)
+   rss = flow_get_rss_action(p_actions_rx);
if (rss) {
if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
return 0;
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index e4736ee..3fa15cf 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -4344,6 +4344,10 @@ struct mlx5_hlist_entry *
  *   Attributes of flow that includes this action.
  * @param[in] item_flags
  *   Holds the items detected.
+ * @param[in] rss
+ *   Pointer to the RSS action.
+ * @param[out] sample_rss
+ *   Pointer to the RSS action in sample action list.
  * @param[out] error
  *   Pointer to error structure.
  *
@@ -4355,7 +4359,9 @@ struct mlx5_hlist_entry *
   const struct rte_flow_action *action,
   struct rte_eth_dev *dev,
   const struct rte_flow_attr *attr,
-  const uint64_t item_flags,
+  uint64_t item_flags,
+  const struct rte_flow_action_rss *rss,
+  const struct rte_flow_action_rss **sample_rss,
   struct rte_flow_error *error)
 {
struct mlx5_priv *priv = dev->data->dev_private;
@@ -4415,6 +4421,28 @@ struct mlx5_hlist_entry *
sub_action_flags |= MLX5_FLOW_ACTION_QUEUE;
++actions_n;
break;
+   case RTE_FLOW_ACTION_TYPE_RSS:
+   *sample_rss = act->conf;
+   ret = mlx5_flow_validate_action_rss(act,
+   sub_action_flags,
+   dev, attr,
+   item_flags,
+   error);
+   if (ret < 0)
+   return ret;
+   if (rss && *sample_rss &&
+   ((*sample_rss)->

[dpdk-dev] [PATCH 1/3] app/testpmd: add RSS support in sample action

2021-01-10 Thread Jiawei Wang
Support rss action in the sample sub-actions list.

The examples for the sample flow use case and result as below:
set sample_actions 0 mark id  0x12 / rss queues  0 1 2 3 end  / end
flow create 0 ingress group 1 pattern eth / end actions
sample ratio 1 index 0 / jump group 2 / end

This flow will result in all the matched ingress packets will be
jumped to next table, and the each packet will be marked and sent to rss
queues of the control application.

Signed-off-by: Jiawei Wang 
---
 app/test-pmd/cmdline_flow.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 585cab9..28c3a1b 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -560,6 +560,7 @@ struct raw_sample_conf {
 struct rte_flow_action_count sample_count[RAW_SAMPLE_CONFS_MAX_NUM];
 struct rte_flow_action_port_id sample_port_id[RAW_SAMPLE_CONFS_MAX_NUM];
 struct rte_flow_action_raw_encap sample_encap[RAW_SAMPLE_CONFS_MAX_NUM];
+struct action_rss_data sample_rss_data[RAW_SAMPLE_CONFS_MAX_NUM];
 
 /** Maximum number of subsequent tokens and arguments on the stack. */
 #define CTX_STACK_SIZE 16
@@ -1548,6 +1549,7 @@ struct parse_action_priv {
 
 static const enum index next_action_sample[] = {
ACTION_QUEUE,
+   ACTION_RSS,
ACTION_MARK,
ACTION_COUNT,
ACTION_PORT_ID,
@@ -7515,6 +7517,7 @@ static int comp_set_sample_index(struct context *, const 
struct token *,
uint32_t i = 0;
struct rte_flow_action *action = NULL;
struct rte_flow_action *data = NULL;
+   const struct rte_flow_action_rss *rss = NULL;
size_t size = 0;
uint16_t idx = in->port; /* We borrow port field as index */
uint32_t max_size = sizeof(struct rte_flow_action) *
@@ -7546,6 +7549,29 @@ static int comp_set_sample_index(struct context *, const 
struct token *,
(const void *)action->conf, size);
action->conf = &sample_queue[idx];
break;
+   case RTE_FLOW_ACTION_TYPE_RSS:
+   size = sizeof(struct rte_flow_action_rss);
+   rss = action->conf;
+   rte_memcpy(&sample_rss_data[idx].conf,
+  (const void *)rss, size);
+   if (rss->key_len) {
+   sample_rss_data[idx].conf.key =
+   sample_rss_data[idx].key;
+   rte_memcpy((void *)((uintptr_t)
+  sample_rss_data[idx].conf.key),
+  (const void *)rss->key,
+  sizeof(uint8_t) * rss->key_len);
+   }
+   if (rss->queue_num) {
+   sample_rss_data[idx].conf.queue =
+   sample_rss_data[idx].queue;
+   rte_memcpy((void *)((uintptr_t)
+  sample_rss_data[idx].conf.queue),
+  (const void *)rss->queue,
+  sizeof(uint16_t) * rss->queue_num);
+   }
+   action->conf = &sample_rss_data[idx].conf;
+   break;
case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
size = sizeof(struct rte_flow_action_raw_encap);
rte_memcpy(&sample_encap[idx],
-- 
1.8.3.1



[dpdk-dev] [PATCH 0/3] Add RSS action support in the sample sub-actions list

2021-01-10 Thread Jiawei Wang
Currently the sample flow only supports Queue action in NIC-Rx domain.
This patchset adds the RSS action support in the sample sub-actions list.

The examples for the sample flow with RSS action and result as below:
set sample_actions 0 mark id  0x12 / rss queues  0 1 2 3 end  / end
flow create 0 ingress group 1 pattern eth / end actions sample ratio 1 
index 0 / jump group 2 / end

This flow will result in all the matched ingress packets will be
jumped to next table, and the each packet will be marked and sent to rss
queues of the control application.

Jiawei Wang (3):
  app/testpmd: add RSS support in sample action
  net/mlx5: handle the RSS action in the sample
  doc: update RSS support in sample action

 app/test-pmd/cmdline_flow.c|  26 
 doc/guides/nics/mlx5.rst   |   1 +
 doc/guides/rel_notes/release_21_02.rst |   5 +
 drivers/net/mlx5/mlx5_flow.c   |  24 +++-
 drivers/net/mlx5/mlx5_flow_dv.c| 226 +++--
 5 files changed, 207 insertions(+), 75 deletions(-)

-- 
1.8.3.1



[dpdk-dev] [PATCH 3/3] doc: update RSS support in sample action

2021-01-10 Thread Jiawei Wang
Add descrption about RSS action will be supported in the Sample
action in MLX5 PMD.

Signed-off-by: Jiawei Wang 
---
 doc/guides/nics/mlx5.rst   | 1 +
 doc/guides/rel_notes/release_21_02.rst | 5 +
 2 files changed, 6 insertions(+)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 3bda0f8..c20c0ed 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -98,6 +98,7 @@ Features
 - Hardware LRO.
 - Hairpin.
 - Multiple-thread flow insertion.
+- RSS support in sample action.
 
 Limitations
 ---
diff --git a/doc/guides/rel_notes/release_21_02.rst 
b/doc/guides/rel_notes/release_21_02.rst
index 638f981..74ec91f 100644
--- a/doc/guides/rel_notes/release_21_02.rst
+++ b/doc/guides/rel_notes/release_21_02.rst
@@ -55,6 +55,11 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* **Updated Mellanox mlx5 driver.**
+
+  Updated the Mellanox mlx5 driver with new features and improvements, 
including:
+
+  * Added support for RSS action in the sample sub-actions list.
 
 Removed Items
 -
-- 
1.8.3.1



Re: [dpdk-dev] [PATCH v3] mbuf: fix reset on mbuf free

2021-01-10 Thread Ali Alnubani
Hi Olivier,

> -Original Message-
> From: Olivier Matz 
> Sent: Wednesday, January 6, 2021 3:34 PM
> To: dev@dpdk.org
> Cc: andrew.rybche...@oktetlabs.ru; konstantin.anan...@intel.com;
> m...@smartsharesystems.com; Ali Alnubani ;
> ajitkhapa...@gmail.com; sta...@dpdk.org; Ajit Khaparde
> 
> Subject: [PATCH v3] mbuf: fix reset on mbuf free
> 

Even though the performance tests on Mellanox NICs are passing, I see a 
performance drop of up to 0.5 mpps with 64B frames (tests only fail for 1 mpps 
drop or more):
https://mails.dpdk.org/archives/test-report/2021-January/172759.html

I'll verify this on local hardware and reply back.

Regards,
Ali 


Re: [dpdk-dev] [PATCH] ethdev: introduce generic copy rte flow action

2021-01-10 Thread Asaf Penso
Correct, Ori. We'll soon send the testpmd part and pmd draft.

Regards,
Asaf Penso

>-Original Message-
>From: dev  On Behalf Of Ori Kam
>Sent: Sunday, January 10, 2021 10:01 AM
>To: Alexander Kozyrev ; dev@dpdk.org
>Cc: Slava Ovsiienko ; NBU-Contact-Thomas
>Monjalon ; ferruh.yi...@intel.com;
>andrew.rybche...@oktetlabs.ru
>Subject: Re: [dpdk-dev] [PATCH] ethdev: introduce generic copy rte flow
>action
>
>Hi Alexander,
>
>I guess that the test-pmd part will be available later right?
>
>> -Original Message-
>> From: Alexander Kozyrev 
>> Sent: Friday, January 8, 2021 8:33 AM
>> Subject: [PATCH] ethdev: introduce generic copy rte flow action
>>
>> Implement a generic copy flow API to allow copying of an arbitrary
>> header field (as well as mark, metadata or tag) to another item.
>>
>> This generic copy mechanism removes the necessity to implement a
>> separate RTE Flow action every time we need to modify a new packet
>> field in the future. A user-provided value can be used from a
>> specified tag/metadata or directly copied from other packet field.
>>
>> The number of bits to copy as well as the offset to start from can be
>> specified to allow a partial copy or copy into an arbitrary place in a
>> packet for greater flexibility.
>>
>
>Since the question why you are using enum and not just offset from the start
>of the packet, was discussed and raised by number of people it will be best if
>it will appear in the commit log, at least the advantages to this
>implementation.
>
>> RFC:
>>
>https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatch
>> es.d
>pdk.org%2Fpatch%2F85384%2F&data=04%7C01%7Corika%40nvidia.com
>%
>> 7Cd04c2e49c3a840994da408d8b39f3304%7C43083d15727340c1b7db39efd9cc
>>
>c17a%7C0%7C0%7C637456843629116269%7CUnknown%7CTWFpbGZsb3d8eyJ
>>
>WIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7
>C
>>
>1000&sdata=85096rASBtzbjU42pV6sPkl3nVt5HlR6%2BL9nxI3qgFA%3D&a
>> mp;reserved=0
>>
>> Signed-off-by: Alexander Kozyrev 
>> ---
>>  doc/guides/prog_guide/rte_flow.rst | 35 ++
>>  lib/librte_ethdev/rte_flow.c   |  1 +
>>  lib/librte_ethdev/rte_flow.h   | 59
>++
>>  3 files changed, 95 insertions(+)
>>
>> diff --git a/doc/guides/prog_guide/rte_flow.rst
>> b/doc/guides/prog_guide/rte_flow.rst
>> index 86b3444803..b737ff9dad 100644
>> --- a/doc/guides/prog_guide/rte_flow.rst
>> +++ b/doc/guides/prog_guide/rte_flow.rst
>> @@ -2766,6 +2766,41 @@ The behaviour of the shared action defined by
>> ``action`` argument of type
>> | no properties |
>> +---+
>>
>> +Action: ``COPY_ITEM``
>> +^
>> +
>> +Copy ``width`` bits from ``src`` item to ``dst`` item.
>> +
>> +An arbitrary header field (as well as mark, metadata or tag values)
>> +can be used as both source and destination items as set by ``item``.
>> +
>
>For tag I think you should also use the index right?
>
>> +Inner packet header fields can be accessed using the ``index`` and it
>> +is possible to start the copy from the ``offset`` bits in an item.
>
>Please specify  what means index 0 /1 ... 0 is outer most? Inner most?
>You can look at the RSS level for reference.
>I think it will be best to use the same values.
>
>What happens if we want to copy between different sizes?
>for example copy IPV6 src to number of tags? I assume we will be using offset
>and split the copy command to number of actions right?
>
>> +
>> +.. _table_rte_flow_action_copy_item:
>> +
>> +.. table:: COPY_ITEM
>> +
>> +   +-+
>> +   | Field | Value   |
>> +   +===+=+
>> +   | ``dst``   | destination item|
>> +   | ``src``   | source item |
>> +   | ``width`` | number of bits to copy  |
>> +   +---+-+
>> +
>> +.. _table_rte_flow_action_copy_data:
>> +
>> +.. table:: destination/source item definition
>> +
>> +   +--+
>> +   | Field | Value|
>> +
>+===+==
>+
>> +   | ``item``  | ID of a packet field/mark/metadata/tag   |
>> +   | ``index`` | index of outer/inner header or tag array |
>> +   | ``offset``| number of bits to skip during the copy   |
>> +   +---+--+
>> +
>>  Negative types
>>  ~~
>>
>> diff --git a/lib/librte_ethdev/rte_flow.c
>> b/lib/librte_ethdev/rte_flow.c index a06f64c271..fdbabefc47 100644
>> --- a/lib/librte_ethdev/rte_flow.c
>> +++ b/lib/librte_ethdev/rte_flow.c
>> @@ -176,6 +176,7 @@ static const struct rte_flow_desc_data
>> rte_flow_desc_action[] = {
>>  MK_FLOW_ACTION(SET_IPV6_DSCP, sizeof(struct
>> rte_flow_action_set_dscp)),
>>  MK_FLOW_ACTION(AGE, sizeof(struct rte_flow_action_age)),
>>  MK_FLOW_ACTION(SAMPLE, sizeof(struct rte_flo

Re: [dpdk-dev] [dpdk-dev v2 1/2] ethdev: add new tunnel type for ecpri

2021-01-10 Thread Ori Kam
Hi,

> -Original Message-
> From: Zhang, Qi Z 
> Sent: Saturday, January 9, 2021 3:01 AM
> Subject: RE: [dpdk-dev] [dpdk-dev v2 1/2] ethdev: add new tunnel type for 
> ecpri
> 
> 
> 
> > -Original Message-
> > From: Thomas Monjalon 
> > Subject: Re: [dpdk-dev] [dpdk-dev v2 1/2] ethdev: add new tunnel type for
> ecpri
> >
> > 08/01/2021 10:29, Andrew Rybchenko:
> > > On 1/8/21 11:57 AM, Ferruh Yigit wrote:
> > > > On 1/8/2021 1:41 AM, Zhang, Qi Z wrote:
> > > >> From: Thomas Monjalon 
> > > >>> 07/01/2021 16:24, Zhang, Qi Z:
> > >  From: Thomas Monjalon 
> > > > 07/01/2021 13:47, Zhang, Qi Z:
> > > >> From: Thomas Monjalon 
> > > >>> 07/01/2021 10:32, Guo, Jia:
> > >  From: Thomas Monjalon 
> > > > 24/12/2020 07:59, Jeff Guo:
> > > >> --- a/lib/librte_ethdev/rte_ethdev.h
> > > >> +++ b/lib/librte_ethdev/rte_ethdev.h
> > > >> @@ -1219,6 +1219,7 @@ enum rte_eth_tunnel_type {
> > > >>   RTE_TUNNEL_TYPE_IP_IN_GRE,
> > > >>   RTE_L2_TUNNEL_TYPE_E_TAG,
> > > >>   RTE_TUNNEL_TYPE_VXLAN_GPE,
> > > >> +RTE_TUNNEL_TYPE_ECPRI,
> > > >>   RTE_TUNNEL_TYPE_MAX,
> > > >>   };
> > > >
> > > > We tried to remove all these legacy API in DPDK 20.11.
> > > > Andrew decided to not remove this one because it is not yet
> > > > completely replaced by rte_flow in all drivers.
> > > > However, I am against continuing to update this API.
> > > > The opposite work should be done: migrate to rte_flow.
> > > 
> > >  Agree but seems that the legacy api and driver legacy
> > >  implementation still keep in this release, and there is no a
> > >  general way to replace the legacy by rte_flow right now.
> > > >>>
> > > >>> I think rte_flow is a complete replacement with more features.
> > > >>
> > > >> Thomas, I may not agree with this.
> > > >>
> > > >> Actually the "enum rte_eth_tunnel_type" is used by
> > > >> rte_eth_dev_udp_tunnel_port_add A packet with specific dst udp
> > > >> port will be recognized as a specific tunnel packet type (e.g.
> > > >> vxlan, vxlan-gpe,
> > > > ecpri...) In Intel NIC, the API actually changes the
> > > > configuration of the packet parser in HW but not add a filter
> > > > rule and I guess all other devices may enable it in a similar way.
> > > >> so naturally it should be a device (port) level configuration
> > > >> but not a rte_flow
> > > > rule for match, encap, decap...
> > > >
> > > > I don't understand how it helps to identify an UDP port if there
> > > > is no rule for this tunnel.
> > > > What is the usage?
> > > 
> > >  Yes, in general It is a rule, it matches a udp packet's dst port
> > >  and the action is
> > > >>> "now the packet is identified as vxlan packet" then all other
> > > >>> rte_flow rules that match for a vlxan as pattern will take effect.
> > > >>> but somehow, I think they are not rules in the same domain, just
> > > >>> like we have dedicate API for mac/vlan filter, we'd better have a
> > > >>> dedicate API for this also. ( RFC for Vxlan explains why we need
> > > >>> this.
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Ftools.ietf
> .org%2Fhtml%2Frfc7348&data=04%7C01%7Corika%40nvidia.com%7C46b2
> d8f48944422f0d9008d8b43a2293%7C43083d15727340c1b7db39efd9ccc17a%7
> C0%7C0%7C637457509081543237%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC
> 4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&a
> mp;sdata=RYWFMjuxkcUZ982kK2s44tBAjf%2FTkDyaa7VEybCtxOo%3D&res
> erved=0).
> > > 
> > >  "Destination Port: IANA has assigned the value 4789 for the VXLAN
> > >  UDP port, and this value SHOULD be used by default as the
> > >  destination UDP port.  Some early implementations of VXLAN have
> > >  used other values for the destination port.  To enable
> > >  interoperability with these implementations, the destination port
> > SHOULD be configurable."
> > > >>>
> > > >>> Yes the port number is free.
> > > >>> But isn't it more natural to specify this port number as part of
> > > >>> the rte_flow rule?
> > > >>
> > > >> I think if we have a rte_flow action type that can be used to set a
> > > >> packet's tunnel type xxx, like below #flow create eth/ipv4/udp port
> > > >> is 4789/... action set_tunnel_type VxLAN / end then we may replace
> > > >> it with rte_flow, but I'm not sure if it's necessary, please share
> > > >> if you have a better idea.
> >
> > Of course we can specify the UDP port in rte_flow rule.
> > Please check rte_flow_item_udp.
> > That's a basic of rte_flow.
> 
> Its not about the pattern match, it's about the action, what we need is a
> rte_flow action to "define a packet's tunnel type", but we don't have.
> 
> #flow create eth/ipv4/udp port is 4789/... action set_tunnel_type VxLAN
> 
> I think rte_eth_dev_udp_tunnel_port_add 

Re: [dpdk-dev] [PATCH v3 1/8] lib/librte_ethdev: introduce GENEVE header TLV option item

2021-01-10 Thread Ori Kam
Hi,

> -Original Message-
> From: Shiri Kuzin 
> Sent: Thursday, January 7, 2021 10:39 AM
> Subject: [PATCH v3 1/8] lib/librte_ethdev: introduce GENEVE header TLV option
> item
> 
> The Geneve tunneling protocol is designed to allow the
> user to specify some data context on the packet.
> The GENEVE TLV (Type-Length-Variable) Option
> is the mean intended to present the user data.
> 
> In order to support GENEVE TLV Option the new rte_flow
> item "rte_flow_item_geneve_opt" is added.
> The new item contains the values and masks for the
> following fields:
> -option class
> -option type
> -length
> -data
> 
> New item will be added to testpmd to support match and
> raw encap/decap actions.
> 
> Signed-off-by: Shiri Kuzin 
> ---
>  lib/librte_ethdev/rte_flow.c |  1 +
>  lib/librte_ethdev/rte_flow.h | 27 +++
>  2 files changed, 28 insertions(+)
> 
> diff --git a/lib/librte_ethdev/rte_flow.c b/lib/librte_ethdev/rte_flow.c
> index a06f64c271..2af7d965e1 100644
> --- a/lib/librte_ethdev/rte_flow.c
> +++ b/lib/librte_ethdev/rte_flow.c
> @@ -97,6 +97,7 @@ static const struct rte_flow_desc_data
> rte_flow_desc_item[] = {
>   MK_FLOW_ITEM(L2TPV3OIP, sizeof(struct rte_flow_item_l2tpv3oip)),
>   MK_FLOW_ITEM(PFCP, sizeof(struct rte_flow_item_pfcp)),
>   MK_FLOW_ITEM(ECPRI, sizeof(struct rte_flow_item_ecpri)),
> + MK_FLOW_ITEM(GENEVE_OPT, sizeof(struct
> rte_flow_item_geneve_opt)),
>  };
> 
>  /** Generate flow_action[] entry. */
> diff --git a/lib/librte_ethdev/rte_flow.h b/lib/librte_ethdev/rte_flow.h
> index 0977a78270..11a6494b8e 100644
> --- a/lib/librte_ethdev/rte_flow.h
> +++ b/lib/librte_ethdev/rte_flow.h
> @@ -543,6 +543,14 @@ enum rte_flow_item_type {
>* See struct rte_flow_item_ipv6_frag_ext.
>*/
>   RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
> +
> + /**
> +  * Matches Geneve Variable Length Option
> +  *
> +  * See struct rte_flow_item_geneve_opt
> +  */
> + RTE_FLOW_ITEM_TYPE_GENEVE_OPT,
> +
>  };
> 
>  /**
> @@ -1627,6 +1635,25 @@ static const struct rte_flow_item_ecpri
> rte_flow_item_ecpri_mask = {
>  };
>  #endif
> 
> +/**
> + * RTE_FLOW_ITEM_TYPE_GENEVE_OPT
> + *
> + * Matches a GENEVE Variable Length Option
> + */
> +struct rte_flow_item_geneve_opt {
> + rte_be16_t option_class;
> + uint8_t option_type;
> + uint8_t option_len;
> + uint32_t *data;
> +};
> +
> +/** Default mask for RTE_FLOW_ITEM_TYPE_GENEVE_OPT. */
> +#ifndef __cplusplus
> +static const struct rte_flow_item_geneve_opt
> +rte_flow_item_geneve_opt_mask = {
> + .option_type = 0xff,
> +};
> +#endif
>  /**
>   * Matching pattern item definition.
>   *
> --
> 2.21.0

Acked-by: Ori Kam 
Thanks,
Ori



[dpdk-dev] [PATCH 1/5] common/mlx5: query register c preserve capability via DevX

2021-01-10 Thread Jiawei Wang
Update function mlx5_devx_cmd_query_hca_attr() to add the
reg_c_preserve bit query.

The stored metadata in register C may be lost in NIC Tx and
FDB egress while doing one one of the following operations:
 - packet encapsulation.
 - packet mirroring (multiple processing paths).
 - packet sampling (using Flow Sampler).

If the reg_c_preserve bit is set to 1, then the above
limitation is obsolete, the all metadata registers Cx
preserve their values even through the operations mentioned
above.

Signed-off-by: Jiawei Wang 
---
 drivers/common/mlx5/mlx5_devx_cmds.c | 2 ++
 drivers/common/mlx5/mlx5_devx_cmds.h | 1 +
 drivers/common/mlx5/mlx5_prm.h   | 4 +++-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c 
b/drivers/common/mlx5/mlx5_devx_cmds.c
index 12f51a9..b2d8a9e 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -728,6 +728,8 @@ struct mlx5_devx_obj *
attr->log_max_pd = MLX5_GET(cmd_hca_cap, hcattr, log_max_pd);
attr->log_max_srq = MLX5_GET(cmd_hca_cap, hcattr, log_max_srq);
attr->log_max_srq_sz = MLX5_GET(cmd_hca_cap, hcattr, log_max_srq_sz);
+   attr->reg_c_preserve =
+   MLX5_GET(cmd_hca_cap, hcattr, reg_c_preserve);
if (attr->qos.sup) {
MLX5_SET(query_hca_cap_in, in, op_mod,
 MLX5_GET_HCA_CAP_OP_MOD_QOS_CAP |
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h 
b/drivers/common/mlx5/mlx5_devx_cmds.h
index b335b7c..3b243b6 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -114,6 +114,7 @@ struct mlx5_hca_attr {
uint32_t scatter_fcs_w_decap_disable:1;
uint32_t flow_hit_aso:1; /* General obj type FLOW_HIT_ASO supported. */
uint32_t regex:1;
+   uint32_t reg_c_preserve:1;
uint32_t regexp_num_of_engines;
uint32_t log_max_ft_sampler_num:8;
struct mlx5_hca_qos_attr qos;
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 8c9b53c..53dab91 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1130,7 +1130,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 regexp[0x1];
u8 reserved_at_a1[0x3];
u8 regexp_num_of_engines[0x4];
-   u8 reserved_at_a8[0x3];
+   u8 reserved_at_a8[0x1];
+   u8 reg_c_preserve[0x1];
+   u8 reserved_at_aa[0x1];
u8 log_max_srq[0x5];
u8 reserved_at_b0[0x3];
u8 regexp_log_crspace_size[0x5];
-- 
1.8.3.1



[dpdk-dev] [PATCH 2/5] net/mlx5: support E-Switch mirroring and jump in one flow

2021-01-10 Thread Jiawei Wang
mlx5 E-Switch mirroring is implemented as multiple destination array in
one steering table. The array currently supports only port ID as
destination actions.

This patch adds the jump action support to the array as one of
destination.
The packets can be mirrored to the port and jump to next table in the
same
destination array allowing to continue handling in the new table.

For example:
set sample_actions 0 port_id id 1 / end
flow create 0 ingress transfer pattern eth / end actions
sample ratio 1 index 0 / jump group 1 / end
flow create 1 ingress transfer group 1 pattern eth / end actions
set_mac_dst mac_addr 00:aa:bb:cc:dd:ee / port_id id 2 / end

The flow results all the matched ingress packets are mirrored
to port id 1 and go to group 1. In the group 1, packets are modified
with the destination mac and sent to port id 2.

Signed-off-by: Jiawei Wang 
---
 drivers/net/mlx5/mlx5_flow.c| 36 ++
 drivers/net/mlx5/mlx5_flow.h|  2 +
 drivers/net/mlx5/mlx5_flow_dv.c | 81 +
 3 files changed, 80 insertions(+), 39 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 2a4073c..217090a 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -4503,7 +4503,6 @@ struct mlx5_hlist_entry *
 {
const struct rte_flow_action_sample *sample;
int actions_n = 0;
-   int jump_flag = 0;
uint32_t ratio = 0;
int sub_type = 0;
int flag = 0;
@@ -4518,8 +4517,6 @@ struct mlx5_hlist_entry *
if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE ||
actions->type == RTE_FLOW_ACTION_TYPE_RSS)
*qrss_action_pos = actions_n;
-   if (actions->type == RTE_FLOW_ACTION_TYPE_JUMP)
-   jump_flag = 1;
if (actions->type == RTE_FLOW_ACTION_TYPE_SAMPLE) {
sample = actions->conf;
ratio = sample->ratio;
@@ -4530,10 +4527,10 @@ struct mlx5_hlist_entry *
}
if (flag && action == RTE_FLOW_ACTION_TYPE_SAMPLE && attr->transfer) {
if (ratio == 1) {
-   /* JUMP Action not support for Mirroring;
-* Mirroring support multi-destination;
+   /* FDB mirroring uses the destination array to implement
+* instead of FLOW_SAMPLER object.
 */
-   if (!jump_flag && sub_type != RTE_FLOW_ACTION_TYPE_END)
+   if (sub_type != RTE_FLOW_ACTION_TYPE_END)
flag = 0;
}
}
@@ -4554,8 +4551,8 @@ struct mlx5_hlist_entry *
  *
  * @param dev
  *   Pointer to Ethernet device.
- * @param[in] fdb_tx
- *   FDB egress flow flag.
+ * @param[in] add_tag
+ *   Add extra tag action flag.
  * @param[out] sfx_items
  *   Suffix flow match items (list terminated by the END pattern item).
  * @param[in] actions
@@ -4579,7 +4576,7 @@ struct mlx5_hlist_entry *
  */
 static int
 flow_sample_split_prep(struct rte_eth_dev *dev,
-  uint32_t fdb_tx,
+  int add_tag,
   struct rte_flow_item sfx_items[],
   const struct rte_flow_action actions[],
   struct rte_flow_action actions_sfx[],
@@ -4602,7 +4599,11 @@ struct mlx5_hlist_entry *
  RTE_FLOW_ERROR_TYPE_ACTION,
  NULL, "invalid position of sample "
  "action in list");
-   if (!fdb_tx) {
+   /* For CX5, add an extra tag action for NIC-RX and E-Switch ingress.
+* For CX6DX and above, metadata registers Cx preserve their value,
+* add an extra tag action for NIC-RX and E-Switch ingress and egress.
+*/
+   if (add_tag) {
/* Prepare the prefix tag action. */
set_tag = (void *)(actions_pre + actions_n + 1);
ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
@@ -4653,8 +4654,7 @@ struct mlx5_hlist_entry *
memcpy(actions_pre, actions,
   sizeof(struct rte_flow_action) * index);
}
-   /* Add the extra tag action for NIC-RX and E-Switch ingress. */
-   if (!fdb_tx) {
+   if (add_tag) {
actions_pre[index++] =
(struct rte_flow_action){
.type = (enum rte_flow_action_type)
@@ -5067,6 +5067,7 @@ struct mlx5_hlist_entry *
int actions_n = 0;
int sample_action_pos;
int qrss_action_pos;
+   int add_tag = 0;
int ret = 0;
 
if (priv->sampler_en)
@@ -5088,16 +5089,21 @@ struct mlx5_hlist_entry *
  "sample flow");
/* The representor_id is -1 for uplink. */
   

[dpdk-dev] [PATCH 0/5] Add the E-Switch mirroring and jump supports

2021-01-10 Thread Jiawei Wang
MLX5 E-Switch mirroring is implemented as multiple destination array in
one steering table. The array currently supports only port ID as
destination actions.

This patchset adds the below supports for MLX5 PMD driver:
  - Supports the metadata register Cx preserve capability query.
  - Supports the jump action support as one of destination of array.
  - Supports the modify action only impact on the one of destination.

The examples for the E-Switch flow use case and result as below:
set sample_actions 0 port_id id 1 / end
flow create 0 ingress transfer pattern eth / end actions sample ratio 1 
index 0 / jump group 1 / end
flow create 1 ingress transfer group 1 pattern eth / end actions 
set_mac_dst mac_addr 00:aa:bb:cc:dd:ee / port_id id 2 / end

The flow results all the matched ingress packets are mirrored
to port id 1 and go to group 1. In the group 1, packets are modified
with the destination mac and sent to port id 2.

Jiawei Wang (5):
  common/mlx5: query register c preserve capability via DevX
  net/mlx5: support E-Switch mirroring and jump in one flow
  net/mlx5: extend the skip scale flag
  net/mlx5: supports modify one port in E-Switch mirroring
  doc: update the advanced E-switch mirroring supports

 doc/guides/nics/mlx5.rst   |   2 +
 doc/guides/rel_notes/release_21_02.rst |   6 +
 drivers/common/mlx5/mlx5_devx_cmds.c   |   2 +
 drivers/common/mlx5/mlx5_devx_cmds.h   |   1 +
 drivers/common/mlx5/mlx5_prm.h |   4 +-
 drivers/net/mlx5/mlx5_flow.c   | 203 +++--
 drivers/net/mlx5/mlx5_flow.h   |  23 +++-
 drivers/net/mlx5/mlx5_flow_dv.c|  92 +--
 8 files changed, 234 insertions(+), 99 deletions(-)

-- 
1.8.3.1



[dpdk-dev] [PATCH 4/5] net/mlx5: supports modify one port in E-Switch mirroring

2021-01-10 Thread Jiawei Wang
While there's the modify action and sample action with ratio=1
in the E-Switch flow, and modify action is after the sample
action, means that the modify should only impact on after sample.
MLX5 PMD will monitor the above case and split the E-Switch flow
into two sub flows, smiliar as sample flow did before:

 - the prefix sub flow with all actions preceding the sample and the
   sample action itself, also append the new jump action after sample
   in the prefix sub flow;
 - the suffix sub flow with the modify action and other actions
   following the sample action.

The flow split as below:

Original flow: items / actions pre / sample / modify / actions sfx
prefix sub flow -
items / actions pre / set_tag action / sample / jump
suffix sub flow -
tag_item / modify / actions sfx

Signed-off-by: Jiawei Wang 
---
 drivers/net/mlx5/mlx5_flow.c| 175 
 drivers/net/mlx5/mlx5_flow_dv.c |  10 ---
 2 files changed, 123 insertions(+), 62 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 055474b..df79318 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -4490,6 +4490,8 @@ struct mlx5_hlist_entry *
  *   Pointer to the position of the matched action if exists, otherwise is -1.
  * @param[out] qrss_action_pos
  *   Pointer to the position of the Queue/RSS action if exists, otherwise is 
-1.
+ * @param[out] modify_after_mirror
+ *   Pointer to the flag of modify action after FDB mirroring.
  *
  * @return
  *   > 0 the total number of actions.
@@ -4499,13 +4501,15 @@ struct mlx5_hlist_entry *
 flow_check_match_action(const struct rte_flow_action actions[],
const struct rte_flow_attr *attr,
enum rte_flow_action_type action,
-   int *match_action_pos, int *qrss_action_pos)
+   int *match_action_pos, int *qrss_action_pos,
+   int *modify_after_mirror)
 {
const struct rte_flow_action_sample *sample;
int actions_n = 0;
uint32_t ratio = 0;
int sub_type = 0;
int flag = 0;
+   int fdb_mirror = 0;
 
*match_action_pos = -1;
*qrss_action_pos = -1;
@@ -4514,25 +4518,53 @@ struct mlx5_hlist_entry *
flag = 1;
*match_action_pos = actions_n;
}
-   if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE ||
-   actions->type == RTE_FLOW_ACTION_TYPE_RSS)
+   switch (actions->type) {
+   case RTE_FLOW_ACTION_TYPE_QUEUE:
+   case RTE_FLOW_ACTION_TYPE_RSS:
*qrss_action_pos = actions_n;
-   if (actions->type == RTE_FLOW_ACTION_TYPE_SAMPLE) {
+   break;
+   case RTE_FLOW_ACTION_TYPE_SAMPLE:
sample = actions->conf;
ratio = sample->ratio;
sub_type = ((const struct rte_flow_action *)
(sample->actions))->type;
+   if (ratio == 1 && attr->transfer)
+   fdb_mirror = 1;
+   break;
+   case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+   case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+   case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+   case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+   case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+   case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+   case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+   case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+   case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+   case RTE_FLOW_ACTION_TYPE_SET_TTL:
+   case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
+   case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
+   case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
+   case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
+   case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
+   case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
+   case RTE_FLOW_ACTION_TYPE_FLAG:
+   case RTE_FLOW_ACTION_TYPE_MARK:
+   case RTE_FLOW_ACTION_TYPE_SET_META:
+   case RTE_FLOW_ACTION_TYPE_SET_TAG:
+   if (fdb_mirror)
+   *modify_after_mirror = 1;
+   break;
+   default:
+   break;
}
actions_n++;
}
-   if (flag && action == RTE_FLOW_ACTION_TYPE_SAMPLE && attr->transfer) {
-   if (ratio == 1) {
-   /* FDB mirroring uses the destination array to implement
-* instead of FLOW_SAMPLER object.
-*/
-   if (sub_type != RTE_FLOW_ACTION_TYPE_END)
-   flag = 0;
-   }
+   if (flag && fdb_

[dpdk-dev] [PATCH 3/5] net/mlx5: extend the skip scale flag

2021-01-10 Thread Jiawei Wang
The sampling feature introduces the scale flow group with factor,
then the scaled table value can be used for the normal path table
due to this table be created implicitly.

But if the input group value already be scaled, for example the
group value of sampling suffix flow, then use 'skip_scale" flag
to skip the scale twice in the translation action.

Consider the flow with jump action and this jump action could be
created implicitly, PMD may only scale the original flow group
value or scale the jump group value or both, so extend the
'skip_scale' flag to two bits:
If bit0 of 'skip_scale' flag is set to 1, then skip the scale the
original flow group;
If bit1 of 'skip_scale' flag is set to 1, then skip the scale the
jump flow group.

Signed-off-by: Jiawei Wang 
---
 drivers/net/mlx5/mlx5_flow.c|  2 +-
 drivers/net/mlx5/mlx5_flow.h| 21 ++---
 drivers/net/mlx5/mlx5_flow_dv.c |  9 +++--
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 217090a..055474b 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -5133,7 +5133,7 @@ struct mlx5_hlist_entry *
/* Suffix group level already be scaled with factor, set
 * skip_scale to 1 to avoid scale again in translation.
 */
-   flow_split_info->skip_scale = 1;
+   flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
 #endif
}
/* Add the suffix subflow. */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 18d5cfc..a054ff3 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -755,6 +755,9 @@ struct mlx5_flow_verbs_workspace {
 };
 #endif /* HAVE_INFINIBAND_VERBS_H */
 
+#define MLX5_SCALE_FLOW_GROUP_BIT 0
+#define MLX5_SCALE_JUMP_FLOW_GROUP_BIT 1
+
 /** Maximal number of device sub-flows supported. */
 #define MLX5_NUM_MAX_DEV_FLOWS 32
 
@@ -768,8 +771,20 @@ struct mlx5_flow {
/**< Bit-fields of detected actions, see MLX5_FLOW_ACTION_*. */
bool external; /**< true if the flow is created external to PMD. */
uint8_t ingress:1; /**< 1 if the flow is ingress. */
-   uint8_t skip_scale:1;
-   /**< 1 if skip the scale the table with factor. */
+   uint8_t skip_scale:2;
+   /**
+* Each Bit be set to 1 if Skip the scale the flow group with factor.
+* If bit0 be set to 1, then skip the scale the original flow group;
+* If bit1 be set to 1, then skip the scale the jump flow group if
+* having jump action.
+* 00: Enable scale in a flow, default value.
+* 01: Skip scale the flow group with factor, enable scale the group
+* of jump action.
+* 10: Enable scale the group with factor, skip scale the group of
+* jump action.
+* 11: Skip scale the table with factor both for flow group and jump
+* group.
+*/
union {
 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
struct mlx5_flow_dv_workspace dv;
@@ -1238,7 +1253,7 @@ struct flow_grp_info {
uint64_t fdb_def_rule:1;
/* force standard group translation */
uint64_t std_tbl_fix:1;
-   uint64_t skip_scale:1;
+   uint64_t skip_scale:2;
 };
 
 static inline bool
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index d82d22e..63569ad 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -9732,7 +9732,8 @@ struct mlx5_cache_entry *
.external = !!dev_flow->external,
.transfer = !!attr->transfer,
.fdb_def_rule = !!priv->fdb_def_rule,
-   .skip_scale = !!dev_flow->skip_scale,
+   .skip_scale = dev_flow->skip_scale &
+   (1 << MLX5_SCALE_FLOW_GROUP_BIT),
};
 
if (!wks)
@@ -10090,7 +10091,11 @@ struct mlx5_cache_entry *
jump_group = ((const struct rte_flow_action_jump *)
action->conf)->group;
grp_info.std_tbl_fix = 0;
-   grp_info.skip_scale = 0;
+   if (dev_flow->skip_scale &
+   (1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT))
+   grp_info.skip_scale = 1;
+   else
+   grp_info.skip_scale = 0;
ret = mlx5_flow_group_to_table(dev, tunnel,
   jump_group,
   &table,
-- 
1.8.3.1



[dpdk-dev] [PATCH 5/5] doc: update the advanced E-switch mirroring supports

2021-01-10 Thread Jiawei Wang
Updates the description in MLX5 PMD and release note.
Adds the new supports for E-switch mirroring and jump in one flow,
and header modify one port in E-switch mirroring.

Signed-off-by: Jiawei Wang 
---
 doc/guides/nics/mlx5.rst   | 2 ++
 doc/guides/rel_notes/release_21_02.rst | 6 ++
 2 files changed, 8 insertions(+)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 3bda0f8..65922ef 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -98,6 +98,8 @@ Features
 - Hardware LRO.
 - Hairpin.
 - Multiple-thread flow insertion.
+- E-Switch mirroring and jump.
+- E-Switch mirroring and modify.
 
 Limitations
 ---
diff --git a/doc/guides/rel_notes/release_21_02.rst 
b/doc/guides/rel_notes/release_21_02.rst
index 638f981..76025aa 100644
--- a/doc/guides/rel_notes/release_21_02.rst
+++ b/doc/guides/rel_notes/release_21_02.rst
@@ -55,6 +55,12 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* **Updated Mellanox mlx5 driver.**
+
+  Updated the Mellanox mlx5 driver with new features and improvements, 
including:
+
+  * - Add support for E-Switch mirroring and jump in one flow.
+  * - Add support for header modify for one port in E-Switch mirroring flow.
 
 Removed Items
 -
-- 
1.8.3.1



[dpdk-dev] [PATCH v3 2/6] app/regex: support multi QPs

2021-01-10 Thread Ophir Munk
Up to this commit the regex application used one QP which was assigned a
number of jobs, each with a different segment of a file to parse.  This
commit adds support for multiple QPs assignments. All QPs will be
assigned the same number of jobs, with the same segments of file to
parse. It will enable comparing functionality with different numbers of
QPs. All queues are managed on one core with one thread. This commit
focuses on changing routines API to support multi QPs, mainly, QP scalar
variables are replaced by per-QP struct instance.  The enqueue/dequeue
operations are interleaved as follows:
 enqueue(QP #1)
 enqueue(QP #2)
 ...
 enqueue(QP #n)
 dequeue(QP #1)
 dequeue(QP #2)
 ...
 dequeue(QP #n)

A new parameter 'nb_qps' was added to configure the number of QPs:
 --nb_qps .
If not configured, nb_qps is set to 1 by default.

Signed-off-by: Ophir Munk 
Acked-by: Ori Kam 
---
 app/test-regex/main.c | 322 --
 1 file changed, 204 insertions(+), 118 deletions(-)

diff --git a/app/test-regex/main.c b/app/test-regex/main.c
index cb2a065..d225267 100644
--- a/app/test-regex/main.c
+++ b/app/test-regex/main.c
@@ -33,12 +33,22 @@ enum app_args {
ARG_NUM_OF_JOBS,
ARG_PERF_MODE,
ARG_NUM_OF_ITERATIONS,
+   ARG_NUM_OF_QPS,
 };
 
 struct job_ctx {
struct rte_mbuf *mbuf;
 };
 
+struct qp_params {
+   uint32_t total_enqueue;
+   uint32_t total_dequeue;
+   uint32_t total_matches;
+   struct rte_regex_ops **ops;
+   struct job_ctx *jobs_ctx;
+   char *buf;
+};
+
 static void
 usage(const char *prog_name)
 {
@@ -47,13 +57,15 @@ usage(const char *prog_name)
" --data NAME: data file to use\n"
" --nb_jobs: number of jobs to use\n"
" --perf N: only outputs the performance data\n"
-   " --nb_iter N: number of iteration to run\n",
+   " --nb_iter N: number of iteration to run\n"
+   " --nb_qps N: number of queues to use\n",
prog_name);
 }
 
 static void
 args_parse(int argc, char **argv, char *rules_file, char *data_file,
-  uint32_t *nb_jobs, bool *perf_mode, uint32_t *nb_iterations)
+  uint32_t *nb_jobs, bool *perf_mode, uint32_t *nb_iterations,
+  uint32_t *nb_qps)
 {
char **argvopt;
int opt;
@@ -71,6 +83,8 @@ args_parse(int argc, char **argv, char *rules_file, char 
*data_file,
{ "perf", 0, 0, ARG_PERF_MODE},
/* Number of iterations to run with perf test */
{ "nb_iter", 1, 0, ARG_NUM_OF_ITERATIONS},
+   /* Number of QPs. */
+   { "nb_qps", 1, 0, ARG_NUM_OF_QPS},
/* End of options */
{ 0, 0, 0, 0 }
};
@@ -104,6 +118,9 @@ args_parse(int argc, char **argv, char *rules_file, char 
*data_file,
case ARG_NUM_OF_ITERATIONS:
*nb_iterations = atoi(optarg);
break;
+   case ARG_NUM_OF_QPS:
+   *nb_qps = atoi(optarg);
+   break;
case ARG_HELP:
usage("RegEx test app");
break;
@@ -163,15 +180,17 @@ read_file(char *file, char **buf)
 }
 
 static int
-init_port(uint16_t *nb_max_payload, char *rules_file, uint8_t *nb_max_matches)
+init_port(uint16_t *nb_max_payload, char *rules_file, uint8_t *nb_max_matches,
+ uint32_t nb_qps)
 {
uint16_t id;
+   uint16_t qp_id;
uint16_t num_devs;
char *rules = NULL;
long rules_len;
struct rte_regexdev_info info;
struct rte_regexdev_config dev_conf = {
-   .nb_queue_pairs = 1,
+   .nb_queue_pairs = nb_qps,
.nb_groups = 1,
};
struct rte_regexdev_qp_conf qp_conf = {
@@ -203,7 +222,8 @@ init_port(uint16_t *nb_max_payload, char *rules_file, 
uint8_t *nb_max_matches)
*nb_max_matches = info.max_matches;
*nb_max_payload = info.max_payload_size;
if (info.regexdev_capa & RTE_REGEXDEV_SUPP_MATCH_AS_END_F)
-   dev_conf.dev_cfg_flags |= 
RTE_REGEXDEV_CFG_MATCH_AS_END_F;
+   dev_conf.dev_cfg_flags |=
+   RTE_REGEXDEV_CFG_MATCH_AS_END_F;
dev_conf.nb_max_matches = info.max_matches;
dev_conf.nb_rules_per_group = info.max_rules_per_group;
dev_conf.rule_db_len = rules_len;
@@ -214,12 +234,16 @@ init_port(uint16_t *nb_max_payload, char *rules_file, 
uint8_t *nb_max_matches)
goto error;
}
if (info.regexdev_capa & RTE_REGEXDEV_CAPA_QUEUE_PAIR_OOS_F)
-   qp_conf.qp_conf_flags |= RTE_REGEX_QUEUE_PAIR_CFG_OOS_F;
-   res = rte_regexdev_queue_pair_setup(id, 0, &qp_conf);
-   if (res < 0) {
-   printf("Error, can't setup

[dpdk-dev] [PATCH v3 0/6] regex multi Q with multi cores support

2021-01-10 Thread Ophir Munk
This patchset enhances the regex application to support multi Q with multi 
cores.

v1: Initial release.
v2: Update documentation (testregex.rst).
v3: fix an error in commit "app/regex: support performance measurements per QP"
The following line triggered an error with PPC compiler:
error: qp may be used uninitialized in this function 
[-Werror=maybe-uninitialized]
time = ((double)qp->end - qp->start) / CLOCKS_PER_SEC;
Adding Acked-By

Ophir Munk (6):
  app/regex: move mem pool creation to worker routine
  app/regex: support multi QPs
  app/regex: read data file once at startup
  app/regex: support multi cores
  app/regex: support performance measurements per QP
  app/regex: replace Linux clock() API with rdtsc

 app/test-regex/main.c  | 519 +
 doc/guides/tools/testregex.rst |  30 ++-
 2 files changed, 398 insertions(+), 151 deletions(-)

-- 
2.8.4



[dpdk-dev] [PATCH v3 1/6] app/regex: move mem pool creation to worker routine

2021-01-10 Thread Ophir Munk
Function rte_pktmbuf_pool_create() is moved from init_port() routine to
run_regex() routine. Looking forward on multi core support - init_port()
will be called only once as part of application startup while mem pool
creation should be called multiple times (per core).

Signed-off-by: Ophir Munk 
Acked-by: Ori Kam 
---
 app/test-regex/main.c | 34 +++---
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/app/test-regex/main.c b/app/test-regex/main.c
index ac6152d..cb2a065 100644
--- a/app/test-regex/main.c
+++ b/app/test-regex/main.c
@@ -163,8 +163,7 @@ read_file(char *file, char **buf)
 }
 
 static int
-init_port(struct rte_mempool **mbuf_mp, uint32_t nb_jobs,
- uint16_t *nb_max_payload, char *rules_file, uint8_t *nb_max_matches)
+init_port(uint16_t *nb_max_payload, char *rules_file, uint8_t *nb_max_matches)
 {
uint16_t id;
uint16_t num_devs;
@@ -187,14 +186,6 @@ init_port(struct rte_mempool **mbuf_mp, uint32_t nb_jobs,
return -EINVAL;
}
 
-   *mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool", nb_jobs, 0,
- 0, MBUF_SIZE, rte_socket_id());
-   if (*mbuf_mp == NULL) {
-   printf("Error, can't create memory pool\n");
-   res = -ENOMEM;
-   goto error;
-   }
-
rules_len = read_file(rules_file, &rules);
if (rules_len < 0) {
printf("Error, can't read rules files.\n");
@@ -237,8 +228,6 @@ init_port(struct rte_mempool **mbuf_mp, uint32_t nb_jobs,
 error:
if (rules)
rte_free(rules);
-   if (*mbuf_mp)
-   rte_mempool_free(*mbuf_mp);
return res;
 }
 
@@ -248,7 +237,7 @@ extbuf_free_cb(void *addr __rte_unused, void *fcb_opaque 
__rte_unused)
 }
 
 static int
-run_regex(struct rte_mempool *mbuf_mp, uint32_t nb_jobs,
+run_regex(uint32_t nb_jobs,
  uint16_t nb_max_payload, bool perf_mode, uint32_t nb_iterations,
  char *data_file, uint8_t nb_max_matches)
 {
@@ -273,9 +262,17 @@ run_regex(struct rte_mempool *mbuf_mp, uint32_t nb_jobs,
time_t end;
double time;
struct job_ctx *jobs_ctx;
+   struct rte_mempool *mbuf_mp;
 
shinfo.free_cb = extbuf_free_cb;
 
+   mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool", nb_jobs, 0,
+   0, MBUF_SIZE, rte_socket_id());
+   if (mbuf_mp == NULL) {
+   printf("Error, can't create memory pool\n");
+   return -ENOMEM;
+   }
+
ops = rte_malloc(NULL, sizeof(*ops) * nb_jobs, 0);
if (!ops) {
printf("Error, can't allocate memory for ops.\n");
@@ -409,6 +406,9 @@ run_regex(struct rte_mempool *mbuf_mp, uint32_t nb_jobs,
rte_free(jobs_ctx);
if (buf)
rte_free(buf);
+   if (mbuf_mp)
+   rte_mempool_free(mbuf_mp);
+
return res;
 }
 
@@ -417,7 +417,6 @@ main(int argc, char **argv)
 {
char rules_file[MAX_FILE_NAME];
char data_file[MAX_FILE_NAME];
-   struct rte_mempool *mbuf_mp = NULL;
uint32_t nb_jobs = 0;
uint16_t nb_max_payload = 0;
bool perf_mode = 0;
@@ -434,16 +433,13 @@ main(int argc, char **argv)
args_parse(argc, argv, rules_file, data_file, &nb_jobs,
   &perf_mode, &nb_iterations);
 
-   ret = init_port(&mbuf_mp, nb_jobs, &nb_max_payload, rules_file,
-   &nb_max_matches);
+   ret = init_port(&nb_max_payload, rules_file, &nb_max_matches);
if (ret < 0)
rte_exit(EXIT_FAILURE, "init port failed\n");
-   ret = run_regex(mbuf_mp, nb_jobs, nb_max_payload, perf_mode,
+   ret = run_regex(nb_jobs, nb_max_payload, perf_mode,
nb_iterations, data_file, nb_max_matches);
if (ret < 0) {
-   rte_mempool_free(mbuf_mp);
rte_exit(EXIT_FAILURE, "RegEx function failed\n");
}
-   rte_mempool_free(mbuf_mp);
return EXIT_SUCCESS;
 }
-- 
2.8.4



[dpdk-dev] [PATCH v3 3/6] app/regex: read data file once at startup

2021-01-10 Thread Ophir Munk
Up to this commit the input data file was read from scratch for each QP,
which is redundant. Starting from this commit the data file is read only
once at startup. Each QP will clone the data.

Signed-off-by: Ophir Munk 
Acked-by: Ori Kam 
---
 app/test-regex/main.c | 63 +++
 1 file changed, 39 insertions(+), 24 deletions(-)

diff --git a/app/test-regex/main.c b/app/test-regex/main.c
index d225267..9bafd02 100644
--- a/app/test-regex/main.c
+++ b/app/test-regex/main.c
@@ -180,6 +180,19 @@ read_file(char *file, char **buf)
 }
 
 static int
+clone_buf(char *data_buf, char **buf, long data_len)
+{
+   char *dest_buf;
+   dest_buf =
+   rte_malloc(NULL, sizeof(char) * (data_len + 1), 4096);
+   if (!dest_buf)
+   return -ENOMEM;
+   memcpy(dest_buf, data_buf, data_len + 1);
+   *buf = dest_buf;
+   return 0;
+}
+
+static int
 init_port(uint16_t *nb_max_payload, char *rules_file, uint8_t *nb_max_matches,
  uint32_t nb_qps)
 {
@@ -262,12 +275,11 @@ extbuf_free_cb(void *addr __rte_unused, void *fcb_opaque 
__rte_unused)
 
 static int
 run_regex(uint32_t nb_jobs,
- uint16_t nb_max_payload, bool perf_mode, uint32_t nb_iterations,
- char *data_file, uint8_t nb_max_matches, uint32_t nb_qps)
+ bool perf_mode, uint32_t nb_iterations,
+ uint8_t nb_max_matches, uint32_t nb_qps,
+ char *data_buf, long data_len, long job_len)
 {
char *buf = NULL;
-   long buf_len = 0;
-   long job_len = 0;
uint32_t actual_jobs = 0;
uint32_t i;
uint16_t qp_id;
@@ -344,22 +356,8 @@ run_regex(uint32_t nb_jobs,
}
}
 
-   buf_len = read_file(data_file, &buf);
-   if (buf_len <= 0) {
-   printf("Error, can't read file, or file is empty.\n");
-   res = -EXIT_FAILURE;
-   goto end;
-   }
-
-   job_len = buf_len / nb_jobs;
-   if (job_len == 0) {
-   printf("Error, To many jobs, for the given input.\n");
-   res = -EXIT_FAILURE;
-   goto end;
-   }
-
-   if (job_len > nb_max_payload) {
-   printf("Error, not enough jobs to cover input.\n");
+   if (clone_buf(data_buf, &buf, data_len)) {
+   printf("Error, can't clone buf.\n");
res = -EXIT_FAILURE;
goto end;
}
@@ -367,8 +365,8 @@ run_regex(uint32_t nb_jobs,
/* Assign each mbuf with the data to handle. */
actual_jobs = 0;
pos = 0;
-   for (i = 0; (pos < buf_len) && (i < nb_jobs) ; i++) {
-   long act_job_len = RTE_MIN(job_len, buf_len - pos);
+   for (i = 0; (pos < data_len) && (i < nb_jobs) ; i++) {
+   long act_job_len = RTE_MIN(job_len, data_len - pos);
rte_pktmbuf_attach_extbuf(ops[i]->mbuf, &buf[pos], 0,
act_job_len, &shinfo);
jobs_ctx[i].mbuf = ops[i]->mbuf;
@@ -505,6 +503,9 @@ main(int argc, char **argv)
uint16_t nb_max_payload = 0;
uint8_t nb_max_matches = 0;
uint32_t nb_qps = 1;
+   char *data_buf;
+   long data_len;
+   long job_len;
 
/* Init EAL. */
ret = rte_eal_init(argc, argv);
@@ -522,10 +523,24 @@ main(int argc, char **argv)
&nb_max_matches, nb_qps);
if (ret < 0)
rte_exit(EXIT_FAILURE, "init port failed\n");
-   ret = run_regex(nb_jobs, nb_max_payload, perf_mode,
-   nb_iterations, data_file, nb_max_matches, nb_qps);
+
+   data_len = read_file(data_file, &data_buf);
+   if (data_len <= 0)
+   rte_exit(EXIT_FAILURE, "Error, can't read file, or file is 
empty.\n");
+
+   job_len = data_len / nb_jobs;
+   if (job_len == 0)
+   rte_exit(EXIT_FAILURE, "Error, To many jobs, for the given 
input.\n");
+
+   if (job_len > nb_max_payload)
+   rte_exit(EXIT_FAILURE, "Error, not enough jobs to cover 
input.\n");
+
+   ret = run_regex(nb_jobs, perf_mode,
+   nb_iterations, nb_max_matches, nb_qps,
+   data_buf, data_len, job_len);
if (ret < 0) {
rte_exit(EXIT_FAILURE, "RegEx function failed\n");
}
+   rte_free(data_buf);
return EXIT_SUCCESS;
 }
-- 
2.8.4



[dpdk-dev] [PATCH v3 5/6] app/regex: support performance measurements per QP

2021-01-10 Thread Ophir Munk
Up to this commit measuring the parsing elapsed time and Giga bits per
second performance was done on the aggregation of all QPs (per core).
This commit separates the time measurements per individual QP.

Signed-off-by: Ophir Munk 
Acked-by: Ori Kam 
---
 app/test-regex/main.c | 34 +++---
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/app/test-regex/main.c b/app/test-regex/main.c
index 2948d3e..2fce55d 100644
--- a/app/test-regex/main.c
+++ b/app/test-regex/main.c
@@ -48,6 +48,8 @@ struct qp_params {
struct rte_regex_ops **ops;
struct job_ctx *jobs_ctx;
char *buf;
+   time_t start;
+   time_t end;
 };
 
 struct qps_per_lcore {
@@ -324,8 +326,6 @@ run_regex(void *args)
unsigned long d_ind = 0;
struct rte_mbuf_ext_shared_info shinfo;
int res = 0;
-   time_t start;
-   time_t end;
double time;
struct rte_mempool *mbuf_mp;
struct qp_params *qp;
@@ -418,9 +418,10 @@ run_regex(void *args)
 
qp->buf = buf;
qp->total_matches = 0;
+   qp->start = 0;
+   qp->end = 0;
}
 
-   start = clock();
for (i = 0; i < nb_iterations; i++) {
for (qp_id = 0; qp_id < nb_qps; qp_id++) {
qp = &qps[qp_id];
@@ -431,6 +432,8 @@ run_regex(void *args)
update = false;
for (qp_id = 0; qp_id < nb_qps; qp_id++) {
qp = &qps[qp_id];
+   if (!qp->start)
+   qp->start = clock();
if (qp->total_dequeue < actual_jobs) {
struct rte_regex_ops **
cur_ops_to_enqueue = qp->ops +
@@ -461,22 +464,31 @@ run_regex(void *args)
qp->total_enqueue -
qp->total_dequeue);
update = true;
+   } else {
+   if (!qp->end)
+   qp->end = clock();
}
+
}
} while (update);
}
-   end = clock();
-   time = ((double)end - start) / CLOCKS_PER_SEC;
-   printf("Job len = %ld Bytes\n",  job_len);
-   printf("Time = %lf sec\n",  time);
-   printf("Perf = %lf Gbps\n",
-  (((double)actual_jobs * job_len * nb_iterations * 8) / time) /
-   10.0);
+   for (qp_id = 0; qp_id < nb_qps; qp_id++) {
+   qp = &qps[qp_id];
+   time = ((double)qp->end - qp->start) / CLOCKS_PER_SEC;
+   printf("Core=%u QP=%u\n", rte_lcore_id(), qp_id + qp_id_base);
+   printf("Job len = %ld Bytes\n",  job_len);
+   printf("Time = %lf sec\n",  time);
+   printf("Perf = %lf Gbps\n\n",
+   (((double)actual_jobs * job_len *
+   nb_iterations * 8) / time) /
+   10.0);
+   }
 
if (rgxc->perf_mode)
goto end;
for (qp_id = 0; qp_id < nb_qps; qp_id++) {
-   printf("\n QP id=%u \n", qp_id);
+   printf("\n Core=%u QP=%u \n",
+  rte_lcore_id(), qp_id + qp_id_base);
qp = &qps[qp_id];
/* Log results per job. */
for (d_ind = 0; d_ind < qp->total_dequeue; d_ind++) {
-- 
2.8.4



[dpdk-dev] [PATCH v3 4/6] app/regex: support multi cores

2021-01-10 Thread Ophir Munk
Up to this commit the regex application was running with multiple QPs on
a single core.  This commit adds the option to specify a number of cores
on which multiple QPs will run.
A new parameter 'nb_lcores' was added to configure the number of cores:
--nb_lcores .
If not configured the number of cores is set to 1 by default.  On
application startup a few initial steps occur by the main core: the
number of QPs and cores are parsed.  The QPs are distributed as evenly
as possible on the cores.  The regex device and all QPs are initialized.
The data file is read and saved in a buffer. Then for each core the
application calls rte_eal_remote_launch() with the worker routine
(run_regex) as its parameter.

Signed-off-by: Ophir Munk 
Acked-by: Ori Kam 
---
 app/test-regex/main.c  | 155 -
 doc/guides/tools/testregex.rst |  30 ++--
 2 files changed, 164 insertions(+), 21 deletions(-)

diff --git a/app/test-regex/main.c b/app/test-regex/main.c
index 9bafd02..2948d3e 100644
--- a/app/test-regex/main.c
+++ b/app/test-regex/main.c
@@ -34,6 +34,7 @@ enum app_args {
ARG_PERF_MODE,
ARG_NUM_OF_ITERATIONS,
ARG_NUM_OF_QPS,
+   ARG_NUM_OF_LCORES,
 };
 
 struct job_ctx {
@@ -49,6 +50,26 @@ struct qp_params {
char *buf;
 };
 
+struct qps_per_lcore {
+   unsigned int lcore_id;
+   int socket;
+   uint16_t qp_id_base;
+   uint16_t nb_qps;
+};
+
+struct regex_conf {
+   uint32_t nb_jobs;
+   bool perf_mode;
+   uint32_t nb_iterations;
+   char *data_file;
+   uint8_t nb_max_matches;
+   uint32_t nb_qps;
+   uint16_t qp_id_base;
+   char *data_buf;
+   long data_len;
+   long job_len;
+};
+
 static void
 usage(const char *prog_name)
 {
@@ -58,14 +79,15 @@ usage(const char *prog_name)
" --nb_jobs: number of jobs to use\n"
" --perf N: only outputs the performance data\n"
" --nb_iter N: number of iteration to run\n"
-   " --nb_qps N: number of queues to use\n",
+   " --nb_qps N: number of queues to use\n"
+   " --nb_lcores N: number of lcores to use\n",
prog_name);
 }
 
 static void
 args_parse(int argc, char **argv, char *rules_file, char *data_file,
   uint32_t *nb_jobs, bool *perf_mode, uint32_t *nb_iterations,
-  uint32_t *nb_qps)
+  uint32_t *nb_qps, uint32_t *nb_lcores)
 {
char **argvopt;
int opt;
@@ -85,6 +107,8 @@ args_parse(int argc, char **argv, char *rules_file, char 
*data_file,
{ "nb_iter", 1, 0, ARG_NUM_OF_ITERATIONS},
/* Number of QPs. */
{ "nb_qps", 1, 0, ARG_NUM_OF_QPS},
+   /* Number of lcores. */
+   { "nb_lcores", 1, 0, ARG_NUM_OF_LCORES},
/* End of options */
{ 0, 0, 0, 0 }
};
@@ -121,6 +145,9 @@ args_parse(int argc, char **argv, char *rules_file, char 
*data_file,
case ARG_NUM_OF_QPS:
*nb_qps = atoi(optarg);
break;
+   case ARG_NUM_OF_LCORES:
+   *nb_lcores = atoi(optarg);
+   break;
case ARG_HELP:
usage("RegEx test app");
break;
@@ -274,11 +301,18 @@ extbuf_free_cb(void *addr __rte_unused, void *fcb_opaque 
__rte_unused)
 }
 
 static int
-run_regex(uint32_t nb_jobs,
- bool perf_mode, uint32_t nb_iterations,
- uint8_t nb_max_matches, uint32_t nb_qps,
- char *data_buf, long data_len, long job_len)
+run_regex(void *args)
 {
+   struct regex_conf *rgxc = args;
+   uint32_t nb_jobs = rgxc->nb_jobs;
+   uint32_t nb_iterations = rgxc->nb_iterations;
+   uint8_t nb_max_matches = rgxc->nb_max_matches;
+   uint32_t nb_qps = rgxc->nb_qps;
+   uint16_t qp_id_base  = rgxc->qp_id_base;
+   char *data_buf = rgxc->data_buf;
+   long data_len = rgxc->data_len;
+   long job_len = rgxc->job_len;
+
char *buf = NULL;
uint32_t actual_jobs = 0;
uint32_t i;
@@ -298,9 +332,13 @@ run_regex(uint32_t nb_jobs,
struct qp_params *qps = NULL;
bool update;
uint16_t qps_used = 0;
+   char mbuf_pool[16];
 
shinfo.free_cb = extbuf_free_cb;
-   mbuf_mp = rte_pktmbuf_pool_create("mbuf_pool", nb_jobs * nb_qps, 0,
+   snprintf(mbuf_pool,
+sizeof(mbuf_pool),
+"mbuf_pool_%2u", qp_id_base);
+   mbuf_mp = rte_pktmbuf_pool_create(mbuf_pool, nb_jobs * nb_qps, 0,
0, MBUF_SIZE, rte_socket_id());
if (mbuf_mp == NULL) {
printf("Error, can't create memory pool\n");
@@ -402,7 +440,7 @@ run_regex(uint32_t nb_jobs,
qp->total_enqueue +=
rte_regexdev_enqueue_burst
   

[dpdk-dev] [PATCH v3 6/6] app/regex: replace Linux clock() API with rdtsc

2021-01-10 Thread Ophir Munk
Performance measurement (elapsed time and Gbps) are based on Linux
clock() API. The resolution is improved by replacing the clock() API
with rte_rdtsc_precise() API.

Signed-off-by: Ophir Munk 
Acked-by: Ori Kam 
---
 app/test-regex/main.c | 31 +--
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/app/test-regex/main.c b/app/test-regex/main.c
index 2fce55d..aea4fa6 100644
--- a/app/test-regex/main.c
+++ b/app/test-regex/main.c
@@ -48,8 +48,8 @@ struct qp_params {
struct rte_regex_ops **ops;
struct job_ctx *jobs_ctx;
char *buf;
-   time_t start;
-   time_t end;
+   uint64_t start;
+   uint64_t cycles;
 };
 
 struct qps_per_lcore {
@@ -326,7 +326,7 @@ run_regex(void *args)
unsigned long d_ind = 0;
struct rte_mbuf_ext_shared_info shinfo;
int res = 0;
-   double time;
+   long double time;
struct rte_mempool *mbuf_mp;
struct qp_params *qp;
struct qp_params *qps = NULL;
@@ -419,7 +419,7 @@ run_regex(void *args)
qp->buf = buf;
qp->total_matches = 0;
qp->start = 0;
-   qp->end = 0;
+   qp->cycles = 0;
}
 
for (i = 0; i < nb_iterations; i++) {
@@ -432,9 +432,8 @@ run_regex(void *args)
update = false;
for (qp_id = 0; qp_id < nb_qps; qp_id++) {
qp = &qps[qp_id];
-   if (!qp->start)
-   qp->start = clock();
if (qp->total_dequeue < actual_jobs) {
+   qp->start = rte_rdtsc_precise();
struct rte_regex_ops **
cur_ops_to_enqueue = qp->ops +
qp->total_enqueue;
@@ -463,25 +462,21 @@ run_regex(void *args)
cur_ops_to_dequeue,
qp->total_enqueue -
qp->total_dequeue);
+   qp->cycles +=
+(rte_rdtsc_precise() - qp->start);
update = true;
-   } else {
-   if (!qp->end)
-   qp->end = clock();
}
-
}
} while (update);
}
for (qp_id = 0; qp_id < nb_qps; qp_id++) {
qp = &qps[qp_id];
-   time = ((double)qp->end - qp->start) / CLOCKS_PER_SEC;
-   printf("Core=%u QP=%u\n", rte_lcore_id(), qp_id + qp_id_base);
-   printf("Job len = %ld Bytes\n",  job_len);
-   printf("Time = %lf sec\n",  time);
-   printf("Perf = %lf Gbps\n\n",
-   (((double)actual_jobs * job_len *
-   nb_iterations * 8) / time) /
-   10.0);
+   time = (long double)qp->cycles / rte_get_timer_hz();
+   printf("Core=%u QP=%u Job=%ld Bytes Time=%Lf sec Perf=%Lf "
+  "Gbps\n", rte_lcore_id(), qp_id + qp_id_base,
+  job_len, time,
+  (((double)actual_jobs * job_len * nb_iterations * 8)
+  / time) / 10.0);
}
 
if (rgxc->perf_mode)
-- 
2.8.4



Re: [dpdk-dev] [PATCH v2 5/6] app/regex: support performance measurements per QP

2021-01-10 Thread Ophir Munk


> -Original Message-
> From: Thomas Monjalon 
> Sent: Friday, January 8, 2021 11:09 AM
> To: Ophir Munk 
> Cc: dev@dpdk.org; Ori Kam 
> Subject: Re: [dpdk-dev] [PATCH v2 5/6] app/regex: support performance
> measurements per QP
> 
> 20/12/2020 11:41, Ophir Munk:
> > Up to this commit measuring the parsing elapsed time and Giga bits per
> > second performance was done on the aggregation of all QPs (per core).
> > This commit separates the time measurements per individual QP.
> >
> > Signed-off-by: Ophir Munk 
> > ---
> > --- a/app/test-regex/main.c
> > +++ b/app/test-regex/main.c
> > +   for (qp_id = 0; qp_id < nb_qps; qp_id++) {
> > +   time = ((double)qp->end - qp->start) / CLOCKS_PER_SEC;
> 
> This line triggers an error with PPC compiler:
> error: ‘qp’ may be used uninitialized in this function [-Werror=maybe-
> uninitialized]
>time = ((double)qp->end - qp->start) / CLOCKS_PER_SEC;
> 
Thanks for reporting.
I sent v3 with a fix.
Could I have known this error in advance? Is there a CI report?




Re: [dpdk-dev] [RFC] app/testpmd: support multi-process

2021-01-10 Thread Wisam Monther
Hi,

> -Original Message-
> From: dev  On Behalf Of Lijun Ou
> Sent: Friday, January 8, 2021 11:46 AM
> To: ferruh.yi...@intel.com; wenzhuo...@intel.com; beilei.x...@intel.com;
> bernard.iremon...@intel.com
> Cc: dev@dpdk.org
> Subject: [dpdk-dev] [RFC] app/testpmd: support multi-process
> 
> This patch adds multi-process support for testpmd.
> The test cmd example as follows:
> the primary cmd:
> ./testpmd -w xxx --file-prefix=xx -l 0-1 -n 2 -- -i\
> --rxq=16 --txq=16 --num-procs=2 --proc-id=0 the secondary cmd:
> ./testpmd -w xxx --file-prefix=xx -l 2-3 -n 2 -- -i\
> --rxq=16 --txq=16 --num-procs=2 --proc-id=1
> 
> Signed-off-by: Min Hu (Connor) 
> Signed-off-by: Lijun Ou 
> ---
>  app/test-pmd/cmdline.c|   6 ++-
>  app/test-pmd/config.c |   9 +++-
>  app/test-pmd/parameters.c |   9 
>  app/test-pmd/testpmd.c| 133 --
> 
>  app/test-pmd/testpmd.h|   7 +++
>  5 files changed, 121 insertions(+), 43 deletions(-)
> 

+1 for having this support for testpmd.

Some questions in my mind:
How are the queues distributing here? In example I see 16 defined, are they for 
one instance or for all?
Will all processes have same memory region? If installing one RTE_FLOW in one 
instance will be active for all?
Same question for detaching device in one instance, how it will reflect on 
others?
There is many other scenarios like this, how it will handle those?

BRs,
Wisam Jaddo


[dpdk-dev] [PATCH v2] net/mlx5: fix flow check hairpin split

2021-01-10 Thread Dekel Peled
Previously, the identification of hairpin queue was done using
mlx5_rxq_get_type() function.
Recent patch replaced it with use of mlx5_rxq_get_hairpin_conf(),
and check of the return value conf != NULL.
The case of return value is NULL (queue is not hairpin) was not handled.
As result, non-hairpin flows were wrongly handled.
This patch adds the required check for return value is NULL.

Fixes: 509f8470de55 ("net/mlx5: do not split hairpin flow in explicit mode")
Cc: sta...@dpdk.org

Signed-off-by: Dekel Peled 
Acked-by: Matan Azrad 

---
v2: update conditions according to coding standards.
---

---
 drivers/net/mlx5/mlx5_flow.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 2a4073c126..37502067d4 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -3548,7 +3548,7 @@ flow_check_hairpin_split(struct rte_eth_dev *dev,
if (queue == NULL)
return 0;
conf = mlx5_rxq_get_hairpin_conf(dev, queue->index);
-   if (conf != NULL && !!conf->tx_explicit)
+   if (conf == NULL || conf->tx_explicit != 0)
return 0;
queue_action = 1;
action_n++;
@@ -3558,7 +3558,7 @@ flow_check_hairpin_split(struct rte_eth_dev *dev,
if (rss == NULL || rss->queue_num == 0)
return 0;
conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]);
-   if (conf != NULL && !!conf->tx_explicit)
+   if (conf == NULL || conf->tx_explicit != 0)
return 0;
queue_action = 1;
action_n++;
-- 
2.25.1



[dpdk-dev] [PATCH] doc: add release milestones definition

2021-01-10 Thread Asaf Penso
Signed-off-by: Asaf Penso 
---
 doc/guides/contributing/patches.rst | 52 +
 1 file changed, 52 insertions(+)

diff --git a/doc/guides/contributing/patches.rst 
b/doc/guides/contributing/patches.rst
index 4e9140b..849fe6d 100644
--- a/doc/guides/contributing/patches.rst
+++ b/doc/guides/contributing/patches.rst
@@ -657,3 +657,55 @@ patch accepted. The general cycle for patch review and 
acceptance is:
  than rework of the original.
* Trivial patches may be merged sooner than described above at the tree 
committer's
  discretion.
+
+
+Milestones definition
+~
+
+Each DPDK release has a couple of milestones that help everyone to converge to 
the release date.
+The following is a list of these milestones together with concrete definitions 
and expectations.
+
+Roadmap
+* Content - new features in libs, drivers, apps, and examples
+* Timeframe - to be published until the first day of the release
+
+RFC
+* Content - public design review for the features, explaining the need, and 
the design
+* It should include the API changes, in libs and apps
+* RFCs are not mandatory but are strongly advised to start early discussions
+* Nice to have - driver implementation (full or draft), example code, and 
documentation
+
+Proposal Deadline
+* Content - either an RFC as stated above or a v1 patch
+* Nice to have - code complete for libs
+
+rc1
+* Content - new or updated API
+* New API should be defined and implemented in shared libraries
+* The API should include DoxyGen documentation and be part of the relevant 
.rst files (lib, release_notes)
+* Code and related documentation must be updated atomically in the same patch
+* It should be used in a test application (/apps) and also documented in the 
relevant .rst files
+* At least one pmd should implement this API. It can be in a draft mode but 
must be sent as a separate series
+* At least 2 weeks before -rc1 the above should be sent to the ML for review
+* Nice to have: example code (/examples)
+
+rc2
+* Content - drivers
+* All features should be implemented in drivers
+* The drivers should include documentation and be part of the relevant .rst 
files (driver, release_notes)
+* Code and related documentation must be updated atomically in the same patch
+* At least 2 weeks before -rc2 the above should be sent to the ML for review
+
+rc3
+* Content - test apps
+* New functionality that doesn’t depend on libs update can be integrated as 
part of -rc3
+* The app should include documentation and be part of the relevant .rst files 
(app, release_notes if significant)
+* Code and related documentation must be updated atomically in the same patch
+* Last opportunity for Misc changes
+* Small driver rework
+* libs and driver cleanup
+* Big bug fixes
+
+rc4
+* Content - documentation updates
+* Critical bug fixes
-- 
1.8.3.1



Re: [dpdk-dev] [PATCH v2 5/6] app/regex: support performance measurements per QP

2021-01-10 Thread Thomas Monjalon
10/01/2021 12:16, Ophir Munk:
> 
> > -Original Message-
> > From: Thomas Monjalon 
> > Sent: Friday, January 8, 2021 11:09 AM
> > To: Ophir Munk 
> > Cc: dev@dpdk.org; Ori Kam 
> > Subject: Re: [dpdk-dev] [PATCH v2 5/6] app/regex: support performance
> > measurements per QP
> > 
> > 20/12/2020 11:41, Ophir Munk:
> > > Up to this commit measuring the parsing elapsed time and Giga bits per
> > > second performance was done on the aggregation of all QPs (per core).
> > > This commit separates the time measurements per individual QP.
> > >
> > > Signed-off-by: Ophir Munk 
> > > ---
> > > --- a/app/test-regex/main.c
> > > +++ b/app/test-regex/main.c
> > > + for (qp_id = 0; qp_id < nb_qps; qp_id++) {
> > > + time = ((double)qp->end - qp->start) / CLOCKS_PER_SEC;
> > 
> > This line triggers an error with PPC compiler:
> > error: ‘qp’ may be used uninitialized in this function [-Werror=maybe-
> > uninitialized]
> >time = ((double)qp->end - qp->start) / CLOCKS_PER_SEC;
> > 
> Thanks for reporting.
> I sent v3 with a fix.
> Could I have known this error in advance? Is there a CI report?

No I think there is no CI report for PPC.
Actually the PPC support in DPDK is not clear.




Re: [dpdk-dev] [v21.02 v3 06/10] net/bonding: remove local variable shadowing outer one

2021-01-10 Thread Min Hu (Connor)

Acked-by: Min Hu (Connor) 

在 2020/11/19 19:58, Ferruh Yigit 写道:

'retval' is already defined in the function scope, removing the 'retval'
in the block scope.

Fixes: 112891cd27e5 ("net/bonding: add dedicated HW queues for LACP control")
Cc: sta...@dpdk.org

Signed-off-by: Ferruh Yigit 
---
Cc: tomaszx.kula...@intel.com
---
  drivers/net/bonding/rte_eth_bond_8023ad.c | 6 ++
  1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c 
b/drivers/net/bonding/rte_eth_bond_8023ad.c
index 67ca0730fa..5fe004e551 100644
--- a/drivers/net/bonding/rte_eth_bond_8023ad.c
+++ b/drivers/net/bonding/rte_eth_bond_8023ad.c
@@ -1334,8 +1334,7 @@ bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private 
*internals,
rte_eth_macaddr_get(slave_id, &m_hdr->eth_hdr.s_addr);
  
  		if (internals->mode4.dedicated_queues.enabled == 0) {

-   int retval = rte_ring_enqueue(port->tx_ring, pkt);
-   if (retval != 0) {
+   if (rte_ring_enqueue(port->tx_ring, pkt) != 0) {
/* reset timer */
port->rx_marker_timer = 0;
wrn = WRN_TX_QUEUE_FULL;
@@ -1355,8 +1354,7 @@ bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private 
*internals,
}
} else if (likely(subtype == SLOW_SUBTYPE_LACP)) {
if (internals->mode4.dedicated_queues.enabled == 0) {
-   int retval = rte_ring_enqueue(port->rx_ring, pkt);
-   if (retval != 0) {
+   if (rte_ring_enqueue(port->rx_ring, pkt) != 0) {
/* If RX fing full free lacpdu message and drop 
packet */
wrn = WRN_RX_QUEUE_FULL;
goto free_out;



[dpdk-dev] 回复: [RFC PATCH v1 4/6] app/eventdev: add release barriers for pipeline test

2021-01-10 Thread Feifei Wang


> -邮件原件-
> 发件人: Pavan Nikhilesh Bhagavatula 
> 发送时间: 2021年1月8日 18:58
> 收件人: Feifei Wang ; jer...@marvell.com; Harry
> van Haaren 
> 抄送: dev@dpdk.org; nd ; Honnappa Nagarahalli
> ; sta...@dpdk.org; Ruifeng Wang
> ; nd ; nd ; nd
> 
> 主题: RE: [RFC PATCH v1 4/6] app/eventdev: add release barriers for pipeline
> test
> 
> Hi Feifei,
> 
> >Hi, Pavan
> >
> >> -邮件原件-
> >> 发件人: Pavan Nikhilesh Bhagavatula 
> >> 发送时间: 2021年1月8日 17:13
> >> 收件人: Feifei Wang ; jer...@marvell.com;
> >Harry
> >> van Haaren 
> >> 抄送: dev@dpdk.org; nd ; Honnappa Nagarahalli
> >> ; sta...@dpdk.org; Ruifeng Wang
> >> ; nd ; nd 
> >> 主题: RE: [RFC PATCH v1 4/6] app/eventdev: add release barriers for
> >pipeline
> >> test
> >>
> >> Hi Feifei,
> >>
> >> >Hi, Pavan
> >> >
> >> >
> >> >
> >> >> -邮件原件-
> >> >
> >> >> 发件人: Pavan Nikhilesh Bhagavatula
> >
> >> >
> >> >> 发送时间: 2021年1月5日 17:29
> >> >
> >> >> 收件人: Feifei Wang ;
> >mailto:jer...@marvell.com;
> >> >Harry
> >> >
> >> >> van Haaren 
> >> >
> >> >> 抄送: mailto:dev@dpdk.org; nd ; Honnappa
> >Nagarahalli
> >> >
> >> >> ;
> >mailto:sta...@dpdk.org; Ruifeng Wang
> >> >
> >> >> ; nd 
> >> >
> >> >> 主题: RE: [RFC PATCH v1 4/6] app/eventdev: add release barriers
> >for
> >> >pipeline
> >> >
> >> >> test
> >> >
> >> >>
> >> >
> >> >> Hi Feifei,
> >> >
> >> >>
> >> >
> >> >> >Hi, Pavan
> >> >
> >> >> >
> >> >
> >> >> >Sorry for my late reply and thanks very much for your review.
> >> >
> >> >> >
> >> >
> >> >> >> -Original Message-
> >> >
> >> >> >> From: Pavan Nikhilesh Bhagavatula
> >>
> >> >om>>
> >> >
> >> >> >> Sent: 2020年12月22日 18:33
> >> >
> >> >> >> To: Feifei Wang
> >> >>;
> >> >mailto:jer...@marvell.com;
> >> >
> >> >> >Harry van
> >> >
> >> >> >> Haaren
> >>
> >> >com>>;
> >> >Pavan Nikhilesh
> >> >
> >> >> >>
> >>
> >>mailto:pbhagavatula@cavium
> >n
> >> >etworks.com>>
> >> >
> >> >> >> Cc: mailto:dev@dpdk.org; nd
> >> >>; Honnappa
> >Nagarahalli
> >> >
> >> >> >>
> >>
> >>mailto:Honnappa.Nagarahalli@ar
> >m
> >> >.com>>; mailto:sta...@dpdk.org; Phil
> >Yang
> >> >
> >> >> >> >
> >> >
> >> >> >> Subject: RE: [RFC PATCH v1 4/6] app/eventdev: add release
> >> >barriers
> >> >
> >> >> >for
> >> >
> >> >> >> pipeline test
> >> >
> >> >> >>
> >> >
> >> >> >>
> >> >
> >> >> >> >Add release barriers before updating the processed packets for
> >> >
> >> >> >worker
> >> >
> >> >> >> >lcores to ensure the worker lcore has really finished data
> >> >
> >> >> >> >processing and then it can update the processed packets
> >number.
> >> >
> >> >> >> >
> >> >
> >> >> >>
> >> >
> >> >> >> I believe we can live with minor inaccuracies in stats being
> >> >
> >> >> >> presented
> >> >
> >> >> >as
> >> >
> >> >> >> atomics are pretty heavy when scheduler is limited to burst
> >> >> >> size as
> >> >1.
> >> >
> >> >> >>
> >> >
> >> >> >> One option is to move it before a pipeline operation
> >> >
> >> >> >(pipeline_event_tx,
> >> >
> >> >> >> pipeline_fwd_event etc.) as they imply implicit release barrier
> >> >> >> (as
> >> >
> >> >> >> all
> >> >
> >> >> >the
> >> >
> >> >> >> changes done to the event should be visible to the next core).
> >> >
> >> >> >
> >> >
> >> >> >If I understand correctly, your meaning is that move release
> >> >> >barriers
> >> >
> >> >> >before pipeline_event_tx or pipeline_fwd_event. This can ensure
> >the
> >> >
> >> >> >event has been processed before the next core begins to tx/fwd.
> >For
> >> >
> >> >> >example:
> >> >
> >> >>
> >> >
> >> >> What I meant was event APIs such as `rte_event_enqueue_burst`,
> >> >
> >> >> `rte_event_eth_tx_adapter_enqueue`
> >> >
> >> >> act as an implicit release barrier and the API
> >> >`rte_event_dequeue_burst` act
> >> >
> >> >> as an implicit acquire barrier.
> >> >
> >> >
> >> >
> >> >>
> >> >
> >> >> Since, pipeline_* test starts with a dequeue() and ends with an
> >> >enqueue() I
> >> >
> >> >> don’t believe we need barriers in Between.
> >> >
> >> >
> >> >
> >> >Sorry for my misunderstanding this. And I agree with you that no
> >> >barriers are
> >> >
> >> >needed between dequeue and enqueue.
> >> >
> >> >
> >> >
> >> >Now, let's go back to the beginning. Actually with this patch, our
> >> >barrier is mainly
> >> >
> >> >for the synchronous variable " w->processed_pkts ". As we all know,
> >the
> >> >event is firstly
> >> >
> >> >dequeued and then enqueued, after this, the event can be treated as
> >the
> >> >processed event
> >> >
> >> >and included in the s

Re: [dpdk-dev] [PATCH 1/3] net/virtio: remove reference to virtqueue in vrings

2021-01-10 Thread Xia, Chenbo
Hi Maxime,

> -Original Message-
> From: Maxime Coquelin 
> Sent: Tuesday, December 22, 2020 12:15 AM
> To: dev@dpdk.org; Xia, Chenbo ; amore...@redhat.com;
> david.march...@redhat.com; olivier.m...@6wind.com
> Cc: Maxime Coquelin 
> Subject: [PATCH 1/3] net/virtio: remove reference to virtqueue in vrings
> 
> Vrings are part of the virtqueues, so we don't need
> to have a pointer to it in Vrings descriptions.
> 
> Instead, let's just substract from its offset to

Subtract?

With above fixed:

Reviewed-by: Chenbo Xia 

> calculate virtqueue address.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  drivers/net/virtio/virtio_ethdev.c| 36 +--
>  drivers/net/virtio/virtio_rxtx.c  | 28 +++
>  drivers/net/virtio/virtio_rxtx.h  |  3 --
>  drivers/net/virtio/virtio_rxtx_packed_avx.c   | 14 
>  drivers/net/virtio/virtio_rxtx_simple.h   |  2 +-
>  .../net/virtio/virtio_rxtx_simple_altivec.c   |  2 +-
>  drivers/net/virtio/virtio_rxtx_simple_neon.c  |  2 +-
>  drivers/net/virtio/virtio_rxtx_simple_sse.c   |  2 +-
>  .../net/virtio/virtio_user/virtio_user_dev.c  |  4 +--
>  drivers/net/virtio/virtio_user_ethdev.c   |  2 +-
>  drivers/net/virtio/virtqueue.h|  6 +++-
>  11 files changed, 49 insertions(+), 52 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c
> b/drivers/net/virtio/virtio_ethdev.c
> index 96871b7b70..297c01a70d 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -133,7 +133,7 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
>  struct virtio_pmd_ctrl *ctrl,
>  int *dlen, int pkt_num)
>  {
> - struct virtqueue *vq = cvq->vq;
> + struct virtqueue *vq = virtnet_cq_to_vq(cvq);
>   int head;
>   struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
>   struct virtio_pmd_ctrl *result;
> @@ -229,7 +229,7 @@ virtio_send_command_split(struct virtnet_ctl *cvq,
> int *dlen, int pkt_num)
>  {
>   struct virtio_pmd_ctrl *result;
> - struct virtqueue *vq = cvq->vq;
> + struct virtqueue *vq = virtnet_cq_to_vq(cvq);
>   uint32_t head, i;
>   int k, sum = 0;
> 
> @@ -316,13 +316,13 @@ virtio_send_command(struct virtnet_ctl *cvq, struct
> virtio_pmd_ctrl *ctrl,
> 
>   ctrl->status = status;
> 
> - if (!cvq || !cvq->vq) {
> + if (!cvq) {
>   PMD_INIT_LOG(ERR, "Control queue is not supported.");
>   return -1;
>   }
> 
>   rte_spinlock_lock(&cvq->lock);
> - vq = cvq->vq;
> + vq = virtnet_cq_to_vq(cvq);
> 
>   PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
>   "vq->hw->cvq = %p vq = %p",
> @@ -552,19 +552,16 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
> 
>   vq->sw_ring = sw_ring;
>   rxvq = &vq->rxq;
> - rxvq->vq = vq;
>   rxvq->port_id = dev->data->port_id;
>   rxvq->mz = mz;
>   } else if (queue_type == VTNET_TQ) {
>   txvq = &vq->txq;
> - txvq->vq = vq;
>   txvq->port_id = dev->data->port_id;
>   txvq->mz = mz;
>   txvq->virtio_net_hdr_mz = hdr_mz;
>   txvq->virtio_net_hdr_mem = hdr_mz->iova;
>   } else if (queue_type == VTNET_CQ) {
>   cvq = &vq->cq;
> - cvq->vq = vq;
>   cvq->mz = mz;
>   cvq->virtio_net_hdr_mz = hdr_mz;
>   cvq->virtio_net_hdr_mem = hdr_mz->iova;
> @@ -851,7 +848,7 @@ virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev,
> uint16_t queue_id)
>  {
>   struct virtio_hw *hw = dev->data->dev_private;
>   struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
> - struct virtqueue *vq = rxvq->vq;
> + struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
> 
>   virtqueue_enable_intr(vq);
>   virtio_mb(hw->weak_barriers);
> @@ -862,7 +859,7 @@ static int
>  virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
>  {
>   struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
> - struct virtqueue *vq = rxvq->vq;
> + struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
> 
>   virtqueue_disable_intr(vq);
>   return 0;
> @@ -2159,8 +2156,7 @@ static int
>  virtio_dev_start(struct rte_eth_dev *dev)
>  {
>   uint16_t nb_queues, i;
> - struct virtnet_rx *rxvq;
> - struct virtnet_tx *txvq __rte_unused;
> + struct virtqueue *vq;
>   struct virtio_hw *hw = dev->data->dev_private;
>   int ret;
> 
> @@ -2217,27 +2213,27 @@ virtio_dev_start(struct rte_eth_dev *dev)
>   PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
> 
>   for (i = 0; i < dev->data->nb_rx_queues; i++) {
> - rxvq = dev->data->rx_queues[i];
> + vq = virtnet_rxq_to_vq(dev->data->rx_queues[i]);
>   /* Flush the old packets */
> - virtqueue

Re: [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled

2021-01-10 Thread Ruifeng Wang

> -Original Message-
> From: oulijun 
> Sent: Saturday, January 9, 2021 10:16 AM
> To: Ruifeng Wang ; Wei Hu (Xavier)
> ; Min Hu (Connor) ;
> Yisen Zhuang ; Huisong Li
> ; Chengchang Tang
> ; Chengwen Feng
> 
> Cc: dev@dpdk.org; vladimir.medved...@intel.com; jer...@marvell.com;
> hemant.agra...@nxp.com; Honnappa Nagarahalli
> ; nd ; sta...@dpdk.org
> Subject: Re: [PATCH v2 2/5] net/hns3: fix build with sve enabled
> 
> 
> 
> 在 2021/1/8 16:25, Ruifeng Wang 写道:
> > Building with SVE extension enabled stopped with error:
> >
> >   error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’
> > 18 | #define PG64_256BIT  svwhilelt_b64(0, 4)
> >
> > This is caused by unintentional cflags reset.
> > Fixed the issue by appending required flag to cflags instead of
> > overriding it.
> >
> > Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx")
> > Cc: xavier.hu...@huawei.com
> > Cc: sta...@dpdk.org
> >
> > Signed-off-by: Ruifeng Wang 
> > ---
> >   drivers/net/hns3/meson.build | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/hns3/meson.build
> > b/drivers/net/hns3/meson.build index 45cee34d9..798086357 100644
> > --- a/drivers/net/hns3/meson.build
> > +++ b/drivers/net/hns3/meson.build
> > @@ -32,7 +32,7 @@ deps += ['hash']
> >   if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64')
> > sources += files('hns3_rxtx_vec.c')
> > if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != ''
> > -   cflags = ['-DCC_SVE_SUPPORT']
> > +   cflags += ['-DCC_SVE_SUPPORT']
> Hi
>I noticed this patch, but I checked that the hns3 driver did not use this
> function.How did you compile it?

Hi,
The hns3 driver has sve rx/tx implementation in hns3_rxtx_vec_sve.c. This path
will be enabled when compiling with sve feature enabled.

I compiled it by using gcc-10.2 with flag '-march=armv8.3-a+sve'. 
You can try compile for n2 with the cross file added in this series (5/5).

Thanks,
Ruifeng
> 
> Thanks
> Lijun Ou
> > sources += files('hns3_rxtx_vec_sve.c')
> > endif
> >   endif
> >


Re: [dpdk-dev] [PATCH v2 2/5] net/hns3: fix build with sve enabled

2021-01-10 Thread Ruifeng Wang

> -Original Message-
> From: oulijun 
> Sent: Saturday, January 9, 2021 10:12 AM
> To: Honnappa Nagarahalli ; Ruifeng Wang
> ; Wei Hu (Xavier) ;
> Min Hu (Connor) ; Yisen Zhuang
> ; Huisong Li ;
> Chengchang Tang ; Chengwen Feng
> 
> Cc: dev@dpdk.org; vladimir.medved...@intel.com; jer...@marvell.com;
> hemant.agra...@nxp.com; nd ; sta...@dpdk.org
> Subject: Re: [PATCH v2 2/5] net/hns3: fix build with sve enabled
> 
> 
> 在 2021/1/9 8:06, Honnappa Nagarahalli 写道:
> > 
> >
> >>
> >> Building with SVE extension enabled stopped with error:
> >>
> >>   error: ACLE function ‘svwhilelt_b64_s32’ requires ISA extension ‘sve’
> >> 18 | #define PG64_256BIT  svwhilelt_b64(0, 4)
> >>
> >> This is caused by unintentional cflags reset.
> >> Fixed the issue by appending required flag to cflags instead of overriding 
> >> it.
> >>
> >> Fixes: 952ebacce4f2 ("net/hns3: support SVE Rx")
> >> Cc: xavier.hu...@huawei.com
> >> Cc: sta...@dpdk.org
> >>
> >> Signed-off-by: Ruifeng Wang 
> >> ---
> >>   drivers/net/hns3/meson.build | 2 +-
> >>   1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/net/hns3/meson.build
> >> b/drivers/net/hns3/meson.build index 45cee34d9..798086357 100644
> >> --- a/drivers/net/hns3/meson.build
> >> +++ b/drivers/net/hns3/meson.build
> >> @@ -32,7 +32,7 @@ deps += ['hash']
> >>   if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64')
> >>sources += files('hns3_rxtx_vec.c')
> >>if cc.get_define('__ARM_FEATURE_SVE', args: machine_args) != ''
> >> -  cflags = ['-DCC_SVE_SUPPORT']
> >> +  cflags += ['-DCC_SVE_SUPPORT']
> > This comment is unrelated to this patch. We need to be consistent with the
> macro definitions. Is '__ARM_FEATURE_SVE' not enough? If we need to
> define an additional flag, I would name it something like
> 'RTE_ARM_FEATURE_SVE'.
> >
> I think the __ARM_FEATURE_SVE is ok. if use the gcc version included SVE
> flag, it will be identified as __ARM_FEATURE_SVE. it is defined in the ARM
> SVE document.

Yes, we can rely on flags defined by compiler and no extra flag is needed.
I can update in next version to remove this section from meson file and replace 
CC_SVE_SUPPORT in code.
> >>sources += files('hns3_rxtx_vec_sve.c')
> >>endif
> >>   endif
> >> --
> >> 2.25.1
> >


Re: [dpdk-dev] [PATCH 2/3] net/virtio: allocate fake mbuf in Rx queue

2021-01-10 Thread Xia, Chenbo
Hi Maxime,

> -Original Message-
> From: Maxime Coquelin 
> Sent: Tuesday, December 22, 2020 12:15 AM
> To: dev@dpdk.org; Xia, Chenbo ; amore...@redhat.com;
> david.march...@redhat.com; olivier.m...@6wind.com
> Cc: Maxime Coquelin 
> Subject: [PATCH 2/3] net/virtio: allocate fake mbuf in Rx queue
> 
> While it is worth clarifying whether the fake mbuf
> in virtnet_rx struct is really necessary, it is sure
> that it heavily impacts cache usage by being part of
> the struct. Indeed, it takes uses cachelines, and

Did you mean 'uses cachelines'?

> requires alignement on a cacheline.

Alignment?

With above fixed:

Reviewed-by: Chenbo Xia 

> 
> Before this series, it means it took 120 bytes in
> virtnet_rx struct:
> 
> struct virtnet_rx {
>   struct virtqueue * vq;   /* 0 8 */
> 
>   /* XXX 56 bytes hole, try to pack */
> 
>   /* --- cacheline 1 boundary (64 bytes) --- */
>   struct rte_mbuffake_mbuf __attribute__((__aligned__(64)));
> /*64   128 */
>   /* --- cacheline 3 boundary (192 bytes) --- */
> 
> This patch allocates it using malloc in order to optimize
> virtnet_rx cache usage and so virtqueue cache usage.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  drivers/net/virtio/virtio_ethdev.c | 10 ++
>  drivers/net/virtio/virtio_rxtx.c   |  8 +++-
>  drivers/net/virtio/virtio_rxtx.h   |  2 +-
>  3 files changed, 14 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c
> b/drivers/net/virtio/virtio_ethdev.c
> index 297c01a70d..a1351b36ca 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -539,6 +539,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
>   }
> 
>   if (queue_type == VTNET_RQ) {
> + struct rte_mbuf *fake_mbuf;
>   size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
>  sizeof(vq->sw_ring[0]);
> 
> @@ -550,10 +551,18 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
>   goto fail_q_alloc;
>   }
> 
> + fake_mbuf = malloc(sizeof(*fake_mbuf));
> + if (!fake_mbuf) {
> + PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
> + ret = -ENOMEM;
> + goto fail_q_alloc;
> + }
> +
>   vq->sw_ring = sw_ring;
>   rxvq = &vq->rxq;
>   rxvq->port_id = dev->data->port_id;
>   rxvq->mz = mz;
> + rxvq->fake_mbuf = fake_mbuf;
>   } else if (queue_type == VTNET_TQ) {
>   txvq = &vq->txq;
>   txvq->port_id = dev->data->port_id;
> @@ -636,6 +645,7 @@ virtio_free_queues(struct virtio_hw *hw)
> 
>   queue_type = virtio_get_queue_type(hw, i);
>   if (queue_type == VTNET_RQ) {
> + free(vq->rxq.fake_mbuf);
>   rte_free(vq->sw_ring);
>   rte_memzone_free(vq->rxq.mz);
>   } else if (queue_type == VTNET_TQ) {
> diff --git a/drivers/net/virtio/virtio_rxtx.c
> b/drivers/net/virtio/virtio_rxtx.c
> index 1fcce36cbd..d147d7300a 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -703,11 +703,9 @@ virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev,
> uint16_t queue_idx)
>   virtio_rxq_vec_setup(rxvq);
>   }
> 
> - memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
> - for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
> -  desc_idx++) {
> - vq->sw_ring[vq->vq_nentries + desc_idx] =
> - &rxvq->fake_mbuf;
> + memset(rxvq->fake_mbuf, 0, sizeof(*rxvq->fake_mbuf));
> + for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; desc_idx++) {
> + vq->sw_ring[vq->vq_nentries + desc_idx] = rxvq->fake_mbuf;
>   }
> 
>   if (hw->use_vec_rx && !virtio_with_packed_queue(hw)) {
> diff --git a/drivers/net/virtio/virtio_rxtx.h
> b/drivers/net/virtio/virtio_rxtx.h
> index 7f1036be6f..6ce5d67d15 100644
> --- a/drivers/net/virtio/virtio_rxtx.h
> +++ b/drivers/net/virtio/virtio_rxtx.h
> @@ -19,7 +19,7 @@ struct virtnet_stats {
> 
>  struct virtnet_rx {
>   /* dummy mbuf, for wraparound when processing RX ring. */
> - struct rte_mbuf fake_mbuf;
> + struct rte_mbuf *fake_mbuf;
>   uint64_t mbuf_initializer; /**< value to init mbufs. */
>   struct rte_mempool *mpool; /**< mempool for mbuf allocation */
> 
> --
> 2.29.2



Re: [dpdk-dev] [PATCH 3/3] net/virtio: pack virtuqueue struct

2021-01-10 Thread Xia, Chenbo
Hi Maxime,

> -Original Message-
> From: Maxime Coquelin 
> Sent: Tuesday, December 22, 2020 12:15 AM
> To: dev@dpdk.org; Xia, Chenbo ; amore...@redhat.com;
> david.march...@redhat.com; olivier.m...@6wind.com
> Cc: Maxime Coquelin 
> Subject: [PATCH 3/3] net/virtio: pack virtuqueue struct

s/virtuqueue/virtqueue

> 
> This patch optimizes packing of the virtuqueue

Ditto

With above fixed:

Reviewed-by: Chenbo Xia 

> struct by moving fields around to fill holes.
> 
> Offset field is not used and so can be removed.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  drivers/net/virtio/virtqueue.h | 18 +-
>  1 file changed, 9 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
> index c64e7dcbdf..3b08aac931 100644
> --- a/drivers/net/virtio/virtqueue.h
> +++ b/drivers/net/virtio/virtqueue.h
> @@ -245,6 +245,15 @@ struct virtqueue {
>   uint16_t vq_avail_idx; /**< sync until needed */
>   uint16_t vq_free_thresh; /**< free threshold */
> 
> + /**
> +  * Head of the free chain in the descriptor table. If
> +  * there are no free descriptors, this will be set to
> +  * VQ_RING_DESC_CHAIN_END.
> +  */
> + uint16_t  vq_desc_head_idx;
> + uint16_t  vq_desc_tail_idx;
> + uint16_t  vq_queue_index;   /**< PCI queue index */
> +
>   void *vq_ring_virt_mem;  /**< linear address of vring*/
>   unsigned int vq_ring_size;
> 
> @@ -257,15 +266,6 @@ struct virtqueue {
>   rte_iova_t vq_ring_mem; /**< physical address of vring,
>* or virtual address for virtio_user. */
> 
> - /**
> -  * Head of the free chain in the descriptor table. If
> -  * there are no free descriptors, this will be set to
> -  * VQ_RING_DESC_CHAIN_END.
> -  */
> - uint16_t  vq_desc_head_idx;
> - uint16_t  vq_desc_tail_idx;
> - uint16_t  vq_queue_index;   /**< PCI queue index */
> - uint16_t offset; /**< relative offset to obtain addr in mbuf */
>   uint16_t  *notify_addr;
>   struct rte_mbuf **sw_ring;  /**< RX software ring. */
>   struct vq_desc_extra vq_descx[0];
> --
> 2.29.2



Re: [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2

2021-01-10 Thread Ruifeng Wang


> -Original Message-
> From: Honnappa Nagarahalli 
> Sent: Saturday, January 9, 2021 7:58 AM
> To: Ruifeng Wang ; jer...@marvell.com; Ruifeng
> Wang ; Jan Viktorin ;
> Bruce Richardson 
> Cc: dev@dpdk.org; vladimir.medved...@intel.com;
> hemant.agra...@nxp.com; nd ; Honnappa Nagarahalli
> ; nd 
> Subject: RE: [PATCH v2 5/5] config: add Arm Neoverse N2
> 
> + Juraj
> 
> Please note that this clashes with Juraj's patch for meson rework.

Yes. I didn't base it on the build options rework series.
I will rebase when that series got merged.
> 
> 
> 
> >
> > Add Arm Neoverse N2 cpu support.
> >
> > Signed-off-by: Ruifeng Wang 
> > ---
> >  config/arm/arm64_n2_linux_gcc | 17 +
> >  config/arm/meson.build| 11 ++-
> >  2 files changed, 27 insertions(+), 1 deletion(-)  create mode 100644
> > config/arm/arm64_n2_linux_gcc
> >
> > diff --git a/config/arm/arm64_n2_linux_gcc
> > b/config/arm/arm64_n2_linux_gcc new file mode 100644 index
> > 0..78f6f3e2b
> > --- /dev/null
> > +++ b/config/arm/arm64_n2_linux_gcc
> > @@ -0,0 +1,17 @@
> > +[binaries]
> > +c = 'aarch64-linux-gnu-gcc'
> > +cpp = 'aarch64-linux-gnu-cpp'
> > +ar = 'aarch64-linux-gnu-gcc-ar'
> > +strip = 'aarch64-linux-gnu-strip'
> > +pkgconfig = 'aarch64-linux-gnu-pkg-config'
> > +pcap-config = ''
> > +
> > +[host_machine]
> > +system = 'linux'
> > +cpu_family = 'aarch64'
> > +cpu = 'armv8-a'
> > +endian = 'little'
> > +
> > +[properties]
> > +implementor_id = '0x41'
> > +implementor_pn = '0xd49'
> > diff --git a/config/arm/meson.build b/config/arm/meson.build index
> > 42b4e43c7..58e0ae643 100644
> > --- a/config/arm/meson.build
> > +++ b/config/arm/meson.build
> > @@ -89,6 +89,14 @@ flags_n1generic_extra = [
> > ['RTE_MAX_NUMA_NODES', 1],
> > ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
> > ['RTE_LIBRTE_VHOST_NUMA', false]]
> > +flags_n2generic_extra = [
> > +   ['RTE_MACHINE', '"neoverse-n2"'],
> > +   ['RTE_MAX_LCORE', 64],
> > +   ['RTE_CACHE_LINE_SIZE', 64],
> > +   ['RTE_ARM_FEATURE_ATOMICS', true],
> > +   ['RTE_USE_C11_MEM_MODEL', true],
> > +   ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
> > +   ['RTE_LIBRTE_VHOST_NUMA', false]]
> Do we need a flag RTE_ARM_FEATURE_SVE?

I don't think extra flag is needed. We can rely on __ARM_FEATURE_SVE from 
compiler.
One scenario I can think of where RTE_ARM_FEATURE_SVE can be needed is, when we 
are
writing inline assembly with sve instructions and using compiler that has no 
sve support.
I'm not sure we will have sve inline assembly as C intrinsics are available.
> 
> >
> >  machine_args_generic = [
> > ['default', ['-march=armv8-a+crc', '-moutline-atomics']], @@ -100,7
> > +108,8 @@ machine_args_generic = [
> > ['0xd09', ['-mcpu=cortex-a73']],
> > ['0xd0a', ['-mcpu=cortex-a75']],
> > ['0xd0b', ['-mcpu=cortex-a76']],
> > -   ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'],
> > flags_n1generic_extra]]
> > +   ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'],
> > flags_n1generic_extra],
> > +   ['0xd49', ['-march=armv8.5-a+crypto+sve'], flags_n2generic_extra]]
> Should this be 'sve2'? There should be a flag to indicate SVE2.

Yes. N2 supports sve2 and sve2 is superset of sve.
I will do the change in next version.
> 
> >
> >  machine_args_cavium = [
> > ['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']],
> > --
> > 2.25.1



Re: [dpdk-dev] [PATCH v2 5/5] config: add Arm Neoverse N2

2021-01-10 Thread Jerin Jacob
On Mon, Jan 11, 2021 at 8:31 AM Ruifeng Wang  wrote:
>
>
> > -Original Message-
> > From: Honnappa Nagarahalli 
> > Sent: Saturday, January 9, 2021 7:58 AM
> > To: Ruifeng Wang ; jer...@marvell.com; Ruifeng
> > Wang ; Jan Viktorin ;
> > Bruce Richardson 
> > Cc: dev@dpdk.org; vladimir.medved...@intel.com;
> > hemant.agra...@nxp.com; nd ; Honnappa Nagarahalli
> > ; nd 
> > Subject: RE: [PATCH v2 5/5] config: add Arm Neoverse N2
> >
> > + Juraj
> >
> > Please note that this clashes with Juraj's patch for meson rework.
>
> Yes. I didn't base it on the build options rework series.
> I will rebase when that series got merged.
> >
> > 
> >
> > >
> > > Add Arm Neoverse N2 cpu support.
> > >
> > > Signed-off-by: Ruifeng Wang 
> > > ---
> > >  config/arm/arm64_n2_linux_gcc | 17 +
> > >  config/arm/meson.build| 11 ++-
> > >  2 files changed, 27 insertions(+), 1 deletion(-)  create mode 100644
> > > config/arm/arm64_n2_linux_gcc
> > >
> > > diff --git a/config/arm/arm64_n2_linux_gcc
> > > b/config/arm/arm64_n2_linux_gcc new file mode 100644 index
> > > 0..78f6f3e2b
> > > --- /dev/null
> > > +++ b/config/arm/arm64_n2_linux_gcc
> > > @@ -0,0 +1,17 @@
> > > +[binaries]
> > > +c = 'aarch64-linux-gnu-gcc'
> > > +cpp = 'aarch64-linux-gnu-cpp'
> > > +ar = 'aarch64-linux-gnu-gcc-ar'
> > > +strip = 'aarch64-linux-gnu-strip'
> > > +pkgconfig = 'aarch64-linux-gnu-pkg-config'
> > > +pcap-config = ''
> > > +
> > > +[host_machine]
> > > +system = 'linux'
> > > +cpu_family = 'aarch64'
> > > +cpu = 'armv8-a'
> > > +endian = 'little'
> > > +
> > > +[properties]
> > > +implementor_id = '0x41'
> > > +implementor_pn = '0xd49'
> > > diff --git a/config/arm/meson.build b/config/arm/meson.build index
> > > 42b4e43c7..58e0ae643 100644
> > > --- a/config/arm/meson.build
> > > +++ b/config/arm/meson.build
> > > @@ -89,6 +89,14 @@ flags_n1generic_extra = [
> > > ['RTE_MAX_NUMA_NODES', 1],
> > > ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
> > > ['RTE_LIBRTE_VHOST_NUMA', false]]
> > > +flags_n2generic_extra = [
> > > +   ['RTE_MACHINE', '"neoverse-n2"'],
> > > +   ['RTE_MAX_LCORE', 64],
> > > +   ['RTE_CACHE_LINE_SIZE', 64],
> > > +   ['RTE_ARM_FEATURE_ATOMICS', true],
> > > +   ['RTE_USE_C11_MEM_MODEL', true],
> > > +   ['RTE_EAL_NUMA_AWARE_HUGEPAGES', false],
> > > +   ['RTE_LIBRTE_VHOST_NUMA', false]]
> > Do we need a flag RTE_ARM_FEATURE_SVE?
>
> I don't think extra flag is needed. We can rely on __ARM_FEATURE_SVE from 
> compiler.
> One scenario I can think of where RTE_ARM_FEATURE_SVE can be needed is, when 
> we are
> writing inline assembly with sve instructions and using compiler that has no 
> sve support.
> I'm not sure we will have sve inline assembly as C intrinsics are available.

It may be useful to introduce RTE_ARM_FEATURE_SVE to abstract any
compiler difference in
future(GCC vs clang or another tool chain etc).


> >
> > >
> > >  machine_args_generic = [
> > > ['default', ['-march=armv8-a+crc', '-moutline-atomics']], @@ -100,7
> > > +108,8 @@ machine_args_generic = [
> > > ['0xd09', ['-mcpu=cortex-a73']],
> > > ['0xd0a', ['-mcpu=cortex-a75']],
> > > ['0xd0b', ['-mcpu=cortex-a76']],
> > > -   ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'],
> > > flags_n1generic_extra]]
> > > +   ['0xd0c', ['-march=armv8.2-a+crypto', '-mcpu=neoverse-n1'],
> > > flags_n1generic_extra],
> > > +   ['0xd49', ['-march=armv8.5-a+crypto+sve'], flags_n2generic_extra]]
> > Should this be 'sve2'? There should be a flag to indicate SVE2.
>
> Yes. N2 supports sve2 and sve2 is superset of sve.
> I will do the change in next version.
> >
> > >
> > >  machine_args_cavium = [
> > > ['default', ['-march=armv8-a+crc+crypto','-mcpu=thunderx']],
> > > --
> > > 2.25.1
>


Re: [dpdk-dev] [PATCH] net/ice: refactor PF RSS

2021-01-10 Thread Zhang, Qi Z



> -Original Message-
> From: Ding, Xuan 
> Sent: Friday, January 8, 2021 4:39 PM
> To: Zhang, Qi Z ; Wu, Jingjing ;
> Xing, Beilei 
> Cc: dev@dpdk.org; Wang, Haiyue ; Guo, Jia
> ; Su, Simei ; Yan, Zhirun
> ; Ding, Xuan 
> Subject: [PATCH] net/ice: refactor PF RSS
> 
> This patch refactors the PF RSS code based on the below design:
> 1. ice_pattern_match_item->input_set_mask is the superset of
>ETH_RSS_xxx.
> 2. ice_pattern_match_item->meta is the ice_rss_hash_cfg template.
> 3. ice_hash_parse_pattern will generate pattern hint.
> 4. ice_hash_parse_action will refine the ice_rss_hash_cfg based on
>the pattern hint and rss_type.
> 5. The refine process includes:
>1)  refine protocol headers(VLAN/PPPOE/GTPU).
>2)  refine hash bit fields of l2, l3, l4.
>3)  refine hash bit fields for gtpu header.
> 
> Signed-off-by: Xuan Ding 

Acked-by: Qi Zhang 

Applied to dpdk-next-net-intel.

Thanks
Qi


Re: [dpdk-dev] [PATCH 2/3] net/virtio: allocate fake mbuf in Rx queue

2021-01-10 Thread Xia, Chenbo
Hi Maxime,

> -Original Message-
> From: Maxime Coquelin 
> Sent: Tuesday, December 22, 2020 12:15 AM
> To: dev@dpdk.org; Xia, Chenbo ; amore...@redhat.com;
> david.march...@redhat.com; olivier.m...@6wind.com
> Cc: Maxime Coquelin 
> Subject: [PATCH 2/3] net/virtio: allocate fake mbuf in Rx queue
> 
> While it is worth clarifying whether the fake mbuf
> in virtnet_rx struct is really necessary, it is sure
> that it heavily impacts cache usage by being part of
> the struct. Indeed, it takes uses cachelines, and
> requires alignement on a cacheline.
> 
> Before this series, it means it took 120 bytes in
> virtnet_rx struct:
> 
> struct virtnet_rx {
>   struct virtqueue * vq;   /* 0 8 */
> 
>   /* XXX 56 bytes hole, try to pack */
> 
>   /* --- cacheline 1 boundary (64 bytes) --- */
>   struct rte_mbuffake_mbuf __attribute__((__aligned__(64)));
> /*64   128 */
>   /* --- cacheline 3 boundary (192 bytes) --- */
> 
> This patch allocates it using malloc in order to optimize
> virtnet_rx cache usage and so virtqueue cache usage.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  drivers/net/virtio/virtio_ethdev.c | 10 ++
>  drivers/net/virtio/virtio_rxtx.c   |  8 +++-
>  drivers/net/virtio/virtio_rxtx.h   |  2 +-
>  3 files changed, 14 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c
> b/drivers/net/virtio/virtio_ethdev.c
> index 297c01a70d..a1351b36ca 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -539,6 +539,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
>   }
> 
>   if (queue_type == VTNET_RQ) {
> + struct rte_mbuf *fake_mbuf;
>   size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
>  sizeof(vq->sw_ring[0]);
> 
> @@ -550,10 +551,18 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
>   goto fail_q_alloc;
>   }
> 
> + fake_mbuf = malloc(sizeof(*fake_mbuf));
> + if (!fake_mbuf) {
> + PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
> + ret = -ENOMEM;
> + goto fail_q_alloc;
> + }
> +
>   vq->sw_ring = sw_ring;
>   rxvq = &vq->rxq;
>   rxvq->port_id = dev->data->port_id;
>   rxvq->mz = mz;
> + rxvq->fake_mbuf = fake_mbuf;
>   } else if (queue_type == VTNET_TQ) {
>   txvq = &vq->txq;
>   txvq->port_id = dev->data->port_id;
> @@ -636,6 +645,7 @@ virtio_free_queues(struct virtio_hw *hw)
> 
>   queue_type = virtio_get_queue_type(hw, i);
>   if (queue_type == VTNET_RQ) {
> + free(vq->rxq.fake_mbuf);

After thinking about this again, although you add the free of fake mbuf
here, it's better to add free in virtio_init_queue too after fail_q_alloc.
And when setup_queue(hw, vq) fails, it's better to goto fail_q_alloc to 
free fake mbuf. Now it will not memory leak as we use virtio_free_queues when
virtio_alloc_queues fails. But inside virtio_init_queue, it's better to
handle the errors well.. If you agree with above, it may also be good to
change the name 'fail_q_alloc' since now it may also fail when setting up
queues.

Sorry for an extra email about this...

Thanks,
Chenbo

>   rte_free(vq->sw_ring);
>   rte_memzone_free(vq->rxq.mz);
>   } else if (queue_type == VTNET_TQ) {
> diff --git a/drivers/net/virtio/virtio_rxtx.c
> b/drivers/net/virtio/virtio_rxtx.c
> index 1fcce36cbd..d147d7300a 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -703,11 +703,9 @@ virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev,
> uint16_t queue_idx)
>   virtio_rxq_vec_setup(rxvq);
>   }
> 
> - memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
> - for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
> -  desc_idx++) {
> - vq->sw_ring[vq->vq_nentries + desc_idx] =
> - &rxvq->fake_mbuf;
> + memset(rxvq->fake_mbuf, 0, sizeof(*rxvq->fake_mbuf));
> + for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; desc_idx++) {
> + vq->sw_ring[vq->vq_nentries + desc_idx] = rxvq->fake_mbuf;
>   }
> 
>   if (hw->use_vec_rx && !virtio_with_packed_queue(hw)) {
> diff --git a/drivers/net/virtio/virtio_rxtx.h
> b/drivers/net/virtio/virtio_rxtx.h
> index 7f1036be6f..6ce5d67d15 100644
> --- a/drivers/net/virtio/virtio_rxtx.h
> +++ b/drivers/net/virtio/virtio_rxtx.h
> @@ -19,7 +19,7 @@ struct virtnet_stats {
> 
>  struct virtnet_rx {
>   /* dummy mbuf, for wraparound when processing RX ring. */
> - struct rte_mbuf fake_mbuf;
> + struct rte_mbuf *fake_mbuf;
>   uint64_t mbuf_initializer; /**< value to init mbufs. */
>   st

Re: [dpdk-dev] [PATCH v3 1/2] net/virtio: fix missing backend features negotiation

2021-01-10 Thread Xia, Chenbo
> -Original Message-
> From: Maxime Coquelin 
> Sent: Friday, January 8, 2021 5:42 PM
> To: dev@dpdk.org; sta...@dpdk.org; Xia, Chenbo ;
> amore...@redhat.com; jasow...@redhat.com; david.march...@redhat.com
> Cc: Maxime Coquelin 
> Subject: [PATCH v3 1/2] net/virtio: fix missing backend features negotiation
> 
> This patch adds missing backend features negotiation for
> in Vhost-vDPA. Without it, IOTLB messages v2 could be sent
> by Virtio-user PMD while not supported by the backend.
> 
> Fixes: 6b901437056e ("net/virtio: introduce vhost-vDPA backend")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  drivers/net/virtio/virtio_user/vhost.h   |  4 
>  drivers/net/virtio/virtio_user/vhost_vdpa.c  | 14 ++
>  drivers/net/virtio/virtio_user/virtio_user_dev.c | 14 ++
>  drivers/net/virtio/virtio_user/virtio_user_dev.h |  4 +---
>  4 files changed, 29 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_user/vhost.h
> b/drivers/net/virtio/virtio_user/vhost.h
> index 210a3704e7..c1dcc50b58 100644
> --- a/drivers/net/virtio/virtio_user/vhost.h
> +++ b/drivers/net/virtio/virtio_user/vhost.h
> @@ -86,6 +86,10 @@ enum vhost_user_request {
>   VHOST_USER_MAX
>  };
> 
> +#ifndef VHOST_BACKEND_F_IOTLB_MSG_V2
> +#define VHOST_BACKEND_F_IOTLB_MSG_V2 1
> +#endif
> +
>  extern const char * const vhost_msg_strings[VHOST_USER_MAX];
> 
>  struct vhost_memory_region {
> diff --git a/drivers/net/virtio/virtio_user/vhost_vdpa.c
> b/drivers/net/virtio/virtio_user/vhost_vdpa.c
> index c5b59e2f95..83c60ea660 100644
> --- a/drivers/net/virtio/virtio_user/vhost_vdpa.c
> +++ b/drivers/net/virtio/virtio_user/vhost_vdpa.c
> @@ -35,6 +35,8 @@
>  #define VHOST_VDPA_SET_STATUS _IOW(VHOST_VIRTIO, 0x72, __u8)
>  #define VHOST_VDPA_SET_VRING_ENABLE  _IOW(VHOST_VIRTIO, 0x75, \
>struct vhost_vring_state)
> +#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
> +#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
> 
>  static uint64_t vhost_req_user_to_vdpa[] = {
>   [VHOST_USER_SET_OWNER] = VHOST_SET_OWNER,
> @@ -51,6 +53,8 @@ static uint64_t vhost_req_user_to_vdpa[] = {
>   [VHOST_USER_SET_STATUS] = VHOST_VDPA_SET_STATUS,
>   [VHOST_USER_GET_STATUS] = VHOST_VDPA_GET_STATUS,
>   [VHOST_USER_SET_VRING_ENABLE] = VHOST_VDPA_SET_VRING_ENABLE,
> + [VHOST_USER_GET_PROTOCOL_FEATURES] = VHOST_GET_BACKEND_FEATURES,
> + [VHOST_USER_SET_PROTOCOL_FEATURES] = VHOST_SET_BACKEND_FEATURES,
>  };
> 
>  /* no alignment requirement */
> @@ -86,6 +90,11 @@ vhost_vdpa_dma_map(struct virtio_user_dev *dev, void *addr,
>  {
>   struct vhost_msg msg = {};
> 
> + if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)))
> {
> + PMD_DRV_LOG(ERR, "IOTLB_MSG_V2 not supported by the backend.");
> + return -1;
> + }
> +
>   msg.type = VHOST_IOTLB_MSG_V2;
>   msg.iotlb.type = VHOST_IOTLB_UPDATE;
>   msg.iotlb.iova = iova;
> @@ -111,6 +120,11 @@ vhost_vdpa_dma_unmap(struct virtio_user_dev *dev,
> __rte_unused void *addr,
>  {
>   struct vhost_msg msg = {};
> 
> + if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)))
> {
> + PMD_DRV_LOG(ERR, "IOTLB_MSG_V2 not supported by the backend.");
> + return -1;
> + }
> +
>   msg.type = VHOST_IOTLB_MSG_V2;
>   msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
>   msg.iotlb.iova = iova;
> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> index e1cbad0d90..39c5dfc9e4 100644
> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> @@ -440,11 +440,13 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
>1ULL << VIRTIO_F_RING_PACKED   |   \
>1ULL << VHOST_USER_F_PROTOCOL_FEATURES)
> 
> -#define VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES  \
> +#define VHOST_USER_SUPPORTED_PROTOCOL_FEATURES   \
>   (1ULL << VHOST_USER_PROTOCOL_F_MQ | \
>1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK |  \
>1ULL << VHOST_USER_PROTOCOL_F_STATUS)
> 
> +#define VHOST_VDPA_SUPPORTED_PROTOCOL_FEATURES   \
> + (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
>  int
>  virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
>int cq, int queue_size, const char *mac, char **ifname,
> @@ -463,9 +465,13 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char
> *path, int queues,
>   dev->mac_specified = 0;
>   dev->frontend_features = 0;
>   dev->unsupported_features = ~VIRTIO_USER_SUPPORTED_FEATURES;
> - dev->protocol_features = VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES;
>   dev->backend_type = backend_type;
> 
> + if (dev->backend_type == VIRTIO_USER_BACKEND_VHOST_USER)
> + dev->protocol_feat

[dpdk-dev] [PATCH v4 0/2] Enhance Async Enqueue for Small Packets

2021-01-10 Thread Jiayu Hu
Async enqueue offloads large copies to DMA devices, and small copies
are still performed by the CPU. However, it requires users to get
enqueue completed packets by rte_vhost_poll_enqueue_completed(), even
if they are completed by the CPU when rte_vhost_submit_enqueue_burst()
returns. This design incurs extra overheads of tracking completed
pktmbufs and function calls, thus degrading performance on small packets.

The first patch cleans up async enqueue code, and the second patch
enables rte_vhost_submit_enqueue_burst() to return completed packets.

Change log
==
v4:
- support new API in vhost example
v3:
- fix incorrect ret value when DMA ring is full
- enhance description of API declaration and programmer guide
v2:
- fix typo
- rename API variables
- update programmer guide

Jiayu Hu (2):
  vhost: cleanup async enqueue
  vhost: enhance async enqueue for small packets

 doc/guides/prog_guide/vhost_lib.rst |   8 +-
 examples/vhost/main.c   |  18 ++-
 lib/librte_vhost/rte_vhost_async.h  |  34 +++--
 lib/librte_vhost/vhost.c|  14 +-
 lib/librte_vhost/vhost.h|   7 +-
 lib/librte_vhost/vhost_user.c   |   7 +-
 lib/librte_vhost/virtio_net.c   | 258 
 7 files changed, 200 insertions(+), 146 deletions(-)

-- 
2.7.4



[dpdk-dev] [PATCH v4 1/2] vhost: cleanup async enqueue

2021-01-10 Thread Jiayu Hu
This patch removes unnecessary check and function calls, and it changes
appropriate types for internal variables and fixes typos.

Signed-off-by: Jiayu Hu 
Tested-by: Yinan Wang 
---
 lib/librte_vhost/rte_vhost_async.h |  8 
 lib/librte_vhost/virtio_net.c  | 16 
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/lib/librte_vhost/rte_vhost_async.h 
b/lib/librte_vhost/rte_vhost_async.h
index c73bd7c..03bd558 100644
--- a/lib/librte_vhost/rte_vhost_async.h
+++ b/lib/librte_vhost/rte_vhost_async.h
@@ -112,7 +112,7 @@ struct rte_vhost_async_features {
 };
 
 /**
- * register a async channel for vhost
+ * register an async channel for vhost
  *
  * @param vid
  *  vhost device id async channel to be attached to
@@ -147,8 +147,8 @@ __rte_experimental
 int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id);
 
 /**
- * This function submit enqueue data to async engine. This function has
- * no guranttee to the transfer completion upon return. Applications
+ * This function submits enqueue data to async engine. This function has
+ * no guarantee to the transfer completion upon return. Applications
  * should poll transfer status by rte_vhost_poll_enqueue_completed()
  *
  * @param vid
@@ -167,7 +167,7 @@ uint16_t rte_vhost_submit_enqueue_burst(int vid, uint16_t 
queue_id,
struct rte_mbuf **pkts, uint16_t count);
 
 /**
- * This function check async completion status for a specific vhost
+ * This function checks async completion status for a specific vhost
  * device queue. Packets which finish copying (enqueue) operation
  * will be returned in an array.
  *
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index fec08b2..0b63940 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1130,8 +1130,11 @@ async_mbuf_to_desc(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
}
 
 out:
-   async_fill_iter(src_it, tlen, src_iovec, tvec_idx);
-   async_fill_iter(dst_it, tlen, dst_iovec, tvec_idx);
+   if (tlen) {
+   async_fill_iter(src_it, tlen, src_iovec, tvec_idx);
+   async_fill_iter(dst_it, tlen, dst_iovec, tvec_idx);
+   } else
+   src_it->count = 0;
 
return error;
 }
@@ -1491,10 +1494,9 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct rte_vhost_iov_iter *src_it = it_pool;
struct rte_vhost_iov_iter *dst_it = it_pool + 1;
uint16_t n_free_slot, slot_idx = 0;
-   uint16_t pkt_err = 0;
uint16_t segs_await = 0;
struct async_inflight_info *pkts_info = vq->async_pkts_info;
-   int n_pkts = 0;
+   uint32_t n_pkts = 0, pkt_err = 0;
 
/*
 * The ordering between avail index and desc reads need to be enforced.
@@ -1549,11 +1551,9 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
/*
 * conditions to trigger async device transfer:
 * - buffered packet number reaches transfer threshold
-* - this is the last packet in the burst enqueue
 * - unused async iov number is less than max vhost vector
 */
if (pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
-   (pkt_idx == count - 1 && pkt_burst_idx) ||
(VHOST_MAX_ASYNC_VEC / 2 - segs_await <
BUF_VECTOR_MAX)) {
n_pkts = vq->async_ops.transfer_data(dev->vid,
@@ -1565,7 +1565,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
segs_await = 0;
vq->async_pkts_inflight_n += pkt_burst_idx;
 
-   if (unlikely(n_pkts < (int)pkt_burst_idx)) {
+   if (unlikely(n_pkts < pkt_burst_idx)) {
/*
 * log error packets number here and do actual
 * error processing when applications poll
@@ -1585,7 +1585,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
queue_id, tdes, 0, pkt_burst_idx);
vq->async_pkts_inflight_n += pkt_burst_idx;
 
-   if (unlikely(n_pkts < (int)pkt_burst_idx))
+   if (unlikely(n_pkts < pkt_burst_idx))
pkt_err = pkt_burst_idx - n_pkts;
}
 
-- 
2.7.4



[dpdk-dev] [PATCH v4 2/2] vhost: enhance async enqueue for small packets

2021-01-10 Thread Jiayu Hu
Async enqueue offloads large copies to DMA devices, and small copies
are still performed by the CPU. However, it requires users to get
enqueue completed packets by rte_vhost_poll_enqueue_completed(), even
if they are completed by the CPU when rte_vhost_submit_enqueue_burst()
returns. This design incurs extra overheads of tracking completed
pktmbufs and function calls, thus degrading performance on small packets.

This patch enhances async enqueue for small packets by enabling
rte_vhost_submit_enqueue_burst() to return completed packets.

Signed-off-by: Jiayu Hu 
Tested-by: Yinan Wang 
---
 doc/guides/prog_guide/vhost_lib.rst |   8 +-
 examples/vhost/main.c   |  18 ++-
 lib/librte_vhost/rte_vhost_async.h  |  30 +++--
 lib/librte_vhost/vhost.c|  14 +--
 lib/librte_vhost/vhost.h|   7 +-
 lib/librte_vhost/vhost_user.c   |   7 +-
 lib/librte_vhost/virtio_net.c   | 242 
 7 files changed, 190 insertions(+), 136 deletions(-)

diff --git a/doc/guides/prog_guide/vhost_lib.rst 
b/doc/guides/prog_guide/vhost_lib.rst
index ba4c62a..dc29229 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -245,11 +245,13 @@ The following is an overview of some key Vhost API 
functions:
 
   Unregister the async copy device channel from a vhost queue.
 
-* ``rte_vhost_submit_enqueue_burst(vid, queue_id, pkts, count)``
+* ``rte_vhost_submit_enqueue_burst(vid, queue_id, pkts, count, comp_pkts, 
comp_count)``
 
   Submit an enqueue request to transmit ``count`` packets from host to guest
-  by async data path. Enqueue is not guaranteed to finish upon the return of
-  this API call.
+  by async data path. Successfully enqueued packets can be transfer completed
+  or being occupied by DMA engines; transfer completed packets are returned in
+  ``comp_pkts``, but others are not guaranteed to finish, when this API
+  call returns.
 
   Applications must not free the packets submitted for enqueue until the
   packets are completed.
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 8d8c303..2230997 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -809,13 +809,16 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev 
*src_vdev,
struct rte_mbuf *m)
 {
uint16_t ret;
-   struct rte_mbuf *m_cpl[1];
+   struct rte_mbuf *m_cpl[1], *comp_pkt;
+   uint32_t nr_comp = 0;
 
if (builtin_net_driver) {
ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
} else if (async_vhost_driver) {
ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
-   &m, 1);
+   &m, 1, &comp_pkt, &nr_comp);
+   if (nr_comp == 1)
+   goto done;
 
if (likely(ret))
dst_vdev->nr_async_pkts++;
@@ -829,6 +832,7 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev 
*src_vdev,
ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
}
 
+done:
if (enable_stats) {
rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
@@ -1090,7 +1094,8 @@ static __rte_always_inline void
 drain_eth_rx(struct vhost_dev *vdev)
 {
uint16_t rx_count, enqueue_count;
-   struct rte_mbuf *pkts[MAX_PKT_BURST];
+   struct rte_mbuf *pkts[MAX_PKT_BURST], *comp_pkts[MAX_PKT_BURST];
+   uint32_t nr_comp = 0;
 
rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
pkts, MAX_PKT_BURST);
@@ -1124,7 +1129,12 @@ drain_eth_rx(struct vhost_dev *vdev)
pkts, rx_count);
} else if (async_vhost_driver) {
enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
-   VIRTIO_RXQ, pkts, rx_count);
+   VIRTIO_RXQ, pkts, rx_count, comp_pkts,
+   &nr_comp);
+   if (nr_comp > 0) {
+   free_pkts(comp_pkts, nr_comp);
+   enqueue_count -= nr_comp;
+   }
vdev->nr_async_pkts += enqueue_count;
} else {
enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
diff --git a/lib/librte_vhost/rte_vhost_async.h 
b/lib/librte_vhost/rte_vhost_async.h
index 03bd558..c855ff8 100644
--- a/lib/librte_vhost/rte_vhost_async.h
+++ b/lib/librte_vhost/rte_vhost_async.h
@@ -87,13 +87,8 @@ struct rte_vhost_async_channel_ops {
  * inflight async packet information
  */
 struct async_inflight_info {
-   union {
-   uint32_t info;
-   struct {
-   uint16_t descs; /* num of descs inflight */
-   uint16_t segs; /* iov segs inflig

[dpdk-dev] [PATCH v8 0/2] examples/vhost: sample code refactor

2021-01-10 Thread Cheng Jiang
Refactor the vhost sample code. Add ioat ring space count and check
in ioat callback, optimize vhost data path for batch enqueue, replace
rte_atomicNN_xxx to atomic_XXX and refactor vhost async data path.
---
v8:
 * rebased codes

v7:
 * fixed rte_ioat_completed_ops() fail handler issue

v6:
 * adjusted the value of MAX_ENQUEUED_SIZE in ioat.h

v5:
 * added vhost enqueue buffer free when destroy a vhost device
 * added rte_ioat_completed_ops() fail handler
 * changed the behavior of drain_vhost_table() function
 * changed some variable names
 * changed some variable definition
 * added rte_zmalloc() fail handler
 * added some comments
 * fixed some typos

v4:
 * improved code structure
 * improved vhost enqueue buffer memory allocation
 * cleaned some codes

v3:
 * added some variable initiation
 * cleaned some codes

v2:
 * optimized patch structure
 * optimized git log
 * replaced rte_atomicNN_xxx to atomic_XXX

Cheng Jiang (2):
  examples/vhost: add ioat ring space count and check
  examples/vhost: refactor vhost data path

 examples/vhost/ioat.c |  24 +++--
 examples/vhost/ioat.h |   2 +-
 examples/vhost/main.c | 226 ++
 examples/vhost/main.h |   7 +-
 4 files changed, 181 insertions(+), 78 deletions(-)

--
2.29.2



[dpdk-dev] [PATCH v8 1/2] examples/vhost: add ioat ring space count and check

2021-01-10 Thread Cheng Jiang
Add ioat ring space count and check, if ioat ring space is not enough
for the next async vhost packet enqueue, then just return to prevent
enqueue failure. Add rte_ioat_completed_ops() fail handler.

Signed-off-by: Cheng Jiang 
Reviewed-by: Jiayu Hu 
---
 examples/vhost/ioat.c | 24 +---
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 71d8a1f1f..dbad28d43 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -17,6 +17,7 @@ struct packet_tracker {
unsigned short next_read;
unsigned short next_write;
unsigned short last_remain;
+   unsigned short ioat_space;
 };

 struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
@@ -113,7 +114,7 @@ open_ioat(const char *value)
goto out;
}
rte_rawdev_start(dev_id);
-
+   cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE;
dma_info->nr++;
i++;
}
@@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
src = descs[i_desc].src;
dst = descs[i_desc].dst;
i_seg = 0;
+   if (cb_tracker[dev_id].ioat_space < src->nr_segs)
+   break;
while (i_seg < src->nr_segs) {
-   /*
-* TODO: Assuming that the ring space of the
-* IOAT device is large enough, so there is no
-* error here, and the actual error handling
-* will be added later.
-*/
rte_ioat_enqueue_copy(dev_id,
(uintptr_t)(src->iov[i_seg].iov_base)
+ src->offset,
@@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
i_seg++;
}
write &= mask;
-   cb_tracker[dev_id].size_track[write] = i_seg;
+   cb_tracker[dev_id].size_track[write] = src->nr_segs;
+   cb_tracker[dev_id].ioat_space -= src->nr_segs;
write++;
}
} else {
@@ -178,17 +176,21 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
 {
if (!opaque_data) {
uintptr_t dump[255];
-   unsigned short n_seg;
+   int n_seg;
unsigned short read, write;
unsigned short nb_packet = 0;
unsigned short mask = MAX_ENQUEUED_SIZE - 1;
unsigned short i;
+
int dev_id = dma_bind[vid].dmas[queue_id * 2
+ VIRTIO_RXQ].dev_id;
n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump);
-   n_seg += cb_tracker[dev_id].last_remain;
-   if (!n_seg)
+   if (n_seg <= 0)
return 0;
+
+   cb_tracker[dev_id].ioat_space += n_seg;
+   n_seg += cb_tracker[dev_id].last_remain;
+
read = cb_tracker[dev_id].next_read;
write = cb_tracker[dev_id].next_write;
for (i = 0; i < max_packets; i++) {
--
2.29.2



[dpdk-dev] [PATCH v8 2/2] examples/vhost: refactor vhost data path

2021-01-10 Thread Cheng Jiang
Change the vm2vm data path to batch enqueue for better performance.
Support latest async vhost API, refactor vhost async data path,
replace rte_atomicNN_xxx to atomic_XXX and clean some codes.

Signed-off-by: Cheng Jiang 
Reviewed-by: Jiayu Hu 
---
 examples/vhost/ioat.h |   2 +-
 examples/vhost/main.c | 226 ++
 examples/vhost/main.h |   7 +-
 3 files changed, 168 insertions(+), 67 deletions(-)

diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index d6e1e2e07..0a1dbb811 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -11,7 +11,7 @@

 #define MAX_VHOST_DEVICE 1024
 #define IOAT_RING_SIZE 4096
-#define MAX_ENQUEUED_SIZE 256
+#define MAX_ENQUEUED_SIZE 512

 struct dma_info {
struct rte_pci_addr addr;
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 22309977c..45976c93c 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -179,9 +179,22 @@ struct mbuf_table {
struct rte_mbuf *m_table[MAX_PKT_BURST];
 };

+struct vhost_bufftable {
+   uint32_t len;
+   uint64_t pre_tsc;
+   struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
 /* TX queue for each data core. */
 struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];

+/*
+ * Vhost TX buffer for each data core.
+ * Every data core maintains a TX buffer for every vhost device,
+ * which is used for batch pkts enqueue for higher performance.
+ */
+struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE * MAX_VHOST_DEVICE];
+
 #define MBUF_TABLE_DRAIN_TSC   ((rte_get_tsc_hz() + US_PER_S - 1) \
 / US_PER_S * BURST_TX_DRAIN_US)
 #define VLAN_HLEN   4
@@ -804,43 +817,112 @@ unlink_vmdq(struct vhost_dev *vdev)
}
 }

+static inline void
+free_pkts(struct rte_mbuf **pkts, uint16_t n)
+{
+   while (n--)
+   rte_pktmbuf_free(pkts[n]);
+}
+
 static __rte_always_inline void
-virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
+complete_async_pkts(struct vhost_dev *vdev)
+{
+   struct rte_mbuf *p_cpl[MAX_PKT_BURST];
+   uint16_t complete_count;
+
+   complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
+   VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
+   if (complete_count) {
+   atomic_fetch_sub(&vdev->nr_async_pkts, complete_count);
+   free_pkts(p_cpl, complete_count);
+   }
+}
+
+static __rte_always_inline void
+sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
struct rte_mbuf *m)
 {
uint16_t ret;
-   struct rte_mbuf *m_cpl[1], *comp_pkt;
-   uint32_t nr_comp = 0;

if (builtin_net_driver) {
ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
-   } else if (async_vhost_driver) {
-   ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
-   &m, 1, &comp_pkt, &nr_comp);
-   if (nr_comp == 1)
-   goto done;
-
-   if (likely(ret))
-   dst_vdev->nr_async_pkts++;
-
-   while (likely(dst_vdev->nr_async_pkts)) {
-   if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
-   VIRTIO_RXQ, m_cpl, 1))
-   dst_vdev->nr_async_pkts--;
-   }
} else {
ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
}

-done:
if (enable_stats) {
-   rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
-   rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
+   atomic_fetch_add(&dst_vdev->stats.rx_total_atomic, 1);
+   atomic_fetch_add(&dst_vdev->stats.rx_atomic, ret);
src_vdev->stats.tx_total++;
src_vdev->stats.tx += ret;
}
 }

+static __rte_always_inline void
+drain_vhost(struct vhost_dev *vdev)
+{
+   uint16_t ret;
+   uint64_t buff_idx = rte_lcore_id() * MAX_VHOST_DEVICE + vdev->vid;
+   uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
+   struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
+
+   if (builtin_net_driver) {
+   ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
+   } else if (async_vhost_driver) {
+   uint32_t cpu_cpl_nr = 0;
+   uint16_t enqueue_fail = 0;
+   struct rte_mbuf *m_cpu_cpl[nr_xmit];
+
+   complete_async_pkts(vdev);
+   ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+   m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+   atomic_fetch_add(&vdev->nr_async_pkts, ret - cpu_cpl_nr);
+
+   if (cpu_cpl_nr)
+   free_pkts(m_cpu_cpl, cpu_cpl_nr);
+
+   enqueue_fail = nr_xmit - ret;
+   if (enqueue_fail)
+   free_pkts(&m[ret], nr_xmit - ret);
+   } else {
+