Re: [PATCH v2 1/2] vhost: fix possible FDs leak

2023-01-29 Thread David Marchand
On Fri, Jan 27, 2023 at 5:55 PM Maxime Coquelin
 wrote:
>
> On failure, read_vhost_message() only closed the message
> FDs if the header size was unexpected, but there are other
> cases where it is required. For exemple in the case the
> payload size read from the header is greater than the
> expected maximum payload size.
>
> This patch fixes this by closing all messages FDs in all
> error cases.
>
> Fixes: bf472259dde6 ("vhost: fix possible denial of service by leaking FDs")
> Cc: sta...@dpdk.org
>
> Signed-off-by: Maxime Coquelin 

Reviewed-by: David Marchand 

We mentionned offlist that the request type can be logged to help with debug.
Do you intend to add this as a follow up patch?


-- 
David Marchand



Re: [PATCH v2 2/2] vhost: fix possible FD leaks on MSG_TRUNC and MSG_CTRUNC

2023-01-29 Thread David Marchand
On Fri, Jan 27, 2023 at 5:55 PM Maxime Coquelin
 wrote:
>
> This patch fixes possible FDs leaks when truncation happens
> on either the message buffer or its control data. Indeed,
> by returning early, it did not let a chance to retrieve the
> FDs passed as ancillary data, and so caused a potential FDs
> leak.
>
> This patch fixes this by extracting the FDs from the
> ancillary data as long as recvmsg() call succeeded. It also
> improves the logs to differentiate between MSG_TRUNC and
> MSG_CTRUNC.

As I mentionned offlist, I am not convinced the MSG_TRUNC flag can be
set on receipt of a message, since the socket is in stream mode.
I am okay to keep the check as is, but it is confusing.


>
> Fixes: bf472259dde6 ("vhost: fix possible denial of service by leaking FDs")
> Cc: sta...@dpdk.org
>
> Signed-off-by: Maxime Coquelin 

Reviewed-by: David Marchand 


-- 
David Marchand



[PATCH 02/10] net/hns3: extract common API to query device

2023-01-29 Thread Dongdong Liu
From: Huisong Li 

Extract common function to query device specifications.

Fixes: 9c740336f024 ("net/hns3: get device specifications from firmware")
Cc: sta...@dpdk.org

Signed-off-by: Huisong Li 
Signed-off-by: Dongdong Liu 
---
 drivers/net/hns3/hns3_common.c| 75 +++
 drivers/net/hns3/hns3_common.h|  2 +
 drivers/net/hns3/hns3_ethdev.c| 63 --
 drivers/net/hns3/hns3_ethdev_vf.c | 65 +--
 4 files changed, 79 insertions(+), 126 deletions(-)

diff --git a/drivers/net/hns3/hns3_common.c b/drivers/net/hns3/hns3_common.c
index 7adc6a4972..b0c7f8d62c 100644
--- a/drivers/net/hns3/hns3_common.c
+++ b/drivers/net/hns3/hns3_common.c
@@ -10,6 +10,7 @@
 #include "hns3_logs.h"
 #include "hns3_regs.h"
 #include "hns3_rxtx.h"
+#include "hns3_dcb.h"
 #include "hns3_common.h"
 
 int
@@ -845,3 +846,77 @@ hns3_get_pci_revision_id(struct hns3_hw *hw, uint8_t 
*revision_id)
 
return 0;
 }
+
+void
+hns3_set_default_dev_specifications(struct hns3_hw *hw)
+{
+   struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw);
+
+   hw->max_non_tso_bd_num = HNS3_MAX_NON_TSO_BD_PER_PKT;
+   hw->rss_ind_tbl_size = HNS3_RSS_IND_TBL_SIZE;
+   hw->rss_key_size = HNS3_RSS_KEY_SIZE;
+   hw->intr.int_ql_max = HNS3_INTR_QL_NONE;
+
+   if (hns->is_vf)
+   return;
+
+   hw->max_tm_rate = HNS3_ETHER_MAX_RATE;
+}
+
+static void
+hns3_parse_dev_specifications(struct hns3_hw *hw, struct hns3_cmd_desc *desc)
+{
+   struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw);
+   struct hns3_dev_specs_0_cmd *req0;
+   struct hns3_dev_specs_1_cmd *req1;
+
+   req0 = (struct hns3_dev_specs_0_cmd *)desc[0].data;
+   req1 = (struct hns3_dev_specs_1_cmd *)desc[1].data;
+
+   hw->max_non_tso_bd_num = req0->max_non_tso_bd_num;
+   hw->rss_ind_tbl_size = rte_le_to_cpu_16(req0->rss_ind_tbl_size);
+   hw->rss_key_size = rte_le_to_cpu_16(req0->rss_key_size);
+   hw->intr.int_ql_max = rte_le_to_cpu_16(req0->intr_ql_max);
+   hw->min_tx_pkt_len = req1->min_tx_pkt_len;
+
+   if (hns->is_vf)
+   return;
+
+   hw->max_tm_rate = rte_le_to_cpu_32(req0->max_tm_rate);
+}
+
+static int
+hns3_check_dev_specifications(struct hns3_hw *hw)
+{
+   if (hw->rss_ind_tbl_size == 0 ||
+   hw->rss_ind_tbl_size > HNS3_RSS_IND_TBL_SIZE_MAX) {
+   hns3_err(hw, "the indirection table size obtained (%u) is 
invalid, and should not be zero or exceed the maximum(%u)",
+hw->rss_ind_tbl_size, HNS3_RSS_IND_TBL_SIZE_MAX);
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+int
+hns3_query_dev_specifications(struct hns3_hw *hw)
+{
+   struct hns3_cmd_desc desc[HNS3_QUERY_DEV_SPECS_BD_NUM];
+   int ret;
+   int i;
+
+   for (i = 0; i < HNS3_QUERY_DEV_SPECS_BD_NUM - 1; i++) {
+   hns3_cmd_setup_basic_desc(&desc[i], HNS3_OPC_QUERY_DEV_SPECS,
+ true);
+   desc[i].flag |= rte_cpu_to_le_16(HNS3_CMD_FLAG_NEXT);
+   }
+   hns3_cmd_setup_basic_desc(&desc[i], HNS3_OPC_QUERY_DEV_SPECS, true);
+
+   ret = hns3_cmd_send(hw, desc, HNS3_QUERY_DEV_SPECS_BD_NUM);
+   if (ret)
+   return ret;
+
+   hns3_parse_dev_specifications(hw, desc);
+
+   return hns3_check_dev_specifications(hw);
+}
diff --git a/drivers/net/hns3/hns3_common.h b/drivers/net/hns3/hns3_common.h
index 5aa001f0cc..8eaeda26e7 100644
--- a/drivers/net/hns3/hns3_common.h
+++ b/drivers/net/hns3/hns3_common.h
@@ -60,5 +60,7 @@ void hns3_unmap_rx_interrupt(struct rte_eth_dev *dev);
 int hns3_restore_rx_interrupt(struct hns3_hw *hw);
 
 int hns3_get_pci_revision_id(struct hns3_hw *hw, uint8_t *revision_id);
+void hns3_set_default_dev_specifications(struct hns3_hw *hw);
+int hns3_query_dev_specifications(struct hns3_hw *hw);
 
 #endif /* HNS3_COMMON_H */
diff --git a/drivers/net/hns3/hns3_ethdev.c b/drivers/net/hns3/hns3_ethdev.c
index eb809cd8c9..ab565ce128 100644
--- a/drivers/net/hns3/hns3_ethdev.c
+++ b/drivers/net/hns3/hns3_ethdev.c
@@ -2647,69 +2647,6 @@ hns3_parse_speed(int speed_cmd, uint32_t *speed)
return 0;
 }
 
-static void
-hns3_set_default_dev_specifications(struct hns3_hw *hw)
-{
-   hw->max_non_tso_bd_num = HNS3_MAX_NON_TSO_BD_PER_PKT;
-   hw->rss_ind_tbl_size = HNS3_RSS_IND_TBL_SIZE;
-   hw->rss_key_size = HNS3_RSS_KEY_SIZE;
-   hw->max_tm_rate = HNS3_ETHER_MAX_RATE;
-   hw->intr.int_ql_max = HNS3_INTR_QL_NONE;
-}
-
-static void
-hns3_parse_dev_specifications(struct hns3_hw *hw, struct hns3_cmd_desc *desc)
-{
-   struct hns3_dev_specs_0_cmd *req0;
-   struct hns3_dev_specs_1_cmd *req1;
-
-   req0 = (struct hns3_dev_specs_0_cmd *)desc[0].data;
-   req1 = (struct hns3_dev_specs_1_cmd *)desc[1].data;
-
-   hw->max_non_tso_bd_num = req0->max_non_tso_bd_num;
-   hw->rss_ind_tbl_size = rte_le_to_cpu_16(req0->rss_ind_tbl_size);
-   

[PATCH 01/10] net/hns3: fix error log about indirection table size

2023-01-29 Thread Dongdong Liu
From: Huisong Li 

The error log about indirection table size during initialization phase
of PF and VF is unreasonable when the indirection table size obtained
from firmware or PF function should be zero. In addition, VF driver
should use error level to print this log.

Fixes: 0fce2c46dc16 ("net/hns3: fix RSS indirection table size")
Cc: sta...@dpdk.org

Signed-off-by: Huisong Li 
Signed-off-by: Dongdong Liu 
---
 drivers/net/hns3/hns3_ethdev.c| 2 +-
 drivers/net/hns3/hns3_ethdev_vf.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/hns3/hns3_ethdev.c b/drivers/net/hns3/hns3_ethdev.c
index d326f70129..eb809cd8c9 100644
--- a/drivers/net/hns3/hns3_ethdev.c
+++ b/drivers/net/hns3/hns3_ethdev.c
@@ -2679,7 +2679,7 @@ hns3_check_dev_specifications(struct hns3_hw *hw)
 {
if (hw->rss_ind_tbl_size == 0 ||
hw->rss_ind_tbl_size > HNS3_RSS_IND_TBL_SIZE_MAX) {
-   hns3_err(hw, "the size of hash lookup table configured (%u) 
exceeds the maximum(%u)",
+   hns3_err(hw, "the indirection table size obtained (%u) is 
invalid, and should not be zero or exceed the maximum(%u)",
 hw->rss_ind_tbl_size, HNS3_RSS_IND_TBL_SIZE_MAX);
return -EINVAL;
}
diff --git a/drivers/net/hns3/hns3_ethdev_vf.c 
b/drivers/net/hns3/hns3_ethdev_vf.c
index d220522c43..e43815607a 100644
--- a/drivers/net/hns3/hns3_ethdev_vf.c
+++ b/drivers/net/hns3/hns3_ethdev_vf.c
@@ -718,8 +718,8 @@ hns3vf_check_dev_specifications(struct hns3_hw *hw)
 {
if (hw->rss_ind_tbl_size == 0 ||
hw->rss_ind_tbl_size > HNS3_RSS_IND_TBL_SIZE_MAX) {
-   hns3_warn(hw, "the size of hash lookup table configured (%u) 
exceeds the maximum(%u)",
- hw->rss_ind_tbl_size, HNS3_RSS_IND_TBL_SIZE_MAX);
+   hns3_err(hw, "the indirection table size obtained (%u) is 
invalid, and should not be zero or exceed the maximum(%u)",
+hw->rss_ind_tbl_size, HNS3_RSS_IND_TBL_SIZE_MAX);
return -EINVAL;
}
 
-- 
2.22.0



[PATCH 00/10] net/hns3: some bugfixes for rss

2023-01-29 Thread Dongdong Liu
This patchset is to do some bugfixes for hns3 rss.

Huisong Li (10):
  net/hns3: fix error log about indirection table size
  net/hns3: extract common API to query device
  net/hns3: refactor set RSS hash algorithm and key interface
  net/hns3: fix fixed RSS key size to be more compatibility
  net/hns3: fix misclearing RSS configuration
  net/hns3: using RSS filter list to check duplicated rule
  net/hns3: remove useless code when destroy valid RSS rule
  net/hns3: fix useless warning when flush or destroy rule
  net/hns3: fix bad memory structure conversion
  net/hns3: fix incorrect check for duplicate RSS rule

 drivers/net/hns3/hns3_common.c|  87 +++-
 drivers/net/hns3/hns3_common.h|   2 +
 drivers/net/hns3/hns3_ethdev.c|  63 -
 drivers/net/hns3/hns3_ethdev_vf.c |  65 +
 drivers/net/hns3/hns3_flow.c  | 217 ++
 drivers/net/hns3/hns3_rss.c   |  63 -
 drivers/net/hns3/hns3_rss.h   |   7 +-
 7 files changed, 227 insertions(+), 277 deletions(-)

--
2.22.0



[PATCH 03/10] net/hns3: refactor set RSS hash algorithm and key interface

2023-01-29 Thread Dongdong Liu
From: Huisong Li 

The hns3_rss_set_algo_key() is used to set RSS hash algorithm and key to
hardware. The maximum times of command sent to firmware depend on the
length of key. However, now this times is fixed, which isn't good for
key expansion. In addition, hash algorithm comes from rss_info::hash_algo
maintained in driver, which also isn't good for the usage of this function.
This patch has to use hash algorithm and key length as the input parameters
of this interface.

Fixes: c37ca66f2b27 ("net/hns3: support RSS")
Cc: sta...@dpdk.org

Signed-off-by: Huisong Li 
Signed-off-by: Dongdong Liu 
---
 drivers/net/hns3/hns3_flow.c |  3 ++-
 drivers/net/hns3/hns3_rss.c  | 48 
 drivers/net/hns3/hns3_rss.h  |  4 ++-
 3 files changed, 26 insertions(+), 29 deletions(-)

diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c
index a2c1589c39..95609f8483 100644
--- a/drivers/net/hns3/hns3_flow.c
+++ b/drivers/net/hns3/hns3_flow.c
@@ -1494,7 +1494,8 @@ hns3_hw_rss_hash_set(struct hns3_hw *hw, struct 
rte_flow_action_rss *rss_config)
if (ret)
return ret;
 
-   ret = hns3_rss_set_algo_key(hw, rss_config->key);
+   ret = hns3_rss_set_algo_key(hw, hw->rss_info.hash_algo,
+   rss_config->key, HNS3_RSS_KEY_SIZE);
if (ret)
return ret;
 
diff --git a/drivers/net/hns3/hns3_rss.c b/drivers/net/hns3/hns3_rss.c
index ca5a129234..3db7bf0445 100644
--- a/drivers/net/hns3/hns3_rss.c
+++ b/drivers/net/hns3/hns3_rss.c
@@ -277,45 +277,37 @@ static const struct {
 
 /*
  * rss_generic_config command function, opcode:0x0D01.
- * Used to set algorithm, key_offset and hash key of rss.
+ * Used to set algorithm and hash key of RSS.
  */
 int
-hns3_rss_set_algo_key(struct hns3_hw *hw, const uint8_t *key)
+hns3_rss_set_algo_key(struct hns3_hw *hw, uint8_t hash_algo,
+ const uint8_t *key, uint8_t key_len)
 {
-#define HNS3_KEY_OFFSET_MAX3
-#define HNS3_SET_HASH_KEY_BYTE_FOUR2
-
struct hns3_rss_generic_config_cmd *req;
struct hns3_cmd_desc desc;
-   uint32_t key_offset, key_size;
-   const uint8_t *key_cur;
-   uint8_t cur_offset;
+   const uint8_t *cur_key;
+   uint16_t cur_key_size;
+   uint16_t max_bd_num;
+   uint16_t idx;
int ret;
 
req = (struct hns3_rss_generic_config_cmd *)desc.data;
 
-   /*
-* key_offset=0, hash key byte0~15 is set to hardware.
-* key_offset=1, hash key byte16~31 is set to hardware.
-* key_offset=2, hash key byte32~39 is set to hardware.
-*/
-   for (key_offset = 0; key_offset < HNS3_KEY_OFFSET_MAX; key_offset++) {
+   max_bd_num = DIV_ROUND_UP(key_len, HNS3_RSS_HASH_KEY_NUM);
+   for (idx = 0; idx < max_bd_num; idx++) {
hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_RSS_GENERIC_CONFIG,
  false);
 
-   req->hash_config |=
-   (hw->rss_info.hash_algo & HNS3_RSS_HASH_ALGO_MASK);
-   req->hash_config |= (key_offset << HNS3_RSS_HASH_KEY_OFFSET_B);
+   req->hash_config |= (hash_algo & HNS3_RSS_HASH_ALGO_MASK);
+   req->hash_config |= (idx << HNS3_RSS_HASH_KEY_OFFSET_B);
 
-   if (key_offset == HNS3_SET_HASH_KEY_BYTE_FOUR)
-   key_size = HNS3_RSS_KEY_SIZE - HNS3_RSS_HASH_KEY_NUM *
-   HNS3_SET_HASH_KEY_BYTE_FOUR;
+   if (idx == max_bd_num - 1)
+   cur_key_size = key_len % HNS3_RSS_HASH_KEY_NUM;
else
-   key_size = HNS3_RSS_HASH_KEY_NUM;
+   cur_key_size = HNS3_RSS_HASH_KEY_NUM;
 
-   cur_offset = key_offset * HNS3_RSS_HASH_KEY_NUM;
-   key_cur = key + cur_offset;
-   memcpy(req->hash_key, key_cur, key_size);
+   cur_key = key + idx * HNS3_RSS_HASH_KEY_NUM;
+   memcpy(req->hash_key, cur_key, cur_key_size);
 
ret = hns3_cmd_send(hw, &desc, 1);
if (ret) {
@@ -518,7 +510,8 @@ hns3_dev_rss_hash_update(struct rte_eth_dev *dev,
goto set_tuple_fail;
 
if (key) {
-   ret = hns3_rss_set_algo_key(hw, key);
+   ret = hns3_rss_set_algo_key(hw, hw->rss_info.hash_algo,
+   key, HNS3_RSS_KEY_SIZE);
if (ret)
goto set_algo_key_fail;
}
@@ -795,8 +788,9 @@ hns3_config_rss(struct hns3_adapter *hns)
break;
}
 
-   /* Configure RSS hash algorithm and hash key offset */
-   ret = hns3_rss_set_algo_key(hw, hash_key);
+   /* Configure RSS hash algorithm and hash key */
+   ret = hns3_rss_set_algo_key(hw, hw->rss_info.hash_algo, hash_key,
+   HNS3_RSS_KEY_SIZE);
if (ret)
return ret;
 
diff --git a/drivers/n

[PATCH 04/10] net/hns3: fix fixed RSS key size to be more compatibility

2023-01-29 Thread Dongdong Liu
From: Huisong Li 

For better compatibility, the RSS key size of PF and VF are obtained from
firmware. However, many places still used the old macro HNS3_RSS_KEY_SIZE
as the key size.

Fixes: 9c740336f024 ("net/hns3: get device specifications from firmware")
Cc: sta...@dpdk.org

Signed-off-by: Huisong Li 
Signed-off-by: Dongdong Liu 
---
 drivers/net/hns3/hns3_common.c | 12 +++-
 drivers/net/hns3/hns3_flow.c   | 26 --
 drivers/net/hns3/hns3_rss.c| 23 +++
 drivers/net/hns3/hns3_rss.h|  3 ++-
 4 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/drivers/net/hns3/hns3_common.c b/drivers/net/hns3/hns3_common.c
index b0c7f8d62c..2da0f30964 100644
--- a/drivers/net/hns3/hns3_common.c
+++ b/drivers/net/hns3/hns3_common.c
@@ -129,7 +129,7 @@ hns3_dev_infos_get(struct rte_eth_dev *eth_dev, struct 
rte_eth_dev_info *info)
};
 
info->reta_size = hw->rss_ind_tbl_size;
-   info->hash_key_size = HNS3_RSS_KEY_SIZE;
+   info->hash_key_size = hw->rss_key_size;
info->flow_type_rss_offloads = HNS3_ETH_RSS_SUPPORT;
 
info->default_rxportconf.burst_size = HNS3_DEFAULT_PORT_CONF_BURST_SIZE;
@@ -895,6 +895,16 @@ hns3_check_dev_specifications(struct hns3_hw *hw)
return -EINVAL;
}
 
+   if (hw->rss_key_size == 0 || hw->rss_key_size > HNS3_RSS_KEY_SIZE_MAX) {
+   hns3_err(hw, "the RSS key size obtained (%u) is invalid, and 
should not be zero or exceed the maximum(%u)",
+hw->rss_key_size, HNS3_RSS_KEY_SIZE_MAX);
+   return -EINVAL;
+   }
+
+   if (hw->rss_key_size > HNS3_RSS_KEY_SIZE)
+   hns3_warn(hw, "the RSS key size obtained (%u) is greater than 
the default key size (%u)",
+ hw->rss_key_size, HNS3_RSS_KEY_SIZE);
+
return 0;
 }
 
diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c
index 95609f8483..a18ec7650d 100644
--- a/drivers/net/hns3/hns3_flow.c
+++ b/drivers/net/hns3/hns3_flow.c
@@ -1406,10 +1406,10 @@ hns3_parse_rss_filter(struct rte_eth_dev *dev,
return rte_flow_error_set(error, ENOTSUP,
  RTE_FLOW_ERROR_TYPE_ACTION_CONF, act,
  "a nonzero RSS encapsulation level is 
not supported");
-   if (rss->key_len && rss->key_len != RTE_DIM(rss_conf->key))
+   if (rss->key_len && rss->key_len != hw->rss_key_size)
return rte_flow_error_set(error, ENOTSUP,
  RTE_FLOW_ERROR_TYPE_ACTION_CONF, act,
- "RSS hash key must be exactly 40 
bytes");
+ "invalid RSS key length");
 
if (!hns3_rss_input_tuple_supported(hw, rss))
return rte_flow_error_set(error, EINVAL,
@@ -1443,16 +1443,6 @@ hns3_disable_rss(struct hns3_hw *hw)
return 0;
 }
 
-static void
-hns3_adjust_rss_key(struct hns3_hw *hw, struct rte_flow_action_rss *rss_conf)
-{
-   if (rss_conf->key == NULL || rss_conf->key_len < HNS3_RSS_KEY_SIZE) {
-   hns3_warn(hw, "Default RSS hash key to be set");
-   rss_conf->key = hns3_hash_key;
-   rss_conf->key_len = HNS3_RSS_KEY_SIZE;
-   }
-}
-
 static int
 hns3_parse_rss_algorithm(struct hns3_hw *hw, enum rte_eth_hash_function *func,
 uint8_t *hash_algo)
@@ -1485,9 +1475,16 @@ hns3_parse_rss_algorithm(struct hns3_hw *hw, enum 
rte_eth_hash_function *func,
 static int
 hns3_hw_rss_hash_set(struct hns3_hw *hw, struct rte_flow_action_rss 
*rss_config)
 {
+   uint8_t rss_key[HNS3_RSS_KEY_SIZE_MAX] = {0};
+   bool use_default_key = false;
int ret;
 
-   hns3_adjust_rss_key(hw, rss_config);
+   if (rss_config->key == NULL || rss_config->key_len != hw->rss_key_size) 
{
+   hns3_warn(hw, "Default RSS hash key to be set");
+   memcpy(rss_key, hns3_hash_key,
+   RTE_MIN(sizeof(hns3_hash_key), hw->rss_key_size));
+   use_default_key = true;
+   }
 
ret = hns3_parse_rss_algorithm(hw, &rss_config->func,
   &hw->rss_info.hash_algo);
@@ -1495,7 +1492,8 @@ hns3_hw_rss_hash_set(struct hns3_hw *hw, struct 
rte_flow_action_rss *rss_config)
return ret;
 
ret = hns3_rss_set_algo_key(hw, hw->rss_info.hash_algo,
-   rss_config->key, HNS3_RSS_KEY_SIZE);
+   use_default_key ? rss_key : rss_config->key,
+   hw->rss_key_size);
if (ret)
return ret;
 
diff --git a/drivers/net/hns3/hns3_rss.c b/drivers/net/hns3/hns3_rss.c
index 3db7bf0445..d6e0754273 100644
--- a/drivers/net/hns3/hns3_rss.c
+++ b/drivers/net/hns3/hns3_rss.c
@@ -316,7 +316,7 @@ hns3_rss_set_algo_key(struct hns3_hw *hw, uint8_t hash_algo,
   

[PATCH 05/10] net/hns3: fix misclearing RSS configuration

2023-01-29 Thread Dongdong Liu
From: Huisong Li 

The RSS configuration will be miscleared when driver receives a RSS rule
which has more one RSS action.

Fixes: c37ca66f2b27 ("net/hns3: support RSS")
Cc: sta...@dpdk.org

Signed-off-by: Huisong Li 
Signed-off-by: Dongdong Liu 
---
 drivers/net/hns3/hns3_flow.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c
index a18ec7650d..c338eab049 100644
--- a/drivers/net/hns3/hns3_flow.c
+++ b/drivers/net/hns3/hns3_flow.c
@@ -1421,12 +1421,10 @@ hns3_parse_rss_filter(struct rte_eth_dev *dev,
 
/* Check if the next not void action is END */
NEXT_ITEM_OF_ACTION(act, actions, act_index);
-   if (act->type != RTE_FLOW_ACTION_TYPE_END) {
-   memset(rss_conf, 0, sizeof(struct hns3_rss_conf));
+   if (act->type != RTE_FLOW_ACTION_TYPE_END)
return rte_flow_error_set(error, EINVAL,
  RTE_FLOW_ERROR_TYPE_ACTION,
  act, "Not supported action.");
-   }
 
return 0;
 }
-- 
2.22.0



[PATCH 06/10] net/hns3: using RSS filter list to check duplicated rule

2023-01-29 Thread Dongdong Liu
From: Huisong Li 

All rules from user are saved in RSS filter list, so use RSS
filter list to check duplicated rule.

Fixes: c37ca66f2b27 ("net/hns3: support RSS")
Cc: sta...@dpdk.org

Signed-off-by: Huisong Li 
Signed-off-by: Dongdong Liu 
---
 drivers/net/hns3/hns3_flow.c | 35 +--
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c
index c338eab049..303275ae93 100644
--- a/drivers/net/hns3/hns3_flow.c
+++ b/drivers/net/hns3/hns3_flow.c
@@ -1300,7 +1300,7 @@ hns3_action_rss_same(const struct rte_flow_action_rss 
*comp,
!memcmp(comp->key, with->key, with->key_len);
 
return (func_is_same && rss_key_is_same &&
-   comp->types == (with->types & HNS3_ETH_RSS_SUPPORT) &&
+   comp->types == with->types &&
comp->level == with->level &&
comp->queue_num == with->queue_num &&
!memcmp(comp->queue, with->queue,
@@ -1596,15 +1596,7 @@ hns3_config_rss_filter(struct hns3_hw *hw,
}
 
/* Set hash algorithm and flow types by the user's config */
-   ret = hns3_hw_rss_hash_set(hw, &rss_flow_conf);
-   if (ret)
-   return ret;
-
-   ret = hns3_rss_conf_copy(rss_info, &rss_flow_conf);
-   if (ret)
-   hns3_err(hw, "RSS config init fail(%d)", ret);
-
-   return ret;
+   return hns3_hw_rss_hash_set(hw, &rss_flow_conf);
 }
 
 static int
@@ -1676,17 +1668,32 @@ hns3_restore_filter(struct hns3_adapter *hns)
return hns3_restore_rss_filter(hw);
 }
 
+static bool
+hns3_rss_action_is_dup(struct hns3_hw *hw,
+  const struct rte_flow_action_rss *act)
+{
+   struct hns3_rss_conf_ele *filter;
+
+   TAILQ_FOREACH(filter, &hw->flow_rss_list, entries) {
+   if (!filter->filter_info.valid)
+   continue;
+
+   if (hns3_action_rss_same(&filter->filter_info.conf, act))
+   return true;
+   }
+
+   return false;
+}
+
 static int
 hns3_flow_parse_rss(struct rte_eth_dev *dev,
const struct hns3_rss_conf *conf, bool add)
 {
struct hns3_adapter *hns = dev->data->dev_private;
struct hns3_hw *hw = &hns->hw;
-   bool ret;
 
-   ret = hns3_action_rss_same(&hw->rss_info.conf, &conf->conf);
-   if (ret) {
-   hns3_err(hw, "Enter duplicate RSS configuration : %d", ret);
+   if (hns3_rss_action_is_dup(hw, &conf->conf)) {
+   hns3_err(hw, "duplicate RSS configuration");
return -EINVAL;
}
 
-- 
2.22.0



[PATCH 07/10] net/hns3: remove useless code when destroy valid RSS rule

2023-01-29 Thread Dongdong Liu
From: Huisong Li 

The hw::rss_info::conf::func was set to the macro RTE_ETH_HASH_FUNCTION_MAX
and hw::rss_info::conf::queue was set to NULL when all rules are flushed,
which indicates no flow rules is issued. See
commit eb158fc756a5 ("net/hns3: fix config when creating RSS rule after
flush").
Actually, the way determining whether there are rules has been changed by
walking the flow RSS list. See
commit 705a50800334 ("net/hns3: fix RSS filter restore").

In addition, the rte_flow_action_rss from user isn't saved to 'conf' in
hw->rss_info now. So this code can be removed.

Fixes: eb158fc756a5 ("net/hns3: fix config when creating RSS rule after flush")
Fixes: 705a50800334 ("net/hns3: fix RSS filter restore")
Cc: sta...@dpdk.org

Signed-off-by: Huisong Li 
Signed-off-by: Dongdong Liu 
---
 drivers/net/hns3/hns3_flow.c | 26 ++
 1 file changed, 2 insertions(+), 24 deletions(-)

diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c
index 303275ae93..7adde16cbc 100644
--- a/drivers/net/hns3/hns3_flow.c
+++ b/drivers/net/hns3/hns3_flow.c
@@ -1279,19 +1279,8 @@ hns3_action_rss_same(const struct rte_flow_action_rss 
*comp,
bool rss_key_is_same;
bool func_is_same;
 
-   /*
-* When user flush all RSS rule, RSS func is set invalid with
-* RTE_ETH_HASH_FUNCTION_MAX. Then the user create a flow after
-* flushed, any validate RSS func is different with it before
-* flushed. Others, when user create an action RSS with RSS func
-* specified RTE_ETH_HASH_FUNCTION_DEFAULT, the func is the same
-* between continuous RSS flow.
-*/
-   if (comp->func == RTE_ETH_HASH_FUNCTION_MAX)
-   func_is_same = false;
-   else
-   func_is_same = (with->func != RTE_ETH_HASH_FUNCTION_DEFAULT) ?
-   (comp->func == with->func) : true;
+   func_is_same = (with->func != RTE_ETH_HASH_FUNCTION_DEFAULT) ?
+   (comp->func == with->func) : true;
 
if (with->key_len == 0 || with->key == NULL)
rss_key_is_same = 1;
@@ -1533,7 +1522,6 @@ static int
 hns3_config_rss_filter(struct hns3_hw *hw,
   const struct hns3_rss_conf *conf, bool add)
 {
-   struct hns3_rss_conf *rss_info;
uint64_t flow_types;
uint16_t num;
int ret;
@@ -1560,7 +1548,6 @@ hns3_config_rss_filter(struct hns3_hw *hw,
/* Update the useful flow types */
rss_flow_conf.types = flow_types;
 
-   rss_info = &hw->rss_info;
if (!add) {
if (!conf->valid)
return 0;
@@ -1571,15 +1558,6 @@ hns3_config_rss_filter(struct hns3_hw *hw,
return ret;
}
 
-   if (rss_flow_conf.queue_num) {
-   /*
-* Due the content of queue pointer have been reset to
-* 0, the rss_info->conf.queue should be set to NULL
-*/
-   rss_info->conf.queue = NULL;
-   rss_info->conf.queue_num = 0;
-   }
-
return 0;
}
 
-- 
2.22.0



[PATCH 08/10] net/hns3: fix useless warning when flush or destroy rule

2023-01-29 Thread Dongdong Liu
From: Huisong Li 

The types of the rule will no longer be used when user flush all rules or
destroy a rule. But user would receive some RSS types warnings, like,
"modified RSS types based on hardware support, requested:0x137f83fffc
configured:0x3ffc".

Fixes: ec674cb742e5 ("net/hns3: fix flushing RSS rule")
Cc: sta...@dpdk.org

Signed-off-by: Huisong Li 
Signed-off-by: Dongdong Liu 
---
 drivers/net/hns3/hns3_flow.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c
index 7adde16cbc..fbc38dd3d4 100644
--- a/drivers/net/hns3/hns3_flow.c
+++ b/drivers/net/hns3/hns3_flow.c
@@ -1537,17 +1537,6 @@ hns3_config_rss_filter(struct hns3_hw *hw,
.queue = conf->conf.queue,
};
 
-   /* Filter the unsupported flow types */
-   flow_types = conf->conf.types ?
-rss_flow_conf.types & HNS3_ETH_RSS_SUPPORT :
-hw->rss_info.conf.types;
-   if (flow_types != rss_flow_conf.types)
-   hns3_warn(hw, "modified RSS types based on hardware support, "
- "requested:0x%" PRIx64 " configured:0x%" PRIx64,
- rss_flow_conf.types, flow_types);
-   /* Update the useful flow types */
-   rss_flow_conf.types = flow_types;
-
if (!add) {
if (!conf->valid)
return 0;
@@ -1573,6 +1562,17 @@ hns3_config_rss_filter(struct hns3_hw *hw,
return ret;
}
 
+   /* Filter the unsupported flow types */
+   flow_types = conf->conf.types ?
+rss_flow_conf.types & HNS3_ETH_RSS_SUPPORT :
+hw->rss_info.conf.types;
+   if (flow_types != rss_flow_conf.types)
+   hns3_warn(hw, "modified RSS types based on hardware support,"
+ " requested:0x%" PRIx64 " configured:0x%" PRIx64,
+ rss_flow_conf.types, flow_types);
+   /* Update the useful flow types */
+   rss_flow_conf.types = flow_types;
+
/* Set hash algorithm and flow types by the user's config */
return hns3_hw_rss_hash_set(hw, &rss_flow_conf);
 }
-- 
2.22.0



[PATCH 09/10] net/hns3: fix bad memory structure conversion

2023-01-29 Thread Dongdong Liu
From: Huisong Li 

When the type in 'struct rte_flow_action' is RTE_FLOW_ACTION_TYPE_RSS, the
'conf' pointer references the 'struct rte_flow_action_rss' instead of the
'struct hns3_rss_conf' in driver. But driver uses 'struct hns3_rss_conf' to
convert this 'conf' pointer to get RSS action configuration.

In addition, RSS filter configuration is directly cloned to RSS filter node
instead of coping it after successfully setting to hardware.

Fixes: c37ca66f2b27 ("net/hns3: support RSS")
Cc: sta...@dpdk.org

Signed-off-by: Huisong Li 
Signed-off-by: Dongdong Liu 
---
 drivers/net/hns3/hns3_flow.c | 57 +---
 1 file changed, 20 insertions(+), 37 deletions(-)

diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c
index fbc38dd3d4..307aba75a7 100644
--- a/drivers/net/hns3/hns3_flow.c
+++ b/drivers/net/hns3/hns3_flow.c
@@ -95,8 +95,8 @@ static const struct rte_flow_action *
 hns3_find_rss_general_action(const struct rte_flow_item pattern[],
 const struct rte_flow_action actions[])
 {
+   const struct rte_flow_action_rss *rss_act;
const struct rte_flow_action *act = NULL;
-   const struct hns3_rss_conf *rss;
bool have_eth = false;
 
for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
@@ -115,8 +115,8 @@ hns3_find_rss_general_action(const struct rte_flow_item 
pattern[],
}
}
 
-   rss = act->conf;
-   if (have_eth && rss->conf.queue_num) {
+   rss_act = act->conf;
+   if (have_eth && rss_act->queue_num) {
/*
 * Pattern have ETH and action's queue_num > 0, indicate this is
 * queue region configuration.
@@ -1296,30 +1296,6 @@ hns3_action_rss_same(const struct rte_flow_action_rss 
*comp,
sizeof(*with->queue) * with->queue_num));
 }
 
-static int
-hns3_rss_conf_copy(struct hns3_rss_conf *out,
-  const struct rte_flow_action_rss *in)
-{
-   if (in->key_len > RTE_DIM(out->key) ||
-   in->queue_num > RTE_DIM(out->queue))
-   return -EINVAL;
-   if (in->key == NULL && in->key_len)
-   return -EINVAL;
-   out->conf = (struct rte_flow_action_rss) {
-   .func = in->func,
-   .level = in->level,
-   .types = in->types,
-   .key_len = in->key_len,
-   .queue_num = in->queue_num,
-   };
-   out->conf.queue = memcpy(out->queue, in->queue,
-   sizeof(*in->queue) * in->queue_num);
-   if (in->key)
-   out->conf.key = memcpy(out->key, in->key, in->key_len);
-
-   return 0;
-}
-
 static bool
 hns3_rss_input_tuple_supported(struct hns3_hw *hw,
   const struct rte_flow_action_rss *rss)
@@ -1733,9 +1709,10 @@ hns3_flow_create_rss_rule(struct rte_eth_dev *dev,
  struct rte_flow *flow)
 {
struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+   const struct rte_flow_action_rss *rss_act;
struct hns3_rss_conf_ele *rss_filter_ptr;
struct hns3_rss_conf_ele *filter_ptr;
-   const struct hns3_rss_conf *rss_conf;
+   struct hns3_rss_conf *new_conf;
int ret;
 
rss_filter_ptr = rte_zmalloc("hns3 rss filter",
@@ -1745,19 +1722,25 @@ hns3_flow_create_rss_rule(struct rte_eth_dev *dev,
return -ENOMEM;
}
 
-   /*
-* After all the preceding tasks are successfully configured, configure
-* rules to the hardware to simplify the rollback of rules in the
-* hardware.
-*/
-   rss_conf = (const struct hns3_rss_conf *)act->conf;
-   ret = hns3_flow_parse_rss(dev, rss_conf, true);
+   rss_act = (const struct rte_flow_action_rss *)act->conf;
+   new_conf = &rss_filter_ptr->filter_info;
+   memcpy(&new_conf->conf, rss_act, sizeof(*rss_act));
+   if (rss_act->queue_num > 0) {
+   memcpy(new_conf->queue, rss_act->queue,
+  rss_act->queue_num * sizeof(new_conf->queue[0]));
+   new_conf->conf.queue = new_conf->queue;
+   }
+   if (rss_act->key_len > 0) {
+   memcpy(new_conf->key, rss_act->key,
+  rss_act->key_len * sizeof(new_conf->key[0]));
+   new_conf->conf.key = new_conf->key;
+   }
+
+   ret = hns3_flow_parse_rss(dev, new_conf, true);
if (ret != 0) {
rte_free(rss_filter_ptr);
return ret;
}
-
-   hns3_rss_conf_copy(&rss_filter_ptr->filter_info, &rss_conf->conf);
rss_filter_ptr->filter_info.valid = true;
 
/*
-- 
2.22.0



[PATCH 10/10] net/hns3: fix incorrect check for duplicate RSS rule

2023-01-29 Thread Dongdong Liu
From: Huisong Li 

Currently, the interface for verifying duplicate RSS rules has
some problems:
1) If the value of 'func' in configuring RSS rule is default
   value, this rule is mistakenly considered as a duplicate rule.
2) If key length is zero or 'key' is NULL in configuring RSS rule
   this rule is also mistakenly considered as a duplicate rule.
3) If 'key' or 'queue' in struct rte_flow_action_rss being NULL
   is used to memcpy, which may cause segment fault.

Fixes: c37ca66f2b27 ("net/hns3: support RSS")
Cc: sta...@dpdk.org

Signed-off-by: Huisong Li 
Signed-off-by: Dongdong Liu 
---
 drivers/net/hns3/hns3_flow.c | 58 ++--
 1 file changed, 42 insertions(+), 16 deletions(-)

diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c
index 307aba75a7..f76ceb18d1 100644
--- a/drivers/net/hns3/hns3_flow.c
+++ b/drivers/net/hns3/hns3_flow.c
@@ -1272,28 +1272,54 @@ hns3_filterlist_flush(struct rte_eth_dev *dev)
}
 }
 
+static bool
+hns3_flow_rule_key_same(const struct rte_flow_action_rss *comp,
+   const struct rte_flow_action_rss *with)
+{
+   if (comp->key_len != with->key_len)
+   return false;
+
+   if (with->key_len == 0)
+   return true;
+
+   if (comp->key == NULL && with->key == NULL)
+   return true;
+
+   if (!(comp->key != NULL && with->key != NULL))
+   return false;
+
+   return !memcmp(comp->key, with->key, with->key_len);
+}
+
+static bool
+hns3_flow_rule_queues_same(const struct rte_flow_action_rss *comp,
+  const struct rte_flow_action_rss *with)
+{
+   if (comp->queue_num != with->queue_num)
+   return false;
+
+   if (with->queue_num == 0)
+   return true;
+
+   if (comp->queue == NULL && with->queue == NULL)
+   return true;
+
+   if (!(comp->queue != NULL && with->queue != NULL))
+   return false;
+
+   return !memcmp(comp->queue, with->queue, with->queue_num);
+}
+
 static bool
 hns3_action_rss_same(const struct rte_flow_action_rss *comp,
 const struct rte_flow_action_rss *with)
 {
-   bool rss_key_is_same;
-   bool func_is_same;
+   bool same_func;
 
-   func_is_same = (with->func != RTE_ETH_HASH_FUNCTION_DEFAULT) ?
-   (comp->func == with->func) : true;
+   same_func = (comp->func == with->func);
 
-   if (with->key_len == 0 || with->key == NULL)
-   rss_key_is_same = 1;
-   else
-   rss_key_is_same = comp->key_len == with->key_len &&
-   !memcmp(comp->key, with->key, with->key_len);
-
-   return (func_is_same && rss_key_is_same &&
-   comp->types == with->types &&
-   comp->level == with->level &&
-   comp->queue_num == with->queue_num &&
-   !memcmp(comp->queue, with->queue,
-   sizeof(*with->queue) * with->queue_num));
+   return same_func && hns3_flow_rule_key_same(comp, with) &&
+   hns3_flow_rule_queues_same(comp, with);
 }
 
 static bool
-- 
2.22.0



RE: [PATCH v2] net: not build PMD AVX library when no IOVA as PA

2023-01-29 Thread Zhang, Qi Z



> -Original Message-
> From: Zhang, Qi Z 
> Sent: Monday, December 12, 2022 10:56 PM
> To: m...@smartsharesystems.com; Richardson, Bruce
> ; Lu, Wenzhuo 
> Cc: dev@dpdk.org; Wu, Wenjun1 ; Zhang, Qi Z
> ; sta...@dpdk.org
> Subject: [PATCH v2] net: not build PMD AVX library when no IOVA as PA
> 
> PMD not announce pmd_supports_disable_iova_as_pa will not be build
> when RTE_IOVA_AS_PA is not defined, but some AVX library for vector path
> is not skipped by the build system which cause compile error.
> 
> The patch modify i40e, iavf, ice's meson file to skip AVX library build when
> RTE_IOVA_AS_PA is not defined.
> 
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Qi Zhang 

Move this for next-net review.

Acked-by: Qi Zhang 

Applied to dpdk-next-net-intel.

Thanks
Qi


RE: [PATCH v2] net/ice: support IOVA as VA mode

2023-01-29 Thread Zhang, Qi Z



> -Original Message-
> From: Zhang, Qi Z 
> Sent: Monday, December 12, 2022 9:28 PM
> To: m...@smartsharesystems.com; Richardson, Bruce
> ; Lu, Wenzhuo 
> Cc: dev@dpdk.org; Wu, Wenjun1 
> Subject: RE: [PATCH v2] net/ice: support IOVA as VA mode
> 
> 
> 
> > -Original Message-
> > From: Zhang, Qi Z 
> > Sent: Tuesday, December 13, 2022 5:36 AM
> > To: m...@smartsharesystems.com; Richardson, Bruce
> > ; Lu, Wenzhuo 
> > Cc: dev@dpdk.org; Wu, Wenjun1 ; Zhang, Qi Z
> > 
> > Subject: [PATCH v2] net/ice: support IOVA as VA mode
> >
> > Claim pmd_supports_disable_iova_as_pa. Remove buf_iova access when
> > RTE_IOVA_AS_PA is not defined.
> >
> > The patch simply replace buf_iova with buf_addr at IOVA as VA mode.
> > Some SIMD instructions in data path may be over used, further
> > optimization is expected.
> >
> > Signed-off-by: Qi Zhang 
> 
> Forgot to inherent ack from v1
> Acked-by: Morten Brørup 

Applied to dpdk-next-net-intel.

Thanks
Qi



RE: [PATCH v2 0/3] net/igc: support PTP timesync

2023-01-29 Thread Zhang, Qi Z



> -Original Message-
> From: Su, Simei 
> Sent: Tuesday, January 17, 2023 9:26 PM
> To: Zhang, Qi Z ; Guo, Junfeng
> 
> Cc: dev@dpdk.org; Wu, Wenjun1 ; Su, Simei
> 
> Subject: [PATCH v2 0/3] net/igc: support PTP timesync
> 
> [PATCH v2 1/3] code refactoring.
> [PATCH v2 2/3] add related definitions for ptp timesync.
> [PATCH v2 3/3] add IEEE1588 API to support timesync.
> 
> v2:
> * Refine commit log.
> * Update the doc/guides/nics/features/igc.ini to add "Timesync" feature.
> * Add release notes.
> 
> Simei Su (3):
>   net/igc: code refactoring
>   net/igc/base: support PTP timesync
>   net/igc: support IEEE 1588 PTP
> 
>  doc/guides/nics/features/igc.ini   |   1 +
>  doc/guides/rel_notes/release_23_03.rst |   3 +
>  drivers/net/igc/base/igc_defines.h |  11 ++
>  drivers/net/igc/igc_ethdev.c   | 222
> +
>  drivers/net/igc/igc_ethdev.h   |   4 +-
>  drivers/net/igc/igc_txrx.c | 166 +++-
>  drivers/net/igc/igc_txrx.h | 116 +
>  7 files changed, 401 insertions(+), 122 deletions(-)
> 
> --
> 2.9.5

Acked-by: Qi Zhang 

Refined PATCH 2/3 's title and commit log as below

net/igc/base: expose timesync registers

Add definitions for timesync related registers.

Applied to dpdk-next-net-intel.

Thanks
Qi



[PATCH] gpudev: fix deadlocks when registering callback

2023-01-29 Thread eagostini
Agree with the patch.
Thanks!


RE: [EXT] Re: [dpdk-dev] [PATCH v1 00/12] mldev: introduce machine learning device library

2023-01-29 Thread Shivah Shankar Shankar Narayan Rao
External Email

--
25/01/2023 20:01, Jerin Jacob:
> On Wed, Jan 25, 2023 at 7:50 PM Thomas Monjalon  wrote:
> > 14/11/2022 13:02, jer...@marvell.com:
> > > ML Model: An ML model is an algorithm trained over a dataset. A 
> > > model consists of procedure/algorithm and data/pattern required to make 
> > > predictions on live data.
> > > Once the model is created and trained outside of the DPDK scope, 
> > > the model can be loaded via rte_ml_model_load() and then start it using 
> > > rte_ml_model_start() API.
> > > The rte_ml_model_params_update() can be used to update the model 
> > > parameters such as weight and bias without unloading the model using 
> > > rte_ml_model_unload().
> >
> > The fact that the model is prepared outside means the model format 
> > is free and probably different per mldev driver.
> > I think it is OK but it requires a lot of documentation effort to 
> > explain how to bind the model and its parameters with the DPDK API.
> > Also we may need to pass some metadata from the model builder to the 
> > inference engine in order to enable optimizations prepared in the model.
> > And the other way, we may need inference capabilities in order to 
> > generate an optimized model which can run in the inference engine.
> 
> The base API specification kept absolute minimum. Currently, weight 
> and biases parameters updated through rte_ml_model_params_update(). It 
> can be extended when there are drivers supports it or if you have any 
> specific parameter you would like to add it in 
> rte_ml_model_params_update().

This function is
int rte_ml_model_params_update(int16_t dev_id, int16_t model_id, void *buffer);

How are we supposed to provide separate parameters in this void* ?

Just to clarify on what "parameters" mean, they just mean weights and biases of 
the model, which are the parameters for a model.
Also, the Proposed APIs are for running the inference on a pre-trained model. 
For running the inference the amount of parameters tuning needed/done is 
limited/none.
The only parameters that get may get changed are the Weights and Bias which the 
API rte_ml_model_params_update() caters to. 

While running the inference on a Model there won't be any random addition or 
removal of operators to/from the model or there won't be any changes in the 
actual flow of model.
Since the only parameter that can be changed is Weights and Biases the above 
API should take care.

> Other metadata data like batch, shapes, formats queried using 
> rte_ml_io_info().

Copying:
+/** Input and output data information structure
+ *
+ * Specifies the type and shape of input and output data.
+ */
+struct rte_ml_io_info {
+   char name[RTE_ML_STR_MAX];
+   /**< Name of data */
+   struct rte_ml_io_shape shape;
+   /**< Shape of data */
+   enum rte_ml_io_type qtype;
+   /**< Type of quantized data */
+   enum rte_ml_io_type dtype;
+   /**< Type of de-quantized data */ };

Is it the right place to notify the app that some model optimizations are 
supported? (example: merge some operations in the graph)

The inference is run on a pre-trained model, which means any merges /additions 
of operations to the graph are NOT done. 
If any such things are done then the changed model needs to go through the 
training and compilation once again which is out of scope of these APIs.

> > [...]
> > > Typical application utilisation of the ML API will follow the 
> > > following programming flow.
> > >
> > > - rte_ml_dev_configure()
> > > - rte_ml_dev_queue_pair_setup()
> > > - rte_ml_model_load()
> > > - rte_ml_model_start()
> > > - rte_ml_model_info()
> > > - rte_ml_dev_start()
> > > - rte_ml_enqueue_burst()
> > > - rte_ml_dequeue_burst()
> > > - rte_ml_model_stop()
> > > - rte_ml_model_unload()
> > > - rte_ml_dev_stop()
> > > - rte_ml_dev_close()
> >
> > Where is parameters update in this flow?
> 
> Added the mandatory APIs in the top level flow doc.
> rte_ml_model_params_update() used to update the parameters.

The question is "where" should it be done?
Before/after start?

The model image comes with the Weights and Bias and will be loaded and used as 
a part of rte_ml_model_load and rte_ml_model_start. 
In rare scenarios where the user wants to update the Weights and Bias of an 
already loaded model, the rte_ml_model_stop can be called to stop the model and 
the Weights and Biases can be updated using the The parameters (Weights&Biases) 
can be updated when the  rte_ml_model_params_update() API followed by 
rte_ml_model_start to start the model with the new Weights and Biases.

> > Should we update all parameters at once or can it be done more fine-grain?
> 
> Currently, rte_ml_model_params_update() can be used to update weight 
> and bias via buffer when device is in stop state and without unloading 
> the model.

The question is "can we update a single parameter"?
And how?
As mentioned above for running inference the mode

Re: [PATCH v2] eal: cleanup alarm and multiprocess hotplug before memory detach

2023-01-29 Thread Fengnan Chang
friendly ping

Fengnan Chang  于2023年1月6日周五 11:00写道:
>
> Hi:
>   is there any comments?
>
> Fengnan Chang  于2022年12月21日周三 11:41写道:
> >
> > Alarm and multiprocess hotplug still need access hugepage memory,
> > if alarm event processed after memory detach, it may cause SEGV.
> > So cleanup alarm and multiprocess hotplug before memory detach.
> >
> > Fixes: 90b13ab8d4f7 ("alarm: remove direct access to interrupt handle")
> > Fixes: a0cc7be20dd1 ("mem: cleanup multiprocess resources")
> >
> > Signed-off-by: Fengnan Chang 
> > ---
> >  lib/eal/freebsd/eal.c | 2 +-
> >  lib/eal/linux/eal.c   | 4 ++--
> >  2 files changed, 3 insertions(+), 3 deletions(-)
> >
> > diff --git a/lib/eal/freebsd/eal.c b/lib/eal/freebsd/eal.c
> > index 1b58cd3da6..83b99c601f 100644
> > --- a/lib/eal/freebsd/eal.c
> > +++ b/lib/eal/freebsd/eal.c
> > @@ -897,9 +897,9 @@ rte_eal_cleanup(void)
> > eal_bus_cleanup();
> > rte_trace_save();
> > eal_trace_fini();
> > +   rte_eal_alarm_cleanup();
> > /* after this point, any DPDK pointers will become dangling */
> > rte_eal_memory_detach();
> > -   rte_eal_alarm_cleanup();
> > eal_cleanup_config(internal_conf);
> > return 0;
> >  }
> > diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
> > index 8c118d0d9f..c76f026023 100644
> > --- a/lib/eal/linux/eal.c
> > +++ b/lib/eal/linux/eal.c
> > @@ -1372,11 +1372,11 @@ rte_eal_cleanup(void)
> > eal_bus_cleanup();
> > rte_trace_save();
> > eal_trace_fini();
> > +   eal_mp_dev_hotplug_cleanup();
> > +   rte_eal_alarm_cleanup();
> > /* after this point, any DPDK pointers will become dangling */
> > rte_eal_memory_detach();
> > -   eal_mp_dev_hotplug_cleanup();
> > rte_eal_malloc_heap_cleanup();
> > -   rte_eal_alarm_cleanup();
> > eal_cleanup_config(internal_conf);
> > rte_eal_log_cleanup();
> > return 0;
> > --
> > 2.37.0 (Apple Git-136)
> >


Re: [PATCH v7] ethdev: add special flags when creating async transfer table

2023-01-29 Thread Ivan Malov

Hi Rongwei,

Thanks for persevering. I have no strong opinion, but, at least, the
fact that the new flags are no longer meant for use in rte_flow_attr,
which is clearly not the right place for such, is an improvement.

However, let's take a closer look at the current patch, shall we?

But, before we get to that, I'd like to kindly request that you
provide a more concrete example of how this feature is supposed
to be used. Are there some real-life application examples?

Also, to me, it's still unclear how an application can obtain
the knowledge of this hint in the first instance. For example,
can Open vSwitch somehow tell ethdevs representing physical
ports from ones representing "vports" (host endpoints)?
How does it know which attribute to specify?

For the rest of my notes, PSB.

On Mon, 14 Nov 2022, Rongwei Liu wrote:


In case flow rules match only one kind of traffic in a flow table,
then optimization can be done via allocation of this table.


This wording might confuse readers. Consider rephrasing it, please:
If multiple flow rules share a common set of match masks, then
they might belong in a flow table which can be pre-allocated.


Such optimization is possible only if the application gives a hint
about its usage of the table during initial configuration.

The transfer domain rules may process traffic from wire or vport,
which may correspond to two kinds of underlayer resources.


Why name it a "vport"? Why not "host"?

host = packets generated by any of the host's "vport"s
wire = packets arriving at the NIC from the network


That's why the first two hints introduced in this patch are about
wire and vport traffic specialization.
Wire means traffic arrives from the uplink port while vport means
traffic initiated from VF/SF.


By the sound of it, the meaning is confined to just VFs/SFs.
What if the user wants to match packets coming from PFs?



There are two possible approaches for providing the hints.
Using IPv4 as an example:
1. Use pattern item in both template table and flow rules.

 pattern_template: pattern ANY_VPORT / eth / ipv4 is 1.1.1.1 / end
 async flow create: pattern ANY_VPORT / eth / ipv4 is 1.1.1.2 / end

 "ANY_VPORT" needs to be present in each flow rule even if it's
 just a hint. No value to match because matching is already done by
 IPv4 item.


Why no value to match on? How does it prevent rogue tenants
from spoofing network headers? If the application receives
a packet on a particular vport's representor, then it may
strictly specify item represented_port pointing to that
vport so that only packets from that vport match.

Why isn't security a consideration?



2. Add special flags into table_attr.

 template_table 0 create table_id 0 group 1 transfer vport_orig

Approach 1 needs to specify the pattern in each flow rule which wastes
memory and is not user friendly.


What if the user has to insert a group of rules which not only
have the same set of match masks but also share exactly the
same match spec values for a limited subset of network
items (for example, those of an encap. header)? This
way, a subset of network item specs can remain fixed
across many rules. Does that count as wasting memory?

If yes, then the problem does not concern just a single pair
of attributes, but rather deserves a more versatile solution
like some sort of indirect grouping of constant item specs.
Have you considered such options?


This patch takes the 2nd approach and introduces one new member
"specialize" into rte_flow_table_attr to indicate possible flow table
optimization.


The name "specialize" might have some drawbacks:
- spelling difference (specialise/specialize)
- in grep output, will mix with flows' "spec"
- quite long
- not a noun

Why not "scope"? Or something like that?



By default, there is no hint, so the behavior of the transfer domain
doesn't change.
There is no guarantee that the hint will be used by the PMD.

Signed-off-by: Rongwei Liu 
Acked-by: Ori Kam 

v2: Move the new field to template table attribute.
v4: Mark it as optional and clear the concept.
v5: Change specialize type to uint32_t.
v6: Change the flags to macros and re-construct the commit log.
v7: Fix build failure.
---
app/test-pmd/cmdline_flow.c | 26 +++
doc/guides/prog_guide/rte_flow.rst  | 15 +++
doc/guides/testpmd_app_ug/testpmd_funcs.rst |  3 ++-
lib/ethdev/rte_flow.h   | 28 +
4 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 88108498e0..62197f2618 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -184,6 +184,8 @@ enum index {
TABLE_INGRESS,
TABLE_EGRESS,
TABLE_TRANSFER,
+   TABLE_TRANSFER_WIRE_ORIG,
+   TABLE_TRANSFER_VPORT_ORIG,
TABLE_RULES_NUMBER,
TABLE_PATTERN_TEMPLATE,
TABLE_ACTIONS_TEMPLATE,
@@ -1158,6 +1160,8 @@ static const enum index next_table_attr[] = 

RE: [PATCH v7] ethdev: add special flags when creating async transfer table

2023-01-29 Thread Rongwei Liu
Hi Ivan,

BR
Rongwei

> -Original Message-
> From: Ivan Malov 
> Sent: Monday, January 30, 2023 08:00
> To: Rongwei Liu 
> Cc: Matan Azrad ; Slava Ovsiienko
> ; Ori Kam ; NBU-Contact-
> Thomas Monjalon (EXTERNAL) ; Aman Singh
> ; Yuying Zhang ;
> Ferruh Yigit ; Andrew Rybchenko
> ; dev@dpdk.org; Raslan Darawsheh
> 
> Subject: Re: [PATCH v7] ethdev: add special flags when creating async transfer
> table
> 
> External email: Use caution opening links or attachments
> 
> 
> Hi Rongwei,
> 
> Thanks for persevering. I have no strong opinion, but, at least, the fact 
> that the
> new flags are no longer meant for use in rte_flow_attr, which is clearly not
> the right place for such, is an improvement.
> 
Thanks for the suggestion, move it to rte_flow_table_attr now and it' dedicated 
to async API.
> However, let's take a closer look at the current patch, shall we?
> 
> But, before we get to that, I'd like to kindly request that you provide a more
> concrete example of how this feature is supposed to be used. Are there some
> real-life application examples?
> 
Sure.
> Also, to me, it's still unclear how an application can obtain the knowledge of
> this hint in the first instance. For example, can Open vSwitch somehow tell
> ethdevs representing physical ports from ones representing "vports" (host
> endpoints)?
> How does it know which attribute to specify?
> 
Hint should be initiated by application and application knows it' traffic 
pattern which highly relates to deployment.
Let' use VxLAN encap/decap as an example:
1. Traffic from wire should be VxLAN pattern and do the decap, then send to 
different vports.
flow pattern_template 0 create transfer relaxed no pattern_template_id 4 
template represented_port ethdev_port_id is 0 / eth / ipv4 / udp / vxlan / tag 
index is 0 data is 0x33 / end
flow actions_template 0 create transfer actions_template_id 4 template 
raw_decap index 0 / represented_port ethdev_port_id 1 / end mask raw_decap 
index 0 / represented_port ethdev_port_id 1 / end
flow template_table 0 create group 1 priority 0 transfer wire_orig table_id 4 
rules_number 128 pattern_template 4 actions_template 4

2. Traffic from vports should be encap with different VxLAN header and send to 
wire.
flow actions_template 1 create transfer actions_template_id 5 template 
raw_encap index 0 / represented_port ethdev_port_id 0 / end mask raw_encap 
index 0 / represented_port ethdev_port_id 0 / end
flow template_table 0 create group 1 priority 0 transfer vport_orig table_id 5 
rules_number 128 pattern_template 4 actions_template 5

> For the rest of my notes, PSB.
> 
> On Mon, 14 Nov 2022, Rongwei Liu wrote:
> 
> > In case flow rules match only one kind of traffic in a flow table,
> > then optimization can be done via allocation of this table.
> 
> This wording might confuse readers. Consider rephrasing it, please:
> If multiple flow rules share a common set of match masks, then they might
> belong in a flow table which can be pre-allocated.
> 
> > Such optimization is possible only if the application gives a hint
> > about its usage of the table during initial configuration.
> >
> > The transfer domain rules may process traffic from wire or vport,
> > which may correspond to two kinds of underlayer resources.
> 
> Why name it a "vport"? Why not "host"?
> 
> host = packets generated by any of the host's "vport"s wire = packets arriving
> at the NIC from the network
Vport is "virtual port" for short and contains "VF/SF" for now. 
Per my thoughts, it' clearer and maps to DPDK port probing/management.
> 
> > That's why the first two hints introduced in this patch are about wire
> > and vport traffic specialization.
> > Wire means traffic arrives from the uplink port while vport means
> > traffic initiated from VF/SF.
> 
> By the sound of it, the meaning is confined to just VFs/SFs.
> What if the user wants to match packets coming from PFs?
> 
It should be "wire_orig".
> >
> > There are two possible approaches for providing the hints.
> > Using IPv4 as an example:
> > 1. Use pattern item in both template table and flow rules.
> >
> >  pattern_template: pattern ANY_VPORT / eth / ipv4 is 1.1.1.1 / end
> > async flow create: pattern ANY_VPORT / eth / ipv4 is 1.1.1.2 / end
> >
> >  "ANY_VPORT" needs to be present in each flow rule even if it's  just
> > a hint. No value to match because matching is already done by
> >  IPv4 item.
> 
> Why no value to match on? How does it prevent rogue tenants from spoofing
> network headers? If the application receives a packet on a particular vport's
> representor, then it may strictly specify item represented_port pointing to 
> that
> vport so that only packets from that vport match.
> 
> Why isn't security a consideration?
> 
There is some misunderstanding here.  "ANY_VPORT" is the approach (new matching 
item without value)  suggested by you. 
I was explaining we need to apply it to each flow rule even if it's only a flag 
and no value.
> >
> > 2. Add special flags into table_

RE: [PATCH v4 3/3] ethdev: add standby flags for live migration

2023-01-29 Thread Rongwei Liu
Hi Jerin

BR
Rongwei

> -Original Message-
> From: Jerin Jacob 
> Sent: Monday, January 23, 2023 21:20
> To: Rongwei Liu 
> Cc: dev@dpdk.org; Matan Azrad ; Slava Ovsiienko
> ; Ori Kam ; NBU-Contact-
> Thomas Monjalon (EXTERNAL) ;
> step...@networkplumber.org; Raslan Darawsheh ;
> Ferruh Yigit ; Andrew Rybchenko
> 
> Subject: Re: [PATCH v4 3/3] ethdev: add standby flags for live migration
> 
> External email: Use caution opening links or attachments
> 
> 
> On Wed, Jan 18, 2023 at 9:15 PM Rongwei Liu  wrote:
> >
> > Some flags are added to the process state API for live migration in
> > order to change the behavior of the flow rules in a standby process.
> >
> > Signed-off-by: Rongwei Liu 
> > ---
> >  lib/ethdev/rte_ethdev.h | 21 +
> >  1 file changed, 21 insertions(+)
> >
> > diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index
> > 1505396ced..9ae4f426a7 100644
> > --- a/lib/ethdev/rte_ethdev.h
> > +++ b/lib/ethdev/rte_ethdev.h
> > @@ -2260,6 +2260,27 @@ int rte_eth_dev_owner_get(const uint16_t
> > port_id,  __rte_experimental  int rte_eth_process_set_role(bool
> > standby, uint32_t flags);
> >
> > +/**@{@name Process role flags
> > + * used when migrating from an application to another one.
> > + * @see rte_eth_process_set_active
> > + */
> > +/**
> > + * When set on a standby process, ingress flow rules will be
> > +effective
> > + * in active and standby processes, so the ingress traffic may be 
> > duplicated.
> > + */
> > +#define RTE_ETH_PROCESS_FLAG_STANDBY_DUP_FLOW_INGRESS
> RTE_BIT32(0)
> 
> 
> How to duplicate if action has statefull items for example,
> rte_flow_action_security::security_session -> it store the live pointer
> rte_flow_action_meter::mtr_id; -> MTR object ID created with
> rte_mtr_create()
I agree with you, not all actions can be supported in the active/standby model.
That' why we have return value checking and rollback.
In Nvidia driver doc, we suggested user to start from 'rss/queue/jump' actions.
Meter is possible, at least per my view.
Assume: "meter g_action queue 0 / y_action drop / r_action drop"
Old application: create meter_id 'A' with pre-defined limitation.
New application: create meter_id 'B' which has the same parameters with 'A'.
1. 1st possible approach:
Hardware duplicates the traffic; old application use meter 'A' and new 
application uses meter 'B' to control traffic throughputs.
Since traffic is duplicated, so it can go to different meters. 
2. 2nd possible approach:
 Meter 'A' and 'B' point to the same hardware resource, and traffic 
reaches this part first and if green, duplication happens. 


RE: [PATCH v2 1/8] ethdev: add IPv6 routing extension header definition

2023-01-29 Thread Rongwei Liu
HI Andrew

BR
Rongwei

> -Original Message-
> From: Andrew Rybchenko 
> Sent: Friday, January 20, 2023 17:21
> To: Rongwei Liu ; Matan Azrad ;
> Slava Ovsiienko ; Ori Kam ;
> NBU-Contact-Thomas Monjalon (EXTERNAL) ; Aman
> Singh ; Yuying Zhang
> ; Ferruh Yigit ; Olivier
> Matz 
> Cc: dev@dpdk.org; Raslan Darawsheh 
> Subject: Re: [PATCH v2 1/8] ethdev: add IPv6 routing extension header
> definition
> 
> External email: Use caution opening links or attachments
> 
> 
> On 1/19/23 06:11, Rongwei Liu wrote:
> > Add IPv6 routing extension header definition and no TLV support for
> > now.
> >
> > At rte_flow layer, there are new items defined for matching
> > type/nexthdr/segments_left field.
> >
> > Add command line support for IPv6 routing extension header
> > matching: type/nexthdr/segment_list.
> >
> > Signed-off-by: Rongwei Liu 
> > Acked-by: Ori Kam 
> 
> [snip]
> 
> > diff --git a/doc/guides/prog_guide/rte_flow.rst
> > b/doc/guides/prog_guide/rte_flow.rst
> > index 3e6242803d..ae99036be0 100644
> > --- a/doc/guides/prog_guide/rte_flow.rst
> > +++ b/doc/guides/prog_guide/rte_flow.rst
> > @@ -1544,6 +1544,15 @@ Matches Color Marker set by a Meter.
> >
> >   - ``color``: Metering color marker.
> >
> > +Item: ``IPV6_ROUTING_EXT``
> > +^^
> > +
> > +Matches ipv6 routing extension header.
> 
> ipv6 -> IPv6
Sure.
> 
> > +
> > +- ``next_hdr``: Next layer header type.
> > +- ``type``: IPv6 routing extension header type.
> > +- ``segments_left``: How many IPv6 destination addresses carries on
> 
> Why are only 3 fields mentioned above?
> 
This is the 1st phase to matching the 1st uint32 of IPv6 routing extension. 
No need to match hdr_len since TLV is ignored.
> > +
> >   Actions
> >   ~~~
> >
> > diff --git a/doc/guides/rel_notes/release_23_03.rst
> > b/doc/guides/rel_notes/release_23_03.rst
> > index b8c5b68d6c..2a794d598e 100644
> > --- a/doc/guides/rel_notes/release_23_03.rst
> > +++ b/doc/guides/rel_notes/release_23_03.rst
> > @@ -55,6 +55,11 @@ New Features
> >Also, make sure to start the actual text at the margin.
> >===
> >
> > +* **Added rte_flow support for matching IPv6 routing extension header
> > +fields.**
> > +
> > +  Added ``ipv6_routing_ext`` items in rte_flow to match IPv6 routing
> > + extension  header
> 
> Missing full stop above.
> 
Sure
> > +
> >
> >   Removed Items
> >   -
> > @@ -84,6 +89,11 @@ API Changes
> >  Also, make sure to start the actual text at the margin.
> >  ===
> >
> > +* ethdev: added a new structure:
> > +
> > +- IPv6 routing extension header ``rte_flow_item_ipv6_routing_ext`` and
> > +  ``rte_ipv6_routing_ext``
> > +
> 
> If I'm not mistaken, additions should not be here. It is not an API change.
> 
Checked existing release doc, "ihl" and "version" of IPv4 header is added here 
but with "net:" prefix.
Do you think it' good to follow? 
> >
> >   ABI Changes
> >   ---
> > diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c index
> > 7d0c24366c..4074b475c8 100644
> > --- a/lib/ethdev/rte_flow.c
> > +++ b/lib/ethdev/rte_flow.c
> > @@ -76,6 +76,20 @@ rte_flow_item_flex_conv(void *buf, const void *data)
> >   return src->length;
> >   }
> >
> > +static size_t
> > +rte_flow_item_ipv6_routing_ext_conv(void *buf, const void *data) {
> > + struct rte_flow_item_ipv6_routing_ext *dst = buf;
> > + const struct rte_flow_item_ipv6_routing_ext *src = data;
> > + size_t len;
> > +
> > + len = src->hdr.hdr_len ? src->hdr.hdr_len << 3 :
> > + src->hdr.segments_left << 4;
> 
> Compare hdr_len vs 0 explicitly.
> Also I'd add parenthesis around ternary operator values to make it simpler to
> understand.
Sure.
> 
> > + if (buf)
> 
> Please, compare vs NULL explicitly. May be 'dst' would be better here?
> 
> > + rte_memcpy((void *)((uintptr_t)(dst->hdr.segments)),
> > +src->hdr.segments, len);
> > + return len;
> > +}
> > +
Sure.
> >   /** Generate flow_item[] entry. */
> >   #define MK_FLOW_ITEM(t, s) \
> >   [RTE_FLOW_ITEM_TYPE_ ## t] = { \ @@ -157,6 +171,8 @@ static
> > const struct rte_flow_desc_data rte_flow_desc_item[] = {
> >   MK_FLOW_ITEM(L2TPV2, sizeof(struct rte_flow_item_l2tpv2)),
> >   MK_FLOW_ITEM(PPP, sizeof(struct rte_flow_item_ppp)),
> >   MK_FLOW_ITEM(METER_COLOR, sizeof(struct
> > rte_flow_item_meter_color)),
> > + MK_FLOW_ITEM_FN(IPV6_ROUTING_EXT, sizeof(struct
> rte_flow_item_ipv6_routing_ext),
> > + rte_flow_item_ipv6_routing_ext_conv),
> >   };
> >
> >   /** Generate flow_action[] entry. */ diff --git
> > a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h index
> > b60987db4b..0120d3e7d2 100644
> > --- a/lib/ethdev/rte_flow.h
> > +++ b/lib/ethdev/rte_flow.h
> > @@ -624,6 +624,13 @@ enum rte_flow_item_type {
> >* See struct rte_flow_item_meter_color.
> >*/
> >  

[PATCH v3 0/8] add IPv6 routing extension support

2023-01-29 Thread Rongwei Liu
Support IPv6 routing extension header matching with new rte_flow item.
Add encapsulation support for IPv6 routing extension header.

v3: enhance the format and use be32/be16 in network header structure.
v2: remove redundant rte_flow items. include the commit from Gregory
to pass the compilation.

Gregory Etelson (1):
  net/mlx5/hws: Definer, add mlx5dr context to definer_conv_data

Rongwei Liu (7):
  ethdev: add IPv6 routing extension header definition
  net/mlx5: adopt IPv6 routing extension prm definition
  net/mlx5/hws: add IPv6 routing extension matching support
  app/testpmd: add IPv6 routing extension header in raw encap
  ethdev: add modify IPv6 protocol field
  net/mlx5/hws: add modify IPv6 protocol implementation
  doc/mlx5: add IPv6 routing extension matching docs

 app/test-pmd/cmdline_flow.c|  72 -
 doc/guides/nics/features/default.ini   |   1 +
 doc/guides/nics/features/mlx5.ini  |   1 +
 doc/guides/nics/mlx5.rst   |   2 +
 doc/guides/prog_guide/rte_flow.rst |   9 ++
 doc/guides/rel_notes/release_23_03.rst |   9 ++
 drivers/common/mlx5/mlx5_devx_cmds.c   |  17 +++-
 drivers/common/mlx5/mlx5_devx_cmds.h   |   7 +-
 drivers/common/mlx5/mlx5_prm.h |  29 +-
 drivers/net/mlx5/hws/mlx5dr_definer.c  | 133 +
 drivers/net/mlx5/hws/mlx5dr_definer.h  |  15 +++
 drivers/net/mlx5/mlx5.c| 103 ++-
 drivers/net/mlx5/mlx5.h|  19 +++-
 drivers/net/mlx5/mlx5_flow.h   |  28 ++
 drivers/net/mlx5/mlx5_flow_dv.c|  10 ++
 drivers/net/mlx5/mlx5_flow_flex.c  |  14 ++-
 drivers/net/mlx5/mlx5_flow_hw.c|  29 +-
 lib/ethdev/rte_flow.c  |  19 
 lib/ethdev/rte_flow.h  |  20 
 lib/net/rte_ip.h   |  21 
 20 files changed, 517 insertions(+), 41 deletions(-)

-- 
2.27.0



[PATCH v3 1/8] ethdev: add IPv6 routing extension header definition

2023-01-29 Thread Rongwei Liu
Add IPv6 routing extension header definition and no
TLV support for now.

At rte_flow layer, there are new items defined for matching
type/nexthdr/segments_left field.

Add command line support for IPv6 routing extension header
matching: type/nexthdr/segment_list.

Signed-off-by: Rongwei Liu 
Acked-by: Ori Kam 
---
 app/test-pmd/cmdline_flow.c| 46 ++
 doc/guides/prog_guide/rte_flow.rst |  9 +
 doc/guides/rel_notes/release_23_03.rst |  9 +
 lib/ethdev/rte_flow.c  | 19 +++
 lib/ethdev/rte_flow.h  | 19 +++
 lib/net/rte_ip.h   | 21 
 6 files changed, 123 insertions(+)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 88108498e0..7a8516829c 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -298,6 +298,10 @@ enum index {
ITEM_IPV6_SRC,
ITEM_IPV6_DST,
ITEM_IPV6_HAS_FRAG_EXT,
+   ITEM_IPV6_ROUTING_EXT,
+   ITEM_IPV6_ROUTING_EXT_TYPE,
+   ITEM_IPV6_ROUTING_EXT_NEXT_HDR,
+   ITEM_IPV6_ROUTING_EXT_SEG_LEFT,
ITEM_ICMP,
ITEM_ICMP_TYPE,
ITEM_ICMP_CODE,
@@ -1326,6 +1330,7 @@ static const enum index next_item[] = {
ITEM_ARP_ETH_IPV4,
ITEM_IPV6_EXT,
ITEM_IPV6_FRAG_EXT,
+   ITEM_IPV6_ROUTING_EXT,
ITEM_ICMP6,
ITEM_ICMP6_ND_NS,
ITEM_ICMP6_ND_NA,
@@ -1435,6 +1440,15 @@ static const enum index item_ipv6[] = {
ITEM_IPV6_SRC,
ITEM_IPV6_DST,
ITEM_IPV6_HAS_FRAG_EXT,
+   ITEM_IPV6_ROUTING_EXT,
+   ITEM_NEXT,
+   ZERO,
+};
+
+static const enum index item_ipv6_routing_ext[] = {
+   ITEM_IPV6_ROUTING_EXT_TYPE,
+   ITEM_IPV6_ROUTING_EXT_NEXT_HDR,
+   ITEM_IPV6_ROUTING_EXT_SEG_LEFT,
ITEM_NEXT,
ZERO,
 };
@@ -3844,6 +3858,38 @@ static const struct token token_list[] = {
.args = ARGS(ARGS_ENTRY_BF(struct rte_flow_item_ipv6,
   has_frag_ext, 1)),
},
+   [ITEM_IPV6_ROUTING_EXT] = {
+   .name = "ipv6_routing_ext",
+   .help = "match IPv6 routing extension header",
+   .priv = PRIV_ITEM(IPV6_ROUTING_EXT,
+ sizeof(struct 
rte_flow_item_ipv6_routing_ext)),
+   .next = NEXT(item_ipv6_routing_ext),
+   .call = parse_vc,
+   },
+   [ITEM_IPV6_ROUTING_EXT_TYPE] = {
+   .name = "ext_type",
+   .help = "match IPv6 routing extension header type",
+   .next = NEXT(item_ipv6_routing_ext, NEXT_ENTRY(COMMON_UNSIGNED),
+item_param),
+   .args = ARGS(ARGS_ENTRY_HTON(struct 
rte_flow_item_ipv6_routing_ext,
+hdr.type)),
+   },
+   [ITEM_IPV6_ROUTING_EXT_NEXT_HDR] = {
+   .name = "ext_next_hdr",
+   .help = "match IPv6 routing extension header next header type",
+   .next = NEXT(item_ipv6_routing_ext, NEXT_ENTRY(COMMON_UNSIGNED),
+item_param),
+   .args = ARGS(ARGS_ENTRY_HTON(struct 
rte_flow_item_ipv6_routing_ext,
+hdr.next_hdr)),
+   },
+   [ITEM_IPV6_ROUTING_EXT_SEG_LEFT] = {
+   .name = "ext_seg_left",
+   .help = "match IPv6 routing extension header segment left",
+   .next = NEXT(item_ipv6_routing_ext, NEXT_ENTRY(COMMON_UNSIGNED),
+item_param),
+   .args = ARGS(ARGS_ENTRY_HTON(struct 
rte_flow_item_ipv6_routing_ext,
+hdr.segments_left)),
+   },
[ITEM_ICMP] = {
.name = "icmp",
.help = "match ICMP header",
diff --git a/doc/guides/prog_guide/rte_flow.rst 
b/doc/guides/prog_guide/rte_flow.rst
index 3e6242803d..602fab29d3 100644
--- a/doc/guides/prog_guide/rte_flow.rst
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -1544,6 +1544,15 @@ Matches Color Marker set by a Meter.
 
 - ``color``: Metering color marker.
 
+Item: ``IPV6_ROUTING_EXT``
+^^
+
+Matches IPv6 routing extension header.
+
+- ``next_hdr``: Next layer header type.
+- ``type``: IPv6 routing extension header type.
+- ``segments_left``: How many IPv6 destination addresses carries on.
+
 Actions
 ~~~
 
diff --git a/doc/guides/rel_notes/release_23_03.rst 
b/doc/guides/rel_notes/release_23_03.rst
index b8c5b68d6c..8f482301f7 100644
--- a/doc/guides/rel_notes/release_23_03.rst
+++ b/doc/guides/rel_notes/release_23_03.rst
@@ -55,6 +55,11 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* **Added rte_flow support for matching IPv6 routing extension header fields.**
+
+  Added ``ipv6_routing_ext`` items in rte_flow to match IPv6 routing extension
+  header.
+

[PATCH v3 2/8] net/mlx5: adopt IPv6 routing extension prm definition

2023-01-29 Thread Rongwei Liu
Per newest PRM definition, sample_id stands for 3 parts
of information instead of single uint32_t id: sample_id +
modify_filed_id + format_select_dw.

Also new FW capability bits have been introduces to identify
the new capability.

Signed-off-by: Rongwei Liu 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/common/mlx5/mlx5_devx_cmds.c | 14 +++---
 drivers/common/mlx5/mlx5_devx_cmds.h |  7 ++-
 drivers/common/mlx5/mlx5_prm.h   | 28 ++--
 drivers/net/mlx5/mlx5.c  | 15 +++
 drivers/net/mlx5/mlx5.h  |  3 ++-
 drivers/net/mlx5/mlx5_flow_flex.c| 14 +++---
 6 files changed, 67 insertions(+), 14 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c 
b/drivers/common/mlx5/mlx5_devx_cmds.c
index e3a4927d0f..1f65ea7dcb 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -607,7 +607,8 @@ mlx5_devx_cmd_query_hca_vdpa_attr(void *ctx,
 
 int
 mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj *flex_obj,
- uint32_t ids[], uint32_t num)
+ struct mlx5_ext_sample_id ids[],
+ uint32_t num, uint8_t *anchor)
 {
uint32_t in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0};
uint32_t out[MLX5_ST_SZ_DW(create_flex_parser_out)] = {0};
@@ -636,6 +637,7 @@ mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj 
*flex_obj,
(void *)flex_obj);
return -rte_errno;
}
+   *anchor = MLX5_GET(parse_graph_flex, flex, head_anchor_id);
for (i = 0; i < MLX5_GRAPH_NODE_SAMPLE_NUM; i++) {
void *s_off = (void *)((char *)sample + i *
  MLX5_ST_SZ_BYTES(parse_graph_flow_match_sample));
@@ -645,8 +647,8 @@ mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj 
*flex_obj,
  flow_match_sample_en);
if (!en)
continue;
-   ids[idx++] = MLX5_GET(parse_graph_flow_match_sample, s_off,
- flow_match_sample_field_id);
+   ids[idx++].id = MLX5_GET(parse_graph_flow_match_sample, s_off,
+flow_match_sample_field_id);
}
if (num != idx) {
rte_errno = EINVAL;
@@ -794,6 +796,12 @@ mlx5_devx_cmd_query_hca_parse_graph_node_cap
 max_num_arc_out);
attr->max_num_sample = MLX5_GET(parse_graph_node_cap, hcattr,
max_num_sample);
+   attr->anchor_en = MLX5_GET(parse_graph_node_cap, hcattr, anchor_en);
+   attr->ext_sample_id = MLX5_GET(parse_graph_node_cap, hcattr, 
ext_sample_id);
+   attr->sample_tunnel_inner2 = MLX5_GET(parse_graph_node_cap, hcattr,
+ sample_tunnel_inner2);
+   attr->zero_size_supported = MLX5_GET(parse_graph_node_cap, hcattr,
+zero_size_supported);
attr->sample_id_in_out = MLX5_GET(parse_graph_node_cap, hcattr,
  sample_id_in_out);
attr->max_base_header_length = MLX5_GET(parse_graph_node_cap, hcattr,
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h 
b/drivers/common/mlx5/mlx5_devx_cmds.h
index c94b9eac06..5b33010155 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -114,6 +114,10 @@ struct mlx5_hca_flex_attr {
uint8_t  max_num_arc_out;
uint8_t  max_num_sample;
uint8_t  max_num_prog_sample:5; /* From HCA CAP 2 */
+   uint8_t  anchor_en:1;
+   uint8_t  ext_sample_id:1;
+   uint8_t  sample_tunnel_inner2:1;
+   uint8_t  zero_size_supported:1;
uint8_t  sample_id_in_out:1;
uint16_t max_base_header_length;
uint8_t  max_sample_base_offset;
@@ -706,7 +710,8 @@ int mlx5_devx_cmd_modify_tir(struct mlx5_devx_obj *tir,
 struct mlx5_devx_modify_tir_attr *tir_attr);
 __rte_internal
 int mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj *flex_obj,
- uint32_t ids[], uint32_t num);
+ struct mlx5_ext_sample_id ids[],
+ uint32_t num, uint8_t *anchor);
 
 __rte_internal
 struct mlx5_devx_obj *
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 3790dc84b8..ce6cd98fd7 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1893,7 +1893,11 @@ struct mlx5_ifc_parse_graph_node_cap_bits {
u8 max_num_arc_in[0x08];
u8 max_num_arc_out[0x08];
u8 max_num_sample[0x08];
-   u8 reserved_at_78[0x07];
+   u8 reserved_at_78[0x03];
+   u8 anchor_en[0x1];
+   u8 ext_sample_id[0x1];
+   u8 sample_tunnel_inner2[0x1];
+   u8 zero_size_supported[0x1];
u8 s

[PATCH v3 3/8] net/mlx5/hws: Definer, add mlx5dr context to definer_conv_data

2023-01-29 Thread Rongwei Liu
From: Gregory Etelson 

New mlx5dr_context member replaces mlx5dr_cmd_query_caps.
Capabilities structure is a member of mlx5dr_context.

Signed-off-by: Gregory Etelson 
Signed-off-by: Rongwei Liu 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/net/mlx5/hws/mlx5dr_definer.c | 42 ++-
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/drivers/net/mlx5/hws/mlx5dr_definer.c 
b/drivers/net/mlx5/hws/mlx5dr_definer.c
index 6b98eb8c96..0f1cab7e07 100644
--- a/drivers/net/mlx5/hws/mlx5dr_definer.c
+++ b/drivers/net/mlx5/hws/mlx5dr_definer.c
@@ -100,7 +100,7 @@ struct mlx5dr_definer_sel_ctrl {
 };
 
 struct mlx5dr_definer_conv_data {
-   struct mlx5dr_cmd_query_caps *caps;
+   struct mlx5dr_context *ctx;
struct mlx5dr_definer_fc *fc;
uint8_t relaxed;
uint8_t tunnel;
@@ -815,6 +815,7 @@ mlx5dr_definer_conv_item_gtp(struct 
mlx5dr_definer_conv_data *cd,
 struct rte_flow_item *item,
 int item_idx)
 {
+   struct mlx5dr_cmd_query_caps *caps = cd->ctx->caps;
const struct rte_flow_item_gtp *m = item->mask;
struct mlx5dr_definer_fc *fc;
 
@@ -836,7 +837,7 @@ mlx5dr_definer_conv_item_gtp(struct 
mlx5dr_definer_conv_data *cd,
}
 
if (m->teid) {
-   if (!(cd->caps->flex_protocols & 
MLX5_HCA_FLEX_GTPU_TEID_ENABLED)) {
+   if (!(caps->flex_protocols & MLX5_HCA_FLEX_GTPU_TEID_ENABLED)) {
rte_errno = ENOTSUP;
return rte_errno;
}
@@ -844,11 +845,11 @@ mlx5dr_definer_conv_item_gtp(struct 
mlx5dr_definer_conv_data *cd,
fc->item_idx = item_idx;
fc->tag_set = &mlx5dr_definer_gtp_teid_set;
fc->bit_mask = __mlx5_mask(header_gtp, teid);
-   fc->byte_off = cd->caps->format_select_gtpu_dw_1 * DW_SIZE;
+   fc->byte_off = caps->format_select_gtpu_dw_1 * DW_SIZE;
}
 
if (m->v_pt_rsv_flags) {
-   if (!(cd->caps->flex_protocols & 
MLX5_HCA_FLEX_GTPU_DW_0_ENABLED)) {
+   if (!(caps->flex_protocols & MLX5_HCA_FLEX_GTPU_DW_0_ENABLED)) {
rte_errno = ENOTSUP;
return rte_errno;
}
@@ -857,12 +858,12 @@ mlx5dr_definer_conv_item_gtp(struct 
mlx5dr_definer_conv_data *cd,
fc->tag_set = &mlx5dr_definer_gtp_ext_flag_set;
fc->bit_mask = __mlx5_mask(header_gtp, ext_hdr_flag);
fc->bit_off = __mlx5_dw_bit_off(header_gtp, ext_hdr_flag);
-   fc->byte_off = cd->caps->format_select_gtpu_dw_0 * DW_SIZE;
+   fc->byte_off = caps->format_select_gtpu_dw_0 * DW_SIZE;
}
 
 
if (m->msg_type) {
-   if (!(cd->caps->flex_protocols & 
MLX5_HCA_FLEX_GTPU_DW_0_ENABLED)) {
+   if (!(caps->flex_protocols & MLX5_HCA_FLEX_GTPU_DW_0_ENABLED)) {
rte_errno = ENOTSUP;
return rte_errno;
}
@@ -871,7 +872,7 @@ mlx5dr_definer_conv_item_gtp(struct 
mlx5dr_definer_conv_data *cd,
fc->tag_set = &mlx5dr_definer_gtp_msg_type_set;
fc->bit_mask = __mlx5_mask(header_gtp, msg_type);
fc->bit_off = __mlx5_dw_bit_off(header_gtp, msg_type);
-   fc->byte_off = cd->caps->format_select_gtpu_dw_0 * DW_SIZE;
+   fc->byte_off = caps->format_select_gtpu_dw_0 * DW_SIZE;
}
 
return 0;
@@ -882,12 +883,13 @@ mlx5dr_definer_conv_item_gtp_psc(struct 
mlx5dr_definer_conv_data *cd,
 struct rte_flow_item *item,
 int item_idx)
 {
+   struct mlx5dr_cmd_query_caps *caps = cd->ctx->caps;
const struct rte_flow_item_gtp_psc *m = item->mask;
struct mlx5dr_definer_fc *fc;
 
/* Overwrite GTP extension flag to be 1 */
if (!cd->relaxed) {
-   if (!(cd->caps->flex_protocols & 
MLX5_HCA_FLEX_GTPU_DW_0_ENABLED)) {
+   if (!(caps->flex_protocols & MLX5_HCA_FLEX_GTPU_DW_0_ENABLED)) {
rte_errno = ENOTSUP;
return rte_errno;
}
@@ -896,12 +898,12 @@ mlx5dr_definer_conv_item_gtp_psc(struct 
mlx5dr_definer_conv_data *cd,
fc->tag_set = &mlx5dr_definer_ones_set;
fc->bit_mask = __mlx5_mask(header_gtp, ext_hdr_flag);
fc->bit_off = __mlx5_dw_bit_off(header_gtp, ext_hdr_flag);
-   fc->byte_off = cd->caps->format_select_gtpu_dw_0 * DW_SIZE;
+   fc->byte_off = caps->format_select_gtpu_dw_0 * DW_SIZE;
}
 
/* Overwrite next extension header type */
if (!cd->relaxed) {
-   if (!(cd->caps->flex_protocols & 
MLX5_HCA_FLEX_GTPU_DW_2_ENABLED)) {
+   if (!(caps->flex_protocols & MLX5_HCA_FLEX_GTPU_DW_2_ENABLED)) {
rte_errno = ENOTSUP;
   

[PATCH v3 4/8] net/mlx5/hws: add IPv6 routing extension matching support

2023-01-29 Thread Rongwei Liu
Add mlx5 HWS logic to match IPv6 routing extension header.

Once detecting IPv6 matching extension items in pattern template
create callback, PMD allocates a flex parser to sample the first
dword of srv6 header.

Only support next_hdr/segments_left/type for now.

Signed-off-by: Rongwei Liu 
Reviewed-by: Alex Vesker 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/common/mlx5/mlx5_devx_cmds.c  |  7 +-
 drivers/net/mlx5/hws/mlx5dr_definer.c | 91 ++
 drivers/net/mlx5/hws/mlx5dr_definer.h | 15 +
 drivers/net/mlx5/mlx5.c   | 92 ++-
 drivers/net/mlx5/mlx5.h   | 16 +
 drivers/net/mlx5/mlx5_flow.h  | 28 
 drivers/net/mlx5/mlx5_flow_hw.c   | 29 +++--
 7 files changed, 268 insertions(+), 10 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c 
b/drivers/common/mlx5/mlx5_devx_cmds.c
index 1f65ea7dcb..22a94c1e1a 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -607,7 +607,7 @@ mlx5_devx_cmd_query_hca_vdpa_attr(void *ctx,
 
 int
 mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj *flex_obj,
- struct mlx5_ext_sample_id ids[],
+ struct mlx5_ext_sample_id *ids,
  uint32_t num, uint8_t *anchor)
 {
uint32_t in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0};
@@ -637,8 +637,9 @@ mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj 
*flex_obj,
(void *)flex_obj);
return -rte_errno;
}
-   *anchor = MLX5_GET(parse_graph_flex, flex, head_anchor_id);
-   for (i = 0; i < MLX5_GRAPH_NODE_SAMPLE_NUM; i++) {
+   if (anchor)
+   *anchor = MLX5_GET(parse_graph_flex, flex, head_anchor_id);
+   for (i = 0; i < MLX5_GRAPH_NODE_SAMPLE_NUM && idx <= num; i++) {
void *s_off = (void *)((char *)sample + i *
  MLX5_ST_SZ_BYTES(parse_graph_flow_match_sample));
uint32_t en;
diff --git a/drivers/net/mlx5/hws/mlx5dr_definer.c 
b/drivers/net/mlx5/hws/mlx5dr_definer.c
index 0f1cab7e07..142fc545eb 100644
--- a/drivers/net/mlx5/hws/mlx5dr_definer.c
+++ b/drivers/net/mlx5/hws/mlx5dr_definer.c
@@ -125,6 +125,7 @@ struct mlx5dr_definer_conv_data {
X(SET_BE16, ipv4_frag,  v->fragment_offset, 
rte_ipv4_hdr) \
X(SET_BE16, ipv6_payload_len,   v->hdr.payload_len, 
rte_flow_item_ipv6) \
X(SET,  ipv6_proto, v->hdr.proto,   
rte_flow_item_ipv6) \
+   X(SET,  ipv6_routing_hdr,   IPPROTO_ROUTING,
rte_flow_item_ipv6) \
X(SET,  ipv6_hop_limits,v->hdr.hop_limits,  
rte_flow_item_ipv6) \
X(SET_BE32P,ipv6_src_addr_127_96,   &v->hdr.src_addr[0],
rte_flow_item_ipv6) \
X(SET_BE32P,ipv6_src_addr_95_64,&v->hdr.src_addr[4],
rte_flow_item_ipv6) \
@@ -293,6 +294,21 @@ mlx5dr_definer_integrity_set(struct mlx5dr_definer_fc *fc,
DR_SET(tag, ok1_bits, fc->byte_off, fc->bit_off, fc->bit_mask);
 }
 
+static void
+mlx5dr_definer_ipv6_routing_ext_set(struct mlx5dr_definer_fc *fc,
+   const void *item,
+   uint8_t *tag)
+{
+   const struct rte_flow_item_ipv6_routing_ext *v = item;
+   uint32_t val;
+
+   val = v->hdr.next_hdr << __mlx5_dw_bit_off(header_ipv6_routing_ext, 
next_hdr);
+   val |= v->hdr.type << __mlx5_dw_bit_off(header_ipv6_routing_ext, type);
+   val |= v->hdr.segments_left <<
+   __mlx5_dw_bit_off(header_ipv6_routing_ext, segments_left);
+   DR_SET(tag, val, fc->byte_off, 0, fc->bit_mask);
+}
+
 static void
 mlx5dr_definer_gre_key_set(struct mlx5dr_definer_fc *fc,
   const void *item_spec,
@@ -1468,6 +1484,76 @@ mlx5dr_definer_conv_item_meter_color(struct 
mlx5dr_definer_conv_data *cd,
return 0;
 }
 
+static struct mlx5dr_definer_fc *
+mlx5dr_definer_get_flex_parser_fc(struct mlx5dr_definer_conv_data *cd, 
uint32_t byte_off)
+{
+   uint32_t byte_off_fp7 = MLX5_BYTE_OFF(definer_hl, 
flex_parser.flex_parser_7);
+   uint32_t byte_off_fp0 = MLX5_BYTE_OFF(definer_hl, 
flex_parser.flex_parser_0);
+   enum mlx5dr_definer_fname fname = MLX5DR_DEFINER_FNAME_FLEX_PARSER_0;
+   struct mlx5dr_definer_fc *fc;
+   uint32_t idx;
+
+   if (byte_off < byte_off_fp7 || byte_off > byte_off_fp0) {
+   rte_errno = EINVAL;
+   return NULL;
+   }
+   idx = (byte_off_fp0 - byte_off) / (sizeof(uint32_t));
+   fname += (enum mlx5dr_definer_fname)idx;
+   fc = &cd->fc[fname];
+   fc->byte_off = byte_off;
+   fc->bit_mask = UINT32_MAX;
+   return fc;
+}
+
+static int
+mlx5dr_definer_conv_item_ipv6_routing_ext(struct mlx5dr_definer_conv_data *cd,
+ struct rte_flow_item *item,
+  

[PATCH v3 5/8] app/testpmd: add IPv6 routing extension header in raw encap

2023-01-29 Thread Rongwei Liu
Add IPv6 routing extension header support in raw_encap command.
1. No TLV support now.
2. Assume header length equals to the current segment_left.

Signed-off-by: Rongwei Liu 
Acked-by: Ori Kam 
---
 app/test-pmd/cmdline_flow.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 7a8516829c..4bdb46e89a 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -10925,6 +10925,13 @@ flow_item_default_mask(const struct rte_flow_item 
*item)
 {
const void *mask = NULL;
static rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
+   static struct rte_flow_item_ipv6_routing_ext 
ipv6_routing_ext_default_mask = {
+   .hdr = {
+   .next_hdr = 0xff,
+   .type = 0xff,
+   .segments_left = 0xff,
+   },
+   };
 
switch (item->type) {
case RTE_FLOW_ITEM_TYPE_ANY:
@@ -11027,6 +11034,9 @@ flow_item_default_mask(const struct rte_flow_item *item)
case RTE_FLOW_ITEM_TYPE_METER_COLOR:
mask = &rte_flow_item_meter_color_mask;
break;
+   case RTE_FLOW_ITEM_TYPE_IPV6_ROUTING_EXT:
+   mask = &ipv6_routing_ext_default_mask;
+   break;
default:
break;
}
@@ -11181,6 +11191,7 @@ cmd_set_raw_parsed(const struct buffer *in)
for (i = n - 1 ; i >= 0; --i) {
const struct rte_flow_item_gtp *gtp;
const struct rte_flow_item_geneve_opt *opt;
+   struct rte_flow_item_ipv6_routing_ext *ext;
 
item = in->args.vc.pattern + i;
if (item->spec == NULL)
@@ -11201,6 +11212,18 @@ cmd_set_raw_parsed(const struct buffer *in)
size = sizeof(struct rte_ipv6_hdr);
proto = RTE_ETHER_TYPE_IPV6;
break;
+   case RTE_FLOW_ITEM_TYPE_IPV6_ROUTING_EXT:
+   ext = (struct rte_flow_item_ipv6_routing_ext 
*)(uintptr_t)item->spec;
+   if (!ext->hdr.hdr_len) {
+   size = sizeof(struct rte_ipv6_routing_ext) +
+   (ext->hdr.segments_left << 4);
+   ext->hdr.hdr_len = ext->hdr.segments_left << 1;
+   } else {
+   size = sizeof(struct rte_ipv6_routing_ext) +
+   (ext->hdr.hdr_len << 3);
+   }
+   proto = IPPROTO_ROUTING;
+   break;
case RTE_FLOW_ITEM_TYPE_UDP:
size = sizeof(struct rte_udp_hdr);
proto = 0x11;
-- 
2.27.0



[PATCH v3 7/8] net/mlx5: add modify IPv6 protocol implementation

2023-01-29 Thread Rongwei Liu
Add HWS modify IPv6 protocol implementation.

Signed-off-by: Rongwei Liu 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/common/mlx5/mlx5_prm.h  |  1 +
 drivers/net/mlx5/mlx5_flow_dv.c | 10 ++
 2 files changed, 11 insertions(+)

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index ce6cd98fd7..497f2622b2 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -759,6 +759,7 @@ enum mlx5_modification_field {
MLX5_MODI_OUT_IP_ECN = 0x73,
MLX5_MODI_TUNNEL_HDR_DW_1 = 0x75,
MLX5_MODI_GTPU_FIRST_EXT_DW_0 = 0x76,
+   MLX5_MODI_OUT_IPV6_NEXT_HDR = 0x4A,
 };
 
 /* Total number of metadata reg_c's. */
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 7ca90b..e972a2dc5a 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -1357,6 +1357,7 @@ mlx5_flow_item_field_width(struct rte_eth_dev *dev,
case RTE_FLOW_FIELD_IPV6_DSCP:
return 6;
case RTE_FLOW_FIELD_IPV6_HOPLIMIT:
+   case RTE_FLOW_FIELD_IPV6_PROTO:
return 8;
case RTE_FLOW_FIELD_IPV6_SRC:
case RTE_FLOW_FIELD_IPV6_DST:
@@ -1883,6 +1884,15 @@ mlx5_flow_field_id_to_modify_info
info[idx].offset = data->offset;
}
break;
+   case RTE_FLOW_FIELD_IPV6_PROTO:
+   MLX5_ASSERT(data->offset + width <= 8);
+   off_be = 8 - (data->offset + width);
+   info[idx] = (struct field_modify_info){1, 0, 
MLX5_MODI_OUT_IPV6_NEXT_HDR};
+   if (mask)
+   mask[idx] = flow_modify_info_mask_8(width, off_be);
+   else
+   info[idx].offset = off_be;
+   break;
case RTE_FLOW_FIELD_POINTER:
case RTE_FLOW_FIELD_VALUE:
default:
-- 
2.27.0



[PATCH v3 6/8] ethdev: add modify IPv6 protocol field

2023-01-29 Thread Rongwei Liu
Add IPv6 protocol modify field definition.

Add new modify field destination type string: "ipv6_proto".

Signed-off-by: Rongwei Liu 
Acked-by: Ori Kam 
---
 app/test-pmd/cmdline_flow.c | 3 ++-
 lib/ethdev/rte_flow.h   | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 4bdb46e89a..1340cf3a9b 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -811,7 +811,8 @@ static const char *const modify_field_ids[] = {
"udp_port_src", "udp_port_dst",
"vxlan_vni", "geneve_vni", "gtp_teid",
"tag", "mark", "meta", "pointer", "value",
-   "ipv4_ecn", "ipv6_ecn", "gtp_psc_qfi", "meter_color", NULL
+   "ipv4_ecn", "ipv6_ecn", "gtp_psc_qfi", "meter_color",
+   "ipv6_proto", NULL
 };
 
 static const char *const meter_colors[] = {
diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
index 9b9018cba2..f4797682b7 100644
--- a/lib/ethdev/rte_flow.h
+++ b/lib/ethdev/rte_flow.h
@@ -3547,6 +3547,7 @@ enum rte_flow_field_id {
RTE_FLOW_FIELD_IPV6_ECN,/**< IPv6 ECN. */
RTE_FLOW_FIELD_GTP_PSC_QFI, /**< GTP QFI. */
RTE_FLOW_FIELD_METER_COLOR, /**< Meter color marker. */
+   RTE_FLOW_FIELD_IPV6_PROTO,  /**< IPv6 next header. */
 };
 
 /**
-- 
2.27.0



[PATCH v3 8/8] doc/mlx5: add IPv6 routing extension matching docs

2023-01-29 Thread Rongwei Liu
Update mlx5 related document on IPv6 routing extension header
matching.

Signed-off-by: Rongwei Liu 
Acked-by: Viacheslav Ovsiienko 
---
 doc/guides/nics/features/default.ini | 1 +
 doc/guides/nics/features/mlx5.ini| 1 +
 doc/guides/nics/mlx5.rst | 2 ++
 3 files changed, 4 insertions(+)

diff --git a/doc/guides/nics/features/default.ini 
b/doc/guides/nics/features/default.ini
index 510cc6679d..3d0744a243 100644
--- a/doc/guides/nics/features/default.ini
+++ b/doc/guides/nics/features/default.ini
@@ -141,6 +141,7 @@ udp  =
 vlan =
 vxlan=
 vxlan_gpe=
+ipv6_routing_ext =
 
 [rte_flow actions]
 age  =
diff --git a/doc/guides/nics/features/mlx5.ini 
b/doc/guides/nics/features/mlx5.ini
index 62fd330e2b..bd911a467b 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -87,6 +87,7 @@ vlan = Y
 vxlan= Y
 vxlan_gpe= Y
 represented_port = Y
+ipv6_routing_ext = Y
 
 [rte_flow actions]
 age  = I
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index b23ca35b8f..fb8001faef 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -106,6 +106,7 @@ Features
 - Sub-Function representors.
 - Sub-Function.
 - Matching on represented port.
+- Matching on IPv6 routing extension header.
 
 
 Limitations
@@ -174,6 +175,7 @@ Limitations
 
 - ``-EAGAIN`` for ``rte_eth_dev_start()``.
 - ``-EBUSY`` for ``rte_eth_dev_stop()``.
+  - Matching on ICMP6 following IPv6 routing extension header, should match 
ipv6_routing_ext_next_hdr instead of ICMP6.
 
 - When using Verbs flow engine (``dv_flow_en`` = 0), flow pattern without any
   specific VLAN will match for VLAN packets as well:
-- 
2.27.0



RE: [PATCH v2 01/11] ethdev: add flex item modify field support

2023-01-29 Thread Rongwei Liu
Hi Andrew

BR
Rongwei

> -Original Message-
> From: Andrew Rybchenko 
> Sent: Friday, January 20, 2023 17:08
> To: Rongwei Liu ; Matan Azrad ;
> Slava Ovsiienko ; Ori Kam ;
> NBU-Contact-Thomas Monjalon (EXTERNAL) ; Aman
> Singh ; Yuying Zhang
> ; Ferruh Yigit 
> Cc: dev@dpdk.org; Raslan Darawsheh 
> Subject: Re: [PATCH v2 01/11] ethdev: add flex item modify field support
> 
> External email: Use caution opening links or attachments
> 
> 
> On 1/19/23 07:58, Rongwei Liu wrote:
> > Add flex item as modify field destination.
> > Add "struct rte_flow_item_flex_handle *flex_handle" into "struct
> > rte_flow_action_modify_data" as union with existed "level" member.
> > This new member is dedicated for modifying flex item.
> >
> > Add flex item modify field cmdline support. Now user can use testpmd
> > cli to specify which flex item to be modified, either source or
> > destination.
> >
> > Syntax is as below:
> > modify_field op set dst_type flex_item dst_level 0 dst_offset 16
> > src_type value src_value 0x123456781020 width 8
> >
> > Signed-off-by: Rongwei Liu 
> > Acked-by: Ori Kam 
> 
> [snip]
> 
> > diff --git a/doc/guides/rel_notes/release_23_03.rst
> > b/doc/guides/rel_notes/release_23_03.rst
> > index b8c5b68d6c..c673205e5e 100644
> > --- a/doc/guides/rel_notes/release_23_03.rst
> > +++ b/doc/guides/rel_notes/release_23_03.rst
> > @@ -56,6 +56,10 @@ New Features
> >===
> >
> >
> 
> It should be just one empty line here
> 
Sure.
> > +* ethdev: added a new field:
> 
> "added a new field' is too generic.
> 
> > +
> > +  - modify flex item: ``rte_flow_action_modify_data.flex_handle``
> > +
> 
> And two empty lines here.
> 
Sure.
> >   Removed Items
> >   -
> >
> > diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h index
> > b60987db4b..c66a65351d 100644
> > --- a/lib/ethdev/rte_flow.h
> > +++ b/lib/ethdev/rte_flow.h
> > @@ -3528,6 +3528,7 @@ enum rte_flow_field_id {
> >   RTE_FLOW_FIELD_IPV6_ECN,/**< IPv6 ECN. */
> >   RTE_FLOW_FIELD_GTP_PSC_QFI, /**< GTP QFI. */
> >   RTE_FLOW_FIELD_METER_COLOR, /**< Meter color marker. */
> > + RTE_FLOW_FIELD_FLEX_ITEM,   /**< Flex item. */
> >   };
> >
> >   /**
> > @@ -3541,8 +3542,11 @@ struct rte_flow_action_modify_data {
> >   RTE_STD_C11
> >   union {
> >   struct {
> > - /** Encapsulation level or tag index. */
> > - uint32_t level;
> > + /**< Encapsulation level or tag index or flex
> > + item handle. */
> 
> Have you tried to generate documentation? If it is a union documentation it
> should be /**, not /**<.
Sure. Sorry, I followed wrong existed examples.
> In general, it is better to document union from overall point of view. What 
> is it
> logically? Do not define union as just a union of its fields.
Currently, 'flex_handle' is documents in rte_flow.rst file " table:: 
destination/source field definition" as a new row.  
From API aspect, when modifying flex item, user should specify the pointer of 
the flex item instead of ID.
That' why it was added as a union. 
> 
> > + union {
> > + uint32_t level;
> > + struct rte_flow_item_flex_handle
> > + *flex_handle;
> 
> Union items documentation missing.
See above. Do we need another place to document the union again?
> 
> > + };
> >   /** Number of bits to skip from a field. */
> >   uint32_t offset;
> >   };



RE: [PATCH v2 01/11] ethdev: add flex item modify field support

2023-01-29 Thread Rongwei Liu
Hi Andrew

BR
Rongwei

> -Original Message-
> From: Andrew Rybchenko 
> Sent: Friday, January 20, 2023 17:08
> To: Rongwei Liu ; Matan Azrad ;
> Slava Ovsiienko ; Ori Kam ;
> NBU-Contact-Thomas Monjalon (EXTERNAL) ; Aman
> Singh ; Yuying Zhang
> ; Ferruh Yigit 
> Cc: dev@dpdk.org; Raslan Darawsheh 
> Subject: Re: [PATCH v2 01/11] ethdev: add flex item modify field support
> 
> External email: Use caution opening links or attachments
> 
> 
> On 1/19/23 07:58, Rongwei Liu wrote:
> > Add flex item as modify field destination.
> > Add "struct rte_flow_item_flex_handle *flex_handle" into "struct
> > rte_flow_action_modify_data" as union with existed "level" member.
> > This new member is dedicated for modifying flex item.
> >
> > Add flex item modify field cmdline support. Now user can use testpmd
> > cli to specify which flex item to be modified, either source or
> > destination.
> >
> > Syntax is as below:
> > modify_field op set dst_type flex_item dst_level 0 dst_offset 16
> > src_type value src_value 0x123456781020 width 8
> >
> > Signed-off-by: Rongwei Liu 
> > Acked-by: Ori Kam 
> 
> [snip]
> 
> > diff --git a/doc/guides/rel_notes/release_23_03.rst
> > b/doc/guides/rel_notes/release_23_03.rst
> > index b8c5b68d6c..c673205e5e 100644
> > --- a/doc/guides/rel_notes/release_23_03.rst
> > +++ b/doc/guides/rel_notes/release_23_03.rst
> > @@ -56,6 +56,10 @@ New Features
> >===
> >
> >
> 
> It should be just one empty line here
> 
> > +* ethdev: added a new field:
> 
> "added a new field' is too generic.
> 
> > +
> > +  - modify flex item: ``rte_flow_action_modify_data.flex_handle``
> > +
> 
> And two empty lines here.
> 
> >   Removed Items
> >   -
> >
> > diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h index
> > b60987db4b..c66a65351d 100644
> > --- a/lib/ethdev/rte_flow.h
> > +++ b/lib/ethdev/rte_flow.h
> > @@ -3528,6 +3528,7 @@ enum rte_flow_field_id {
> >   RTE_FLOW_FIELD_IPV6_ECN,/**< IPv6 ECN. */
> >   RTE_FLOW_FIELD_GTP_PSC_QFI, /**< GTP QFI. */
> >   RTE_FLOW_FIELD_METER_COLOR, /**< Meter color marker. */
> > + RTE_FLOW_FIELD_FLEX_ITEM,   /**< Flex item. */
> >   };
> >
> >   /**
> > @@ -3541,8 +3542,11 @@ struct rte_flow_action_modify_data {
> >   RTE_STD_C11
> >   union {
> >   struct {
> > - /** Encapsulation level or tag index. */
> > - uint32_t level;
> > + /**< Encapsulation level or tag index or flex
> > + item handle. */
> 
> Have you tried to generate documentation? If it is a union documentation it
> should be /**, not /**<.
> In general, it is better to document union from overall point of view. What 
> is it
> logically? Do not define union as just a union of its fields.
> 
> > + union {
> > + uint32_t level;
> > + struct rte_flow_item_flex_handle
> > + *flex_handle;
> 
> Union items documentation missing.
Added it in the "rte_flow.rst" following ``level`` segment.
> 
> > + };
> >   /** Number of bits to skip from a field. */
> >   uint32_t offset;
> >   };



[PATCH v3 00/11] add flex item support

2023-01-29 Thread Rongwei Liu
Support flex item matching and modify field in async flow.
Syntax follows sync flow exactly.

v3: enhance format, add flex_handle document.

Rongwei Liu (11):
  ethdev: add flex item modify field support
  app/testpmd: pass flex handle into matching mask
  net/mlx5: enable hws flex item create
  net/mlx5: add IPv6 protocol as flex item input
  net/mlx5: adopt new flex item prm definition
  net/mlx5/hws: add hws flex item matching support
  net/mlx5: add flex item modify field implementation
  net/mlx5: return error for sws modify field
  app/testpmd: raw encap with flex item support
  doc/mlx5: update mlx5 doc
  app/testpmd: adjust cleanup sequence when quitting

 app/test-pmd/cmdline_flow.c| 123 +---
 app/test-pmd/testpmd.c |   2 +-
 doc/guides/nics/mlx5.rst   |   2 +-
 doc/guides/prog_guide/rte_flow.rst |  41 +++---
 doc/guides/rel_notes/release_23_03.rst |   4 +
 drivers/common/mlx5/mlx5_devx_cmds.c   |  14 +-
 drivers/common/mlx5/mlx5_devx_cmds.h   |   7 +-
 drivers/common/mlx5/mlx5_prm.h |  29 +++-
 drivers/net/mlx5/hws/mlx5dr_definer.c  |  83 +++
 drivers/net/mlx5/linux/mlx5_os.c   |  27 ++--
 drivers/net/mlx5/mlx5.c|  17 ++-
 drivers/net/mlx5/mlx5.h|   9 +-
 drivers/net/mlx5/mlx5_flow.h   |   4 +
 drivers/net/mlx5/mlx5_flow_dv.c| 186 ++---
 drivers/net/mlx5/mlx5_flow_flex.c  | 149 +---
 drivers/net/mlx5/mlx5_flow_hw.c|  64 -
 lib/ethdev/rte_flow.h  |   8 +-
 17 files changed, 664 insertions(+), 105 deletions(-)

-- 
2.27.0



[PATCH v3 01/11] ethdev: add flex item modify field support

2023-01-29 Thread Rongwei Liu
Add flex item as modify field destination.
Add "struct rte_flow_item_flex_handle *flex_handle" into
"struct rte_flow_action_modify_data" as union with existed
"level" member. This new member is dedicated for modifying
flex item.

Add flex item modify field cmdline support. Now user can use
testpmd cli to specify which flex item to be modified, either
source or destination.

Syntax is as below:
modify_field op set dst_type flex_item dst_level 0
dst_offset 16 src_type value src_value 0x123456781020 width 8

Signed-off-by: Rongwei Liu 
Acked-by: Ori Kam 
---
 app/test-pmd/cmdline_flow.c| 89 --
 doc/guides/prog_guide/rte_flow.rst | 41 +++-
 doc/guides/rel_notes/release_23_03.rst |  4 ++
 lib/ethdev/rte_flow.h  |  8 ++-
 4 files changed, 116 insertions(+), 26 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 88108498e0..323c07253d 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -601,10 +601,12 @@ enum index {
ACTION_MODIFY_FIELD_DST_TYPE,
ACTION_MODIFY_FIELD_DST_TYPE_VALUE,
ACTION_MODIFY_FIELD_DST_LEVEL,
+   ACTION_MODIFY_FIELD_DST_LEVEL_VALUE,
ACTION_MODIFY_FIELD_DST_OFFSET,
ACTION_MODIFY_FIELD_SRC_TYPE,
ACTION_MODIFY_FIELD_SRC_TYPE_VALUE,
ACTION_MODIFY_FIELD_SRC_LEVEL,
+   ACTION_MODIFY_FIELD_SRC_LEVEL_VALUE,
ACTION_MODIFY_FIELD_SRC_OFFSET,
ACTION_MODIFY_FIELD_SRC_VALUE,
ACTION_MODIFY_FIELD_SRC_POINTER,
@@ -807,7 +809,8 @@ static const char *const modify_field_ids[] = {
"udp_port_src", "udp_port_dst",
"vxlan_vni", "geneve_vni", "gtp_teid",
"tag", "mark", "meta", "pointer", "value",
-   "ipv4_ecn", "ipv6_ecn", "gtp_psc_qfi", "meter_color", NULL
+   "ipv4_ecn", "ipv6_ecn", "gtp_psc_qfi", "meter_color",
+   "hash_result", "flex_item", NULL
 };
 
 static const char *const meter_colors[] = {
@@ -2282,6 +2285,10 @@ parse_vc_modify_field_id(struct context *ctx, const 
struct token *token,
const char *str, unsigned int len, void *buf,
unsigned int size);
 static int
+parse_vc_modify_field_level(struct context *ctx, const struct token *token,
+   const char *str, unsigned int len, void *buf,
+   unsigned int size);
+static int
 parse_vc_action_conntrack_update(struct context *ctx, const struct token 
*token,
 const char *str, unsigned int len, void *buf,
 unsigned int size);
@@ -5976,11 +5983,15 @@ static const struct token token_list[] = {
.name = "dst_level",
.help = "destination field level",
.next = NEXT(action_modify_field_dst,
-NEXT_ENTRY(COMMON_UNSIGNED)),
-   .args = ARGS(ARGS_ENTRY(struct rte_flow_action_modify_field,
-   dst.level)),
+NEXT_ENTRY(ACTION_MODIFY_FIELD_DST_LEVEL_VALUE)),
.call = parse_vc_conf,
},
+   [ACTION_MODIFY_FIELD_DST_LEVEL_VALUE] = {
+   .name = "{dst_level}",
+   .help = "destination field level value",
+   .call = parse_vc_modify_field_level,
+   .comp = comp_none,
+   },
[ACTION_MODIFY_FIELD_DST_OFFSET] = {
.name = "dst_offset",
.help = "destination field bit offset",
@@ -6007,11 +6018,15 @@ static const struct token token_list[] = {
.name = "src_level",
.help = "source field level",
.next = NEXT(action_modify_field_src,
-NEXT_ENTRY(COMMON_UNSIGNED)),
-   .args = ARGS(ARGS_ENTRY(struct rte_flow_action_modify_field,
-   src.level)),
+NEXT_ENTRY(ACTION_MODIFY_FIELD_SRC_LEVEL_VALUE)),
.call = parse_vc_conf,
},
+   [ACTION_MODIFY_FIELD_SRC_LEVEL_VALUE] = {
+   .name = "{src_level}",
+   .help = "source field level value",
+   .call = parse_vc_modify_field_level,
+   .comp = comp_none,
+   },
[ACTION_MODIFY_FIELD_SRC_OFFSET] = {
.name = "src_offset",
.help = "source field bit offset",
@@ -8477,6 +8492,66 @@ parse_vc_modify_field_id(struct context *ctx, const 
struct token *token,
return len;
 }
 
+/** Parse level for modify_field command. */
+static int
+parse_vc_modify_field_level(struct context *ctx, const struct token *token,
+const char *str, unsigned int len, void *buf,
+unsigned int size)
+{
+   struct rte_flow_action_modify_field *action;
+   struct flex_item *fp;
+   uint32_t val;
+   struct buffer *out = buf;
+   char *end;
+
+   (void)token;
+  

[PATCH v3 02/11] app/testpmd: pass flex handle into matching mask

2023-01-29 Thread Rongwei Liu
In async flow create API, there is only mask information when
creating flow table but flex item handle is required to parse
the HW sample information.

Pass the flex item handle instead of UINT64/32_MAX to mask.

Signed-off-by: Rongwei Liu 
Acked-by: Ori Kam 
---
 app/test-pmd/cmdline_flow.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 323c07253d..f5d7a67def 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -10068,8 +10068,8 @@ parse_flex_handle(struct context *ctx, const struct 
token *token,
}
if (offset == offsetof(struct rte_flow_item_flex, handle)) {
const struct flex_item *fp;
-   struct rte_flow_item_flex *item_flex = ctx->object;
-   handle = (uint16_t)(uintptr_t)item_flex->handle;
+   spec = ctx->object;
+   handle = (uint16_t)(uintptr_t)spec->handle;
if (handle >= FLEX_MAX_PARSERS_NUM) {
printf("Bad flex item handle\n");
return -1;
@@ -10079,7 +10079,9 @@ parse_flex_handle(struct context *ctx, const struct 
token *token,
printf("Bad flex item handle\n");
return -1;
}
-   item_flex->handle = fp->flex_handle;
+   spec->handle = fp->flex_handle;
+   mask = spec + 2; /* spec, last, mask */
+   mask->handle = fp->flex_handle;
} else if (offset == offsetof(struct rte_flow_item_flex, pattern)) {
handle = (uint16_t)(uintptr_t)
((struct rte_flow_item_flex *)ctx->object)->pattern;
-- 
2.27.0



[PATCH v3 03/11] net/mlx5: enable hws flex item create

2023-01-29 Thread Rongwei Liu
Enable flex item create and destroy with dv_flow_en=2

Signed-off-by: Rongwei Liu 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/net/mlx5/linux/mlx5_os.c | 27 +++
 drivers/net/mlx5/mlx5_flow_hw.c  |  2 ++
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index a71474c90a..f5b3edea99 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -474,10 +474,20 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv)
err = mlx5_alloc_table_hash_list(priv);
if (err)
goto error;
-   if (priv->sh->config.dv_flow_en == 2)
-   return 0;
/* The resources below are only valid with DV support. */
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
+   /* Init shared flex parsers list, no need lcore_share */
+   snprintf(s, sizeof(s), "%s_flex_parsers_list", sh->ibdev_name);
+   sh->flex_parsers_dv = mlx5_list_create(s, sh, false,
+  mlx5_flex_parser_create_cb,
+  mlx5_flex_parser_match_cb,
+  mlx5_flex_parser_remove_cb,
+  mlx5_flex_parser_clone_cb,
+  mlx5_flex_parser_clone_free_cb);
+   if (!sh->flex_parsers_dv)
+   goto error;
+   if (priv->sh->config.dv_flow_en == 2)
+   return 0;
/* Init port id action list. */
snprintf(s, sizeof(s), "%s_port_id_action_list", sh->ibdev_name);
sh->port_id_action_list = mlx5_list_create(s, sh, true,
@@ -518,16 +528,9 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv)
  flow_dv_dest_array_clone_free_cb);
if (!sh->dest_array_list)
goto error;
-   /* Init shared flex parsers list, no need lcore_share */
-   snprintf(s, sizeof(s), "%s_flex_parsers_list", sh->ibdev_name);
-   sh->flex_parsers_dv = mlx5_list_create(s, sh, false,
-  mlx5_flex_parser_create_cb,
-  mlx5_flex_parser_match_cb,
-  mlx5_flex_parser_remove_cb,
-  mlx5_flex_parser_clone_cb,
-  mlx5_flex_parser_clone_free_cb);
-   if (!sh->flex_parsers_dv)
-   goto error;
+#else
+   if (priv->sh->config.dv_flow_en == 2)
+   return 0;
 #endif
 #ifdef HAVE_MLX5DV_DR
void *domain;
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index 20c71ff7f0..44953451d5 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -8336,6 +8336,8 @@ const struct mlx5_flow_driver_ops mlx5_flow_hw_drv_ops = {
.query = flow_hw_query,
.get_aged_flows = flow_hw_get_aged_flows,
.get_q_aged_flows = flow_hw_get_q_aged_flows,
+   .item_create = flow_dv_item_create,
+   .item_release = flow_dv_item_release,
 };
 
 /**
-- 
2.27.0



[PATCH v3 04/11] net/mlx5: add IPv6 protocol as flex item input

2023-01-29 Thread Rongwei Liu
Support IPv6 protocol as new flex item input link.

Signed-off-by: Rongwei Liu 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/net/mlx5/mlx5_flow_flex.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow_flex.c 
b/drivers/net/mlx5/mlx5_flow_flex.c
index fb08910ddb..bec07b13c1 100644
--- a/drivers/net/mlx5/mlx5_flow_flex.c
+++ b/drivers/net/mlx5/mlx5_flow_flex.c
@@ -1043,6 +1043,22 @@ mlx5_flex_arc_in_udp(const struct rte_flow_item *item,
return rte_be_to_cpu_16(spec->hdr.dst_port);
 }
 
+static int
+mlx5_flex_arc_in_ipv6(const struct rte_flow_item *item,
+ struct rte_flow_error *error)
+{
+   const struct rte_flow_item_ipv6 *spec = item->spec;
+   const struct rte_flow_item_ipv6 *mask = item->mask;
+   struct rte_flow_item_ipv6 ip = { .hdr.proto = 0xff };
+
+   if (memcmp(mask, &ip, sizeof(struct rte_flow_item_ipv6))) {
+   return rte_flow_error_set
+   (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
+"invalid ipv6 item mask, full mask is desired");
+   }
+   return spec->hdr.proto;
+}
+
 static int
 mlx5_flex_translate_arc_in(struct mlx5_hca_flex_attr *attr,
   const struct rte_flow_item_flex_conf *conf,
@@ -1089,6 +1105,9 @@ mlx5_flex_translate_arc_in(struct mlx5_hca_flex_attr 
*attr,
case RTE_FLOW_ITEM_TYPE_UDP:
ret = mlx5_flex_arc_in_udp(rte_item, error);
break;
+   case RTE_FLOW_ITEM_TYPE_IPV6:
+   ret = mlx5_flex_arc_in_ipv6(rte_item, error);
+   break;
default:
MLX5_ASSERT(false);
return rte_flow_error_set
-- 
2.27.0



[PATCH v3 07/11] net/mlx5: add flex item modify field implementation

2023-01-29 Thread Rongwei Liu
Add flex item modify field HWS implementation.
The minimum modify boundary is one byte.

Signed-off-by: Rongwei Liu 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/common/mlx5/mlx5_prm.h  |   1 +
 drivers/net/mlx5/mlx5_flow.h|   3 +
 drivers/net/mlx5/mlx5_flow_dv.c | 165 +---
 drivers/net/mlx5/mlx5_flow_hw.c |  14 ++-
 4 files changed, 170 insertions(+), 13 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index ce6cd98fd7..0c2a516e9d 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -759,6 +759,7 @@ enum mlx5_modification_field {
MLX5_MODI_OUT_IP_ECN = 0x73,
MLX5_MODI_TUNNEL_HDR_DW_1 = 0x75,
MLX5_MODI_GTPU_FIRST_EXT_DW_0 = 0x76,
+   MLX5_MODI_INVALID = INT_MAX,
 };
 
 /* Total number of metadata reg_c's. */
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index c8761c4e5a..c71fa1c0ad 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -1080,6 +1080,8 @@ struct field_modify_info {
uint32_t size; /* Size of field in protocol header, in bytes. */
uint32_t offset; /* Offset of field in protocol header, in bytes. */
enum mlx5_modification_field id;
+   uint32_t shift;
+   uint8_t is_flex; /* Temporary indicator for flex item modify filed WA. 
*/
 };
 
 /* HW steering flow attributes. */
@@ -1244,6 +1246,7 @@ struct rte_flow_actions_template {
uint16_t mhdr_off; /* Offset of DR modify header action. */
uint32_t refcnt; /* Reference counter. */
uint16_t rx_cpy_pos; /* Action position of Rx metadata to be copied. */
+   uint8_t flex_item; /* flex item index. */
 };
 
 /* Jump action struct. */
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 284f18da11..92a5914d4b 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -414,10 +414,15 @@ flow_dv_convert_modify_action(struct rte_flow_item *item,
++field;
continue;
}
-   /* Deduce actual data width in bits from mask value. */
-   off_b = rte_bsf32(mask) + carry_b;
-   size_b = sizeof(uint32_t) * CHAR_BIT -
-off_b - __builtin_clz(mask);
+   if (type == MLX5_MODIFICATION_TYPE_COPY && field->is_flex) {
+   off_b = 32 - field->shift + carry_b - field->size * 
CHAR_BIT;
+   size_b = field->size * CHAR_BIT - carry_b;
+   } else {
+   /* Deduce actual data width in bits from mask value. */
+   off_b = rte_bsf32(mask) + carry_b;
+   size_b = sizeof(uint32_t) * CHAR_BIT -
+off_b - __builtin_clz(mask);
+   }
MLX5_ASSERT(size_b);
actions[i] = (struct mlx5_modification_cmd) {
.action_type = type,
@@ -437,40 +442,46 @@ flow_dv_convert_modify_action(struct rte_flow_item *item,
 * Destination field overflow. Copy leftovers of
 * a source field to the next destination field.
 */
-   carry_b = 0;
if ((size_b > dcopy->size * CHAR_BIT - dcopy->offset) &&
dcopy->size != 0) {
actions[i].length =
dcopy->size * CHAR_BIT - dcopy->offset;
-   carry_b = actions[i].length;
+   carry_b += actions[i].length;
next_field = false;
+   } else {
+   carry_b = 0;
}
/*
 * Not enough bits in a source filed to fill a
 * destination field. Switch to the next source.
 */
if ((size_b < dcopy->size * CHAR_BIT - dcopy->offset) &&
-   (size_b == field->size * CHAR_BIT - off_b)) {
-   actions[i].length =
-   field->size * CHAR_BIT - off_b;
+   ((size_b == field->size * CHAR_BIT - off_b) ||
+field->is_flex)) {
+   actions[i].length = size_b;
dcopy->offset += actions[i].length;
next_dcopy = false;
}
-   if (next_dcopy)
-   ++dcopy;
} else {
MLX5_ASSERT(item->spec);
data = flow_dv_fetch_field((const uint8_t *)item->spec +
   field->offset, field->size);
  

[PATCH v3 08/11] net/mlx5: return error for sws modify field

2023-01-29 Thread Rongwei Liu
Return unsupported error message when application tries to
modify flex item field.

Validation of packet modifications actions for SW Steering checked
if either source or destination field of MODIFY_FIELD action
was a flex item.
When DEC_TTL action is used, DEC_TTL action does not have any
action configuration and dereferencing source or destination field
is invalid, so validation of source and destination field types
should be moved to MODIFY_FIELD specific validation function, then
field types are validated if and only if action type is MODIFY_FIELD.

Signed-off-by: Dariusz Sosnowski 
Signed-off-by: Rongwei Liu 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/net/mlx5/mlx5_flow_dv.c | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 92a5914d4b..a7c0d5bf17 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -4828,6 +4828,7 @@ flow_dv_validate_action_modify_hdr(const uint64_t 
action_flags,
return rte_flow_error_set(error, EINVAL,
  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
  NULL, "action configuration not set");
+
if (action_flags & MLX5_FLOW_ACTION_ENCAP)
return rte_flow_error_set(error, EINVAL,
  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
@@ -5153,17 +5154,21 @@ flow_dv_validate_action_modify_field(struct rte_eth_dev 
*dev,
struct mlx5_hca_attr *hca_attr = &priv->sh->cdev->config.hca_attr;
const struct rte_flow_action_modify_field *action_modify_field =
action->conf;
-   uint32_t dst_width = mlx5_flow_item_field_width(dev,
-   action_modify_field->dst.field,
-   -1, attr, error);
-   uint32_t src_width = mlx5_flow_item_field_width(dev,
-   action_modify_field->src.field,
-   dst_width, attr, error);
+   uint32_t dst_width, src_width;
 
ret = flow_dv_validate_action_modify_hdr(action_flags, action, error);
if (ret)
return ret;
-
+   if (action_modify_field->src.field == RTE_FLOW_FIELD_FLEX_ITEM ||
+   action_modify_field->dst.field == RTE_FLOW_FIELD_FLEX_ITEM)
+   return rte_flow_error_set(error, ENOTSUP,
+   RTE_FLOW_ERROR_TYPE_ACTION, action,
+   "flex item fields modification"
+   " is not supported");
+   dst_width = mlx5_flow_item_field_width(dev, 
action_modify_field->dst.field,
+  -1, attr, error);
+   src_width = mlx5_flow_item_field_width(dev, 
action_modify_field->src.field,
+  dst_width, attr, error);
if (action_modify_field->width == 0)
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ACTION, action,
-- 
2.27.0



[PATCH v3 06/11] net/mlx5/hws: add hws flex item matching support

2023-01-29 Thread Rongwei Liu
Support flex item matching in hws and syntax follows
sws exactly.

Flex item should be created in advance and follow current
json mapping logic.

Signed-off-by: Rongwei Liu 
Reviewed-by: Alex Vesker 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/net/mlx5/hws/mlx5dr_definer.c |  83 ++
 drivers/net/mlx5/mlx5.c   |   2 +-
 drivers/net/mlx5/mlx5.h   |   6 ++
 drivers/net/mlx5/mlx5_flow.h  |   1 +
 drivers/net/mlx5/mlx5_flow_dv.c   |   2 +-
 drivers/net/mlx5/mlx5_flow_flex.c | 116 ++
 drivers/net/mlx5/mlx5_flow_hw.c   |  48 ++-
 7 files changed, 239 insertions(+), 19 deletions(-)

diff --git a/drivers/net/mlx5/hws/mlx5dr_definer.c 
b/drivers/net/mlx5/hws/mlx5dr_definer.c
index 6b98eb8c96..a6378afb10 100644
--- a/drivers/net/mlx5/hws/mlx5dr_definer.c
+++ b/drivers/net/mlx5/hws/mlx5dr_definer.c
@@ -293,6 +293,43 @@ mlx5dr_definer_integrity_set(struct mlx5dr_definer_fc *fc,
DR_SET(tag, ok1_bits, fc->byte_off, fc->bit_off, fc->bit_mask);
 }
 
+static void
+mlx5dr_definer_flex_parser_set(struct mlx5dr_definer_fc *fc,
+  const void *item,
+  uint8_t *tag, bool is_inner)
+{
+   const struct rte_flow_item_flex *flex = item;
+   uint32_t byte_off, val, idx;
+   int ret;
+
+   val = 0;
+   byte_off = MLX5_BYTE_OFF(definer_hl, flex_parser.flex_parser_0);
+   idx = fc->fname - MLX5DR_DEFINER_FNAME_FLEX_PARSER_0;
+   byte_off -= idx * sizeof(uint32_t);
+   ret = mlx5_flex_get_parser_value_per_byte_off(flex, flex->handle, 
byte_off,
+ false, is_inner, &val);
+   if (ret == -1 || !val)
+   return;
+
+   DR_SET(tag, val, fc->byte_off, 0, fc->bit_mask);
+}
+
+static void
+mlx5dr_definer_flex_parser_inner_set(struct mlx5dr_definer_fc *fc,
+const void *item,
+uint8_t *tag)
+{
+   mlx5dr_definer_flex_parser_set(fc, item, tag, true);
+}
+
+static void
+mlx5dr_definer_flex_parser_outer_set(struct mlx5dr_definer_fc *fc,
+const void *item,
+uint8_t *tag)
+{
+   mlx5dr_definer_flex_parser_set(fc, item, tag, false);
+}
+
 static void
 mlx5dr_definer_gre_key_set(struct mlx5dr_definer_fc *fc,
   const void *item_spec,
@@ -1465,6 +1502,47 @@ mlx5dr_definer_conv_item_meter_color(struct 
mlx5dr_definer_conv_data *cd,
return 0;
 }
 
+static int
+mlx5dr_definer_conv_item_flex_parser(struct mlx5dr_definer_conv_data *cd,
+struct rte_flow_item *item,
+int item_idx)
+{
+   uint32_t base_off = MLX5_BYTE_OFF(definer_hl, 
flex_parser.flex_parser_0);
+   const struct rte_flow_item_flex *v, *m;
+   enum mlx5dr_definer_fname fname;
+   struct mlx5dr_definer_fc *fc;
+   uint32_t i, mask, byte_off;
+   bool is_inner = cd->tunnel;
+   int ret;
+
+   m = item->mask;
+   v = item->spec;
+   mask = 0;
+   for (i = 0; i < MLX5_GRAPH_NODE_SAMPLE_NUM; i++) {
+   byte_off = base_off - i * sizeof(uint32_t);
+   ret = mlx5_flex_get_parser_value_per_byte_off(m, v->handle, 
byte_off,
+ true, is_inner, 
&mask);
+   if (ret == -1) {
+   rte_errno = EINVAL;
+   return rte_errno;
+   }
+
+   if (!mask)
+   continue;
+
+   fname = MLX5DR_DEFINER_FNAME_FLEX_PARSER_0;
+   fname += (enum mlx5dr_definer_fname)i;
+   fc = &cd->fc[fname];
+   fc->byte_off = byte_off;
+   fc->item_idx = item_idx;
+   fc->tag_set = cd->tunnel ? 
&mlx5dr_definer_flex_parser_inner_set :
+  
&mlx5dr_definer_flex_parser_outer_set;
+   fc->tag_mask_set = &mlx5dr_definer_ones_set;
+   fc->bit_mask = mask;
+   }
+   return 0;
+}
+
 static int
 mlx5dr_definer_conv_items_to_hl(struct mlx5dr_context *ctx,
struct mlx5dr_match_template *mt,
@@ -1581,6 +1659,11 @@ mlx5dr_definer_conv_items_to_hl(struct mlx5dr_context 
*ctx,
ret = mlx5dr_definer_conv_item_meter_color(&cd, items, 
i);
item_flags |= MLX5_FLOW_ITEM_METER_COLOR;
break;
+   case RTE_FLOW_ITEM_TYPE_FLEX:
+   ret = mlx5dr_definer_conv_item_flex_parser(&cd, items, 
i);
+   item_flags |= cd.tunnel ? MLX5_FLOW_ITEM_INNER_FLEX :
+ MLX5_FLOW_ITEM_OUTER_FLEX;
+   break;
default:
DR_LOG(ERR, "Unsupported item type %d", items->type);

[PATCH v3 09/11] app/testpmd: raw encap with flex item support

2023-01-29 Thread Rongwei Liu
Application should retrieve raw_encap buffer from
spec->pattern if it is flex item.

Signed-off-by: Rongwei Liu 
Acked-by: Ori Kam 
---
 app/test-pmd/cmdline_flow.c | 26 +-
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index f5d7a67def..50c8ec5594 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -11193,6 +11193,7 @@ cmd_set_raw_parsed(const struct buffer *in)
uint16_t proto = 0;
uint16_t idx = in->port; /* We borrow port field as index */
int gtp_psc = -1; /* GTP PSC option index. */
+   const void *src_spec;
 
if (in->command == SET_SAMPLE_ACTIONS)
return cmd_set_raw_parsed_sample(in);
@@ -11216,6 +11217,7 @@ cmd_set_raw_parsed(const struct buffer *in)
item = in->args.vc.pattern + i;
if (item->spec == NULL)
item->spec = flow_item_default_mask(item);
+   src_spec = item->spec;
switch (item->type) {
case RTE_FLOW_ITEM_TYPE_ETH:
size = sizeof(struct rte_ether_hdr);
@@ -11343,9 +11345,13 @@ cmd_set_raw_parsed(const struct buffer *in)
size = sizeof(struct rte_flow_item_pfcp);
break;
case RTE_FLOW_ITEM_TYPE_FLEX:
-   size = item->spec ?
-   ((const struct rte_flow_item_flex *)
-   item->spec)->length : 0;
+   if (item->spec != NULL) {
+   size = ((const struct rte_flow_item_flex 
*)item->spec)->length;
+   src_spec = ((const struct rte_flow_item_flex 
*)item->spec)->pattern;
+   } else {
+   size = 0;
+   src_spec = NULL;
+   }
break;
case RTE_FLOW_ITEM_TYPE_GRE_OPTION:
size = 0;
@@ -11378,12 +11384,14 @@ cmd_set_raw_parsed(const struct buffer *in)
fprintf(stderr, "Error - Not supported item\n");
goto error;
}
-   *total_size += size;
-   rte_memcpy(data_tail - (*total_size), item->spec, size);
-   /* update some fields which cannot be set by cmdline */
-   update_fields((data_tail - (*total_size)), item,
- upper_layer);
-   upper_layer = proto;
+   if (size) {
+   *total_size += size;
+   rte_memcpy(data_tail - (*total_size), src_spec, size);
+   /* update some fields which cannot be set by cmdline */
+   update_fields((data_tail - (*total_size)), item,
+ upper_layer);
+   upper_layer = proto;
+   }
}
if (verbose_level & 0x1)
printf("total data size is %zu\n", (*total_size));
-- 
2.27.0



[PATCH v3 05/11] net/mlx5: adopt new flex item prm definition

2023-01-29 Thread Rongwei Liu
Per newest PRM definition, sample_id stands for 3 parts
of information instead of single uint32_t id: sample_id +
modify_filed_id + format_select_dw.

Also new FW capability bits have been introduces to identify
the new capability.

Signed-off-by: Rongwei Liu 
Acked-by: Viacheslav Ovsiienko 
---
 drivers/common/mlx5/mlx5_devx_cmds.c | 14 +++---
 drivers/common/mlx5/mlx5_devx_cmds.h |  7 ++-
 drivers/common/mlx5/mlx5_prm.h   | 28 ++--
 drivers/net/mlx5/mlx5.c  | 15 +++
 drivers/net/mlx5/mlx5.h  |  3 ++-
 drivers/net/mlx5/mlx5_flow_flex.c| 14 +++---
 6 files changed, 67 insertions(+), 14 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c 
b/drivers/common/mlx5/mlx5_devx_cmds.c
index e3a4927d0f..1f65ea7dcb 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -607,7 +607,8 @@ mlx5_devx_cmd_query_hca_vdpa_attr(void *ctx,
 
 int
 mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj *flex_obj,
- uint32_t ids[], uint32_t num)
+ struct mlx5_ext_sample_id ids[],
+ uint32_t num, uint8_t *anchor)
 {
uint32_t in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0};
uint32_t out[MLX5_ST_SZ_DW(create_flex_parser_out)] = {0};
@@ -636,6 +637,7 @@ mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj 
*flex_obj,
(void *)flex_obj);
return -rte_errno;
}
+   *anchor = MLX5_GET(parse_graph_flex, flex, head_anchor_id);
for (i = 0; i < MLX5_GRAPH_NODE_SAMPLE_NUM; i++) {
void *s_off = (void *)((char *)sample + i *
  MLX5_ST_SZ_BYTES(parse_graph_flow_match_sample));
@@ -645,8 +647,8 @@ mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj 
*flex_obj,
  flow_match_sample_en);
if (!en)
continue;
-   ids[idx++] = MLX5_GET(parse_graph_flow_match_sample, s_off,
- flow_match_sample_field_id);
+   ids[idx++].id = MLX5_GET(parse_graph_flow_match_sample, s_off,
+flow_match_sample_field_id);
}
if (num != idx) {
rte_errno = EINVAL;
@@ -794,6 +796,12 @@ mlx5_devx_cmd_query_hca_parse_graph_node_cap
 max_num_arc_out);
attr->max_num_sample = MLX5_GET(parse_graph_node_cap, hcattr,
max_num_sample);
+   attr->anchor_en = MLX5_GET(parse_graph_node_cap, hcattr, anchor_en);
+   attr->ext_sample_id = MLX5_GET(parse_graph_node_cap, hcattr, 
ext_sample_id);
+   attr->sample_tunnel_inner2 = MLX5_GET(parse_graph_node_cap, hcattr,
+ sample_tunnel_inner2);
+   attr->zero_size_supported = MLX5_GET(parse_graph_node_cap, hcattr,
+zero_size_supported);
attr->sample_id_in_out = MLX5_GET(parse_graph_node_cap, hcattr,
  sample_id_in_out);
attr->max_base_header_length = MLX5_GET(parse_graph_node_cap, hcattr,
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h 
b/drivers/common/mlx5/mlx5_devx_cmds.h
index c94b9eac06..5b33010155 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -114,6 +114,10 @@ struct mlx5_hca_flex_attr {
uint8_t  max_num_arc_out;
uint8_t  max_num_sample;
uint8_t  max_num_prog_sample:5; /* From HCA CAP 2 */
+   uint8_t  anchor_en:1;
+   uint8_t  ext_sample_id:1;
+   uint8_t  sample_tunnel_inner2:1;
+   uint8_t  zero_size_supported:1;
uint8_t  sample_id_in_out:1;
uint16_t max_base_header_length;
uint8_t  max_sample_base_offset;
@@ -706,7 +710,8 @@ int mlx5_devx_cmd_modify_tir(struct mlx5_devx_obj *tir,
 struct mlx5_devx_modify_tir_attr *tir_attr);
 __rte_internal
 int mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj *flex_obj,
- uint32_t ids[], uint32_t num);
+ struct mlx5_ext_sample_id ids[],
+ uint32_t num, uint8_t *anchor);
 
 __rte_internal
 struct mlx5_devx_obj *
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 3790dc84b8..ce6cd98fd7 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1893,7 +1893,11 @@ struct mlx5_ifc_parse_graph_node_cap_bits {
u8 max_num_arc_in[0x08];
u8 max_num_arc_out[0x08];
u8 max_num_sample[0x08];
-   u8 reserved_at_78[0x07];
+   u8 reserved_at_78[0x03];
+   u8 anchor_en[0x1];
+   u8 ext_sample_id[0x1];
+   u8 sample_tunnel_inner2[0x1];
+   u8 zero_size_supported[0x1];
u8 s

[PATCH v3 10/11] doc/mlx5: update mlx5 doc

2023-01-29 Thread Rongwei Liu
Add flex item matching and modify field feature into
mlx5 documents.

Signed-off-by: Rongwei Liu 
Acked-by: Ori Kam 
---
 doc/guides/nics/mlx5.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index b23ca35b8f..a2634c378f 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -106,7 +106,7 @@ Features
 - Sub-Function representors.
 - Sub-Function.
 - Matching on represented port.
-
+- Modify flex item field.
 
 Limitations
 ---
-- 
2.27.0



[PATCH v3 11/11] app/testpmd: adjust cleanup sequence when quitting

2023-01-29 Thread Rongwei Liu
If flex item is referenced in async flow either by
pattern template or action template, currently testpmd
complains "flex item has flow references". Flex items should
be flushed after async flow resources cleanup.

Signed-off-by: Rongwei Liu 
Acked-by: Ori Kam 
---
 app/test-pmd/testpmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 134d79a555..e35f7a0e7a 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -3298,10 +3298,10 @@ flush_port_owned_resources(portid_t pi)
 {
mcast_addr_pool_destroy(pi);
port_flow_flush(pi);
-   port_flex_item_flush(pi);
port_flow_template_table_flush(pi);
port_flow_pattern_template_flush(pi);
port_flow_actions_template_flush(pi);
+   port_flex_item_flush(pi);
port_action_handle_flush(pi);
 }
 
-- 
2.27.0



RE: [PATCH v1 00/21] Add control queue & MQ support to Virtio-user vDPA

2023-01-29 Thread Xia, Chenbo
Hi Maxime,

> -Original Message-
> From: Maxime Coquelin 
> Sent: Wednesday, November 30, 2022 11:56 PM
> To: dev@dpdk.org; Xia, Chenbo ;
> david.march...@redhat.com; epere...@redhat.com
> Cc: Maxime Coquelin 
> Subject: [PATCH v1 00/21] Add control queue & MQ support to Virtio-user
> vDPA
> 
> --
> 2.38.1

I see there is one virtio test failed on patchwork, could you check if
it's related?

Thanks,
Chenbo



[RFC v2 0/9] gve PMD enhancement

2023-01-29 Thread Junfeng Guo
This patch set includs three main enhancements for gve PMD:
 - support basic data path for DQO queue format
 - support jumbo frame for GQI queue format
 - add new AdminQ cmd to verify driver compatibility

v2:
 - add one missing commit into this patch set.
 - add new contributors into the mailmap.

Junfeng Guo (9):
  net/gve: add Tx queue setup for DQO
  net/gve: add Rx queue setup for DQO
  net/gve: support device start and close for DQO
  net/gve: support queue release and stop for DQO
  net/gve: support basic Tx data path for DQO
  net/gve: support basic Rx data path for DQO
  net/gve: support basic stats for DQO
  net/gve: support jumbo frame for GQI
  net/gve: add AdminQ command to verify driver compatibility

 .mailmap|   3 +
 MAINTAINERS |   3 +
 drivers/net/gve/base/gve.h  |   1 +
 drivers/net/gve/base/gve_adminq.c   |  29 ++-
 drivers/net/gve/base/gve_adminq.h   |  48 
 drivers/net/gve/base/gve_desc_dqo.h |   4 -
 drivers/net/gve/base/gve_osdep.h|  12 +
 drivers/net/gve/gve_ethdev.c| 200 ++-
 drivers/net/gve/gve_ethdev.h|  86 ++-
 drivers/net/gve/gve_rx.c| 131 +++---
 drivers/net/gve/gve_rx_dqo.c| 343 +
 drivers/net/gve/gve_tx.c|   3 +
 drivers/net/gve/gve_tx_dqo.c| 380 
 drivers/net/gve/meson.build |   2 +
 14 files changed, 1196 insertions(+), 49 deletions(-)
 create mode 100644 drivers/net/gve/gve_rx_dqo.c
 create mode 100644 drivers/net/gve/gve_tx_dqo.c

-- 
2.34.1



[RFC v2 1/9] net/gve: add Tx queue setup for DQO

2023-01-29 Thread Junfeng Guo
Add support for tx_queue_setup_dqo ops.

DQO format has submission and completion queue pair for each Tx/Rx
queue. Note that with DQO format all descriptors and doorbells, as
well as counters are written in little-endian.

Signed-off-by: Junfeng Guo 
Signed-off-by: Rushil Gupta 
Signed-off-by: Jordan Kimbrough 
Signed-off-by: Jeroen de Borst 
---
 .mailmap|   3 +
 MAINTAINERS |   3 +
 drivers/net/gve/base/gve.h  |   1 +
 drivers/net/gve/base/gve_desc_dqo.h |   4 -
 drivers/net/gve/base/gve_osdep.h|   4 +
 drivers/net/gve/gve_ethdev.c|  16 ++-
 drivers/net/gve/gve_ethdev.h|  33 +-
 drivers/net/gve/gve_tx_dqo.c| 178 
 drivers/net/gve/meson.build |   1 +
 9 files changed, 235 insertions(+), 8 deletions(-)
 create mode 100644 drivers/net/gve/gve_tx_dqo.c

diff --git a/.mailmap b/.mailmap
index 452267a567..553b9ce3ca 100644
--- a/.mailmap
+++ b/.mailmap
@@ -578,6 +578,7 @@ Jens Freimann  
 Jeremy Plsek 
 Jeremy Spewock 
 Jerin Jacob   

+Jeroen de Borst 
 Jerome Jutteau 
 Jerry Hao OS 
 Jerry Lilijun 
@@ -642,6 +643,7 @@ Jonathan Erb 
 Jon DeVree 
 Jon Loeliger 
 Joongi Kim 
+Jordan Kimbrough 
 Jørgen Østergaard Sloth 
 Jörg Thalheim 
 Joseph Richard 
@@ -1145,6 +1147,7 @@ Roy Franz 
 Roy Pledge 
 Roy Shterman 
 Ruifeng Wang 
+Rushil Gupta 
 Ryan E Hall 
 Sabyasachi Sengupta 
 Sachin Saxena  
diff --git a/MAINTAINERS b/MAINTAINERS
index 9a0f416d2e..7ffa709b3b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -703,6 +703,9 @@ F: doc/guides/nics/features/enic.ini
 
 Google Virtual Ethernet
 M: Junfeng Guo 
+M: Jeroen de Borst 
+M: Rushil Gupta 
+M: Jordan Kimbrough 
 F: drivers/net/gve/
 F: doc/guides/nics/gve.rst
 F: doc/guides/nics/features/gve.ini
diff --git a/drivers/net/gve/base/gve.h b/drivers/net/gve/base/gve.h
index 2dc4507acb..2b7cf7d99b 100644
--- a/drivers/net/gve/base/gve.h
+++ b/drivers/net/gve/base/gve.h
@@ -7,6 +7,7 @@
 #define _GVE_H_
 
 #include "gve_desc.h"
+#include "gve_desc_dqo.h"
 
 #define GVE_VERSION"1.3.0"
 #define GVE_VERSION_PREFIX "GVE-"
diff --git a/drivers/net/gve/base/gve_desc_dqo.h 
b/drivers/net/gve/base/gve_desc_dqo.h
index ee1afdecb8..bb4a18d4d1 100644
--- a/drivers/net/gve/base/gve_desc_dqo.h
+++ b/drivers/net/gve/base/gve_desc_dqo.h
@@ -13,10 +13,6 @@
 #define GVE_TX_MAX_HDR_SIZE_DQO 255
 #define GVE_TX_MIN_TSO_MSS_DQO 88
 
-#ifndef __LITTLE_ENDIAN_BITFIELD
-#error "Only little endian supported"
-#endif
-
 /* Basic TX descriptor (DTYPE 0x0C) */
 struct gve_tx_pkt_desc_dqo {
__le64 buf_addr;
diff --git a/drivers/net/gve/base/gve_osdep.h b/drivers/net/gve/base/gve_osdep.h
index 7cb73002f4..abf3d379ae 100644
--- a/drivers/net/gve/base/gve_osdep.h
+++ b/drivers/net/gve/base/gve_osdep.h
@@ -35,6 +35,10 @@ typedef rte_be16_t __be16;
 typedef rte_be32_t __be32;
 typedef rte_be64_t __be64;
 
+typedef rte_le16_t __le16;
+typedef rte_le32_t __le32;
+typedef rte_le64_t __le64;
+
 typedef rte_iova_t dma_addr_t;
 
 #define ETH_MIN_MTURTE_ETHER_MIN_MTU
diff --git a/drivers/net/gve/gve_ethdev.c b/drivers/net/gve/gve_ethdev.c
index 97781f0ed3..d03f2fba92 100644
--- a/drivers/net/gve/gve_ethdev.c
+++ b/drivers/net/gve/gve_ethdev.c
@@ -299,6 +299,7 @@ gve_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
 
dev_info->default_txconf = (struct rte_eth_txconf) {
.tx_free_thresh = GVE_DEFAULT_TX_FREE_THRESH,
+   .tx_rs_thresh = GVE_DEFAULT_TX_RS_THRESH,
.offloads = 0,
};
 
@@ -360,6 +361,13 @@ static const struct eth_dev_ops gve_eth_dev_ops = {
.mtu_set  = gve_dev_mtu_set,
 };
 
+static void
+gve_eth_dev_ops_override(struct eth_dev_ops *local_eth_dev_ops)
+{
+   /* override eth_dev ops for DQO */
+   local_eth_dev_ops->tx_queue_setup = gve_tx_queue_setup_dqo;
+}
+
 static void
 gve_free_counter_array(struct gve_priv *priv)
 {
@@ -595,6 +603,7 @@ gve_teardown_priv_resources(struct gve_priv *priv)
 static int
 gve_dev_init(struct rte_eth_dev *eth_dev)
 {
+   static struct eth_dev_ops gve_local_eth_dev_ops = gve_eth_dev_ops;
struct gve_priv *priv = eth_dev->data->dev_private;
int max_tx_queues, max_rx_queues;
struct rte_pci_device *pci_dev;
@@ -602,8 +611,6 @@ gve_dev_init(struct rte_eth_dev *eth_dev)
rte_be32_t *db_bar;
int err;
 
-   eth_dev->dev_ops = &gve_eth_dev_ops;
-
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
 
@@ -642,9 +649,12 @@ gve_dev_init(struct rte_eth_dev *eth_dev)
eth_dev->rx_pkt_burst = gve_rx_burst;
eth_dev->tx_pkt_burst = gve_tx_burst;
} else {
-   PMD_DRV_LOG(ERR, "DQO_RDA is not implemented and will be added 
in the future");
+   /* override Tx/Rx setup/release eth_dev ops */
+   gve_eth_dev_ops_override(&gve_local_eth_dev_ops);
}
 
+   eth_dev->dev_o

[RFC v2 2/9] net/gve: add Rx queue setup for DQO

2023-01-29 Thread Junfeng Guo
Add support for rx_queue_setup_dqo ops.

Signed-off-by: Junfeng Guo 
Signed-off-by: Rushil Gupta 
Signed-off-by: Jordan Kimbrough 
Signed-off-by: Jeroen de Borst 
---
 drivers/net/gve/gve_ethdev.c |   1 +
 drivers/net/gve/gve_ethdev.h |  14 
 drivers/net/gve/gve_rx_dqo.c | 148 +++
 drivers/net/gve/meson.build  |   1 +
 4 files changed, 164 insertions(+)
 create mode 100644 drivers/net/gve/gve_rx_dqo.c

diff --git a/drivers/net/gve/gve_ethdev.c b/drivers/net/gve/gve_ethdev.c
index d03f2fba92..26182b0422 100644
--- a/drivers/net/gve/gve_ethdev.c
+++ b/drivers/net/gve/gve_ethdev.c
@@ -366,6 +366,7 @@ gve_eth_dev_ops_override(struct eth_dev_ops 
*local_eth_dev_ops)
 {
/* override eth_dev ops for DQO */
local_eth_dev_ops->tx_queue_setup = gve_tx_queue_setup_dqo;
+   local_eth_dev_ops->rx_queue_setup = gve_rx_queue_setup_dqo;
 }
 
 static void
diff --git a/drivers/net/gve/gve_ethdev.h b/drivers/net/gve/gve_ethdev.h
index 2dfcef6893..0adfc90554 100644
--- a/drivers/net/gve/gve_ethdev.h
+++ b/drivers/net/gve/gve_ethdev.h
@@ -145,6 +145,7 @@ struct gve_rx_queue {
uint16_t nb_rx_desc;
uint16_t expected_seqno; /* the next expected seqno */
uint16_t free_thresh;
+   uint16_t nb_rx_hold;
uint32_t next_avail;
uint32_t nb_avail;
 
@@ -163,6 +164,14 @@ struct gve_rx_queue {
uint16_t ntfy_id;
uint16_t rx_buf_len;
 
+   /* newly added for DQO*/
+   volatile struct gve_rx_desc_dqo *rx_ring;
+   struct gve_rx_compl_desc_dqo *compl_ring;
+   const struct rte_memzone *compl_ring_mz;
+   uint64_t compl_ring_phys_addr;
+   uint8_t cur_gen_bit;
+   uint16_t bufq_tail;
+
/* Only valid for DQO_RDA queue format */
struct gve_rx_queue *bufq;
 
@@ -334,6 +343,11 @@ gve_tx_burst(void *txq, struct rte_mbuf **tx_pkts, 
uint16_t nb_pkts);
 
 /* Below functions are used for DQO */
 
+int
+gve_rx_queue_setup_dqo(struct rte_eth_dev *dev, uint16_t queue_id,
+  uint16_t nb_desc, unsigned int socket_id,
+  const struct rte_eth_rxconf *conf,
+  struct rte_mempool *pool);
 int
 gve_tx_queue_setup_dqo(struct rte_eth_dev *dev, uint16_t queue_id,
   uint16_t nb_desc, unsigned int socket_id,
diff --git a/drivers/net/gve/gve_rx_dqo.c b/drivers/net/gve/gve_rx_dqo.c
new file mode 100644
index 00..e8a6d575fc
--- /dev/null
+++ b/drivers/net/gve/gve_rx_dqo.c
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Intel Corporation
+ */
+
+#include "gve_ethdev.h"
+#include "base/gve_adminq.h"
+
+static void
+gve_reset_rxq_dqo(struct gve_rx_queue *rxq)
+{
+   struct rte_mbuf **sw_ring;
+   uint32_t size, i;
+
+   if (rxq == NULL) {
+   PMD_DRV_LOG(ERR, "pointer to rxq is NULL");
+   return;
+   }
+
+   size = rxq->nb_rx_desc * sizeof(struct gve_rx_desc_dqo);
+   for (i = 0; i < size; i++)
+   ((volatile char *)rxq->rx_ring)[i] = 0;
+
+   size = rxq->nb_rx_desc * sizeof(struct gve_rx_compl_desc_dqo);
+   for (i = 0; i < size; i++)
+   ((volatile char *)rxq->compl_ring)[i] = 0;
+
+   sw_ring = rxq->sw_ring;
+   for (i = 0; i < rxq->nb_rx_desc; i++)
+   sw_ring[i] = NULL;
+
+   rxq->bufq_tail = 0;
+   rxq->next_avail = 0;
+   rxq->nb_rx_hold = rxq->nb_rx_desc - 1;
+
+   rxq->rx_tail = 0;
+   rxq->cur_gen_bit = 1;
+}
+
+int
+gve_rx_queue_setup_dqo(struct rte_eth_dev *dev, uint16_t queue_id,
+  uint16_t nb_desc, unsigned int socket_id,
+  const struct rte_eth_rxconf *conf,
+  struct rte_mempool *pool)
+{
+   struct gve_priv *hw = dev->data->dev_private;
+   const struct rte_memzone *mz;
+   struct gve_rx_queue *rxq;
+   uint16_t free_thresh;
+   int err = 0;
+
+   if (nb_desc != hw->rx_desc_cnt) {
+   PMD_DRV_LOG(WARNING, "gve doesn't support nb_desc config, use 
hw nb_desc %u.",
+   hw->rx_desc_cnt);
+   }
+   nb_desc = hw->rx_desc_cnt;
+
+   /* Allocate the RX queue data structure. */
+   rxq = rte_zmalloc_socket("gve rxq",
+sizeof(struct gve_rx_queue),
+RTE_CACHE_LINE_SIZE,
+socket_id);
+   if (rxq == NULL) {
+   PMD_DRV_LOG(ERR, "Failed to allocate memory for rx queue 
structure");
+   return -ENOMEM;
+   }
+
+   /* check free_thresh here */
+   free_thresh = conf->rx_free_thresh ?
+   conf->rx_free_thresh : GVE_DEFAULT_RX_FREE_THRESH;
+   if (free_thresh >= nb_desc) {
+   PMD_DRV_LOG(ERR, "rx_free_thresh (%u) must be less than nb_desc 
(%u).",
+   free_thresh, rxq->nb_rx_desc);
+   err = -EINVAL;
+   goto err_rxq;
+   }

[RFC v2 3/9] net/gve: support device start and close for DQO

2023-01-29 Thread Junfeng Guo
Add device start and close support for DQO.

Signed-off-by: Junfeng Guo 
Signed-off-by: Rushil Gupta 
Signed-off-by: Jordan Kimbrough 
Signed-off-by: Jeroen de Borst 
---
 drivers/net/gve/base/gve_adminq.c | 10 +++
 drivers/net/gve/gve_ethdev.c  | 43 ++-
 2 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/drivers/net/gve/base/gve_adminq.c 
b/drivers/net/gve/base/gve_adminq.c
index e745b709b2..e963f910a0 100644
--- a/drivers/net/gve/base/gve_adminq.c
+++ b/drivers/net/gve/base/gve_adminq.c
@@ -497,11 +497,11 @@ static int gve_adminq_create_tx_queue(struct gve_priv 
*priv, u32 queue_index)
cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
} else {
cmd.create_tx_queue.tx_ring_size =
-   cpu_to_be16(txq->nb_tx_desc);
+   cpu_to_be16(priv->tx_desc_cnt);
cmd.create_tx_queue.tx_comp_ring_addr =
-   cpu_to_be64(txq->complq->tx_ring_phys_addr);
+   cpu_to_be64(txq->compl_ring_phys_addr);
cmd.create_tx_queue.tx_comp_ring_size =
-   cpu_to_be16(priv->tx_compq_size);
+   cpu_to_be16(priv->tx_compq_size * DQO_TX_MULTIPLIER);
}
 
return gve_adminq_issue_cmd(priv, &cmd);
@@ -549,9 +549,9 @@ static int gve_adminq_create_rx_queue(struct gve_priv 
*priv, u32 queue_index)
cmd.create_rx_queue.rx_ring_size =
cpu_to_be16(priv->rx_desc_cnt);
cmd.create_rx_queue.rx_desc_ring_addr =
-   cpu_to_be64(rxq->rx_ring_phys_addr);
+   cpu_to_be64(rxq->compl_ring_phys_addr);
cmd.create_rx_queue.rx_data_ring_addr =
-   cpu_to_be64(rxq->bufq->rx_ring_phys_addr);
+   cpu_to_be64(rxq->rx_ring_phys_addr);
cmd.create_rx_queue.packet_buffer_size =
cpu_to_be16(rxq->rx_buf_len);
cmd.create_rx_queue.rx_buff_ring_size =
diff --git a/drivers/net/gve/gve_ethdev.c b/drivers/net/gve/gve_ethdev.c
index 26182b0422..3543378978 100644
--- a/drivers/net/gve/gve_ethdev.c
+++ b/drivers/net/gve/gve_ethdev.c
@@ -78,6 +78,9 @@ gve_free_qpls(struct gve_priv *priv)
uint16_t nb_rxqs = priv->max_nb_rxq;
uint32_t i;
 
+   if (priv->queue_format != GVE_GQI_QPL_FORMAT)
+   return;
+
for (i = 0; i < nb_txqs + nb_rxqs; i++) {
if (priv->qpl[i].mz != NULL)
rte_memzone_free(priv->qpl[i].mz);
@@ -138,6 +141,41 @@ gve_refill_pages(struct gve_rx_queue *rxq)
return 0;
 }
 
+static int
+gve_refill_dqo(struct gve_rx_queue *rxq)
+{
+   struct rte_mbuf *nmb;
+   uint16_t i;
+   int diag;
+
+   diag = rte_pktmbuf_alloc_bulk(rxq->mpool, &rxq->sw_ring[0], 
rxq->nb_rx_desc);
+   if (diag < 0) {
+   for (i = 0; i < rxq->nb_rx_desc - 1; i++) {
+   nmb = rte_pktmbuf_alloc(rxq->mpool);
+   if (!nmb)
+   break;
+   rxq->sw_ring[i] = nmb;
+   }
+   if (i < rxq->nb_rx_desc - 1)
+   return -ENOMEM;
+   }
+
+   for (i = 0; i < rxq->nb_rx_desc; i++) {
+   if (i == rxq->nb_rx_desc - 1)
+   break;
+   nmb = rxq->sw_ring[i];
+   rxq->rx_ring[i].buf_addr = 
rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+   rxq->rx_ring[i].buf_id = rte_cpu_to_le_16(i);
+   }
+
+   rxq->nb_rx_hold = 0;
+   rxq->bufq_tail = rxq->nb_rx_desc - 1;
+
+   rte_write32(rxq->bufq_tail, rxq->qrx_tail);
+
+   return 0;
+}
+
 static int
 gve_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
 {
@@ -206,7 +244,10 @@ gve_dev_start(struct rte_eth_dev *dev)
 
rte_write32(rte_cpu_to_be_32(GVE_IRQ_MASK), rxq->ntfy_addr);
 
-   err = gve_refill_pages(rxq);
+   if (gve_is_gqi(priv))
+   err = gve_refill_pages(rxq);
+   else
+   err = gve_refill_dqo(rxq);
if (err) {
PMD_DRV_LOG(ERR, "Failed to refill for RX");
goto err_rx;
-- 
2.34.1



[RFC v2 4/9] net/gve: support queue release and stop for DQO

2023-01-29 Thread Junfeng Guo
Add support for queue operations:
 - gve_tx_queue_release_dqo
 - gve_rx_queue_release_dqo
 - gve_stop_tx_queues_dqo
 - gve_stop_rx_queues_dqo

Signed-off-by: Junfeng Guo 
Signed-off-by: Rushil Gupta 
Signed-off-by: Jordan Kimbrough 
Signed-off-by: Jeroen de Borst 
---
 drivers/net/gve/gve_ethdev.c | 18 +---
 drivers/net/gve/gve_ethdev.h | 12 
 drivers/net/gve/gve_rx.c |  3 ++
 drivers/net/gve/gve_rx_dqo.c | 57 
 drivers/net/gve/gve_tx.c |  3 ++
 drivers/net/gve/gve_tx_dqo.c | 55 ++
 6 files changed, 144 insertions(+), 4 deletions(-)

diff --git a/drivers/net/gve/gve_ethdev.c b/drivers/net/gve/gve_ethdev.c
index 3543378978..7c4be3a1cb 100644
--- a/drivers/net/gve/gve_ethdev.c
+++ b/drivers/net/gve/gve_ethdev.c
@@ -292,11 +292,19 @@ gve_dev_close(struct rte_eth_dev *dev)
PMD_DRV_LOG(ERR, "Failed to stop dev.");
}
 
-   for (i = 0; i < dev->data->nb_tx_queues; i++)
-   gve_tx_queue_release(dev, i);
+   if (gve_is_gqi(priv)) {
+   for (i = 0; i < dev->data->nb_tx_queues; i++)
+   gve_tx_queue_release(dev, i);
+
+   for (i = 0; i < dev->data->nb_rx_queues; i++)
+   gve_rx_queue_release(dev, i);
+   } else {
+   for (i = 0; i < dev->data->nb_tx_queues; i++)
+   gve_tx_queue_release_dqo(dev, i);
 
-   for (i = 0; i < dev->data->nb_rx_queues; i++)
-   gve_rx_queue_release(dev, i);
+   for (i = 0; i < dev->data->nb_rx_queues; i++)
+   gve_rx_queue_release_dqo(dev, i);
+   }
 
gve_free_qpls(priv);
rte_free(priv->adminq);
@@ -408,6 +416,8 @@ gve_eth_dev_ops_override(struct eth_dev_ops 
*local_eth_dev_ops)
/* override eth_dev ops for DQO */
local_eth_dev_ops->tx_queue_setup = gve_tx_queue_setup_dqo;
local_eth_dev_ops->rx_queue_setup = gve_rx_queue_setup_dqo;
+   local_eth_dev_ops->tx_queue_release = gve_tx_queue_release_dqo;
+   local_eth_dev_ops->rx_queue_release = gve_rx_queue_release_dqo;
 }
 
 static void
diff --git a/drivers/net/gve/gve_ethdev.h b/drivers/net/gve/gve_ethdev.h
index 0adfc90554..93314f2db3 100644
--- a/drivers/net/gve/gve_ethdev.h
+++ b/drivers/net/gve/gve_ethdev.h
@@ -353,4 +353,16 @@ gve_tx_queue_setup_dqo(struct rte_eth_dev *dev, uint16_t 
queue_id,
   uint16_t nb_desc, unsigned int socket_id,
   const struct rte_eth_txconf *conf);
 
+void
+gve_tx_queue_release_dqo(struct rte_eth_dev *dev, uint16_t qid);
+
+void
+gve_rx_queue_release_dqo(struct rte_eth_dev *dev, uint16_t qid);
+
+void
+gve_stop_tx_queues_dqo(struct rte_eth_dev *dev);
+
+void
+gve_stop_rx_queues_dqo(struct rte_eth_dev *dev);
+
 #endif /* _GVE_ETHDEV_H_ */
diff --git a/drivers/net/gve/gve_rx.c b/drivers/net/gve/gve_rx.c
index 518c9d109c..9ba975c9b4 100644
--- a/drivers/net/gve/gve_rx.c
+++ b/drivers/net/gve/gve_rx.c
@@ -343,6 +343,9 @@ gve_stop_rx_queues(struct rte_eth_dev *dev)
uint16_t i;
int err;
 
+   if (!gve_is_gqi(hw))
+   return gve_stop_rx_queues_dqo(dev);
+
err = gve_adminq_destroy_rx_queues(hw, dev->data->nb_rx_queues);
if (err != 0)
PMD_DRV_LOG(WARNING, "failed to destroy rxqs");
diff --git a/drivers/net/gve/gve_rx_dqo.c b/drivers/net/gve/gve_rx_dqo.c
index e8a6d575fc..aca6f8ea2d 100644
--- a/drivers/net/gve/gve_rx_dqo.c
+++ b/drivers/net/gve/gve_rx_dqo.c
@@ -5,6 +5,38 @@
 #include "gve_ethdev.h"
 #include "base/gve_adminq.h"
 
+static inline void
+gve_release_rxq_mbufs_dqo(struct gve_rx_queue *rxq)
+{
+   uint16_t i;
+
+   for (i = 0; i < rxq->nb_rx_desc; i++) {
+   if (rxq->sw_ring[i]) {
+   rte_pktmbuf_free_seg(rxq->sw_ring[i]);
+   rxq->sw_ring[i] = NULL;
+   }
+   }
+
+   rxq->nb_avail = rxq->nb_rx_desc;
+}
+
+void
+gve_rx_queue_release_dqo(struct rte_eth_dev *dev, uint16_t qid)
+{
+   struct gve_rx_queue *q = dev->data->rx_queues[qid];
+
+   if (q == NULL)
+   return;
+
+   gve_release_rxq_mbufs_dqo(q);
+   rte_free(q->sw_ring);
+   rte_memzone_free(q->compl_ring_mz);
+   rte_memzone_free(q->mz);
+   rte_memzone_free(q->qres_mz);
+   q->qres = NULL;
+   rte_free(q);
+}
+
 static void
 gve_reset_rxq_dqo(struct gve_rx_queue *rxq)
 {
@@ -54,6 +86,12 @@ gve_rx_queue_setup_dqo(struct rte_eth_dev *dev, uint16_t 
queue_id,
}
nb_desc = hw->rx_desc_cnt;
 
+   /* Free memory if needed */
+   if (dev->data->rx_queues[queue_id]) {
+   gve_rx_queue_release_dqo(dev, queue_id);
+   dev->data->rx_queues[queue_id] = NULL;
+   }
+
/* Allocate the RX queue data structure. */
rxq = rte_zmalloc_socket("gve rxq",
 sizeof(struct gve_rx_queue),
@@ -146,3 +184,22 @@ gve_rx_q

[RFC v2 5/9] net/gve: support basic Tx data path for DQO

2023-01-29 Thread Junfeng Guo
Add basic Tx data path support for DQO.

Signed-off-by: Junfeng Guo 
Signed-off-by: Rushil Gupta 
Signed-off-by: Jordan Kimbrough 
Signed-off-by: Jeroen de Borst 
---
 drivers/net/gve/gve_ethdev.c |   1 +
 drivers/net/gve/gve_ethdev.h |   4 +
 drivers/net/gve/gve_tx_dqo.c | 141 +++
 3 files changed, 146 insertions(+)

diff --git a/drivers/net/gve/gve_ethdev.c b/drivers/net/gve/gve_ethdev.c
index 7c4be3a1cb..512a038968 100644
--- a/drivers/net/gve/gve_ethdev.c
+++ b/drivers/net/gve/gve_ethdev.c
@@ -703,6 +703,7 @@ gve_dev_init(struct rte_eth_dev *eth_dev)
} else {
/* override Tx/Rx setup/release eth_dev ops */
gve_eth_dev_ops_override(&gve_local_eth_dev_ops);
+   eth_dev->tx_pkt_burst = gve_tx_burst_dqo;
}
 
eth_dev->dev_ops = &gve_local_eth_dev_ops;
diff --git a/drivers/net/gve/gve_ethdev.h b/drivers/net/gve/gve_ethdev.h
index 93314f2db3..ba657dd6c1 100644
--- a/drivers/net/gve/gve_ethdev.h
+++ b/drivers/net/gve/gve_ethdev.h
@@ -125,6 +125,7 @@ struct gve_tx_queue {
uint8_t cur_gen_bit;
uint32_t last_desc_cleaned;
void **txqs;
+   uint16_t re_cnt;
 
/* Only valid for DQO_RDA queue format */
struct gve_tx_queue *complq;
@@ -365,4 +366,7 @@ gve_stop_tx_queues_dqo(struct rte_eth_dev *dev);
 void
 gve_stop_rx_queues_dqo(struct rte_eth_dev *dev);
 
+uint16_t
+gve_tx_burst_dqo(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
 #endif /* _GVE_ETHDEV_H_ */
diff --git a/drivers/net/gve/gve_tx_dqo.c b/drivers/net/gve/gve_tx_dqo.c
index e2e4153f27..3583c82246 100644
--- a/drivers/net/gve/gve_tx_dqo.c
+++ b/drivers/net/gve/gve_tx_dqo.c
@@ -5,6 +5,147 @@
 #include "gve_ethdev.h"
 #include "base/gve_adminq.h"
 
+static inline void
+gve_tx_clean_dqo(struct gve_tx_queue *txq)
+{
+   struct gve_tx_compl_desc *compl_ring;
+   struct gve_tx_compl_desc *compl_desc;
+   struct gve_tx_queue *aim_txq;
+   uint16_t nb_desc_clean;
+   struct rte_mbuf *txe;
+   uint16_t compl_tag;
+   uint16_t next;
+
+   next = txq->complq_tail;
+   compl_ring = txq->compl_ring;
+   compl_desc = &compl_ring[next];
+
+   if (compl_desc->generation != txq->cur_gen_bit)
+   return;
+
+   compl_tag = rte_le_to_cpu_16(compl_desc->completion_tag);
+
+   aim_txq = txq->txqs[compl_desc->id];
+
+   switch (compl_desc->type) {
+   case GVE_COMPL_TYPE_DQO_DESC:
+   /* need to clean Descs from last_cleaned to compl_tag */
+   if (aim_txq->last_desc_cleaned > compl_tag)
+   nb_desc_clean = aim_txq->nb_tx_desc - 
aim_txq->last_desc_cleaned +
+   compl_tag;
+   else
+   nb_desc_clean = compl_tag - aim_txq->last_desc_cleaned;
+   aim_txq->nb_free += nb_desc_clean;
+   aim_txq->last_desc_cleaned = compl_tag;
+   break;
+   case GVE_COMPL_TYPE_DQO_REINJECTION:
+   PMD_DRV_LOG(DEBUG, "GVE_COMPL_TYPE_DQO_REINJECTION !!!");
+   /* FALLTHROUGH */
+   case GVE_COMPL_TYPE_DQO_PKT:
+   txe = aim_txq->sw_ring[compl_tag];
+   if (txe != NULL) {
+   rte_pktmbuf_free_seg(txe);
+   txe = NULL;
+   }
+   break;
+   case GVE_COMPL_TYPE_DQO_MISS:
+   rte_delay_us_sleep(1);
+   PMD_DRV_LOG(DEBUG, "GVE_COMPL_TYPE_DQO_MISS ignored !!!");
+   break;
+   default:
+   PMD_DRV_LOG(ERR, "unknown completion type.");
+   return;
+   }
+
+   next++;
+   if (next == txq->nb_tx_desc * DQO_TX_MULTIPLIER) {
+   next = 0;
+   txq->cur_gen_bit ^= 1;
+   }
+
+   txq->complq_tail = next;
+}
+
+uint16_t
+gve_tx_burst_dqo(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+   struct gve_tx_queue *txq = tx_queue;
+   volatile union gve_tx_desc_dqo *txr;
+   volatile union gve_tx_desc_dqo *txd;
+   struct rte_mbuf **sw_ring;
+   struct rte_mbuf *tx_pkt;
+   uint16_t mask, sw_mask;
+   uint16_t nb_to_clean;
+   uint16_t nb_tx = 0;
+   uint16_t nb_used;
+   uint16_t tx_id;
+   uint16_t sw_id;
+
+   sw_ring = txq->sw_ring;
+   txr = txq->tx_ring;
+
+   mask = txq->nb_tx_desc - 1;
+   sw_mask = txq->sw_size - 1;
+   tx_id = txq->tx_tail;
+   sw_id = txq->sw_tail;
+
+   for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
+   tx_pkt = tx_pkts[nb_tx];
+
+   if (txq->nb_free <= txq->free_thresh) {
+   nb_to_clean = DQO_TX_MULTIPLIER * txq->rs_thresh;
+   while (nb_to_clean--)
+   gve_tx_clean_dqo(txq);
+   }
+
+   if (txq->nb_free < tx_pkt->nb_segs)
+   break;
+
+   nb_used = tx_pkt->nb_segs;
+
+

[RFC v2 6/9] net/gve: support basic Rx data path for DQO

2023-01-29 Thread Junfeng Guo
Add basic Rx data path support for DQO.

Signed-off-by: Junfeng Guo 
Signed-off-by: Rushil Gupta 
Signed-off-by: Jordan Kimbrough 
Signed-off-by: Jeroen de Borst 
---
 drivers/net/gve/gve_ethdev.c |   1 +
 drivers/net/gve/gve_ethdev.h |   3 +
 drivers/net/gve/gve_rx_dqo.c | 128 +++
 3 files changed, 132 insertions(+)

diff --git a/drivers/net/gve/gve_ethdev.c b/drivers/net/gve/gve_ethdev.c
index 512a038968..89e3f09c37 100644
--- a/drivers/net/gve/gve_ethdev.c
+++ b/drivers/net/gve/gve_ethdev.c
@@ -703,6 +703,7 @@ gve_dev_init(struct rte_eth_dev *eth_dev)
} else {
/* override Tx/Rx setup/release eth_dev ops */
gve_eth_dev_ops_override(&gve_local_eth_dev_ops);
+   eth_dev->rx_pkt_burst = gve_rx_burst_dqo;
eth_dev->tx_pkt_burst = gve_tx_burst_dqo;
}
 
diff --git a/drivers/net/gve/gve_ethdev.h b/drivers/net/gve/gve_ethdev.h
index ba657dd6c1..d434f9babe 100644
--- a/drivers/net/gve/gve_ethdev.h
+++ b/drivers/net/gve/gve_ethdev.h
@@ -366,6 +366,9 @@ gve_stop_tx_queues_dqo(struct rte_eth_dev *dev);
 void
 gve_stop_rx_queues_dqo(struct rte_eth_dev *dev);
 
+uint16_t
+gve_rx_burst_dqo(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+
 uint16_t
 gve_tx_burst_dqo(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
 
diff --git a/drivers/net/gve/gve_rx_dqo.c b/drivers/net/gve/gve_rx_dqo.c
index aca6f8ea2d..244517ce5d 100644
--- a/drivers/net/gve/gve_rx_dqo.c
+++ b/drivers/net/gve/gve_rx_dqo.c
@@ -5,6 +5,134 @@
 #include "gve_ethdev.h"
 #include "base/gve_adminq.h"
 
+static inline void
+gve_rx_refill_dqo(struct gve_rx_queue *rxq)
+{
+   volatile struct gve_rx_desc_dqo *rx_buf_ring;
+   volatile struct gve_rx_desc_dqo *rx_buf_desc;
+   struct rte_mbuf *nmb[rxq->free_thresh];
+   uint16_t nb_refill = rxq->free_thresh;
+   uint16_t nb_desc = rxq->nb_rx_desc;
+   uint16_t next_avail = rxq->bufq_tail;
+   struct rte_eth_dev *dev;
+   uint64_t dma_addr;
+   uint16_t delta;
+   int i;
+
+   if (rxq->nb_rx_hold < rxq->free_thresh)
+   return;
+
+   rx_buf_ring = rxq->rx_ring;
+   delta = nb_desc - next_avail;
+   if (unlikely(delta < nb_refill)) {
+   if (likely(rte_pktmbuf_alloc_bulk(rxq->mpool, nmb, delta) == 
0)) {
+   for (i = 0; i < delta; i++) {
+   rx_buf_desc = &rx_buf_ring[next_avail + i];
+   rxq->sw_ring[next_avail + i] = nmb[i];
+   dma_addr = 
rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb[i]));
+   rx_buf_desc->header_buf_addr = 0;
+   rx_buf_desc->buf_addr = dma_addr;
+   }
+   nb_refill -= delta;
+   next_avail = 0;
+   rxq->nb_rx_hold -= delta;
+   } else {
+   dev = &rte_eth_devices[rxq->port_id];
+   dev->data->rx_mbuf_alloc_failed += nb_desc - next_avail;
+   PMD_DRV_LOG(DEBUG, "RX mbuf alloc failed port_id=%u 
queue_id=%u",
+   rxq->port_id, rxq->queue_id);
+   return;
+   }
+   }
+
+   if (nb_desc - next_avail >= nb_refill) {
+   if (likely(rte_pktmbuf_alloc_bulk(rxq->mpool, nmb, nb_refill) 
== 0)) {
+   for (i = 0; i < nb_refill; i++) {
+   rx_buf_desc = &rx_buf_ring[next_avail + i];
+   rxq->sw_ring[next_avail + i] = nmb[i];
+   dma_addr = 
rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb[i]));
+   rx_buf_desc->header_buf_addr = 0;
+   rx_buf_desc->buf_addr = dma_addr;
+   }
+   next_avail += nb_refill;
+   rxq->nb_rx_hold -= nb_refill;
+   } else {
+   dev = &rte_eth_devices[rxq->port_id];
+   dev->data->rx_mbuf_alloc_failed += nb_desc - next_avail;
+   PMD_DRV_LOG(DEBUG, "RX mbuf alloc failed port_id=%u 
queue_id=%u",
+   rxq->port_id, rxq->queue_id);
+   }
+   }
+
+   rte_write32(next_avail, rxq->qrx_tail);
+
+   rxq->bufq_tail = next_avail;
+}
+
+uint16_t
+gve_rx_burst_dqo(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+   volatile struct gve_rx_compl_desc_dqo *rx_compl_ring;
+   volatile struct gve_rx_compl_desc_dqo *rx_desc;
+   struct gve_rx_queue *rxq;
+   struct rte_mbuf *rxm;
+   uint16_t rx_id_bufq;
+   uint16_t pkt_len;
+   uint16_t rx_id;
+   uint16_t nb_rx;
+
+   nb_rx = 0;
+   rxq = rx_queue;
+   rx_id = rxq->rx_tail;
+   rx_id_bufq = rxq->next_avail;
+   rx_compl_ring = rxq->compl_ring;
+
+ 

[RFC v2 7/9] net/gve: support basic stats for DQO

2023-01-29 Thread Junfeng Guo
Add basic stats support for DQO.

Signed-off-by: Junfeng Guo 
Signed-off-by: Rushil Gupta 
Signed-off-by: Jordan Kimbrough 
Signed-off-by: Jeroen de Borst 
---
 drivers/net/gve/gve_ethdev.c | 60 
 drivers/net/gve/gve_ethdev.h | 11 +++
 drivers/net/gve/gve_rx_dqo.c | 12 +++-
 drivers/net/gve/gve_tx_dqo.c |  6 
 4 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/drivers/net/gve/gve_ethdev.c b/drivers/net/gve/gve_ethdev.c
index 89e3f09c37..fae00305f9 100644
--- a/drivers/net/gve/gve_ethdev.c
+++ b/drivers/net/gve/gve_ethdev.c
@@ -369,6 +369,64 @@ gve_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
return 0;
 }
 
+static int
+gve_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+   uint16_t i;
+
+   for (i = 0; i < dev->data->nb_tx_queues; i++) {
+   struct gve_tx_queue *txq = dev->data->tx_queues[i];
+   if (txq == NULL)
+   continue;
+
+   stats->opackets += txq->packets;
+   stats->obytes += txq->bytes;
+   stats->oerrors += txq->errors;
+   }
+
+   for (i = 0; i < dev->data->nb_rx_queues; i++) {
+   struct gve_rx_queue *rxq = dev->data->rx_queues[i];
+   if (rxq == NULL)
+   continue;
+
+   stats->ipackets += rxq->packets;
+   stats->ibytes += rxq->bytes;
+   stats->ierrors += rxq->errors;
+   stats->rx_nombuf += rxq->no_mbufs;
+   }
+
+   return 0;
+}
+
+static int
+gve_dev_stats_reset(struct rte_eth_dev *dev)
+{
+   uint16_t i;
+
+   for (i = 0; i < dev->data->nb_tx_queues; i++) {
+   struct gve_tx_queue *txq = dev->data->tx_queues[i];
+   if (txq == NULL)
+   continue;
+
+   txq->packets  = 0;
+   txq->bytes = 0;
+   txq->errors = 0;
+   }
+
+   for (i = 0; i < dev->data->nb_rx_queues; i++) {
+   struct gve_rx_queue *rxq = dev->data->rx_queues[i];
+   if (rxq == NULL)
+   continue;
+
+   rxq->packets  = 0;
+   rxq->bytes = 0;
+   rxq->errors = 0;
+   rxq->no_mbufs = 0;
+   }
+
+   return 0;
+}
+
 static int
 gve_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 {
@@ -407,6 +465,8 @@ static const struct eth_dev_ops gve_eth_dev_ops = {
.rx_queue_release = gve_rx_queue_release,
.tx_queue_release = gve_tx_queue_release,
.link_update  = gve_link_update,
+   .stats_get= gve_dev_stats_get,
+   .stats_reset  = gve_dev_stats_reset,
.mtu_set  = gve_dev_mtu_set,
 };
 
diff --git a/drivers/net/gve/gve_ethdev.h b/drivers/net/gve/gve_ethdev.h
index d434f9babe..2e0f96499d 100644
--- a/drivers/net/gve/gve_ethdev.h
+++ b/drivers/net/gve/gve_ethdev.h
@@ -105,6 +105,11 @@ struct gve_tx_queue {
struct gve_queue_page_list *qpl;
struct gve_tx_iovec *iov_ring;
 
+   /* stats items */
+   uint64_t packets;
+   uint64_t bytes;
+   uint64_t errors;
+
uint16_t port_id;
uint16_t queue_id;
 
@@ -156,6 +161,12 @@ struct gve_rx_queue {
/* only valid for GQI_QPL queue format */
struct gve_queue_page_list *qpl;
 
+   /* stats items */
+   uint64_t packets;
+   uint64_t bytes;
+   uint64_t errors;
+   uint64_t no_mbufs;
+
struct gve_priv *hw;
const struct rte_memzone *qres_mz;
struct gve_queue_resources *qres;
diff --git a/drivers/net/gve/gve_rx_dqo.c b/drivers/net/gve/gve_rx_dqo.c
index 244517ce5d..41ead5bd98 100644
--- a/drivers/net/gve/gve_rx_dqo.c
+++ b/drivers/net/gve/gve_rx_dqo.c
@@ -37,6 +37,7 @@ gve_rx_refill_dqo(struct gve_rx_queue *rxq)
next_avail = 0;
rxq->nb_rx_hold -= delta;
} else {
+   rxq->no_mbufs += nb_desc - next_avail;
dev = &rte_eth_devices[rxq->port_id];
dev->data->rx_mbuf_alloc_failed += nb_desc - next_avail;
PMD_DRV_LOG(DEBUG, "RX mbuf alloc failed port_id=%u 
queue_id=%u",
@@ -57,6 +58,7 @@ gve_rx_refill_dqo(struct gve_rx_queue *rxq)
next_avail += nb_refill;
rxq->nb_rx_hold -= nb_refill;
} else {
+   rxq->no_mbufs += nb_desc - next_avail;
dev = &rte_eth_devices[rxq->port_id];
dev->data->rx_mbuf_alloc_failed += nb_desc - next_avail;
PMD_DRV_LOG(DEBUG, "RX mbuf alloc failed port_id=%u 
queue_id=%u",
@@ -80,7 +82,9 @@ gve_rx_burst_dqo(void *rx_queue, struct rte_mbuf **rx_pkts, 
uint16_t nb_pkts)
uint16_t pkt_len;
uint16_t rx_id;
uint16_t nb_rx;
+   uint64_t bytes;
 
+   bytes = 0;

[RFC v2 8/9] net/gve: support jumbo frame for GQI

2023-01-29 Thread Junfeng Guo
Add multi-segment support to enable GQI Rx Jumbo Frame.

Signed-off-by: Jordan Kimbrough 
Signed-off-by: Rushil Gupta 
Signed-off-by: Junfeng Guo 
Signed-off-by: Jeroen de Borst 
---
 drivers/net/gve/gve_ethdev.h |   8 +++
 drivers/net/gve/gve_rx.c | 128 ++-
 2 files changed, 105 insertions(+), 31 deletions(-)

diff --git a/drivers/net/gve/gve_ethdev.h b/drivers/net/gve/gve_ethdev.h
index 2e0f96499d..608a2f2fb4 100644
--- a/drivers/net/gve/gve_ethdev.h
+++ b/drivers/net/gve/gve_ethdev.h
@@ -138,6 +138,13 @@ struct gve_tx_queue {
uint8_t is_gqi_qpl;
 };
 
+struct gve_rx_ctx {
+   struct rte_mbuf *mbuf_head;
+   struct rte_mbuf *mbuf_tail;
+   uint16_t total_frags;
+   bool drop_pkt;
+};
+
 struct gve_rx_queue {
volatile struct gve_rx_desc *rx_desc_ring;
volatile union gve_rx_data_slot *rx_data_ring;
@@ -146,6 +153,7 @@ struct gve_rx_queue {
uint64_t rx_ring_phys_addr;
struct rte_mbuf **sw_ring;
struct rte_mempool *mpool;
+   struct gve_rx_ctx ctx;
 
uint16_t rx_tail;
uint16_t nb_rx_desc;
diff --git a/drivers/net/gve/gve_rx.c b/drivers/net/gve/gve_rx.c
index 9ba975c9b4..2468fc70ee 100644
--- a/drivers/net/gve/gve_rx.c
+++ b/drivers/net/gve/gve_rx.c
@@ -5,6 +5,8 @@
 #include "gve_ethdev.h"
 #include "base/gve_adminq.h"
 
+#define GVE_PKT_CONT_BIT_IS_SET(x) (GVE_RXF_PKT_CONT & (x))
+
 static inline void
 gve_rx_refill(struct gve_rx_queue *rxq)
 {
@@ -80,40 +82,70 @@ gve_rx_refill(struct gve_rx_queue *rxq)
}
 }
 
-uint16_t
-gve_rx_burst(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+/*
+ * This method processes a single rte_mbuf and handles packet segmentation
+ * In QPL mode it copies data from the mbuf to the gve_rx_queue.
+ */
+static void
+gve_rx_mbuf(struct gve_rx_queue *rxq, struct rte_mbuf *rxe, uint16_t len,
+   uint16_t rx_id)
 {
-   volatile struct gve_rx_desc *rxr, *rxd;
-   struct gve_rx_queue *rxq = rx_queue;
-   uint16_t rx_id = rxq->rx_tail;
-   struct rte_mbuf *rxe;
-   uint16_t nb_rx, len;
+   uint16_t padding = 0;
uint64_t addr;
-   uint16_t i;
-
-   rxr = rxq->rx_desc_ring;
-   nb_rx = 0;
-
-   for (i = 0; i < nb_pkts; i++) {
-   rxd = &rxr[rx_id];
-   if (GVE_SEQNO(rxd->flags_seq) != rxq->expected_seqno)
-   break;
 
-   if (rxd->flags_seq & GVE_RXF_ERR)
-   continue;
-
-   len = rte_be_to_cpu_16(rxd->len) - GVE_RX_PAD;
-   rxe = rxq->sw_ring[rx_id];
-   if (rxq->is_gqi_qpl) {
-   addr = (uint64_t)(rxq->qpl->mz->addr) + rx_id * 
PAGE_SIZE + GVE_RX_PAD;
-   rte_memcpy((void *)((size_t)rxe->buf_addr + 
rxe->data_off),
-  (void *)(size_t)addr, len);
-   }
+   rxe->data_len = len;
+   if (!rxq->ctx.mbuf_head) {
+   rxq->ctx.mbuf_head = rxe;
+   rxq->ctx.mbuf_tail = rxe;
+   rxe->nb_segs = 1;
rxe->pkt_len = len;
rxe->data_len = len;
rxe->port = rxq->port_id;
rxe->ol_flags = 0;
+   padding = GVE_RX_PAD;
+   } else {
+   rxq->ctx.mbuf_head->pkt_len += len;
+   rxq->ctx.mbuf_head->nb_segs += 1;
+   rxq->ctx.mbuf_tail->next = rxe;
+   rxq->ctx.mbuf_tail = rxe;
+   }
+   if (rxq->is_gqi_qpl) {
+   addr = (uint64_t)(rxq->qpl->mz->addr) + rx_id * PAGE_SIZE + 
padding;
+   rte_memcpy((void *)((size_t)rxe->buf_addr + rxe->data_off),
+   (void *)(size_t)addr, len);
+   }
+}
+
+/*
+ * This method processes a single packet fragment associated with the
+ * passed packet descriptor.
+ * This methods returns whether the fragment is the last fragment
+ * of a packet.
+ */
+static bool
+gve_rx(struct gve_rx_queue *rxq, volatile struct gve_rx_desc *rxd, uint16_t 
rx_id)
+{
+   bool is_last_frag = !GVE_PKT_CONT_BIT_IS_SET(rxd->flags_seq);
+   uint16_t frag_size = rte_be_to_cpu_16(rxd->len);
+   struct gve_rx_ctx *ctx = &rxq->ctx;
+   bool is_first_frag = ctx->total_frags == 0;
+   struct rte_mbuf *rxe;
+
+   if (ctx->drop_pkt)
+   goto finish_frag;
 
+   if (rxd->flags_seq & GVE_RXF_ERR) {
+   ctx->drop_pkt = true;
+   goto finish_frag;
+   }
+
+   if (is_first_frag)
+   frag_size -= GVE_RX_PAD;
+
+   rxe = rxq->sw_ring[rx_id];
+   gve_rx_mbuf(rxq, rxe, frag_size, rx_id);
+
+   if (is_first_frag) {
if (rxd->flags_seq & GVE_RXF_TCP)
rxe->packet_type |= RTE_PTYPE_L4_TCP;
if (rxd->flags_seq & GVE_RXF_UDP)
@@ -127,18 +159,52 @@ gve_rx_burst(void *rx_queue, struct rte_mbuf **rx_pkts, 
uint16_t nb_pkts)
rxe->ol_flags |= RTE_M

[RFC v2 9/9] net/gve: add AdminQ command to verify driver compatibility

2023-01-29 Thread Junfeng Guo
Check whether the driver is compatible with the device presented.

Signed-off-by: Rushil Gupta 
Signed-off-by: Jordan Kimbrough 
Signed-off-by: Junfeng Guo 
Signed-off-by: Jeroen de Borst 
---
 drivers/net/gve/base/gve_adminq.c | 19 ++
 drivers/net/gve/base/gve_adminq.h | 48 +
 drivers/net/gve/base/gve_osdep.h  |  8 +
 drivers/net/gve/gve_ethdev.c  | 60 +++
 drivers/net/gve/gve_ethdev.h  |  1 +
 5 files changed, 136 insertions(+)

diff --git a/drivers/net/gve/base/gve_adminq.c 
b/drivers/net/gve/base/gve_adminq.c
index e963f910a0..5576990cb1 100644
--- a/drivers/net/gve/base/gve_adminq.c
+++ b/drivers/net/gve/base/gve_adminq.c
@@ -401,6 +401,9 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv,
case GVE_ADMINQ_GET_PTYPE_MAP:
priv->adminq_get_ptype_map_cnt++;
break;
+   case GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY:
+   priv->adminq_verify_driver_compatibility_cnt++;
+   break;
default:
PMD_DRV_LOG(ERR, "unknown AQ command opcode %d", opcode);
}
@@ -859,6 +862,22 @@ int gve_adminq_report_stats(struct gve_priv *priv, u64 
stats_report_len,
return gve_adminq_execute_cmd(priv, &cmd);
 }
 
+int gve_adminq_verify_driver_compatibility(struct gve_priv *priv,
+  u64 driver_info_len,
+  dma_addr_t driver_info_addr)
+{
+   union gve_adminq_command cmd;
+
+   memset(&cmd, 0, sizeof(cmd));
+   cmd.opcode = cpu_to_be32(GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY);
+   cmd.verify_driver_compatibility = (struct 
gve_adminq_verify_driver_compatibility) {
+   .driver_info_len = cpu_to_be64(driver_info_len),
+   .driver_info_addr = cpu_to_be64(driver_info_addr),
+   };
+
+   return gve_adminq_execute_cmd(priv, &cmd);
+}
+
 int gve_adminq_report_link_speed(struct gve_priv *priv)
 {
struct gve_dma_mem link_speed_region_dma_mem;
diff --git a/drivers/net/gve/base/gve_adminq.h 
b/drivers/net/gve/base/gve_adminq.h
index 05550119de..c82e02405c 100644
--- a/drivers/net/gve/base/gve_adminq.h
+++ b/drivers/net/gve/base/gve_adminq.h
@@ -23,6 +23,7 @@ enum gve_adminq_opcodes {
GVE_ADMINQ_REPORT_STATS = 0xC,
GVE_ADMINQ_REPORT_LINK_SPEED= 0xD,
GVE_ADMINQ_GET_PTYPE_MAP= 0xE,
+   GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY  = 0xF,
 };
 
 /* Admin queue status codes */
@@ -145,6 +146,48 @@ enum gve_sup_feature_mask {
 };
 
 #define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0
+#define GVE_VERSION_STR_LEN 128
+
+enum gve_driver_capbility {
+   gve_driver_capability_gqi_qpl = 0,
+   gve_driver_capability_gqi_rda = 1,
+   gve_driver_capability_dqo_qpl = 2, /* reserved for future use */
+   gve_driver_capability_dqo_rda = 3,
+};
+
+#define GVE_CAP1(a) BIT((int)a)
+#define GVE_CAP2(a) BIT(((int)a) - 64)
+#define GVE_CAP3(a) BIT(((int)a) - 128)
+#define GVE_CAP4(a) BIT(((int)a) - 192)
+
+#define GVE_DRIVER_CAPABILITY_FLAGS1 \
+   (GVE_CAP1(gve_driver_capability_gqi_qpl) | \
+GVE_CAP1(gve_driver_capability_gqi_rda) | \
+GVE_CAP1(gve_driver_capability_dqo_rda))
+
+#define GVE_DRIVER_CAPABILITY_FLAGS2 0x0
+#define GVE_DRIVER_CAPABILITY_FLAGS3 0x0
+#define GVE_DRIVER_CAPABILITY_FLAGS4 0x0
+
+struct gve_driver_info {
+   u8 os_type; /* 0x01 = Linux */
+   u8 driver_major;
+   u8 driver_minor;
+   u8 driver_sub;
+   __be32 os_version_major;
+   __be32 os_version_minor;
+   __be32 os_version_sub;
+   __be64 driver_capability_flags[4];
+   u8 os_version_str1[GVE_VERSION_STR_LEN];
+   u8 os_version_str2[GVE_VERSION_STR_LEN];
+};
+
+struct gve_adminq_verify_driver_compatibility {
+   __be64 driver_info_len;
+   __be64 driver_info_addr;
+};
+
+GVE_CHECK_STRUCT_LEN(16,  gve_adminq_verify_driver_compatibility);
 
 struct gve_adminq_configure_device_resources {
__be64 counter_array;
@@ -345,6 +388,8 @@ union gve_adminq_command {
struct gve_adminq_report_stats report_stats;
struct gve_adminq_report_link_speed report_link_speed;
struct gve_adminq_get_ptype_map get_ptype_map;
+   struct gve_adminq_verify_driver_compatibility
+   verify_driver_compatibility;
};
};
u8 reserved[64];
@@ -377,5 +422,8 @@ int gve_adminq_report_link_speed(struct gve_priv *priv);
 struct gve_ptype_lut;
 int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv,
 struct gve_ptype_lut *ptype_lut);
+int gve_adminq_verify_driver_compatibility(struct gve_priv *priv,
+  u64 driver_info_len,
+  dma_addr_t driver_info_addr);
 
 #endif /* _GVE_ADMINQ_H */
diff --

[[PATCH] ] linux/igb_uio: fix build with Linux 5.18

2023-01-29 Thread jiangheng (G)
Since commit 7968778914 (PCI: Remove the deprecated "pci-dma-compat.h" API)
in 5.18, pci_set_dma_mask() and pci_set_consistent_dma_mask() no longer exist
switch those api to dma_set_mask_and_coherent.
---
 linux/igb_uio/igb_uio.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/linux/igb_uio/igb_uio.c b/linux/igb_uio/igb_uio.c
index 33e0e02..3672314 100644
--- a/linux/igb_uio/igb_uio.c
+++ b/linux/igb_uio/igb_uio.c
@@ -512,13 +512,21 @@ igbuio_pci_probe(struct pci_dev *dev, const struct 
pci_device_id *id)
goto fail_release_iomem;
 
/* set 64-bit DMA mask */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 18, 0)
err = pci_set_dma_mask(dev,  DMA_BIT_MASK(64));
+#else
+   err = dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64));
+#endif
if (err != 0) {
dev_err(&dev->dev, "Cannot set DMA mask\n");
goto fail_release_iomem;
}
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 18, 0)
err = pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64));
+#else
+   err = dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64));
+#endif
if (err != 0) {
dev_err(&dev->dev, "Cannot set consistent DMA mask\n");
goto fail_release_iomem;
-- 
2.27.0



RE: [PATCH v7 1/6] eal: trace: add trace point emit for blob

2023-01-29 Thread Sunil Kumar Kori
> -Original Message-
> From: Ankur Dwivedi 
> Sent: Monday, January 23, 2023 2:32 PM
> To: dev@dpdk.org
> Cc: tho...@monjalon.net; david.march...@redhat.com; m...@ashroe.eu;
> or...@nvidia.com; ferruh.yi...@amd.com; ch...@att.com;
> humi...@huawei.com; linvi...@tuxdriver.com; ciara.lof...@intel.com;
> qi.z.zh...@intel.com; m...@semihalf.com; m...@semihalf.com;
> shaib...@amazon.com; evge...@amazon.com; igo...@amazon.com;
> cha...@amd.com; Igor Russkikh ;
> shepard.sie...@atomicrules.com; ed.cz...@atomicrules.com;
> john.mil...@atomicrules.com; ajit.khapa...@broadcom.com;
> somnath.ko...@broadcom.com; Jerin Jacob Kollanukkaran
> ; Maciej Czekaj [C] ; Shijith
> Thotton ; Srisivasubramanian Srinivasan
> ; Harman Kalra ;
> rahul.lakkire...@chelsio.com; johnd...@cisco.com; hyon...@cisco.com;
> liudongdo...@huawei.com; yisen.zhu...@huawei.com;
> xuanziya...@huawei.com; cloud.wangxiao...@huawei.com;
> zhouguoy...@huawei.com; simei...@intel.com; wenjun1...@intel.com;
> qiming.y...@intel.com; yuying.zh...@intel.com; beilei.x...@intel.com;
> xiao.w.w...@intel.com; jingjing...@intel.com; junfeng@intel.com;
> rosen...@intel.com; Nithin Kumar Dabilpuram
> ; Kiran Kumar Kokkilagadda
> ; Sunil Kumar Kori ; Satha
> Koteswara Rao Kottidi ; Liron Himi
> ; z...@semihalf.com; Radha Chintakuntla
> ; Veerasenareddy Burru ;
> Sathesh B Edara ; ma...@nvidia.com;
> viachesl...@nvidia.com; lon...@microsoft.com; spin...@cesnet.cz;
> chaoyong...@corigine.com; niklas.soderl...@corigine.com;
> hemant.agra...@nxp.com; sachin.sax...@oss.nxp.com; g.si...@nxp.com;
> apeksha.gu...@nxp.com; sachin.sax...@nxp.com; abo...@pensando.io;
> Rasesh Mody ; Shahed Shaikh
> ; Devendra Singh Rawat
> ; andrew.rybche...@oktetlabs.ru;
> jiawe...@trustnetic.com; jianw...@trustnetic.com;
> jbehr...@vmware.com; maxime.coque...@redhat.com;
> chenbo@intel.com; steven.webs...@windriver.com;
> matt.pet...@windriver.com; bruce.richard...@intel.com;
> mtetsu...@gmail.com; gr...@u256.net; jasvinder.si...@intel.com;
> cristian.dumitre...@intel.com; jgraj...@cisco.com;
> m...@smartsharesystems.com; Ankur Dwivedi 
> Subject: [PATCH v7 1/6] eal: trace: add trace point emit for blob
> 
> Adds a trace point emit function for capturing a blob. The blob captures the
> length passed by the application followed by the array.
> 
> The maximum blob bytes which can be captured is bounded by
> RTE_TRACE_BLOB_LEN_MAX macro. The value for max blob length macro is
> 64 bytes. If the length is less than 64 the remaining trailing bytes are set 
> to
> zero.
> 
> This patch also adds test case for emit blob tracepoint function.
> 
> Signed-off-by: Ankur Dwivedi 
> ---
>  app/test/test_trace.c  | 11 
>  doc/guides/prog_guide/trace_lib.rst| 12 +
>  lib/eal/common/eal_common_trace_points.c   |  2 ++
>  lib/eal/include/rte_eal_trace.h|  6 +
>  lib/eal/include/rte_trace_point.h  | 31 ++
>  lib/eal/include/rte_trace_point_register.h |  9 +++
>  lib/eal/version.map|  3 +++
>  7 files changed, 74 insertions(+)
> 
Acked-by: Sunil Kumar Kori 

> diff --git a/app/test/test_trace.c b/app/test/test_trace.c index
> 6bedf14024..ad4a394a29 100644
> --- a/app/test/test_trace.c
> +++ b/app/test/test_trace.c
> @@ -4,6 +4,7 @@
> 
>  #include 
>  #include 
> +#include 
>  #include 
> 
>  #include "test.h"
> @@ -177,7 +178,12 @@ test_fp_trace_points(void)  static int
>  test_generic_trace_points(void)
>  {
> + uint8_t arr[RTE_TRACE_BLOB_LEN_MAX];
>   int tmp;
> + int i;
> +
> + for (i = 0; i < RTE_TRACE_BLOB_LEN_MAX; i++)
> + arr[i] = i;
> 
>   rte_eal_trace_generic_void();
>   rte_eal_trace_generic_u64(0x10);
> @@ -195,6 +201,11 @@ test_generic_trace_points(void)
>   rte_eal_trace_generic_ptr(&tmp);
>   rte_eal_trace_generic_str("my string");
>   rte_eal_trace_generic_size_t(sizeof(void *));
> + rte_eal_trace_generic_blob(arr, 0);
> + rte_eal_trace_generic_blob(arr, 17);
> + rte_eal_trace_generic_blob(arr, RTE_TRACE_BLOB_LEN_MAX);
> + rte_eal_trace_generic_blob(arr, rte_rand() %
> + RTE_TRACE_BLOB_LEN_MAX);
>   RTE_EAL_TRACE_GENERIC_FUNC;
> 
>   return TEST_SUCCESS;
> diff --git a/doc/guides/prog_guide/trace_lib.rst
> b/doc/guides/prog_guide/trace_lib.rst
> index 9a8f38073d..3e0ea5835c 100644
> --- a/doc/guides/prog_guide/trace_lib.rst
> +++ b/doc/guides/prog_guide/trace_lib.rst
> @@ -352,3 +352,15 @@ event ID.
>  The ``packet.header`` and ``packet.context`` will be written in the slow path
> at the time of trace memory creation. The ``trace.header`` and trace payload
> will be emitted when the tracepoint function is invoked.
> +
> +Limitations
> +---
> +
> +- The ``rte_trace_point_emit_blob()`` function can capture a maximum
> +blob of
> +  length ``RTE_TRACE_BLOB_LEN_MAX`` bytes. The application can call
> +  ``rte_trace_point_emit_blob()`` multipl

RE: [PATCH v7] ethdev: add special flags when creating async transfer table

2023-01-29 Thread Ivan Malov

Hi Rongwei,

For my responses, PSB.

By the way, now you mention things like wasting memory and insertion
optimisastions, are there any comparative figures to see the effect
of this hint on insertion performance / memory footprint?
Some "before" / "after" examples would really be helpful.

After all, I'm not objecting this patch. But I believe that other
reviewers' concerns should nevertheless be addressed anyway.

On Mon, 30 Jan 2023, Rongwei Liu wrote:


Hi Ivan,

BR
Rongwei


-Original Message-
From: Ivan Malov 
Sent: Monday, January 30, 2023 08:00
To: Rongwei Liu 
Cc: Matan Azrad ; Slava Ovsiienko
; Ori Kam ; NBU-Contact-
Thomas Monjalon (EXTERNAL) ; Aman Singh
; Yuying Zhang ;
Ferruh Yigit ; Andrew Rybchenko
; dev@dpdk.org; Raslan Darawsheh

Subject: Re: [PATCH v7] ethdev: add special flags when creating async transfer
table

External email: Use caution opening links or attachments


Hi Rongwei,

Thanks for persevering. I have no strong opinion, but, at least, the fact that 
the
new flags are no longer meant for use in rte_flow_attr, which is clearly not
the right place for such, is an improvement.


Thanks for the suggestion, move it to rte_flow_table_attr now and it' dedicated 
to async API.

However, let's take a closer look at the current patch, shall we?

But, before we get to that, I'd like to kindly request that you provide a more
concrete example of how this feature is supposed to be used. Are there some
real-life application examples?


Sure.

Also, to me, it's still unclear how an application can obtain the knowledge of
this hint in the first instance. For example, can Open vSwitch somehow tell
ethdevs representing physical ports from ones representing "vports" (host
endpoints)?
How does it know which attribute to specify?


Hint should be initiated by application and application knows it' traffic 
pattern which highly relates to deployment.
Let' use VxLAN encap/decap as an example:
1. Traffic from wire should be VxLAN pattern and do the decap, then send to 
different vports.
flow pattern_template 0 create transfer relaxed no pattern_template_id 4 
template represented_port ethdev_port_id is 0 / eth / ipv4 / udp / vxlan / tag 
index is 0 data is 0x33 / end
flow actions_template 0 create transfer actions_template_id 4 template 
raw_decap index 0 / represented_port ethdev_port_id 1 / end mask raw_decap 
index 0 / represented_port ethdev_port_id 1 / end
flow template_table 0 create group 1 priority 0 transfer wire_orig table_id 4 
rules_number 128 pattern_template 4 actions_template 4

2. Traffic from vports should be encap with different VxLAN header and send to 
wire.
flow actions_template 1 create transfer actions_template_id 5 template 
raw_encap index 0 / represented_port ethdev_port_id 0 / end mask raw_encap 
index 0 / represented_port ethdev_port_id 0 / end
flow template_table 0 create group 1 priority 0 transfer vport_orig table_id 5 
rules_number 128 pattern_template 4 actions_template 5


For the rest of my notes, PSB.

On Mon, 14 Nov 2022, Rongwei Liu wrote:


In case flow rules match only one kind of traffic in a flow table,
then optimization can be done via allocation of this table.


This wording might confuse readers. Consider rephrasing it, please:
If multiple flow rules share a common set of match masks, then they might
belong in a flow table which can be pre-allocated.


Such optimization is possible only if the application gives a hint
about its usage of the table during initial configuration.

The transfer domain rules may process traffic from wire or vport,
which may correspond to two kinds of underlayer resources.


Why name it a "vport"? Why not "host"?

host = packets generated by any of the host's "vport"s wire = packets arriving
at the NIC from the network

Vport is "virtual port" for short and contains "VF/SF" for now.
Per my thoughts, it' clearer and maps to DPDK port probing/management.


I understand that "host" might not be a brilliant name.

If "vport" stands for every port of the NIC that is not a network port,
then this name might be OK to me, but why doesn't it cover PFs? A PF is
clearly not a network / physical port. Why just VF/SF then? Where does
that "for now" decision come from? Just wondering.




That's why the first two hints introduced in this patch are about wire
and vport traffic specialization.
Wire means traffic arrives from the uplink port while vport means
traffic initiated from VF/SF.


By the sound of it, the meaning is confined to just VFs/SFs.
What if the user wants to match packets coming from PFs?


It should be "wire_orig".


Forgive me, but that does not sound correct. Say, there's an application
and it has a PF plugged into it: ethdev index 0. And the application
transmits packets using rte_eth_tx_burst() from that port.
You say that these packets can be matched via "wire_orig".
But they do not come from the wire. They come from PF...



There are two possible approaches for providing the hints.
Using IPv4 as an example:
1. Use

RE: [PATCH v1 10/21] net/virtio: alloc Rx SW ring only if vectorized path

2023-01-29 Thread Xia, Chenbo
Hi Maxime,

> -Original Message-
> From: Maxime Coquelin 
> Sent: Wednesday, November 30, 2022 11:56 PM
> To: dev@dpdk.org; Xia, Chenbo ;
> david.march...@redhat.com; epere...@redhat.com
> Cc: Maxime Coquelin 
> Subject: [PATCH v1 10/21] net/virtio: alloc Rx SW ring only if vectorized
> path
> 
> This patch only allocates the SW ring when vectorized
> datapath is used. It also moves the SW ring and fake mbuf
> in the virtnet_rx struct since this is Rx-only.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  drivers/net/virtio/virtio_ethdev.c| 88 ---
>  drivers/net/virtio/virtio_rxtx.c  |  8 +-
>  drivers/net/virtio/virtio_rxtx.h  |  4 +-
>  drivers/net/virtio/virtio_rxtx_simple.h   |  2 +-
>  .../net/virtio/virtio_rxtx_simple_altivec.c   |  4 +-
>  drivers/net/virtio/virtio_rxtx_simple_neon.c  |  4 +-
>  drivers/net/virtio/virtio_rxtx_simple_sse.c   |  4 +-
>  drivers/net/virtio/virtqueue.c|  6 +-
>  drivers/net/virtio/virtqueue.h|  1 -
>  9 files changed, 72 insertions(+), 49 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c
> b/drivers/net/virtio/virtio_ethdev.c
> index 8b17b450ec..46dd5606f6 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -339,6 +339,47 @@ virtio_free_queue_headers(struct virtqueue *vq)
>   *hdr_mem = 0;
>  }
> 
> +static int
> +virtio_rxq_sw_ring_alloc(struct virtqueue *vq, int numa_node)
> +{
> + void *sw_ring;
> + struct rte_mbuf *mbuf;
> + size_t size;
> +
> + /* SW ring is only used with vectorized datapath */
> + if (!vq->hw->use_vec_rx)
> + return 0;
> +
> + size = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq->vq_nentries) * sizeof(vq-
> >rxq.sw_ring[0]);
> +
> + sw_ring = rte_zmalloc_socket("sw_ring", size, RTE_CACHE_LINE_SIZE,
> numa_node);
> + if (!sw_ring) {
> + PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
> + return -ENOMEM;
> + }
> +
> + mbuf = rte_zmalloc_socket("sw_ring", sizeof(*mbuf),
> RTE_CACHE_LINE_SIZE, numa_node);
> + if (!mbuf) {
> + PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
> + rte_free(sw_ring);
> + return -ENOMEM;
> + }
> +
> + vq->rxq.sw_ring = sw_ring;
> + vq->rxq.fake_mbuf = mbuf;
> +
> + return 0;
> +}
> +
> +static void
> +virtio_rxq_sw_ring_free(struct virtqueue *vq)
> +{
> + rte_free(vq->rxq.fake_mbuf);
> + vq->rxq.fake_mbuf = NULL;
> + rte_free(vq->rxq.sw_ring);
> + vq->rxq.sw_ring = NULL;
> +}
> +
>  static int
>  virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
>  {
> @@ -346,14 +387,11 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
>   const struct rte_memzone *mz = NULL;
>   unsigned int vq_size, size;
>   struct virtio_hw *hw = dev->data->dev_private;
> - struct virtnet_rx *rxvq = NULL;
>   struct virtnet_ctl *cvq = NULL;
>   struct virtqueue *vq;
> - void *sw_ring = NULL;
>   int queue_type = virtio_get_queue_type(hw, queue_idx);
>   int ret;
>   int numa_node = dev->device->numa_node;
> - struct rte_mbuf *fake_mbuf = NULL;
> 
>   PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
>   queue_idx, numa_node);
> @@ -441,28 +479,9 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
>   }
> 
>   if (queue_type == VTNET_RQ) {
> - size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
> -sizeof(vq->sw_ring[0]);
> -
> - sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
> - RTE_CACHE_LINE_SIZE, numa_node);
> - if (!sw_ring) {
> - PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
> - ret = -ENOMEM;
> + ret = virtio_rxq_sw_ring_alloc(vq, numa_node);
> + if (ret)
>   goto free_hdr_mz;
> - }
> -
> - fake_mbuf = rte_zmalloc_socket("sw_ring", sizeof(*fake_mbuf),
> - RTE_CACHE_LINE_SIZE, numa_node);
> - if (!fake_mbuf) {
> - PMD_INIT_LOG(ERR, "can not allocate fake mbuf");
> - ret = -ENOMEM;
> - goto free_sw_ring;
> - }
> -
> - vq->sw_ring = sw_ring;
> - rxvq = &vq->rxq;
> - rxvq->fake_mbuf = fake_mbuf;
>   } else if (queue_type == VTNET_TQ) {
>   virtqueue_txq_indirect_headers_init(vq);
>   } else if (queue_type == VTNET_CQ) {
> @@ -486,9 +505,8 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
> 
>  clean_vq:
>   hw->cvq = NULL;
> - rte_free(fake_mbuf);
> -free_sw_ring:
> - rte_free(sw_ring);
> + if (queue_type == VTNET_RQ)
> + virtio_rxq_sw_ring_free(vq);
>  free_hdr_mz:
>   virtio_free_queue_headers(vq);
>  fr

RE: [PATCH v1 01/21] net/virtio: move CVQ code into a dedicated file

2023-01-29 Thread Xia, Chenbo
> -Original Message-
> From: Maxime Coquelin 
> Sent: Wednesday, November 30, 2022 11:56 PM
> To: dev@dpdk.org; Xia, Chenbo ;
> david.march...@redhat.com; epere...@redhat.com
> Cc: Maxime Coquelin 
> Subject: [PATCH v1 01/21] net/virtio: move CVQ code into a dedicated file
> 
> This patch moves Virtio control queue code into a dedicated
> file, as preliminary rework to support shadow control queue
> in Virtio-user.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  drivers/net/virtio/meson.build |   1 +
>  drivers/net/virtio/virtio_cvq.c| 230 +
>  drivers/net/virtio/virtio_cvq.h| 126 
>  drivers/net/virtio/virtio_ethdev.c | 218 +--
>  drivers/net/virtio/virtio_rxtx.h   |   9 --
>  drivers/net/virtio/virtqueue.h | 105 +
>  6 files changed, 359 insertions(+), 330 deletions(-)
>  create mode 100644 drivers/net/virtio/virtio_cvq.c
>  create mode 100644 drivers/net/virtio/virtio_cvq.h
> 
> --
> 2.38.1

Reviewed-by: Chenbo Xia 


RE: [PATCH v1 02/21] net/virtio: introduce notify callback for control queue

2023-01-29 Thread Xia, Chenbo
> -Original Message-
> From: Maxime Coquelin 
> Sent: Wednesday, November 30, 2022 11:56 PM
> To: dev@dpdk.org; Xia, Chenbo ;
> david.march...@redhat.com; epere...@redhat.com
> Cc: Maxime Coquelin 
> Subject: [PATCH v1 02/21] net/virtio: introduce notify callback for
> control queue
> 
> This patch introduces a notification callback for the control
> virtqueue as preliminary work to add shadow control virtqueue
> support.
> 
> This new callback is required so that the shadow control queue
> implemented in Virtio-user does not call the notifciation op
> implemented for the driver layer.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  drivers/net/virtio/virtio_cvq.c| 4 ++--
>  drivers/net/virtio/virtio_cvq.h| 4 
>  drivers/net/virtio/virtio_ethdev.c | 7 +++
>  3 files changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_cvq.c
> b/drivers/net/virtio/virtio_cvq.c
> index de4299a2a7..cd25614df8 100644
> --- a/drivers/net/virtio/virtio_cvq.c
> +++ b/drivers/net/virtio/virtio_cvq.c
> @@ -76,7 +76,7 @@ virtio_send_command_packed(struct virtnet_ctl *cvq,
>   vq->hw->weak_barriers);
> 
>   virtio_wmb(vq->hw->weak_barriers);
> - virtqueue_notify(vq);
> + cvq->notify_queue(vq, cvq->notify_cookie);
> 
>   /* wait for used desc in virtqueue
>* desc_is_used has a load-acquire or rte_io_rmb inside
> @@ -155,7 +155,7 @@ virtio_send_command_split(struct virtnet_ctl *cvq,
> 
>   PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
> 
> - virtqueue_notify(vq);
> + cvq->notify_queue(vq, cvq->notify_cookie);
> 
>   while (virtqueue_nused(vq) == 0)
>   usleep(100);
> diff --git a/drivers/net/virtio/virtio_cvq.h
> b/drivers/net/virtio/virtio_cvq.h
> index 139e813ffb..224dc81422 100644
> --- a/drivers/net/virtio/virtio_cvq.h
> +++ b/drivers/net/virtio/virtio_cvq.h
> @@ -7,6 +7,8 @@
> 
>  #include 
> 
> +struct virtqueue;
> +
>  /**
>   * Control the RX mode, ie. promiscuous, allmulti, etc...
>   * All commands require an "out" sg entry containing a 1 byte
> @@ -110,6 +112,8 @@ struct virtnet_ctl {
>   uint16_t port_id;   /**< Device port identifier. */
>   const struct rte_memzone *mz;   /**< mem zone to populate CTL ring.
> */
>   rte_spinlock_t lock;  /**< spinlock for control queue.
> */
> + void (*notify_queue)(struct virtqueue *vq, void *cookie); /**<
> notify ops. */
> + void *notify_cookie;  /**< cookie for notify ops */
>  };
> 
>  #define VIRTIO_MAX_CTRL_DATA 2048
> diff --git a/drivers/net/virtio/virtio_ethdev.c
> b/drivers/net/virtio/virtio_ethdev.c
> index d553f89a0d..8db8771f4d 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -253,6 +253,12 @@ virtio_init_vring(struct virtqueue *vq)
>   virtqueue_disable_intr(vq);
>  }
> 
> +static void
> +virtio_control_queue_notify(struct virtqueue *vq, __rte_unused void
> *cookie)
> +{
> + virtqueue_notify(vq);
> +}
> +
>  static int
>  virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx)
>  {
> @@ -421,6 +427,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
>   memset(cvq->virtio_net_hdr_mz->addr, 0, rte_mem_page_size());
> 
>   hw->cvq = cvq;
> + vq->cq.notify_queue = &virtio_control_queue_notify;
>   }
> 
>   if (hw->use_va)
> --
> 2.38.1

Reviewed-by: Chenbo Xia 


RE: [PATCH v1 03/21] net/virtio: virtqueue headers alloc refactoring

2023-01-29 Thread Xia, Chenbo
> -Original Message-
> From: Maxime Coquelin 
> Sent: Wednesday, November 30, 2022 11:56 PM
> To: dev@dpdk.org; Xia, Chenbo ;
> david.march...@redhat.com; epere...@redhat.com
> Cc: Maxime Coquelin 
> Subject: [PATCH v1 03/21] net/virtio: virtqueue headers alloc refactoring
> 
> This patch refactors virtqueue initialization by moving
> its headers allocation and deallocation in dedicated
> function.
> 
> While at it, it renames the memzone metadata and address
> pointers in the virtnet_tx and virtnet_ctl structures to
> remove redundant virtio_net_ prefix.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  drivers/net/virtio/virtio_cvq.c|  19 ++--
>  drivers/net/virtio/virtio_cvq.h|   9 +-
>  drivers/net/virtio/virtio_ethdev.c | 149 ++---
>  drivers/net/virtio/virtio_rxtx.c   |  12 +--
>  drivers/net/virtio/virtio_rxtx.h   |  12 +--
>  drivers/net/virtio/virtqueue.c |   8 +-
>  drivers/net/virtio/virtqueue.h |  13 +--
>  7 files changed, 126 insertions(+), 96 deletions(-)
> --
> 2.38.1

Reviewed-by: Chenbo Xia 


RE: [PATCH v1 04/21] net/virtio: remove port ID info from Rx queue

2023-01-29 Thread Xia, Chenbo
> -Original Message-
> From: Maxime Coquelin 
> Sent: Wednesday, November 30, 2022 11:56 PM
> To: dev@dpdk.org; Xia, Chenbo ;
> david.march...@redhat.com; epere...@redhat.com
> Cc: Maxime Coquelin 
> Subject: [PATCH v1 04/21] net/virtio: remove port ID info from Rx queue
> 
> The port ID information is duplicated in several places.
> This patch removes it from the virtnet_rx struct as it can
> be found in virtio_hw struct.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  drivers/net/virtio/virtio_ethdev.c  |  1 -
>  drivers/net/virtio/virtio_rxtx.c| 25 ++---
>  drivers/net/virtio/virtio_rxtx.h|  1 -
>  drivers/net/virtio/virtio_rxtx_packed.c |  3 +--
>  drivers/net/virtio/virtio_rxtx_simple.c |  3 ++-
>  drivers/net/virtio/virtio_rxtx_simple.h |  5 +++--
>  6 files changed, 16 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c
> b/drivers/net/virtio/virtio_ethdev.c
> index cead5f0884..1c68e5a283 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -462,7 +462,6 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
> 
>   vq->sw_ring = sw_ring;
>   rxvq = &vq->rxq;
> - rxvq->port_id = dev->data->port_id;
>   rxvq->mz = mz;
>   rxvq->fake_mbuf = fake_mbuf;
>   } else if (queue_type == VTNET_TQ) {
> diff --git a/drivers/net/virtio/virtio_rxtx.c
> b/drivers/net/virtio/virtio_rxtx.c
> index bd95e8ceb5..45c04aa3f8 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -1024,7 +1024,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf
> **rx_pkts, uint16_t nb_pkts)
>   continue;
>   }
> 
> - rxm->port = rxvq->port_id;
> + rxm->port = hw->port_id;
>   rxm->data_off = RTE_PKTMBUF_HEADROOM;
>   rxm->ol_flags = 0;
>   rxm->vlan_tci = 0;
> @@ -1066,8 +1066,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf
> **rx_pkts, uint16_t nb_pkts)
>   }
>   nb_enqueued += free_cnt;
>   } else {
> - struct rte_eth_dev *dev =
> - &rte_eth_devices[rxvq->port_id];
> + struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id];
>   dev->data->rx_mbuf_alloc_failed += free_cnt;
>   }
>   }
> @@ -1127,7 +1126,7 @@ virtio_recv_pkts_packed(void *rx_queue, struct
> rte_mbuf **rx_pkts,
>   continue;
>   }
> 
> - rxm->port = rxvq->port_id;
> + rxm->port = hw->port_id;
>   rxm->data_off = RTE_PKTMBUF_HEADROOM;
>   rxm->ol_flags = 0;
>   rxm->vlan_tci = 0;
> @@ -1169,8 +1168,7 @@ virtio_recv_pkts_packed(void *rx_queue, struct
> rte_mbuf **rx_pkts,
>   }
>   nb_enqueued += free_cnt;
>   } else {
> - struct rte_eth_dev *dev =
> - &rte_eth_devices[rxvq->port_id];
> + struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id];
>   dev->data->rx_mbuf_alloc_failed += free_cnt;
>   }
>   }
> @@ -1258,7 +1256,7 @@ virtio_recv_pkts_inorder(void *rx_queue,
>   rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
>   rxm->data_len = (uint16_t)(len[i] - hdr_size);
> 
> - rxm->port = rxvq->port_id;
> + rxm->port = hw->port_id;
> 
>   rx_pkts[nb_rx] = rxm;
>   prev = rxm;
> @@ -1352,8 +1350,7 @@ virtio_recv_pkts_inorder(void *rx_queue,
>   }
>   nb_enqueued += free_cnt;
>   } else {
> - struct rte_eth_dev *dev =
> - &rte_eth_devices[rxvq->port_id];
> + struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id];
>   dev->data->rx_mbuf_alloc_failed += free_cnt;
>   }
>   }
> @@ -1437,7 +1434,7 @@ virtio_recv_mergeable_pkts(void *rx_queue,
>   rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
>   rxm->data_len = (uint16_t)(len[i] - hdr_size);
> 
> - rxm->port = rxvq->port_id;
> + rxm->port = hw->port_id;
> 
>   rx_pkts[nb_rx] = rxm;
>   prev = rxm;
> @@ -1530,8 +1527,7 @@ virtio_recv_mergeable_pkts(void *rx_queue,
>   }
>   nb_enqueued += free_cnt;
>   } else {
> - struct rte_eth_dev *dev =
> - &rte_eth_devices[rxvq->port_id];
> + struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id];
>   dev->data->rx_mbuf_alloc_failed += free_cnt;
>   }
>   }
> @@ -1610,7 +1606,7 @@ virtio_recv_mergeable_pkts_packed(voi

RE: [PATCH v1 05/21] net/virtio: remove unused fields in Tx queue struct

2023-01-29 Thread Xia, Chenbo
> -Original Message-
> From: Maxime Coquelin 
> Sent: Wednesday, November 30, 2022 11:56 PM
> To: dev@dpdk.org; Xia, Chenbo ;
> david.march...@redhat.com; epere...@redhat.com
> Cc: Maxime Coquelin 
> Subject: [PATCH v1 05/21] net/virtio: remove unused fields in Tx queue
> struct
> 
> The port and queue IDs are not used in virtnet_tx struct,
> this patch removes them.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  drivers/net/virtio/virtio_ethdev.c | 1 -
>  drivers/net/virtio/virtio_rxtx.c   | 1 -
>  drivers/net/virtio/virtio_rxtx.h   | 3 ---
>  3 files changed, 5 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c
> b/drivers/net/virtio/virtio_ethdev.c
> index 1c68e5a283..a581fae408 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -466,7 +466,6 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
>   rxvq->fake_mbuf = fake_mbuf;
>   } else if (queue_type == VTNET_TQ) {
>   txvq = &vq->txq;
> - txvq->port_id = dev->data->port_id;
>   txvq->mz = mz;
>   } else if (queue_type == VTNET_CQ) {
>   cvq = &vq->cq;
> diff --git a/drivers/net/virtio/virtio_rxtx.c
> b/drivers/net/virtio/virtio_rxtx.c
> index 45c04aa3f8..304403d46c 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -831,7 +831,6 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
>   vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
> 
>   txvq = &vq->txq;
> - txvq->queue_id = queue_idx;
> 
>   tx_free_thresh = tx_conf->tx_free_thresh;
>   if (tx_free_thresh == 0)
> diff --git a/drivers/net/virtio/virtio_rxtx.h
> b/drivers/net/virtio/virtio_rxtx.h
> index 97de9eb0a3..9bbcf32f66 100644
> --- a/drivers/net/virtio/virtio_rxtx.h
> +++ b/drivers/net/virtio/virtio_rxtx.h
> @@ -35,9 +35,6 @@ struct virtnet_tx {
>   const struct rte_memzone *hdr_mz; /**< memzone to populate hdr. */
>   rte_iova_t hdr_mem;   /**< hdr for each xmit packet */
> 
> - uint16_tqueue_id; /**< DPDK queue index. */
> - uint16_tport_id;  /**< Device port identifier. */
> -
>   struct virtnet_stats stats;   /* Statistics */
> 
>   const struct rte_memzone *mz;/**< mem zone to populate TX ring.
> */
> --
> 2.38.1

Reviewed-by: Chenbo Xia 


RE: [PATCH v1 06/21] net/virtio: remove unused queue ID field in Rx queue

2023-01-29 Thread Xia, Chenbo
> -Original Message-
> From: Maxime Coquelin 
> Sent: Wednesday, November 30, 2022 11:56 PM
> To: dev@dpdk.org; Xia, Chenbo ;
> david.march...@redhat.com; epere...@redhat.com
> Cc: Maxime Coquelin 
> Subject: [PATCH v1 06/21] net/virtio: remove unused queue ID field in Rx
> queue
> 
> This patch removes the queue ID field in virtnet_rx struct.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  drivers/net/virtio/virtio_rxtx.c | 1 -
>  drivers/net/virtio/virtio_rxtx.h | 2 --
>  2 files changed, 3 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_rxtx.c
> b/drivers/net/virtio/virtio_rxtx.c
> index 304403d46c..4f69b97f41 100644
> --- a/drivers/net/virtio/virtio_rxtx.c
> +++ b/drivers/net/virtio/virtio_rxtx.c
> @@ -703,7 +703,6 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
>   vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
> 
>   rxvq = &vq->rxq;
> - rxvq->queue_id = queue_idx;
>   rxvq->mpool = mp;
>   dev->data->rx_queues[queue_idx] = rxvq;
> 
> diff --git a/drivers/net/virtio/virtio_rxtx.h
> b/drivers/net/virtio/virtio_rxtx.h
> index 9bbcf32f66..a5fe3ea95c 100644
> --- a/drivers/net/virtio/virtio_rxtx.h
> +++ b/drivers/net/virtio/virtio_rxtx.h
> @@ -23,8 +23,6 @@ struct virtnet_rx {
>   uint64_t mbuf_initializer; /**< value to init mbufs. */
>   struct rte_mempool *mpool; /**< mempool for mbuf allocation */
> 
> - uint16_t queue_id;   /**< DPDK queue index. */
> -
>   /* Statistics */
>   struct virtnet_stats stats;
> 
> --
> 2.38.1

Reviewed-by: Chenbo Xia 


RE: [PATCH v1 07/21] net/virtio: remove unused Port ID in control queue

2023-01-29 Thread Xia, Chenbo
> -Original Message-
> From: Maxime Coquelin 
> Sent: Wednesday, November 30, 2022 11:56 PM
> To: dev@dpdk.org; Xia, Chenbo ;
> david.march...@redhat.com; epere...@redhat.com
> Cc: Maxime Coquelin 
> Subject: [PATCH v1 07/21] net/virtio: remove unused Port ID in control
> queue
> 
> This patch removes the unused port ID information from
> virtnet_ctl struct.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  drivers/net/virtio/virtio_cvq.h | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/drivers/net/virtio/virtio_cvq.h
> b/drivers/net/virtio/virtio_cvq.h
> index 226561e6b8..0ff326b063 100644
> --- a/drivers/net/virtio/virtio_cvq.h
> +++ b/drivers/net/virtio/virtio_cvq.h
> @@ -108,7 +108,6 @@ typedef uint8_t virtio_net_ctrl_ack;
>  struct virtnet_ctl {
>   const struct rte_memzone *hdr_mz; /**< memzone to populate hdr. */
>   rte_iova_t hdr_mem;   /**< hdr for each xmit packet */
> - uint16_t port_id; /**< Device port identifier. */
>   const struct rte_memzone *mz; /**< mem zone to populate CTL ring.
> */
>   rte_spinlock_t lock;  /**< spinlock for control queue.
> */
>   void (*notify_queue)(struct virtqueue *vq, void *cookie); /**<
> notify ops. */
> --
> 2.38.1

Reviewed-by: Chenbo Xia 


RE: [PATCH v1 08/21] net/virtio: move vring memzone to virtqueue struct

2023-01-29 Thread Xia, Chenbo
> -Original Message-
> From: Maxime Coquelin 
> Sent: Wednesday, November 30, 2022 11:56 PM
> To: dev@dpdk.org; Xia, Chenbo ;
> david.march...@redhat.com; epere...@redhat.com
> Cc: Maxime Coquelin 
> Subject: [PATCH v1 08/21] net/virtio: move vring memzone to virtqueue
> struct
> 
> Whatever its type (Rx, Tx or Ctl), all the virtqueue
> require a memzone for the vrings. This patch moves its
> pointer to the virtqueue struct, simplifying the code.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  drivers/net/virtio/virtio_cvq.h|  1 -
>  drivers/net/virtio/virtio_ethdev.c | 11 ++-
>  drivers/net/virtio/virtio_rxtx.h   |  4 
>  drivers/net/virtio/virtqueue.c |  6 ++
>  drivers/net/virtio/virtqueue.h |  1 +
>  5 files changed, 5 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_cvq.h
> b/drivers/net/virtio/virtio_cvq.h
> index 0ff326b063..70739ae04b 100644
> --- a/drivers/net/virtio/virtio_cvq.h
> +++ b/drivers/net/virtio/virtio_cvq.h
> @@ -108,7 +108,6 @@ typedef uint8_t virtio_net_ctrl_ack;
>  struct virtnet_ctl {
>   const struct rte_memzone *hdr_mz; /**< memzone to populate hdr. */
>   rte_iova_t hdr_mem;   /**< hdr for each xmit packet */
> - const struct rte_memzone *mz; /**< mem zone to populate CTL ring.
> */
>   rte_spinlock_t lock;  /**< spinlock for control queue.
> */
>   void (*notify_queue)(struct virtqueue *vq, void *cookie); /**<
> notify ops. */
>   void *notify_cookie;  /**< cookie for notify ops */
> diff --git a/drivers/net/virtio/virtio_ethdev.c
> b/drivers/net/virtio/virtio_ethdev.c
> index a581fae408..b546916a9f 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -423,6 +423,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
> 
>   memset(mz->addr, 0, mz->len);
> 
> + vq->mz = mz;
>   if (hw->use_va)
>   vq->vq_ring_mem = (uintptr_t)mz->addr;
>   else
> @@ -462,14 +463,11 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
> 
>   vq->sw_ring = sw_ring;
>   rxvq = &vq->rxq;
> - rxvq->mz = mz;
>   rxvq->fake_mbuf = fake_mbuf;
>   } else if (queue_type == VTNET_TQ) {
>   txvq = &vq->txq;
> - txvq->mz = mz;
>   } else if (queue_type == VTNET_CQ) {
>   cvq = &vq->cq;
> - cvq->mz = mz;
>   hw->cvq = cvq;
>   vq->cq.notify_queue = &virtio_control_queue_notify;
>   }
> @@ -550,15 +548,10 @@ virtio_free_queues(struct virtio_hw *hw)
>   if (queue_type == VTNET_RQ) {
>   rte_free(vq->rxq.fake_mbuf);
>   rte_free(vq->sw_ring);
> - rte_memzone_free(vq->rxq.mz);
> - } else if (queue_type == VTNET_TQ) {
> - rte_memzone_free(vq->txq.mz);
> - } else {
> - rte_memzone_free(vq->cq.mz);
>   }
> 
>   virtio_free_queue_headers(vq);
> -
> + rte_memzone_free(vq->mz);
>   rte_free(vq);
>   hw->vqs[i] = NULL;
>   }
> diff --git a/drivers/net/virtio/virtio_rxtx.h
> b/drivers/net/virtio/virtio_rxtx.h
> index a5fe3ea95c..57af630110 100644
> --- a/drivers/net/virtio/virtio_rxtx.h
> +++ b/drivers/net/virtio/virtio_rxtx.h
> @@ -25,8 +25,6 @@ struct virtnet_rx {
> 
>   /* Statistics */
>   struct virtnet_stats stats;
> -
> - const struct rte_memzone *mz; /**< mem zone to populate RX ring. */
>  };
> 
>  struct virtnet_tx {
> @@ -34,8 +32,6 @@ struct virtnet_tx {
>   rte_iova_t hdr_mem;   /**< hdr for each xmit packet */
> 
>   struct virtnet_stats stats;   /* Statistics */
> -
> - const struct rte_memzone *mz;/**< mem zone to populate TX ring.
> */
>  };
> 
>  int virtio_rxq_vec_setup(struct virtnet_rx *rxvq);
> diff --git a/drivers/net/virtio/virtqueue.c
> b/drivers/net/virtio/virtqueue.c
> index 3b174a5923..41e3529546 100644
> --- a/drivers/net/virtio/virtqueue.c
> +++ b/drivers/net/virtio/virtqueue.c
> @@ -148,7 +148,6 @@ virtqueue_rxvq_reset_packed(struct virtqueue *vq)
>  {
>   int size = vq->vq_nentries;
>   struct vq_desc_extra *dxp;
> - struct virtnet_rx *rxvq;
>   uint16_t desc_idx;
> 
>   vq->vq_used_cons_idx = 0;
> @@ -162,8 +161,7 @@ virtqueue_rxvq_reset_packed(struct virtqueue *vq)
>   vq->vq_packed.event_flags_shadow = 0;
>   vq->vq_packed.cached_flags |= VRING_DESC_F_WRITE;
> 
> - rxvq = &vq->rxq;
> - memset(rxvq->mz->addr, 0, rxvq->mz->len);
> + memset(vq->mz->addr, 0, vq->mz->len);
> 
>   for (desc_idx = 0; desc_idx < vq->vq_nentries; desc_idx++) {
>   dxp = &vq->vq_descx[desc_idx];
> @@ -201,7 +199,7 @@ virtqueue_txvq_reset_packed(struct virtqueue *vq)
> 
>   txvq = &vq->txq;
>   txr = txvq->hdr_mz->addr;
> - memset(txvq->mz->addr, 0, txvq->

RE: [PATCH v1 09/21] net/virtio: refactor indirect desc headers init

2023-01-29 Thread Xia, Chenbo
> -Original Message-
> From: Maxime Coquelin 
> Sent: Wednesday, November 30, 2022 11:56 PM
> To: dev@dpdk.org; Xia, Chenbo ;
> david.march...@redhat.com; epere...@redhat.com
> Cc: Maxime Coquelin 
> Subject: [PATCH v1 09/21] net/virtio: refactor indirect desc headers init
> 
> This patch refactors the indirect descriptors headers
> initialization in a dedicated function, and makes it used
> by both queue init and reset functions.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  drivers/net/virtio/virtio_ethdev.c | 30 +
>  drivers/net/virtio/virtqueue.c | 68 ++
>  drivers/net/virtio/virtqueue.h |  2 +
>  3 files changed, 54 insertions(+), 46 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c
> b/drivers/net/virtio/virtio_ethdev.c
> index b546916a9f..8b17b450ec 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -347,7 +347,6 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
>   unsigned int vq_size, size;
>   struct virtio_hw *hw = dev->data->dev_private;
>   struct virtnet_rx *rxvq = NULL;
> - struct virtnet_tx *txvq = NULL;
>   struct virtnet_ctl *cvq = NULL;
>   struct virtqueue *vq;
>   void *sw_ring = NULL;
> @@ -465,7 +464,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
>   rxvq = &vq->rxq;
>   rxvq->fake_mbuf = fake_mbuf;
>   } else if (queue_type == VTNET_TQ) {
> - txvq = &vq->txq;
> + virtqueue_txq_indirect_headers_init(vq);
>   } else if (queue_type == VTNET_CQ) {
>   cvq = &vq->cq;
>   hw->cvq = cvq;
> @@ -477,33 +476,6 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t
> queue_idx)
>   else
>   vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_iova);
> 
> - if (queue_type == VTNET_TQ) {
> - struct virtio_tx_region *txr;
> - unsigned int i;
> -
> - txr = txvq->hdr_mz->addr;
> - for (i = 0; i < vq_size; i++) {
> - /* first indirect descriptor is always the tx header */
> - if (!virtio_with_packed_queue(hw)) {
> - struct vring_desc *start_dp = txr[i].tx_indir;
> - vring_desc_init_split(start_dp,
> -   RTE_DIM(txr[i].tx_indir));
> - start_dp->addr = txvq->hdr_mem + i * 
> sizeof(*txr)
> - + offsetof(struct virtio_tx_region, 
> tx_hdr);
> - start_dp->len = hw->vtnet_hdr_size;
> - start_dp->flags = VRING_DESC_F_NEXT;
> - } else {
> - struct vring_packed_desc *start_dp =
> - txr[i].tx_packed_indir;
> - vring_desc_init_indirect_packed(start_dp,
> -   RTE_DIM(txr[i].tx_packed_indir));
> - start_dp->addr = txvq->hdr_mem + i * 
> sizeof(*txr)
> - + offsetof(struct virtio_tx_region, 
> tx_hdr);
> - start_dp->len = hw->vtnet_hdr_size;
> - }
> - }
> - }
> -
>   if (VIRTIO_OPS(hw)->setup_queue(hw, vq) < 0) {
>   PMD_INIT_LOG(ERR, "setup_queue failed");
>   ret = -EINVAL;
> diff --git a/drivers/net/virtio/virtqueue.c
> b/drivers/net/virtio/virtqueue.c
> index 41e3529546..fb651a4ca3 100644
> --- a/drivers/net/virtio/virtqueue.c
> +++ b/drivers/net/virtio/virtqueue.c
> @@ -143,6 +143,54 @@ virtqueue_rxvq_flush(struct virtqueue *vq)
>   virtqueue_rxvq_flush_split(vq);
>  }
> 
> +static void
> +virtqueue_txq_indirect_header_init_packed(struct virtqueue *vq, uint32_t
> idx)
> +{
> + struct virtio_tx_region *txr;
> + struct vring_packed_desc *desc;
> + rte_iova_t hdr_mem;
> +
> + txr = vq->txq.hdr_mz->addr;
> + hdr_mem = vq->txq.hdr_mem;
> + desc = txr[idx].tx_packed_indir;
> +
> + vring_desc_init_indirect_packed(desc,
> RTE_DIM(txr[idx].tx_packed_indir));
> + desc->addr = hdr_mem + idx * sizeof(*txr) + offsetof(struct
> virtio_tx_region, tx_hdr);
> + desc->len = vq->hw->vtnet_hdr_size;
> +}
> +
> +static void
> +virtqueue_txq_indirect_header_init_split(struct virtqueue *vq, uint32_t
> idx)
> +{
> + struct virtio_tx_region *txr;
> + struct vring_desc *desc;
> + rte_iova_t hdr_mem;
> +
> + txr = vq->txq.hdr_mz->addr;
> + hdr_mem = vq->txq.hdr_mem;
> + desc = txr[idx].tx_indir;
> +
> + vring_desc_init_split(desc, RTE_DIM(txr[idx].tx_indir));
> + desc->addr = hdr_mem + idx * sizeof(*txr) + offsetof(struct
> virtio_tx_region, tx_hdr);
> + desc->len = vq->hw->vtnet_hdr_size;
> + desc->flags = VRING_DESC_F_NEXT;
> +}
> +
> +void
> +virtqueue_txq_indire

RE: [PATCH v1 11/21] net/virtio: extract virtqueue init from virtio queue init

2023-01-29 Thread Xia, Chenbo
> -Original Message-
> From: Maxime Coquelin 
> Sent: Wednesday, November 30, 2022 11:56 PM
> To: dev@dpdk.org; Xia, Chenbo ;
> david.march...@redhat.com; epere...@redhat.com
> Cc: Maxime Coquelin 
> Subject: [PATCH v1 11/21] net/virtio: extract virtqueue init from virtio
> queue init
> 
> This patch extracts the virtqueue initialization out of
> the Virtio ethdev queue initialization, as preliminary
> work to provide a way for Virtio-user to allocate its
> shadow control virtqueue.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  drivers/net/virtio/virtio_ethdev.c | 261 ++--
>  drivers/net/virtio/virtqueue.c | 266 +
>  drivers/net/virtio/virtqueue.h |   5 +
>  3 files changed, 282 insertions(+), 250 deletions(-)
> 
> --
> 2.38.1

Reviewed-by: Chenbo Xia