Re: [PATCH v2 1/2] vhost: fix possible FDs leak
On Fri, Jan 27, 2023 at 5:55 PM Maxime Coquelin wrote: > > On failure, read_vhost_message() only closed the message > FDs if the header size was unexpected, but there are other > cases where it is required. For exemple in the case the > payload size read from the header is greater than the > expected maximum payload size. > > This patch fixes this by closing all messages FDs in all > error cases. > > Fixes: bf472259dde6 ("vhost: fix possible denial of service by leaking FDs") > Cc: sta...@dpdk.org > > Signed-off-by: Maxime Coquelin Reviewed-by: David Marchand We mentionned offlist that the request type can be logged to help with debug. Do you intend to add this as a follow up patch? -- David Marchand
Re: [PATCH v2 2/2] vhost: fix possible FD leaks on MSG_TRUNC and MSG_CTRUNC
On Fri, Jan 27, 2023 at 5:55 PM Maxime Coquelin wrote: > > This patch fixes possible FDs leaks when truncation happens > on either the message buffer or its control data. Indeed, > by returning early, it did not let a chance to retrieve the > FDs passed as ancillary data, and so caused a potential FDs > leak. > > This patch fixes this by extracting the FDs from the > ancillary data as long as recvmsg() call succeeded. It also > improves the logs to differentiate between MSG_TRUNC and > MSG_CTRUNC. As I mentionned offlist, I am not convinced the MSG_TRUNC flag can be set on receipt of a message, since the socket is in stream mode. I am okay to keep the check as is, but it is confusing. > > Fixes: bf472259dde6 ("vhost: fix possible denial of service by leaking FDs") > Cc: sta...@dpdk.org > > Signed-off-by: Maxime Coquelin Reviewed-by: David Marchand -- David Marchand
[PATCH 02/10] net/hns3: extract common API to query device
From: Huisong Li Extract common function to query device specifications. Fixes: 9c740336f024 ("net/hns3: get device specifications from firmware") Cc: sta...@dpdk.org Signed-off-by: Huisong Li Signed-off-by: Dongdong Liu --- drivers/net/hns3/hns3_common.c| 75 +++ drivers/net/hns3/hns3_common.h| 2 + drivers/net/hns3/hns3_ethdev.c| 63 -- drivers/net/hns3/hns3_ethdev_vf.c | 65 +-- 4 files changed, 79 insertions(+), 126 deletions(-) diff --git a/drivers/net/hns3/hns3_common.c b/drivers/net/hns3/hns3_common.c index 7adc6a4972..b0c7f8d62c 100644 --- a/drivers/net/hns3/hns3_common.c +++ b/drivers/net/hns3/hns3_common.c @@ -10,6 +10,7 @@ #include "hns3_logs.h" #include "hns3_regs.h" #include "hns3_rxtx.h" +#include "hns3_dcb.h" #include "hns3_common.h" int @@ -845,3 +846,77 @@ hns3_get_pci_revision_id(struct hns3_hw *hw, uint8_t *revision_id) return 0; } + +void +hns3_set_default_dev_specifications(struct hns3_hw *hw) +{ + struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); + + hw->max_non_tso_bd_num = HNS3_MAX_NON_TSO_BD_PER_PKT; + hw->rss_ind_tbl_size = HNS3_RSS_IND_TBL_SIZE; + hw->rss_key_size = HNS3_RSS_KEY_SIZE; + hw->intr.int_ql_max = HNS3_INTR_QL_NONE; + + if (hns->is_vf) + return; + + hw->max_tm_rate = HNS3_ETHER_MAX_RATE; +} + +static void +hns3_parse_dev_specifications(struct hns3_hw *hw, struct hns3_cmd_desc *desc) +{ + struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw); + struct hns3_dev_specs_0_cmd *req0; + struct hns3_dev_specs_1_cmd *req1; + + req0 = (struct hns3_dev_specs_0_cmd *)desc[0].data; + req1 = (struct hns3_dev_specs_1_cmd *)desc[1].data; + + hw->max_non_tso_bd_num = req0->max_non_tso_bd_num; + hw->rss_ind_tbl_size = rte_le_to_cpu_16(req0->rss_ind_tbl_size); + hw->rss_key_size = rte_le_to_cpu_16(req0->rss_key_size); + hw->intr.int_ql_max = rte_le_to_cpu_16(req0->intr_ql_max); + hw->min_tx_pkt_len = req1->min_tx_pkt_len; + + if (hns->is_vf) + return; + + hw->max_tm_rate = rte_le_to_cpu_32(req0->max_tm_rate); +} + +static int +hns3_check_dev_specifications(struct hns3_hw *hw) +{ + if (hw->rss_ind_tbl_size == 0 || + hw->rss_ind_tbl_size > HNS3_RSS_IND_TBL_SIZE_MAX) { + hns3_err(hw, "the indirection table size obtained (%u) is invalid, and should not be zero or exceed the maximum(%u)", +hw->rss_ind_tbl_size, HNS3_RSS_IND_TBL_SIZE_MAX); + return -EINVAL; + } + + return 0; +} + +int +hns3_query_dev_specifications(struct hns3_hw *hw) +{ + struct hns3_cmd_desc desc[HNS3_QUERY_DEV_SPECS_BD_NUM]; + int ret; + int i; + + for (i = 0; i < HNS3_QUERY_DEV_SPECS_BD_NUM - 1; i++) { + hns3_cmd_setup_basic_desc(&desc[i], HNS3_OPC_QUERY_DEV_SPECS, + true); + desc[i].flag |= rte_cpu_to_le_16(HNS3_CMD_FLAG_NEXT); + } + hns3_cmd_setup_basic_desc(&desc[i], HNS3_OPC_QUERY_DEV_SPECS, true); + + ret = hns3_cmd_send(hw, desc, HNS3_QUERY_DEV_SPECS_BD_NUM); + if (ret) + return ret; + + hns3_parse_dev_specifications(hw, desc); + + return hns3_check_dev_specifications(hw); +} diff --git a/drivers/net/hns3/hns3_common.h b/drivers/net/hns3/hns3_common.h index 5aa001f0cc..8eaeda26e7 100644 --- a/drivers/net/hns3/hns3_common.h +++ b/drivers/net/hns3/hns3_common.h @@ -60,5 +60,7 @@ void hns3_unmap_rx_interrupt(struct rte_eth_dev *dev); int hns3_restore_rx_interrupt(struct hns3_hw *hw); int hns3_get_pci_revision_id(struct hns3_hw *hw, uint8_t *revision_id); +void hns3_set_default_dev_specifications(struct hns3_hw *hw); +int hns3_query_dev_specifications(struct hns3_hw *hw); #endif /* HNS3_COMMON_H */ diff --git a/drivers/net/hns3/hns3_ethdev.c b/drivers/net/hns3/hns3_ethdev.c index eb809cd8c9..ab565ce128 100644 --- a/drivers/net/hns3/hns3_ethdev.c +++ b/drivers/net/hns3/hns3_ethdev.c @@ -2647,69 +2647,6 @@ hns3_parse_speed(int speed_cmd, uint32_t *speed) return 0; } -static void -hns3_set_default_dev_specifications(struct hns3_hw *hw) -{ - hw->max_non_tso_bd_num = HNS3_MAX_NON_TSO_BD_PER_PKT; - hw->rss_ind_tbl_size = HNS3_RSS_IND_TBL_SIZE; - hw->rss_key_size = HNS3_RSS_KEY_SIZE; - hw->max_tm_rate = HNS3_ETHER_MAX_RATE; - hw->intr.int_ql_max = HNS3_INTR_QL_NONE; -} - -static void -hns3_parse_dev_specifications(struct hns3_hw *hw, struct hns3_cmd_desc *desc) -{ - struct hns3_dev_specs_0_cmd *req0; - struct hns3_dev_specs_1_cmd *req1; - - req0 = (struct hns3_dev_specs_0_cmd *)desc[0].data; - req1 = (struct hns3_dev_specs_1_cmd *)desc[1].data; - - hw->max_non_tso_bd_num = req0->max_non_tso_bd_num; - hw->rss_ind_tbl_size = rte_le_to_cpu_16(req0->rss_ind_tbl_size); -
[PATCH 01/10] net/hns3: fix error log about indirection table size
From: Huisong Li The error log about indirection table size during initialization phase of PF and VF is unreasonable when the indirection table size obtained from firmware or PF function should be zero. In addition, VF driver should use error level to print this log. Fixes: 0fce2c46dc16 ("net/hns3: fix RSS indirection table size") Cc: sta...@dpdk.org Signed-off-by: Huisong Li Signed-off-by: Dongdong Liu --- drivers/net/hns3/hns3_ethdev.c| 2 +- drivers/net/hns3/hns3_ethdev_vf.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/hns3/hns3_ethdev.c b/drivers/net/hns3/hns3_ethdev.c index d326f70129..eb809cd8c9 100644 --- a/drivers/net/hns3/hns3_ethdev.c +++ b/drivers/net/hns3/hns3_ethdev.c @@ -2679,7 +2679,7 @@ hns3_check_dev_specifications(struct hns3_hw *hw) { if (hw->rss_ind_tbl_size == 0 || hw->rss_ind_tbl_size > HNS3_RSS_IND_TBL_SIZE_MAX) { - hns3_err(hw, "the size of hash lookup table configured (%u) exceeds the maximum(%u)", + hns3_err(hw, "the indirection table size obtained (%u) is invalid, and should not be zero or exceed the maximum(%u)", hw->rss_ind_tbl_size, HNS3_RSS_IND_TBL_SIZE_MAX); return -EINVAL; } diff --git a/drivers/net/hns3/hns3_ethdev_vf.c b/drivers/net/hns3/hns3_ethdev_vf.c index d220522c43..e43815607a 100644 --- a/drivers/net/hns3/hns3_ethdev_vf.c +++ b/drivers/net/hns3/hns3_ethdev_vf.c @@ -718,8 +718,8 @@ hns3vf_check_dev_specifications(struct hns3_hw *hw) { if (hw->rss_ind_tbl_size == 0 || hw->rss_ind_tbl_size > HNS3_RSS_IND_TBL_SIZE_MAX) { - hns3_warn(hw, "the size of hash lookup table configured (%u) exceeds the maximum(%u)", - hw->rss_ind_tbl_size, HNS3_RSS_IND_TBL_SIZE_MAX); + hns3_err(hw, "the indirection table size obtained (%u) is invalid, and should not be zero or exceed the maximum(%u)", +hw->rss_ind_tbl_size, HNS3_RSS_IND_TBL_SIZE_MAX); return -EINVAL; } -- 2.22.0
[PATCH 00/10] net/hns3: some bugfixes for rss
This patchset is to do some bugfixes for hns3 rss. Huisong Li (10): net/hns3: fix error log about indirection table size net/hns3: extract common API to query device net/hns3: refactor set RSS hash algorithm and key interface net/hns3: fix fixed RSS key size to be more compatibility net/hns3: fix misclearing RSS configuration net/hns3: using RSS filter list to check duplicated rule net/hns3: remove useless code when destroy valid RSS rule net/hns3: fix useless warning when flush or destroy rule net/hns3: fix bad memory structure conversion net/hns3: fix incorrect check for duplicate RSS rule drivers/net/hns3/hns3_common.c| 87 +++- drivers/net/hns3/hns3_common.h| 2 + drivers/net/hns3/hns3_ethdev.c| 63 - drivers/net/hns3/hns3_ethdev_vf.c | 65 + drivers/net/hns3/hns3_flow.c | 217 ++ drivers/net/hns3/hns3_rss.c | 63 - drivers/net/hns3/hns3_rss.h | 7 +- 7 files changed, 227 insertions(+), 277 deletions(-) -- 2.22.0
[PATCH 03/10] net/hns3: refactor set RSS hash algorithm and key interface
From: Huisong Li The hns3_rss_set_algo_key() is used to set RSS hash algorithm and key to hardware. The maximum times of command sent to firmware depend on the length of key. However, now this times is fixed, which isn't good for key expansion. In addition, hash algorithm comes from rss_info::hash_algo maintained in driver, which also isn't good for the usage of this function. This patch has to use hash algorithm and key length as the input parameters of this interface. Fixes: c37ca66f2b27 ("net/hns3: support RSS") Cc: sta...@dpdk.org Signed-off-by: Huisong Li Signed-off-by: Dongdong Liu --- drivers/net/hns3/hns3_flow.c | 3 ++- drivers/net/hns3/hns3_rss.c | 48 drivers/net/hns3/hns3_rss.h | 4 ++- 3 files changed, 26 insertions(+), 29 deletions(-) diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c index a2c1589c39..95609f8483 100644 --- a/drivers/net/hns3/hns3_flow.c +++ b/drivers/net/hns3/hns3_flow.c @@ -1494,7 +1494,8 @@ hns3_hw_rss_hash_set(struct hns3_hw *hw, struct rte_flow_action_rss *rss_config) if (ret) return ret; - ret = hns3_rss_set_algo_key(hw, rss_config->key); + ret = hns3_rss_set_algo_key(hw, hw->rss_info.hash_algo, + rss_config->key, HNS3_RSS_KEY_SIZE); if (ret) return ret; diff --git a/drivers/net/hns3/hns3_rss.c b/drivers/net/hns3/hns3_rss.c index ca5a129234..3db7bf0445 100644 --- a/drivers/net/hns3/hns3_rss.c +++ b/drivers/net/hns3/hns3_rss.c @@ -277,45 +277,37 @@ static const struct { /* * rss_generic_config command function, opcode:0x0D01. - * Used to set algorithm, key_offset and hash key of rss. + * Used to set algorithm and hash key of RSS. */ int -hns3_rss_set_algo_key(struct hns3_hw *hw, const uint8_t *key) +hns3_rss_set_algo_key(struct hns3_hw *hw, uint8_t hash_algo, + const uint8_t *key, uint8_t key_len) { -#define HNS3_KEY_OFFSET_MAX3 -#define HNS3_SET_HASH_KEY_BYTE_FOUR2 - struct hns3_rss_generic_config_cmd *req; struct hns3_cmd_desc desc; - uint32_t key_offset, key_size; - const uint8_t *key_cur; - uint8_t cur_offset; + const uint8_t *cur_key; + uint16_t cur_key_size; + uint16_t max_bd_num; + uint16_t idx; int ret; req = (struct hns3_rss_generic_config_cmd *)desc.data; - /* -* key_offset=0, hash key byte0~15 is set to hardware. -* key_offset=1, hash key byte16~31 is set to hardware. -* key_offset=2, hash key byte32~39 is set to hardware. -*/ - for (key_offset = 0; key_offset < HNS3_KEY_OFFSET_MAX; key_offset++) { + max_bd_num = DIV_ROUND_UP(key_len, HNS3_RSS_HASH_KEY_NUM); + for (idx = 0; idx < max_bd_num; idx++) { hns3_cmd_setup_basic_desc(&desc, HNS3_OPC_RSS_GENERIC_CONFIG, false); - req->hash_config |= - (hw->rss_info.hash_algo & HNS3_RSS_HASH_ALGO_MASK); - req->hash_config |= (key_offset << HNS3_RSS_HASH_KEY_OFFSET_B); + req->hash_config |= (hash_algo & HNS3_RSS_HASH_ALGO_MASK); + req->hash_config |= (idx << HNS3_RSS_HASH_KEY_OFFSET_B); - if (key_offset == HNS3_SET_HASH_KEY_BYTE_FOUR) - key_size = HNS3_RSS_KEY_SIZE - HNS3_RSS_HASH_KEY_NUM * - HNS3_SET_HASH_KEY_BYTE_FOUR; + if (idx == max_bd_num - 1) + cur_key_size = key_len % HNS3_RSS_HASH_KEY_NUM; else - key_size = HNS3_RSS_HASH_KEY_NUM; + cur_key_size = HNS3_RSS_HASH_KEY_NUM; - cur_offset = key_offset * HNS3_RSS_HASH_KEY_NUM; - key_cur = key + cur_offset; - memcpy(req->hash_key, key_cur, key_size); + cur_key = key + idx * HNS3_RSS_HASH_KEY_NUM; + memcpy(req->hash_key, cur_key, cur_key_size); ret = hns3_cmd_send(hw, &desc, 1); if (ret) { @@ -518,7 +510,8 @@ hns3_dev_rss_hash_update(struct rte_eth_dev *dev, goto set_tuple_fail; if (key) { - ret = hns3_rss_set_algo_key(hw, key); + ret = hns3_rss_set_algo_key(hw, hw->rss_info.hash_algo, + key, HNS3_RSS_KEY_SIZE); if (ret) goto set_algo_key_fail; } @@ -795,8 +788,9 @@ hns3_config_rss(struct hns3_adapter *hns) break; } - /* Configure RSS hash algorithm and hash key offset */ - ret = hns3_rss_set_algo_key(hw, hash_key); + /* Configure RSS hash algorithm and hash key */ + ret = hns3_rss_set_algo_key(hw, hw->rss_info.hash_algo, hash_key, + HNS3_RSS_KEY_SIZE); if (ret) return ret; diff --git a/drivers/n
[PATCH 04/10] net/hns3: fix fixed RSS key size to be more compatibility
From: Huisong Li For better compatibility, the RSS key size of PF and VF are obtained from firmware. However, many places still used the old macro HNS3_RSS_KEY_SIZE as the key size. Fixes: 9c740336f024 ("net/hns3: get device specifications from firmware") Cc: sta...@dpdk.org Signed-off-by: Huisong Li Signed-off-by: Dongdong Liu --- drivers/net/hns3/hns3_common.c | 12 +++- drivers/net/hns3/hns3_flow.c | 26 -- drivers/net/hns3/hns3_rss.c| 23 +++ drivers/net/hns3/hns3_rss.h| 3 ++- 4 files changed, 36 insertions(+), 28 deletions(-) diff --git a/drivers/net/hns3/hns3_common.c b/drivers/net/hns3/hns3_common.c index b0c7f8d62c..2da0f30964 100644 --- a/drivers/net/hns3/hns3_common.c +++ b/drivers/net/hns3/hns3_common.c @@ -129,7 +129,7 @@ hns3_dev_infos_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *info) }; info->reta_size = hw->rss_ind_tbl_size; - info->hash_key_size = HNS3_RSS_KEY_SIZE; + info->hash_key_size = hw->rss_key_size; info->flow_type_rss_offloads = HNS3_ETH_RSS_SUPPORT; info->default_rxportconf.burst_size = HNS3_DEFAULT_PORT_CONF_BURST_SIZE; @@ -895,6 +895,16 @@ hns3_check_dev_specifications(struct hns3_hw *hw) return -EINVAL; } + if (hw->rss_key_size == 0 || hw->rss_key_size > HNS3_RSS_KEY_SIZE_MAX) { + hns3_err(hw, "the RSS key size obtained (%u) is invalid, and should not be zero or exceed the maximum(%u)", +hw->rss_key_size, HNS3_RSS_KEY_SIZE_MAX); + return -EINVAL; + } + + if (hw->rss_key_size > HNS3_RSS_KEY_SIZE) + hns3_warn(hw, "the RSS key size obtained (%u) is greater than the default key size (%u)", + hw->rss_key_size, HNS3_RSS_KEY_SIZE); + return 0; } diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c index 95609f8483..a18ec7650d 100644 --- a/drivers/net/hns3/hns3_flow.c +++ b/drivers/net/hns3/hns3_flow.c @@ -1406,10 +1406,10 @@ hns3_parse_rss_filter(struct rte_eth_dev *dev, return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION_CONF, act, "a nonzero RSS encapsulation level is not supported"); - if (rss->key_len && rss->key_len != RTE_DIM(rss_conf->key)) + if (rss->key_len && rss->key_len != hw->rss_key_size) return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION_CONF, act, - "RSS hash key must be exactly 40 bytes"); + "invalid RSS key length"); if (!hns3_rss_input_tuple_supported(hw, rss)) return rte_flow_error_set(error, EINVAL, @@ -1443,16 +1443,6 @@ hns3_disable_rss(struct hns3_hw *hw) return 0; } -static void -hns3_adjust_rss_key(struct hns3_hw *hw, struct rte_flow_action_rss *rss_conf) -{ - if (rss_conf->key == NULL || rss_conf->key_len < HNS3_RSS_KEY_SIZE) { - hns3_warn(hw, "Default RSS hash key to be set"); - rss_conf->key = hns3_hash_key; - rss_conf->key_len = HNS3_RSS_KEY_SIZE; - } -} - static int hns3_parse_rss_algorithm(struct hns3_hw *hw, enum rte_eth_hash_function *func, uint8_t *hash_algo) @@ -1485,9 +1475,16 @@ hns3_parse_rss_algorithm(struct hns3_hw *hw, enum rte_eth_hash_function *func, static int hns3_hw_rss_hash_set(struct hns3_hw *hw, struct rte_flow_action_rss *rss_config) { + uint8_t rss_key[HNS3_RSS_KEY_SIZE_MAX] = {0}; + bool use_default_key = false; int ret; - hns3_adjust_rss_key(hw, rss_config); + if (rss_config->key == NULL || rss_config->key_len != hw->rss_key_size) { + hns3_warn(hw, "Default RSS hash key to be set"); + memcpy(rss_key, hns3_hash_key, + RTE_MIN(sizeof(hns3_hash_key), hw->rss_key_size)); + use_default_key = true; + } ret = hns3_parse_rss_algorithm(hw, &rss_config->func, &hw->rss_info.hash_algo); @@ -1495,7 +1492,8 @@ hns3_hw_rss_hash_set(struct hns3_hw *hw, struct rte_flow_action_rss *rss_config) return ret; ret = hns3_rss_set_algo_key(hw, hw->rss_info.hash_algo, - rss_config->key, HNS3_RSS_KEY_SIZE); + use_default_key ? rss_key : rss_config->key, + hw->rss_key_size); if (ret) return ret; diff --git a/drivers/net/hns3/hns3_rss.c b/drivers/net/hns3/hns3_rss.c index 3db7bf0445..d6e0754273 100644 --- a/drivers/net/hns3/hns3_rss.c +++ b/drivers/net/hns3/hns3_rss.c @@ -316,7 +316,7 @@ hns3_rss_set_algo_key(struct hns3_hw *hw, uint8_t hash_algo,
[PATCH 05/10] net/hns3: fix misclearing RSS configuration
From: Huisong Li The RSS configuration will be miscleared when driver receives a RSS rule which has more one RSS action. Fixes: c37ca66f2b27 ("net/hns3: support RSS") Cc: sta...@dpdk.org Signed-off-by: Huisong Li Signed-off-by: Dongdong Liu --- drivers/net/hns3/hns3_flow.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c index a18ec7650d..c338eab049 100644 --- a/drivers/net/hns3/hns3_flow.c +++ b/drivers/net/hns3/hns3_flow.c @@ -1421,12 +1421,10 @@ hns3_parse_rss_filter(struct rte_eth_dev *dev, /* Check if the next not void action is END */ NEXT_ITEM_OF_ACTION(act, actions, act_index); - if (act->type != RTE_FLOW_ACTION_TYPE_END) { - memset(rss_conf, 0, sizeof(struct hns3_rss_conf)); + if (act->type != RTE_FLOW_ACTION_TYPE_END) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, act, "Not supported action."); - } return 0; } -- 2.22.0
[PATCH 06/10] net/hns3: using RSS filter list to check duplicated rule
From: Huisong Li All rules from user are saved in RSS filter list, so use RSS filter list to check duplicated rule. Fixes: c37ca66f2b27 ("net/hns3: support RSS") Cc: sta...@dpdk.org Signed-off-by: Huisong Li Signed-off-by: Dongdong Liu --- drivers/net/hns3/hns3_flow.c | 35 +-- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c index c338eab049..303275ae93 100644 --- a/drivers/net/hns3/hns3_flow.c +++ b/drivers/net/hns3/hns3_flow.c @@ -1300,7 +1300,7 @@ hns3_action_rss_same(const struct rte_flow_action_rss *comp, !memcmp(comp->key, with->key, with->key_len); return (func_is_same && rss_key_is_same && - comp->types == (with->types & HNS3_ETH_RSS_SUPPORT) && + comp->types == with->types && comp->level == with->level && comp->queue_num == with->queue_num && !memcmp(comp->queue, with->queue, @@ -1596,15 +1596,7 @@ hns3_config_rss_filter(struct hns3_hw *hw, } /* Set hash algorithm and flow types by the user's config */ - ret = hns3_hw_rss_hash_set(hw, &rss_flow_conf); - if (ret) - return ret; - - ret = hns3_rss_conf_copy(rss_info, &rss_flow_conf); - if (ret) - hns3_err(hw, "RSS config init fail(%d)", ret); - - return ret; + return hns3_hw_rss_hash_set(hw, &rss_flow_conf); } static int @@ -1676,17 +1668,32 @@ hns3_restore_filter(struct hns3_adapter *hns) return hns3_restore_rss_filter(hw); } +static bool +hns3_rss_action_is_dup(struct hns3_hw *hw, + const struct rte_flow_action_rss *act) +{ + struct hns3_rss_conf_ele *filter; + + TAILQ_FOREACH(filter, &hw->flow_rss_list, entries) { + if (!filter->filter_info.valid) + continue; + + if (hns3_action_rss_same(&filter->filter_info.conf, act)) + return true; + } + + return false; +} + static int hns3_flow_parse_rss(struct rte_eth_dev *dev, const struct hns3_rss_conf *conf, bool add) { struct hns3_adapter *hns = dev->data->dev_private; struct hns3_hw *hw = &hns->hw; - bool ret; - ret = hns3_action_rss_same(&hw->rss_info.conf, &conf->conf); - if (ret) { - hns3_err(hw, "Enter duplicate RSS configuration : %d", ret); + if (hns3_rss_action_is_dup(hw, &conf->conf)) { + hns3_err(hw, "duplicate RSS configuration"); return -EINVAL; } -- 2.22.0
[PATCH 07/10] net/hns3: remove useless code when destroy valid RSS rule
From: Huisong Li The hw::rss_info::conf::func was set to the macro RTE_ETH_HASH_FUNCTION_MAX and hw::rss_info::conf::queue was set to NULL when all rules are flushed, which indicates no flow rules is issued. See commit eb158fc756a5 ("net/hns3: fix config when creating RSS rule after flush"). Actually, the way determining whether there are rules has been changed by walking the flow RSS list. See commit 705a50800334 ("net/hns3: fix RSS filter restore"). In addition, the rte_flow_action_rss from user isn't saved to 'conf' in hw->rss_info now. So this code can be removed. Fixes: eb158fc756a5 ("net/hns3: fix config when creating RSS rule after flush") Fixes: 705a50800334 ("net/hns3: fix RSS filter restore") Cc: sta...@dpdk.org Signed-off-by: Huisong Li Signed-off-by: Dongdong Liu --- drivers/net/hns3/hns3_flow.c | 26 ++ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c index 303275ae93..7adde16cbc 100644 --- a/drivers/net/hns3/hns3_flow.c +++ b/drivers/net/hns3/hns3_flow.c @@ -1279,19 +1279,8 @@ hns3_action_rss_same(const struct rte_flow_action_rss *comp, bool rss_key_is_same; bool func_is_same; - /* -* When user flush all RSS rule, RSS func is set invalid with -* RTE_ETH_HASH_FUNCTION_MAX. Then the user create a flow after -* flushed, any validate RSS func is different with it before -* flushed. Others, when user create an action RSS with RSS func -* specified RTE_ETH_HASH_FUNCTION_DEFAULT, the func is the same -* between continuous RSS flow. -*/ - if (comp->func == RTE_ETH_HASH_FUNCTION_MAX) - func_is_same = false; - else - func_is_same = (with->func != RTE_ETH_HASH_FUNCTION_DEFAULT) ? - (comp->func == with->func) : true; + func_is_same = (with->func != RTE_ETH_HASH_FUNCTION_DEFAULT) ? + (comp->func == with->func) : true; if (with->key_len == 0 || with->key == NULL) rss_key_is_same = 1; @@ -1533,7 +1522,6 @@ static int hns3_config_rss_filter(struct hns3_hw *hw, const struct hns3_rss_conf *conf, bool add) { - struct hns3_rss_conf *rss_info; uint64_t flow_types; uint16_t num; int ret; @@ -1560,7 +1548,6 @@ hns3_config_rss_filter(struct hns3_hw *hw, /* Update the useful flow types */ rss_flow_conf.types = flow_types; - rss_info = &hw->rss_info; if (!add) { if (!conf->valid) return 0; @@ -1571,15 +1558,6 @@ hns3_config_rss_filter(struct hns3_hw *hw, return ret; } - if (rss_flow_conf.queue_num) { - /* -* Due the content of queue pointer have been reset to -* 0, the rss_info->conf.queue should be set to NULL -*/ - rss_info->conf.queue = NULL; - rss_info->conf.queue_num = 0; - } - return 0; } -- 2.22.0
[PATCH 08/10] net/hns3: fix useless warning when flush or destroy rule
From: Huisong Li The types of the rule will no longer be used when user flush all rules or destroy a rule. But user would receive some RSS types warnings, like, "modified RSS types based on hardware support, requested:0x137f83fffc configured:0x3ffc". Fixes: ec674cb742e5 ("net/hns3: fix flushing RSS rule") Cc: sta...@dpdk.org Signed-off-by: Huisong Li Signed-off-by: Dongdong Liu --- drivers/net/hns3/hns3_flow.c | 22 +++--- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c index 7adde16cbc..fbc38dd3d4 100644 --- a/drivers/net/hns3/hns3_flow.c +++ b/drivers/net/hns3/hns3_flow.c @@ -1537,17 +1537,6 @@ hns3_config_rss_filter(struct hns3_hw *hw, .queue = conf->conf.queue, }; - /* Filter the unsupported flow types */ - flow_types = conf->conf.types ? -rss_flow_conf.types & HNS3_ETH_RSS_SUPPORT : -hw->rss_info.conf.types; - if (flow_types != rss_flow_conf.types) - hns3_warn(hw, "modified RSS types based on hardware support, " - "requested:0x%" PRIx64 " configured:0x%" PRIx64, - rss_flow_conf.types, flow_types); - /* Update the useful flow types */ - rss_flow_conf.types = flow_types; - if (!add) { if (!conf->valid) return 0; @@ -1573,6 +1562,17 @@ hns3_config_rss_filter(struct hns3_hw *hw, return ret; } + /* Filter the unsupported flow types */ + flow_types = conf->conf.types ? +rss_flow_conf.types & HNS3_ETH_RSS_SUPPORT : +hw->rss_info.conf.types; + if (flow_types != rss_flow_conf.types) + hns3_warn(hw, "modified RSS types based on hardware support," + " requested:0x%" PRIx64 " configured:0x%" PRIx64, + rss_flow_conf.types, flow_types); + /* Update the useful flow types */ + rss_flow_conf.types = flow_types; + /* Set hash algorithm and flow types by the user's config */ return hns3_hw_rss_hash_set(hw, &rss_flow_conf); } -- 2.22.0
[PATCH 09/10] net/hns3: fix bad memory structure conversion
From: Huisong Li When the type in 'struct rte_flow_action' is RTE_FLOW_ACTION_TYPE_RSS, the 'conf' pointer references the 'struct rte_flow_action_rss' instead of the 'struct hns3_rss_conf' in driver. But driver uses 'struct hns3_rss_conf' to convert this 'conf' pointer to get RSS action configuration. In addition, RSS filter configuration is directly cloned to RSS filter node instead of coping it after successfully setting to hardware. Fixes: c37ca66f2b27 ("net/hns3: support RSS") Cc: sta...@dpdk.org Signed-off-by: Huisong Li Signed-off-by: Dongdong Liu --- drivers/net/hns3/hns3_flow.c | 57 +--- 1 file changed, 20 insertions(+), 37 deletions(-) diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c index fbc38dd3d4..307aba75a7 100644 --- a/drivers/net/hns3/hns3_flow.c +++ b/drivers/net/hns3/hns3_flow.c @@ -95,8 +95,8 @@ static const struct rte_flow_action * hns3_find_rss_general_action(const struct rte_flow_item pattern[], const struct rte_flow_action actions[]) { + const struct rte_flow_action_rss *rss_act; const struct rte_flow_action *act = NULL; - const struct hns3_rss_conf *rss; bool have_eth = false; for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { @@ -115,8 +115,8 @@ hns3_find_rss_general_action(const struct rte_flow_item pattern[], } } - rss = act->conf; - if (have_eth && rss->conf.queue_num) { + rss_act = act->conf; + if (have_eth && rss_act->queue_num) { /* * Pattern have ETH and action's queue_num > 0, indicate this is * queue region configuration. @@ -1296,30 +1296,6 @@ hns3_action_rss_same(const struct rte_flow_action_rss *comp, sizeof(*with->queue) * with->queue_num)); } -static int -hns3_rss_conf_copy(struct hns3_rss_conf *out, - const struct rte_flow_action_rss *in) -{ - if (in->key_len > RTE_DIM(out->key) || - in->queue_num > RTE_DIM(out->queue)) - return -EINVAL; - if (in->key == NULL && in->key_len) - return -EINVAL; - out->conf = (struct rte_flow_action_rss) { - .func = in->func, - .level = in->level, - .types = in->types, - .key_len = in->key_len, - .queue_num = in->queue_num, - }; - out->conf.queue = memcpy(out->queue, in->queue, - sizeof(*in->queue) * in->queue_num); - if (in->key) - out->conf.key = memcpy(out->key, in->key, in->key_len); - - return 0; -} - static bool hns3_rss_input_tuple_supported(struct hns3_hw *hw, const struct rte_flow_action_rss *rss) @@ -1733,9 +1709,10 @@ hns3_flow_create_rss_rule(struct rte_eth_dev *dev, struct rte_flow *flow) { struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); + const struct rte_flow_action_rss *rss_act; struct hns3_rss_conf_ele *rss_filter_ptr; struct hns3_rss_conf_ele *filter_ptr; - const struct hns3_rss_conf *rss_conf; + struct hns3_rss_conf *new_conf; int ret; rss_filter_ptr = rte_zmalloc("hns3 rss filter", @@ -1745,19 +1722,25 @@ hns3_flow_create_rss_rule(struct rte_eth_dev *dev, return -ENOMEM; } - /* -* After all the preceding tasks are successfully configured, configure -* rules to the hardware to simplify the rollback of rules in the -* hardware. -*/ - rss_conf = (const struct hns3_rss_conf *)act->conf; - ret = hns3_flow_parse_rss(dev, rss_conf, true); + rss_act = (const struct rte_flow_action_rss *)act->conf; + new_conf = &rss_filter_ptr->filter_info; + memcpy(&new_conf->conf, rss_act, sizeof(*rss_act)); + if (rss_act->queue_num > 0) { + memcpy(new_conf->queue, rss_act->queue, + rss_act->queue_num * sizeof(new_conf->queue[0])); + new_conf->conf.queue = new_conf->queue; + } + if (rss_act->key_len > 0) { + memcpy(new_conf->key, rss_act->key, + rss_act->key_len * sizeof(new_conf->key[0])); + new_conf->conf.key = new_conf->key; + } + + ret = hns3_flow_parse_rss(dev, new_conf, true); if (ret != 0) { rte_free(rss_filter_ptr); return ret; } - - hns3_rss_conf_copy(&rss_filter_ptr->filter_info, &rss_conf->conf); rss_filter_ptr->filter_info.valid = true; /* -- 2.22.0
[PATCH 10/10] net/hns3: fix incorrect check for duplicate RSS rule
From: Huisong Li Currently, the interface for verifying duplicate RSS rules has some problems: 1) If the value of 'func' in configuring RSS rule is default value, this rule is mistakenly considered as a duplicate rule. 2) If key length is zero or 'key' is NULL in configuring RSS rule this rule is also mistakenly considered as a duplicate rule. 3) If 'key' or 'queue' in struct rte_flow_action_rss being NULL is used to memcpy, which may cause segment fault. Fixes: c37ca66f2b27 ("net/hns3: support RSS") Cc: sta...@dpdk.org Signed-off-by: Huisong Li Signed-off-by: Dongdong Liu --- drivers/net/hns3/hns3_flow.c | 58 ++-- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/drivers/net/hns3/hns3_flow.c b/drivers/net/hns3/hns3_flow.c index 307aba75a7..f76ceb18d1 100644 --- a/drivers/net/hns3/hns3_flow.c +++ b/drivers/net/hns3/hns3_flow.c @@ -1272,28 +1272,54 @@ hns3_filterlist_flush(struct rte_eth_dev *dev) } } +static bool +hns3_flow_rule_key_same(const struct rte_flow_action_rss *comp, + const struct rte_flow_action_rss *with) +{ + if (comp->key_len != with->key_len) + return false; + + if (with->key_len == 0) + return true; + + if (comp->key == NULL && with->key == NULL) + return true; + + if (!(comp->key != NULL && with->key != NULL)) + return false; + + return !memcmp(comp->key, with->key, with->key_len); +} + +static bool +hns3_flow_rule_queues_same(const struct rte_flow_action_rss *comp, + const struct rte_flow_action_rss *with) +{ + if (comp->queue_num != with->queue_num) + return false; + + if (with->queue_num == 0) + return true; + + if (comp->queue == NULL && with->queue == NULL) + return true; + + if (!(comp->queue != NULL && with->queue != NULL)) + return false; + + return !memcmp(comp->queue, with->queue, with->queue_num); +} + static bool hns3_action_rss_same(const struct rte_flow_action_rss *comp, const struct rte_flow_action_rss *with) { - bool rss_key_is_same; - bool func_is_same; + bool same_func; - func_is_same = (with->func != RTE_ETH_HASH_FUNCTION_DEFAULT) ? - (comp->func == with->func) : true; + same_func = (comp->func == with->func); - if (with->key_len == 0 || with->key == NULL) - rss_key_is_same = 1; - else - rss_key_is_same = comp->key_len == with->key_len && - !memcmp(comp->key, with->key, with->key_len); - - return (func_is_same && rss_key_is_same && - comp->types == with->types && - comp->level == with->level && - comp->queue_num == with->queue_num && - !memcmp(comp->queue, with->queue, - sizeof(*with->queue) * with->queue_num)); + return same_func && hns3_flow_rule_key_same(comp, with) && + hns3_flow_rule_queues_same(comp, with); } static bool -- 2.22.0
RE: [PATCH v2] net: not build PMD AVX library when no IOVA as PA
> -Original Message- > From: Zhang, Qi Z > Sent: Monday, December 12, 2022 10:56 PM > To: m...@smartsharesystems.com; Richardson, Bruce > ; Lu, Wenzhuo > Cc: dev@dpdk.org; Wu, Wenjun1 ; Zhang, Qi Z > ; sta...@dpdk.org > Subject: [PATCH v2] net: not build PMD AVX library when no IOVA as PA > > PMD not announce pmd_supports_disable_iova_as_pa will not be build > when RTE_IOVA_AS_PA is not defined, but some AVX library for vector path > is not skipped by the build system which cause compile error. > > The patch modify i40e, iavf, ice's meson file to skip AVX library build when > RTE_IOVA_AS_PA is not defined. > > Cc: sta...@dpdk.org > > Signed-off-by: Qi Zhang Move this for next-net review. Acked-by: Qi Zhang Applied to dpdk-next-net-intel. Thanks Qi
RE: [PATCH v2] net/ice: support IOVA as VA mode
> -Original Message- > From: Zhang, Qi Z > Sent: Monday, December 12, 2022 9:28 PM > To: m...@smartsharesystems.com; Richardson, Bruce > ; Lu, Wenzhuo > Cc: dev@dpdk.org; Wu, Wenjun1 > Subject: RE: [PATCH v2] net/ice: support IOVA as VA mode > > > > > -Original Message- > > From: Zhang, Qi Z > > Sent: Tuesday, December 13, 2022 5:36 AM > > To: m...@smartsharesystems.com; Richardson, Bruce > > ; Lu, Wenzhuo > > Cc: dev@dpdk.org; Wu, Wenjun1 ; Zhang, Qi Z > > > > Subject: [PATCH v2] net/ice: support IOVA as VA mode > > > > Claim pmd_supports_disable_iova_as_pa. Remove buf_iova access when > > RTE_IOVA_AS_PA is not defined. > > > > The patch simply replace buf_iova with buf_addr at IOVA as VA mode. > > Some SIMD instructions in data path may be over used, further > > optimization is expected. > > > > Signed-off-by: Qi Zhang > > Forgot to inherent ack from v1 > Acked-by: Morten Brørup Applied to dpdk-next-net-intel. Thanks Qi
RE: [PATCH v2 0/3] net/igc: support PTP timesync
> -Original Message- > From: Su, Simei > Sent: Tuesday, January 17, 2023 9:26 PM > To: Zhang, Qi Z ; Guo, Junfeng > > Cc: dev@dpdk.org; Wu, Wenjun1 ; Su, Simei > > Subject: [PATCH v2 0/3] net/igc: support PTP timesync > > [PATCH v2 1/3] code refactoring. > [PATCH v2 2/3] add related definitions for ptp timesync. > [PATCH v2 3/3] add IEEE1588 API to support timesync. > > v2: > * Refine commit log. > * Update the doc/guides/nics/features/igc.ini to add "Timesync" feature. > * Add release notes. > > Simei Su (3): > net/igc: code refactoring > net/igc/base: support PTP timesync > net/igc: support IEEE 1588 PTP > > doc/guides/nics/features/igc.ini | 1 + > doc/guides/rel_notes/release_23_03.rst | 3 + > drivers/net/igc/base/igc_defines.h | 11 ++ > drivers/net/igc/igc_ethdev.c | 222 > + > drivers/net/igc/igc_ethdev.h | 4 +- > drivers/net/igc/igc_txrx.c | 166 +++- > drivers/net/igc/igc_txrx.h | 116 + > 7 files changed, 401 insertions(+), 122 deletions(-) > > -- > 2.9.5 Acked-by: Qi Zhang Refined PATCH 2/3 's title and commit log as below net/igc/base: expose timesync registers Add definitions for timesync related registers. Applied to dpdk-next-net-intel. Thanks Qi
[PATCH] gpudev: fix deadlocks when registering callback
Agree with the patch. Thanks!
RE: [EXT] Re: [dpdk-dev] [PATCH v1 00/12] mldev: introduce machine learning device library
External Email -- 25/01/2023 20:01, Jerin Jacob: > On Wed, Jan 25, 2023 at 7:50 PM Thomas Monjalon wrote: > > 14/11/2022 13:02, jer...@marvell.com: > > > ML Model: An ML model is an algorithm trained over a dataset. A > > > model consists of procedure/algorithm and data/pattern required to make > > > predictions on live data. > > > Once the model is created and trained outside of the DPDK scope, > > > the model can be loaded via rte_ml_model_load() and then start it using > > > rte_ml_model_start() API. > > > The rte_ml_model_params_update() can be used to update the model > > > parameters such as weight and bias without unloading the model using > > > rte_ml_model_unload(). > > > > The fact that the model is prepared outside means the model format > > is free and probably different per mldev driver. > > I think it is OK but it requires a lot of documentation effort to > > explain how to bind the model and its parameters with the DPDK API. > > Also we may need to pass some metadata from the model builder to the > > inference engine in order to enable optimizations prepared in the model. > > And the other way, we may need inference capabilities in order to > > generate an optimized model which can run in the inference engine. > > The base API specification kept absolute minimum. Currently, weight > and biases parameters updated through rte_ml_model_params_update(). It > can be extended when there are drivers supports it or if you have any > specific parameter you would like to add it in > rte_ml_model_params_update(). This function is int rte_ml_model_params_update(int16_t dev_id, int16_t model_id, void *buffer); How are we supposed to provide separate parameters in this void* ? Just to clarify on what "parameters" mean, they just mean weights and biases of the model, which are the parameters for a model. Also, the Proposed APIs are for running the inference on a pre-trained model. For running the inference the amount of parameters tuning needed/done is limited/none. The only parameters that get may get changed are the Weights and Bias which the API rte_ml_model_params_update() caters to. While running the inference on a Model there won't be any random addition or removal of operators to/from the model or there won't be any changes in the actual flow of model. Since the only parameter that can be changed is Weights and Biases the above API should take care. > Other metadata data like batch, shapes, formats queried using > rte_ml_io_info(). Copying: +/** Input and output data information structure + * + * Specifies the type and shape of input and output data. + */ +struct rte_ml_io_info { + char name[RTE_ML_STR_MAX]; + /**< Name of data */ + struct rte_ml_io_shape shape; + /**< Shape of data */ + enum rte_ml_io_type qtype; + /**< Type of quantized data */ + enum rte_ml_io_type dtype; + /**< Type of de-quantized data */ }; Is it the right place to notify the app that some model optimizations are supported? (example: merge some operations in the graph) The inference is run on a pre-trained model, which means any merges /additions of operations to the graph are NOT done. If any such things are done then the changed model needs to go through the training and compilation once again which is out of scope of these APIs. > > [...] > > > Typical application utilisation of the ML API will follow the > > > following programming flow. > > > > > > - rte_ml_dev_configure() > > > - rte_ml_dev_queue_pair_setup() > > > - rte_ml_model_load() > > > - rte_ml_model_start() > > > - rte_ml_model_info() > > > - rte_ml_dev_start() > > > - rte_ml_enqueue_burst() > > > - rte_ml_dequeue_burst() > > > - rte_ml_model_stop() > > > - rte_ml_model_unload() > > > - rte_ml_dev_stop() > > > - rte_ml_dev_close() > > > > Where is parameters update in this flow? > > Added the mandatory APIs in the top level flow doc. > rte_ml_model_params_update() used to update the parameters. The question is "where" should it be done? Before/after start? The model image comes with the Weights and Bias and will be loaded and used as a part of rte_ml_model_load and rte_ml_model_start. In rare scenarios where the user wants to update the Weights and Bias of an already loaded model, the rte_ml_model_stop can be called to stop the model and the Weights and Biases can be updated using the The parameters (Weights&Biases) can be updated when the rte_ml_model_params_update() API followed by rte_ml_model_start to start the model with the new Weights and Biases. > > Should we update all parameters at once or can it be done more fine-grain? > > Currently, rte_ml_model_params_update() can be used to update weight > and bias via buffer when device is in stop state and without unloading > the model. The question is "can we update a single parameter"? And how? As mentioned above for running inference the mode
Re: [PATCH v2] eal: cleanup alarm and multiprocess hotplug before memory detach
friendly ping Fengnan Chang 于2023年1月6日周五 11:00写道: > > Hi: > is there any comments? > > Fengnan Chang 于2022年12月21日周三 11:41写道: > > > > Alarm and multiprocess hotplug still need access hugepage memory, > > if alarm event processed after memory detach, it may cause SEGV. > > So cleanup alarm and multiprocess hotplug before memory detach. > > > > Fixes: 90b13ab8d4f7 ("alarm: remove direct access to interrupt handle") > > Fixes: a0cc7be20dd1 ("mem: cleanup multiprocess resources") > > > > Signed-off-by: Fengnan Chang > > --- > > lib/eal/freebsd/eal.c | 2 +- > > lib/eal/linux/eal.c | 4 ++-- > > 2 files changed, 3 insertions(+), 3 deletions(-) > > > > diff --git a/lib/eal/freebsd/eal.c b/lib/eal/freebsd/eal.c > > index 1b58cd3da6..83b99c601f 100644 > > --- a/lib/eal/freebsd/eal.c > > +++ b/lib/eal/freebsd/eal.c > > @@ -897,9 +897,9 @@ rte_eal_cleanup(void) > > eal_bus_cleanup(); > > rte_trace_save(); > > eal_trace_fini(); > > + rte_eal_alarm_cleanup(); > > /* after this point, any DPDK pointers will become dangling */ > > rte_eal_memory_detach(); > > - rte_eal_alarm_cleanup(); > > eal_cleanup_config(internal_conf); > > return 0; > > } > > diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c > > index 8c118d0d9f..c76f026023 100644 > > --- a/lib/eal/linux/eal.c > > +++ b/lib/eal/linux/eal.c > > @@ -1372,11 +1372,11 @@ rte_eal_cleanup(void) > > eal_bus_cleanup(); > > rte_trace_save(); > > eal_trace_fini(); > > + eal_mp_dev_hotplug_cleanup(); > > + rte_eal_alarm_cleanup(); > > /* after this point, any DPDK pointers will become dangling */ > > rte_eal_memory_detach(); > > - eal_mp_dev_hotplug_cleanup(); > > rte_eal_malloc_heap_cleanup(); > > - rte_eal_alarm_cleanup(); > > eal_cleanup_config(internal_conf); > > rte_eal_log_cleanup(); > > return 0; > > -- > > 2.37.0 (Apple Git-136) > >
Re: [PATCH v7] ethdev: add special flags when creating async transfer table
Hi Rongwei, Thanks for persevering. I have no strong opinion, but, at least, the fact that the new flags are no longer meant for use in rte_flow_attr, which is clearly not the right place for such, is an improvement. However, let's take a closer look at the current patch, shall we? But, before we get to that, I'd like to kindly request that you provide a more concrete example of how this feature is supposed to be used. Are there some real-life application examples? Also, to me, it's still unclear how an application can obtain the knowledge of this hint in the first instance. For example, can Open vSwitch somehow tell ethdevs representing physical ports from ones representing "vports" (host endpoints)? How does it know which attribute to specify? For the rest of my notes, PSB. On Mon, 14 Nov 2022, Rongwei Liu wrote: In case flow rules match only one kind of traffic in a flow table, then optimization can be done via allocation of this table. This wording might confuse readers. Consider rephrasing it, please: If multiple flow rules share a common set of match masks, then they might belong in a flow table which can be pre-allocated. Such optimization is possible only if the application gives a hint about its usage of the table during initial configuration. The transfer domain rules may process traffic from wire or vport, which may correspond to two kinds of underlayer resources. Why name it a "vport"? Why not "host"? host = packets generated by any of the host's "vport"s wire = packets arriving at the NIC from the network That's why the first two hints introduced in this patch are about wire and vport traffic specialization. Wire means traffic arrives from the uplink port while vport means traffic initiated from VF/SF. By the sound of it, the meaning is confined to just VFs/SFs. What if the user wants to match packets coming from PFs? There are two possible approaches for providing the hints. Using IPv4 as an example: 1. Use pattern item in both template table and flow rules. pattern_template: pattern ANY_VPORT / eth / ipv4 is 1.1.1.1 / end async flow create: pattern ANY_VPORT / eth / ipv4 is 1.1.1.2 / end "ANY_VPORT" needs to be present in each flow rule even if it's just a hint. No value to match because matching is already done by IPv4 item. Why no value to match on? How does it prevent rogue tenants from spoofing network headers? If the application receives a packet on a particular vport's representor, then it may strictly specify item represented_port pointing to that vport so that only packets from that vport match. Why isn't security a consideration? 2. Add special flags into table_attr. template_table 0 create table_id 0 group 1 transfer vport_orig Approach 1 needs to specify the pattern in each flow rule which wastes memory and is not user friendly. What if the user has to insert a group of rules which not only have the same set of match masks but also share exactly the same match spec values for a limited subset of network items (for example, those of an encap. header)? This way, a subset of network item specs can remain fixed across many rules. Does that count as wasting memory? If yes, then the problem does not concern just a single pair of attributes, but rather deserves a more versatile solution like some sort of indirect grouping of constant item specs. Have you considered such options? This patch takes the 2nd approach and introduces one new member "specialize" into rte_flow_table_attr to indicate possible flow table optimization. The name "specialize" might have some drawbacks: - spelling difference (specialise/specialize) - in grep output, will mix with flows' "spec" - quite long - not a noun Why not "scope"? Or something like that? By default, there is no hint, so the behavior of the transfer domain doesn't change. There is no guarantee that the hint will be used by the PMD. Signed-off-by: Rongwei Liu Acked-by: Ori Kam v2: Move the new field to template table attribute. v4: Mark it as optional and clear the concept. v5: Change specialize type to uint32_t. v6: Change the flags to macros and re-construct the commit log. v7: Fix build failure. --- app/test-pmd/cmdline_flow.c | 26 +++ doc/guides/prog_guide/rte_flow.rst | 15 +++ doc/guides/testpmd_app_ug/testpmd_funcs.rst | 3 ++- lib/ethdev/rte_flow.h | 28 + 4 files changed, 71 insertions(+), 1 deletion(-) diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c index 88108498e0..62197f2618 100644 --- a/app/test-pmd/cmdline_flow.c +++ b/app/test-pmd/cmdline_flow.c @@ -184,6 +184,8 @@ enum index { TABLE_INGRESS, TABLE_EGRESS, TABLE_TRANSFER, + TABLE_TRANSFER_WIRE_ORIG, + TABLE_TRANSFER_VPORT_ORIG, TABLE_RULES_NUMBER, TABLE_PATTERN_TEMPLATE, TABLE_ACTIONS_TEMPLATE, @@ -1158,6 +1160,8 @@ static const enum index next_table_attr[] =
RE: [PATCH v7] ethdev: add special flags when creating async transfer table
Hi Ivan, BR Rongwei > -Original Message- > From: Ivan Malov > Sent: Monday, January 30, 2023 08:00 > To: Rongwei Liu > Cc: Matan Azrad ; Slava Ovsiienko > ; Ori Kam ; NBU-Contact- > Thomas Monjalon (EXTERNAL) ; Aman Singh > ; Yuying Zhang ; > Ferruh Yigit ; Andrew Rybchenko > ; dev@dpdk.org; Raslan Darawsheh > > Subject: Re: [PATCH v7] ethdev: add special flags when creating async transfer > table > > External email: Use caution opening links or attachments > > > Hi Rongwei, > > Thanks for persevering. I have no strong opinion, but, at least, the fact > that the > new flags are no longer meant for use in rte_flow_attr, which is clearly not > the right place for such, is an improvement. > Thanks for the suggestion, move it to rte_flow_table_attr now and it' dedicated to async API. > However, let's take a closer look at the current patch, shall we? > > But, before we get to that, I'd like to kindly request that you provide a more > concrete example of how this feature is supposed to be used. Are there some > real-life application examples? > Sure. > Also, to me, it's still unclear how an application can obtain the knowledge of > this hint in the first instance. For example, can Open vSwitch somehow tell > ethdevs representing physical ports from ones representing "vports" (host > endpoints)? > How does it know which attribute to specify? > Hint should be initiated by application and application knows it' traffic pattern which highly relates to deployment. Let' use VxLAN encap/decap as an example: 1. Traffic from wire should be VxLAN pattern and do the decap, then send to different vports. flow pattern_template 0 create transfer relaxed no pattern_template_id 4 template represented_port ethdev_port_id is 0 / eth / ipv4 / udp / vxlan / tag index is 0 data is 0x33 / end flow actions_template 0 create transfer actions_template_id 4 template raw_decap index 0 / represented_port ethdev_port_id 1 / end mask raw_decap index 0 / represented_port ethdev_port_id 1 / end flow template_table 0 create group 1 priority 0 transfer wire_orig table_id 4 rules_number 128 pattern_template 4 actions_template 4 2. Traffic from vports should be encap with different VxLAN header and send to wire. flow actions_template 1 create transfer actions_template_id 5 template raw_encap index 0 / represented_port ethdev_port_id 0 / end mask raw_encap index 0 / represented_port ethdev_port_id 0 / end flow template_table 0 create group 1 priority 0 transfer vport_orig table_id 5 rules_number 128 pattern_template 4 actions_template 5 > For the rest of my notes, PSB. > > On Mon, 14 Nov 2022, Rongwei Liu wrote: > > > In case flow rules match only one kind of traffic in a flow table, > > then optimization can be done via allocation of this table. > > This wording might confuse readers. Consider rephrasing it, please: > If multiple flow rules share a common set of match masks, then they might > belong in a flow table which can be pre-allocated. > > > Such optimization is possible only if the application gives a hint > > about its usage of the table during initial configuration. > > > > The transfer domain rules may process traffic from wire or vport, > > which may correspond to two kinds of underlayer resources. > > Why name it a "vport"? Why not "host"? > > host = packets generated by any of the host's "vport"s wire = packets arriving > at the NIC from the network Vport is "virtual port" for short and contains "VF/SF" for now. Per my thoughts, it' clearer and maps to DPDK port probing/management. > > > That's why the first two hints introduced in this patch are about wire > > and vport traffic specialization. > > Wire means traffic arrives from the uplink port while vport means > > traffic initiated from VF/SF. > > By the sound of it, the meaning is confined to just VFs/SFs. > What if the user wants to match packets coming from PFs? > It should be "wire_orig". > > > > There are two possible approaches for providing the hints. > > Using IPv4 as an example: > > 1. Use pattern item in both template table and flow rules. > > > > pattern_template: pattern ANY_VPORT / eth / ipv4 is 1.1.1.1 / end > > async flow create: pattern ANY_VPORT / eth / ipv4 is 1.1.1.2 / end > > > > "ANY_VPORT" needs to be present in each flow rule even if it's just > > a hint. No value to match because matching is already done by > > IPv4 item. > > Why no value to match on? How does it prevent rogue tenants from spoofing > network headers? If the application receives a packet on a particular vport's > representor, then it may strictly specify item represented_port pointing to > that > vport so that only packets from that vport match. > > Why isn't security a consideration? > There is some misunderstanding here. "ANY_VPORT" is the approach (new matching item without value) suggested by you. I was explaining we need to apply it to each flow rule even if it's only a flag and no value. > > > > 2. Add special flags into table_
RE: [PATCH v4 3/3] ethdev: add standby flags for live migration
Hi Jerin BR Rongwei > -Original Message- > From: Jerin Jacob > Sent: Monday, January 23, 2023 21:20 > To: Rongwei Liu > Cc: dev@dpdk.org; Matan Azrad ; Slava Ovsiienko > ; Ori Kam ; NBU-Contact- > Thomas Monjalon (EXTERNAL) ; > step...@networkplumber.org; Raslan Darawsheh ; > Ferruh Yigit ; Andrew Rybchenko > > Subject: Re: [PATCH v4 3/3] ethdev: add standby flags for live migration > > External email: Use caution opening links or attachments > > > On Wed, Jan 18, 2023 at 9:15 PM Rongwei Liu wrote: > > > > Some flags are added to the process state API for live migration in > > order to change the behavior of the flow rules in a standby process. > > > > Signed-off-by: Rongwei Liu > > --- > > lib/ethdev/rte_ethdev.h | 21 + > > 1 file changed, 21 insertions(+) > > > > diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h index > > 1505396ced..9ae4f426a7 100644 > > --- a/lib/ethdev/rte_ethdev.h > > +++ b/lib/ethdev/rte_ethdev.h > > @@ -2260,6 +2260,27 @@ int rte_eth_dev_owner_get(const uint16_t > > port_id, __rte_experimental int rte_eth_process_set_role(bool > > standby, uint32_t flags); > > > > +/**@{@name Process role flags > > + * used when migrating from an application to another one. > > + * @see rte_eth_process_set_active > > + */ > > +/** > > + * When set on a standby process, ingress flow rules will be > > +effective > > + * in active and standby processes, so the ingress traffic may be > > duplicated. > > + */ > > +#define RTE_ETH_PROCESS_FLAG_STANDBY_DUP_FLOW_INGRESS > RTE_BIT32(0) > > > How to duplicate if action has statefull items for example, > rte_flow_action_security::security_session -> it store the live pointer > rte_flow_action_meter::mtr_id; -> MTR object ID created with > rte_mtr_create() I agree with you, not all actions can be supported in the active/standby model. That' why we have return value checking and rollback. In Nvidia driver doc, we suggested user to start from 'rss/queue/jump' actions. Meter is possible, at least per my view. Assume: "meter g_action queue 0 / y_action drop / r_action drop" Old application: create meter_id 'A' with pre-defined limitation. New application: create meter_id 'B' which has the same parameters with 'A'. 1. 1st possible approach: Hardware duplicates the traffic; old application use meter 'A' and new application uses meter 'B' to control traffic throughputs. Since traffic is duplicated, so it can go to different meters. 2. 2nd possible approach: Meter 'A' and 'B' point to the same hardware resource, and traffic reaches this part first and if green, duplication happens.
RE: [PATCH v2 1/8] ethdev: add IPv6 routing extension header definition
HI Andrew BR Rongwei > -Original Message- > From: Andrew Rybchenko > Sent: Friday, January 20, 2023 17:21 > To: Rongwei Liu ; Matan Azrad ; > Slava Ovsiienko ; Ori Kam ; > NBU-Contact-Thomas Monjalon (EXTERNAL) ; Aman > Singh ; Yuying Zhang > ; Ferruh Yigit ; Olivier > Matz > Cc: dev@dpdk.org; Raslan Darawsheh > Subject: Re: [PATCH v2 1/8] ethdev: add IPv6 routing extension header > definition > > External email: Use caution opening links or attachments > > > On 1/19/23 06:11, Rongwei Liu wrote: > > Add IPv6 routing extension header definition and no TLV support for > > now. > > > > At rte_flow layer, there are new items defined for matching > > type/nexthdr/segments_left field. > > > > Add command line support for IPv6 routing extension header > > matching: type/nexthdr/segment_list. > > > > Signed-off-by: Rongwei Liu > > Acked-by: Ori Kam > > [snip] > > > diff --git a/doc/guides/prog_guide/rte_flow.rst > > b/doc/guides/prog_guide/rte_flow.rst > > index 3e6242803d..ae99036be0 100644 > > --- a/doc/guides/prog_guide/rte_flow.rst > > +++ b/doc/guides/prog_guide/rte_flow.rst > > @@ -1544,6 +1544,15 @@ Matches Color Marker set by a Meter. > > > > - ``color``: Metering color marker. > > > > +Item: ``IPV6_ROUTING_EXT`` > > +^^ > > + > > +Matches ipv6 routing extension header. > > ipv6 -> IPv6 Sure. > > > + > > +- ``next_hdr``: Next layer header type. > > +- ``type``: IPv6 routing extension header type. > > +- ``segments_left``: How many IPv6 destination addresses carries on > > Why are only 3 fields mentioned above? > This is the 1st phase to matching the 1st uint32 of IPv6 routing extension. No need to match hdr_len since TLV is ignored. > > + > > Actions > > ~~~ > > > > diff --git a/doc/guides/rel_notes/release_23_03.rst > > b/doc/guides/rel_notes/release_23_03.rst > > index b8c5b68d6c..2a794d598e 100644 > > --- a/doc/guides/rel_notes/release_23_03.rst > > +++ b/doc/guides/rel_notes/release_23_03.rst > > @@ -55,6 +55,11 @@ New Features > >Also, make sure to start the actual text at the margin. > >=== > > > > +* **Added rte_flow support for matching IPv6 routing extension header > > +fields.** > > + > > + Added ``ipv6_routing_ext`` items in rte_flow to match IPv6 routing > > + extension header > > Missing full stop above. > Sure > > + > > > > Removed Items > > - > > @@ -84,6 +89,11 @@ API Changes > > Also, make sure to start the actual text at the margin. > > === > > > > +* ethdev: added a new structure: > > + > > +- IPv6 routing extension header ``rte_flow_item_ipv6_routing_ext`` and > > + ``rte_ipv6_routing_ext`` > > + > > If I'm not mistaken, additions should not be here. It is not an API change. > Checked existing release doc, "ihl" and "version" of IPv4 header is added here but with "net:" prefix. Do you think it' good to follow? > > > > ABI Changes > > --- > > diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c index > > 7d0c24366c..4074b475c8 100644 > > --- a/lib/ethdev/rte_flow.c > > +++ b/lib/ethdev/rte_flow.c > > @@ -76,6 +76,20 @@ rte_flow_item_flex_conv(void *buf, const void *data) > > return src->length; > > } > > > > +static size_t > > +rte_flow_item_ipv6_routing_ext_conv(void *buf, const void *data) { > > + struct rte_flow_item_ipv6_routing_ext *dst = buf; > > + const struct rte_flow_item_ipv6_routing_ext *src = data; > > + size_t len; > > + > > + len = src->hdr.hdr_len ? src->hdr.hdr_len << 3 : > > + src->hdr.segments_left << 4; > > Compare hdr_len vs 0 explicitly. > Also I'd add parenthesis around ternary operator values to make it simpler to > understand. Sure. > > > + if (buf) > > Please, compare vs NULL explicitly. May be 'dst' would be better here? > > > + rte_memcpy((void *)((uintptr_t)(dst->hdr.segments)), > > +src->hdr.segments, len); > > + return len; > > +} > > + Sure. > > /** Generate flow_item[] entry. */ > > #define MK_FLOW_ITEM(t, s) \ > > [RTE_FLOW_ITEM_TYPE_ ## t] = { \ @@ -157,6 +171,8 @@ static > > const struct rte_flow_desc_data rte_flow_desc_item[] = { > > MK_FLOW_ITEM(L2TPV2, sizeof(struct rte_flow_item_l2tpv2)), > > MK_FLOW_ITEM(PPP, sizeof(struct rte_flow_item_ppp)), > > MK_FLOW_ITEM(METER_COLOR, sizeof(struct > > rte_flow_item_meter_color)), > > + MK_FLOW_ITEM_FN(IPV6_ROUTING_EXT, sizeof(struct > rte_flow_item_ipv6_routing_ext), > > + rte_flow_item_ipv6_routing_ext_conv), > > }; > > > > /** Generate flow_action[] entry. */ diff --git > > a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h index > > b60987db4b..0120d3e7d2 100644 > > --- a/lib/ethdev/rte_flow.h > > +++ b/lib/ethdev/rte_flow.h > > @@ -624,6 +624,13 @@ enum rte_flow_item_type { > >* See struct rte_flow_item_meter_color. > >*/ > >
[PATCH v3 0/8] add IPv6 routing extension support
Support IPv6 routing extension header matching with new rte_flow item. Add encapsulation support for IPv6 routing extension header. v3: enhance the format and use be32/be16 in network header structure. v2: remove redundant rte_flow items. include the commit from Gregory to pass the compilation. Gregory Etelson (1): net/mlx5/hws: Definer, add mlx5dr context to definer_conv_data Rongwei Liu (7): ethdev: add IPv6 routing extension header definition net/mlx5: adopt IPv6 routing extension prm definition net/mlx5/hws: add IPv6 routing extension matching support app/testpmd: add IPv6 routing extension header in raw encap ethdev: add modify IPv6 protocol field net/mlx5/hws: add modify IPv6 protocol implementation doc/mlx5: add IPv6 routing extension matching docs app/test-pmd/cmdline_flow.c| 72 - doc/guides/nics/features/default.ini | 1 + doc/guides/nics/features/mlx5.ini | 1 + doc/guides/nics/mlx5.rst | 2 + doc/guides/prog_guide/rte_flow.rst | 9 ++ doc/guides/rel_notes/release_23_03.rst | 9 ++ drivers/common/mlx5/mlx5_devx_cmds.c | 17 +++- drivers/common/mlx5/mlx5_devx_cmds.h | 7 +- drivers/common/mlx5/mlx5_prm.h | 29 +- drivers/net/mlx5/hws/mlx5dr_definer.c | 133 + drivers/net/mlx5/hws/mlx5dr_definer.h | 15 +++ drivers/net/mlx5/mlx5.c| 103 ++- drivers/net/mlx5/mlx5.h| 19 +++- drivers/net/mlx5/mlx5_flow.h | 28 ++ drivers/net/mlx5/mlx5_flow_dv.c| 10 ++ drivers/net/mlx5/mlx5_flow_flex.c | 14 ++- drivers/net/mlx5/mlx5_flow_hw.c| 29 +- lib/ethdev/rte_flow.c | 19 lib/ethdev/rte_flow.h | 20 lib/net/rte_ip.h | 21 20 files changed, 517 insertions(+), 41 deletions(-) -- 2.27.0
[PATCH v3 1/8] ethdev: add IPv6 routing extension header definition
Add IPv6 routing extension header definition and no TLV support for now. At rte_flow layer, there are new items defined for matching type/nexthdr/segments_left field. Add command line support for IPv6 routing extension header matching: type/nexthdr/segment_list. Signed-off-by: Rongwei Liu Acked-by: Ori Kam --- app/test-pmd/cmdline_flow.c| 46 ++ doc/guides/prog_guide/rte_flow.rst | 9 + doc/guides/rel_notes/release_23_03.rst | 9 + lib/ethdev/rte_flow.c | 19 +++ lib/ethdev/rte_flow.h | 19 +++ lib/net/rte_ip.h | 21 6 files changed, 123 insertions(+) diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c index 88108498e0..7a8516829c 100644 --- a/app/test-pmd/cmdline_flow.c +++ b/app/test-pmd/cmdline_flow.c @@ -298,6 +298,10 @@ enum index { ITEM_IPV6_SRC, ITEM_IPV6_DST, ITEM_IPV6_HAS_FRAG_EXT, + ITEM_IPV6_ROUTING_EXT, + ITEM_IPV6_ROUTING_EXT_TYPE, + ITEM_IPV6_ROUTING_EXT_NEXT_HDR, + ITEM_IPV6_ROUTING_EXT_SEG_LEFT, ITEM_ICMP, ITEM_ICMP_TYPE, ITEM_ICMP_CODE, @@ -1326,6 +1330,7 @@ static const enum index next_item[] = { ITEM_ARP_ETH_IPV4, ITEM_IPV6_EXT, ITEM_IPV6_FRAG_EXT, + ITEM_IPV6_ROUTING_EXT, ITEM_ICMP6, ITEM_ICMP6_ND_NS, ITEM_ICMP6_ND_NA, @@ -1435,6 +1440,15 @@ static const enum index item_ipv6[] = { ITEM_IPV6_SRC, ITEM_IPV6_DST, ITEM_IPV6_HAS_FRAG_EXT, + ITEM_IPV6_ROUTING_EXT, + ITEM_NEXT, + ZERO, +}; + +static const enum index item_ipv6_routing_ext[] = { + ITEM_IPV6_ROUTING_EXT_TYPE, + ITEM_IPV6_ROUTING_EXT_NEXT_HDR, + ITEM_IPV6_ROUTING_EXT_SEG_LEFT, ITEM_NEXT, ZERO, }; @@ -3844,6 +3858,38 @@ static const struct token token_list[] = { .args = ARGS(ARGS_ENTRY_BF(struct rte_flow_item_ipv6, has_frag_ext, 1)), }, + [ITEM_IPV6_ROUTING_EXT] = { + .name = "ipv6_routing_ext", + .help = "match IPv6 routing extension header", + .priv = PRIV_ITEM(IPV6_ROUTING_EXT, + sizeof(struct rte_flow_item_ipv6_routing_ext)), + .next = NEXT(item_ipv6_routing_ext), + .call = parse_vc, + }, + [ITEM_IPV6_ROUTING_EXT_TYPE] = { + .name = "ext_type", + .help = "match IPv6 routing extension header type", + .next = NEXT(item_ipv6_routing_ext, NEXT_ENTRY(COMMON_UNSIGNED), +item_param), + .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_ipv6_routing_ext, +hdr.type)), + }, + [ITEM_IPV6_ROUTING_EXT_NEXT_HDR] = { + .name = "ext_next_hdr", + .help = "match IPv6 routing extension header next header type", + .next = NEXT(item_ipv6_routing_ext, NEXT_ENTRY(COMMON_UNSIGNED), +item_param), + .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_ipv6_routing_ext, +hdr.next_hdr)), + }, + [ITEM_IPV6_ROUTING_EXT_SEG_LEFT] = { + .name = "ext_seg_left", + .help = "match IPv6 routing extension header segment left", + .next = NEXT(item_ipv6_routing_ext, NEXT_ENTRY(COMMON_UNSIGNED), +item_param), + .args = ARGS(ARGS_ENTRY_HTON(struct rte_flow_item_ipv6_routing_ext, +hdr.segments_left)), + }, [ITEM_ICMP] = { .name = "icmp", .help = "match ICMP header", diff --git a/doc/guides/prog_guide/rte_flow.rst b/doc/guides/prog_guide/rte_flow.rst index 3e6242803d..602fab29d3 100644 --- a/doc/guides/prog_guide/rte_flow.rst +++ b/doc/guides/prog_guide/rte_flow.rst @@ -1544,6 +1544,15 @@ Matches Color Marker set by a Meter. - ``color``: Metering color marker. +Item: ``IPV6_ROUTING_EXT`` +^^ + +Matches IPv6 routing extension header. + +- ``next_hdr``: Next layer header type. +- ``type``: IPv6 routing extension header type. +- ``segments_left``: How many IPv6 destination addresses carries on. + Actions ~~~ diff --git a/doc/guides/rel_notes/release_23_03.rst b/doc/guides/rel_notes/release_23_03.rst index b8c5b68d6c..8f482301f7 100644 --- a/doc/guides/rel_notes/release_23_03.rst +++ b/doc/guides/rel_notes/release_23_03.rst @@ -55,6 +55,11 @@ New Features Also, make sure to start the actual text at the margin. === +* **Added rte_flow support for matching IPv6 routing extension header fields.** + + Added ``ipv6_routing_ext`` items in rte_flow to match IPv6 routing extension + header. +
[PATCH v3 2/8] net/mlx5: adopt IPv6 routing extension prm definition
Per newest PRM definition, sample_id stands for 3 parts of information instead of single uint32_t id: sample_id + modify_filed_id + format_select_dw. Also new FW capability bits have been introduces to identify the new capability. Signed-off-by: Rongwei Liu Acked-by: Viacheslav Ovsiienko --- drivers/common/mlx5/mlx5_devx_cmds.c | 14 +++--- drivers/common/mlx5/mlx5_devx_cmds.h | 7 ++- drivers/common/mlx5/mlx5_prm.h | 28 ++-- drivers/net/mlx5/mlx5.c | 15 +++ drivers/net/mlx5/mlx5.h | 3 ++- drivers/net/mlx5/mlx5_flow_flex.c| 14 +++--- 6 files changed, 67 insertions(+), 14 deletions(-) diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c index e3a4927d0f..1f65ea7dcb 100644 --- a/drivers/common/mlx5/mlx5_devx_cmds.c +++ b/drivers/common/mlx5/mlx5_devx_cmds.c @@ -607,7 +607,8 @@ mlx5_devx_cmd_query_hca_vdpa_attr(void *ctx, int mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj *flex_obj, - uint32_t ids[], uint32_t num) + struct mlx5_ext_sample_id ids[], + uint32_t num, uint8_t *anchor) { uint32_t in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0}; uint32_t out[MLX5_ST_SZ_DW(create_flex_parser_out)] = {0}; @@ -636,6 +637,7 @@ mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj *flex_obj, (void *)flex_obj); return -rte_errno; } + *anchor = MLX5_GET(parse_graph_flex, flex, head_anchor_id); for (i = 0; i < MLX5_GRAPH_NODE_SAMPLE_NUM; i++) { void *s_off = (void *)((char *)sample + i * MLX5_ST_SZ_BYTES(parse_graph_flow_match_sample)); @@ -645,8 +647,8 @@ mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj *flex_obj, flow_match_sample_en); if (!en) continue; - ids[idx++] = MLX5_GET(parse_graph_flow_match_sample, s_off, - flow_match_sample_field_id); + ids[idx++].id = MLX5_GET(parse_graph_flow_match_sample, s_off, +flow_match_sample_field_id); } if (num != idx) { rte_errno = EINVAL; @@ -794,6 +796,12 @@ mlx5_devx_cmd_query_hca_parse_graph_node_cap max_num_arc_out); attr->max_num_sample = MLX5_GET(parse_graph_node_cap, hcattr, max_num_sample); + attr->anchor_en = MLX5_GET(parse_graph_node_cap, hcattr, anchor_en); + attr->ext_sample_id = MLX5_GET(parse_graph_node_cap, hcattr, ext_sample_id); + attr->sample_tunnel_inner2 = MLX5_GET(parse_graph_node_cap, hcattr, + sample_tunnel_inner2); + attr->zero_size_supported = MLX5_GET(parse_graph_node_cap, hcattr, +zero_size_supported); attr->sample_id_in_out = MLX5_GET(parse_graph_node_cap, hcattr, sample_id_in_out); attr->max_base_header_length = MLX5_GET(parse_graph_node_cap, hcattr, diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h index c94b9eac06..5b33010155 100644 --- a/drivers/common/mlx5/mlx5_devx_cmds.h +++ b/drivers/common/mlx5/mlx5_devx_cmds.h @@ -114,6 +114,10 @@ struct mlx5_hca_flex_attr { uint8_t max_num_arc_out; uint8_t max_num_sample; uint8_t max_num_prog_sample:5; /* From HCA CAP 2 */ + uint8_t anchor_en:1; + uint8_t ext_sample_id:1; + uint8_t sample_tunnel_inner2:1; + uint8_t zero_size_supported:1; uint8_t sample_id_in_out:1; uint16_t max_base_header_length; uint8_t max_sample_base_offset; @@ -706,7 +710,8 @@ int mlx5_devx_cmd_modify_tir(struct mlx5_devx_obj *tir, struct mlx5_devx_modify_tir_attr *tir_attr); __rte_internal int mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj *flex_obj, - uint32_t ids[], uint32_t num); + struct mlx5_ext_sample_id ids[], + uint32_t num, uint8_t *anchor); __rte_internal struct mlx5_devx_obj * diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h index 3790dc84b8..ce6cd98fd7 100644 --- a/drivers/common/mlx5/mlx5_prm.h +++ b/drivers/common/mlx5/mlx5_prm.h @@ -1893,7 +1893,11 @@ struct mlx5_ifc_parse_graph_node_cap_bits { u8 max_num_arc_in[0x08]; u8 max_num_arc_out[0x08]; u8 max_num_sample[0x08]; - u8 reserved_at_78[0x07]; + u8 reserved_at_78[0x03]; + u8 anchor_en[0x1]; + u8 ext_sample_id[0x1]; + u8 sample_tunnel_inner2[0x1]; + u8 zero_size_supported[0x1]; u8 s
[PATCH v3 3/8] net/mlx5/hws: Definer, add mlx5dr context to definer_conv_data
From: Gregory Etelson New mlx5dr_context member replaces mlx5dr_cmd_query_caps. Capabilities structure is a member of mlx5dr_context. Signed-off-by: Gregory Etelson Signed-off-by: Rongwei Liu Acked-by: Viacheslav Ovsiienko --- drivers/net/mlx5/hws/mlx5dr_definer.c | 42 ++- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/net/mlx5/hws/mlx5dr_definer.c b/drivers/net/mlx5/hws/mlx5dr_definer.c index 6b98eb8c96..0f1cab7e07 100644 --- a/drivers/net/mlx5/hws/mlx5dr_definer.c +++ b/drivers/net/mlx5/hws/mlx5dr_definer.c @@ -100,7 +100,7 @@ struct mlx5dr_definer_sel_ctrl { }; struct mlx5dr_definer_conv_data { - struct mlx5dr_cmd_query_caps *caps; + struct mlx5dr_context *ctx; struct mlx5dr_definer_fc *fc; uint8_t relaxed; uint8_t tunnel; @@ -815,6 +815,7 @@ mlx5dr_definer_conv_item_gtp(struct mlx5dr_definer_conv_data *cd, struct rte_flow_item *item, int item_idx) { + struct mlx5dr_cmd_query_caps *caps = cd->ctx->caps; const struct rte_flow_item_gtp *m = item->mask; struct mlx5dr_definer_fc *fc; @@ -836,7 +837,7 @@ mlx5dr_definer_conv_item_gtp(struct mlx5dr_definer_conv_data *cd, } if (m->teid) { - if (!(cd->caps->flex_protocols & MLX5_HCA_FLEX_GTPU_TEID_ENABLED)) { + if (!(caps->flex_protocols & MLX5_HCA_FLEX_GTPU_TEID_ENABLED)) { rte_errno = ENOTSUP; return rte_errno; } @@ -844,11 +845,11 @@ mlx5dr_definer_conv_item_gtp(struct mlx5dr_definer_conv_data *cd, fc->item_idx = item_idx; fc->tag_set = &mlx5dr_definer_gtp_teid_set; fc->bit_mask = __mlx5_mask(header_gtp, teid); - fc->byte_off = cd->caps->format_select_gtpu_dw_1 * DW_SIZE; + fc->byte_off = caps->format_select_gtpu_dw_1 * DW_SIZE; } if (m->v_pt_rsv_flags) { - if (!(cd->caps->flex_protocols & MLX5_HCA_FLEX_GTPU_DW_0_ENABLED)) { + if (!(caps->flex_protocols & MLX5_HCA_FLEX_GTPU_DW_0_ENABLED)) { rte_errno = ENOTSUP; return rte_errno; } @@ -857,12 +858,12 @@ mlx5dr_definer_conv_item_gtp(struct mlx5dr_definer_conv_data *cd, fc->tag_set = &mlx5dr_definer_gtp_ext_flag_set; fc->bit_mask = __mlx5_mask(header_gtp, ext_hdr_flag); fc->bit_off = __mlx5_dw_bit_off(header_gtp, ext_hdr_flag); - fc->byte_off = cd->caps->format_select_gtpu_dw_0 * DW_SIZE; + fc->byte_off = caps->format_select_gtpu_dw_0 * DW_SIZE; } if (m->msg_type) { - if (!(cd->caps->flex_protocols & MLX5_HCA_FLEX_GTPU_DW_0_ENABLED)) { + if (!(caps->flex_protocols & MLX5_HCA_FLEX_GTPU_DW_0_ENABLED)) { rte_errno = ENOTSUP; return rte_errno; } @@ -871,7 +872,7 @@ mlx5dr_definer_conv_item_gtp(struct mlx5dr_definer_conv_data *cd, fc->tag_set = &mlx5dr_definer_gtp_msg_type_set; fc->bit_mask = __mlx5_mask(header_gtp, msg_type); fc->bit_off = __mlx5_dw_bit_off(header_gtp, msg_type); - fc->byte_off = cd->caps->format_select_gtpu_dw_0 * DW_SIZE; + fc->byte_off = caps->format_select_gtpu_dw_0 * DW_SIZE; } return 0; @@ -882,12 +883,13 @@ mlx5dr_definer_conv_item_gtp_psc(struct mlx5dr_definer_conv_data *cd, struct rte_flow_item *item, int item_idx) { + struct mlx5dr_cmd_query_caps *caps = cd->ctx->caps; const struct rte_flow_item_gtp_psc *m = item->mask; struct mlx5dr_definer_fc *fc; /* Overwrite GTP extension flag to be 1 */ if (!cd->relaxed) { - if (!(cd->caps->flex_protocols & MLX5_HCA_FLEX_GTPU_DW_0_ENABLED)) { + if (!(caps->flex_protocols & MLX5_HCA_FLEX_GTPU_DW_0_ENABLED)) { rte_errno = ENOTSUP; return rte_errno; } @@ -896,12 +898,12 @@ mlx5dr_definer_conv_item_gtp_psc(struct mlx5dr_definer_conv_data *cd, fc->tag_set = &mlx5dr_definer_ones_set; fc->bit_mask = __mlx5_mask(header_gtp, ext_hdr_flag); fc->bit_off = __mlx5_dw_bit_off(header_gtp, ext_hdr_flag); - fc->byte_off = cd->caps->format_select_gtpu_dw_0 * DW_SIZE; + fc->byte_off = caps->format_select_gtpu_dw_0 * DW_SIZE; } /* Overwrite next extension header type */ if (!cd->relaxed) { - if (!(cd->caps->flex_protocols & MLX5_HCA_FLEX_GTPU_DW_2_ENABLED)) { + if (!(caps->flex_protocols & MLX5_HCA_FLEX_GTPU_DW_2_ENABLED)) { rte_errno = ENOTSUP;
[PATCH v3 4/8] net/mlx5/hws: add IPv6 routing extension matching support
Add mlx5 HWS logic to match IPv6 routing extension header. Once detecting IPv6 matching extension items in pattern template create callback, PMD allocates a flex parser to sample the first dword of srv6 header. Only support next_hdr/segments_left/type for now. Signed-off-by: Rongwei Liu Reviewed-by: Alex Vesker Acked-by: Viacheslav Ovsiienko --- drivers/common/mlx5/mlx5_devx_cmds.c | 7 +- drivers/net/mlx5/hws/mlx5dr_definer.c | 91 ++ drivers/net/mlx5/hws/mlx5dr_definer.h | 15 + drivers/net/mlx5/mlx5.c | 92 ++- drivers/net/mlx5/mlx5.h | 16 + drivers/net/mlx5/mlx5_flow.h | 28 drivers/net/mlx5/mlx5_flow_hw.c | 29 +++-- 7 files changed, 268 insertions(+), 10 deletions(-) diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c index 1f65ea7dcb..22a94c1e1a 100644 --- a/drivers/common/mlx5/mlx5_devx_cmds.c +++ b/drivers/common/mlx5/mlx5_devx_cmds.c @@ -607,7 +607,7 @@ mlx5_devx_cmd_query_hca_vdpa_attr(void *ctx, int mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj *flex_obj, - struct mlx5_ext_sample_id ids[], + struct mlx5_ext_sample_id *ids, uint32_t num, uint8_t *anchor) { uint32_t in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0}; @@ -637,8 +637,9 @@ mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj *flex_obj, (void *)flex_obj); return -rte_errno; } - *anchor = MLX5_GET(parse_graph_flex, flex, head_anchor_id); - for (i = 0; i < MLX5_GRAPH_NODE_SAMPLE_NUM; i++) { + if (anchor) + *anchor = MLX5_GET(parse_graph_flex, flex, head_anchor_id); + for (i = 0; i < MLX5_GRAPH_NODE_SAMPLE_NUM && idx <= num; i++) { void *s_off = (void *)((char *)sample + i * MLX5_ST_SZ_BYTES(parse_graph_flow_match_sample)); uint32_t en; diff --git a/drivers/net/mlx5/hws/mlx5dr_definer.c b/drivers/net/mlx5/hws/mlx5dr_definer.c index 0f1cab7e07..142fc545eb 100644 --- a/drivers/net/mlx5/hws/mlx5dr_definer.c +++ b/drivers/net/mlx5/hws/mlx5dr_definer.c @@ -125,6 +125,7 @@ struct mlx5dr_definer_conv_data { X(SET_BE16, ipv4_frag, v->fragment_offset, rte_ipv4_hdr) \ X(SET_BE16, ipv6_payload_len, v->hdr.payload_len, rte_flow_item_ipv6) \ X(SET, ipv6_proto, v->hdr.proto, rte_flow_item_ipv6) \ + X(SET, ipv6_routing_hdr, IPPROTO_ROUTING, rte_flow_item_ipv6) \ X(SET, ipv6_hop_limits,v->hdr.hop_limits, rte_flow_item_ipv6) \ X(SET_BE32P,ipv6_src_addr_127_96, &v->hdr.src_addr[0], rte_flow_item_ipv6) \ X(SET_BE32P,ipv6_src_addr_95_64,&v->hdr.src_addr[4], rte_flow_item_ipv6) \ @@ -293,6 +294,21 @@ mlx5dr_definer_integrity_set(struct mlx5dr_definer_fc *fc, DR_SET(tag, ok1_bits, fc->byte_off, fc->bit_off, fc->bit_mask); } +static void +mlx5dr_definer_ipv6_routing_ext_set(struct mlx5dr_definer_fc *fc, + const void *item, + uint8_t *tag) +{ + const struct rte_flow_item_ipv6_routing_ext *v = item; + uint32_t val; + + val = v->hdr.next_hdr << __mlx5_dw_bit_off(header_ipv6_routing_ext, next_hdr); + val |= v->hdr.type << __mlx5_dw_bit_off(header_ipv6_routing_ext, type); + val |= v->hdr.segments_left << + __mlx5_dw_bit_off(header_ipv6_routing_ext, segments_left); + DR_SET(tag, val, fc->byte_off, 0, fc->bit_mask); +} + static void mlx5dr_definer_gre_key_set(struct mlx5dr_definer_fc *fc, const void *item_spec, @@ -1468,6 +1484,76 @@ mlx5dr_definer_conv_item_meter_color(struct mlx5dr_definer_conv_data *cd, return 0; } +static struct mlx5dr_definer_fc * +mlx5dr_definer_get_flex_parser_fc(struct mlx5dr_definer_conv_data *cd, uint32_t byte_off) +{ + uint32_t byte_off_fp7 = MLX5_BYTE_OFF(definer_hl, flex_parser.flex_parser_7); + uint32_t byte_off_fp0 = MLX5_BYTE_OFF(definer_hl, flex_parser.flex_parser_0); + enum mlx5dr_definer_fname fname = MLX5DR_DEFINER_FNAME_FLEX_PARSER_0; + struct mlx5dr_definer_fc *fc; + uint32_t idx; + + if (byte_off < byte_off_fp7 || byte_off > byte_off_fp0) { + rte_errno = EINVAL; + return NULL; + } + idx = (byte_off_fp0 - byte_off) / (sizeof(uint32_t)); + fname += (enum mlx5dr_definer_fname)idx; + fc = &cd->fc[fname]; + fc->byte_off = byte_off; + fc->bit_mask = UINT32_MAX; + return fc; +} + +static int +mlx5dr_definer_conv_item_ipv6_routing_ext(struct mlx5dr_definer_conv_data *cd, + struct rte_flow_item *item, +
[PATCH v3 5/8] app/testpmd: add IPv6 routing extension header in raw encap
Add IPv6 routing extension header support in raw_encap command. 1. No TLV support now. 2. Assume header length equals to the current segment_left. Signed-off-by: Rongwei Liu Acked-by: Ori Kam --- app/test-pmd/cmdline_flow.c | 23 +++ 1 file changed, 23 insertions(+) diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c index 7a8516829c..4bdb46e89a 100644 --- a/app/test-pmd/cmdline_flow.c +++ b/app/test-pmd/cmdline_flow.c @@ -10925,6 +10925,13 @@ flow_item_default_mask(const struct rte_flow_item *item) { const void *mask = NULL; static rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX); + static struct rte_flow_item_ipv6_routing_ext ipv6_routing_ext_default_mask = { + .hdr = { + .next_hdr = 0xff, + .type = 0xff, + .segments_left = 0xff, + }, + }; switch (item->type) { case RTE_FLOW_ITEM_TYPE_ANY: @@ -11027,6 +11034,9 @@ flow_item_default_mask(const struct rte_flow_item *item) case RTE_FLOW_ITEM_TYPE_METER_COLOR: mask = &rte_flow_item_meter_color_mask; break; + case RTE_FLOW_ITEM_TYPE_IPV6_ROUTING_EXT: + mask = &ipv6_routing_ext_default_mask; + break; default: break; } @@ -11181,6 +11191,7 @@ cmd_set_raw_parsed(const struct buffer *in) for (i = n - 1 ; i >= 0; --i) { const struct rte_flow_item_gtp *gtp; const struct rte_flow_item_geneve_opt *opt; + struct rte_flow_item_ipv6_routing_ext *ext; item = in->args.vc.pattern + i; if (item->spec == NULL) @@ -11201,6 +11212,18 @@ cmd_set_raw_parsed(const struct buffer *in) size = sizeof(struct rte_ipv6_hdr); proto = RTE_ETHER_TYPE_IPV6; break; + case RTE_FLOW_ITEM_TYPE_IPV6_ROUTING_EXT: + ext = (struct rte_flow_item_ipv6_routing_ext *)(uintptr_t)item->spec; + if (!ext->hdr.hdr_len) { + size = sizeof(struct rte_ipv6_routing_ext) + + (ext->hdr.segments_left << 4); + ext->hdr.hdr_len = ext->hdr.segments_left << 1; + } else { + size = sizeof(struct rte_ipv6_routing_ext) + + (ext->hdr.hdr_len << 3); + } + proto = IPPROTO_ROUTING; + break; case RTE_FLOW_ITEM_TYPE_UDP: size = sizeof(struct rte_udp_hdr); proto = 0x11; -- 2.27.0
[PATCH v3 7/8] net/mlx5: add modify IPv6 protocol implementation
Add HWS modify IPv6 protocol implementation. Signed-off-by: Rongwei Liu Acked-by: Viacheslav Ovsiienko --- drivers/common/mlx5/mlx5_prm.h | 1 + drivers/net/mlx5/mlx5_flow_dv.c | 10 ++ 2 files changed, 11 insertions(+) diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h index ce6cd98fd7..497f2622b2 100644 --- a/drivers/common/mlx5/mlx5_prm.h +++ b/drivers/common/mlx5/mlx5_prm.h @@ -759,6 +759,7 @@ enum mlx5_modification_field { MLX5_MODI_OUT_IP_ECN = 0x73, MLX5_MODI_TUNNEL_HDR_DW_1 = 0x75, MLX5_MODI_GTPU_FIRST_EXT_DW_0 = 0x76, + MLX5_MODI_OUT_IPV6_NEXT_HDR = 0x4A, }; /* Total number of metadata reg_c's. */ diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 7ca90b..e972a2dc5a 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -1357,6 +1357,7 @@ mlx5_flow_item_field_width(struct rte_eth_dev *dev, case RTE_FLOW_FIELD_IPV6_DSCP: return 6; case RTE_FLOW_FIELD_IPV6_HOPLIMIT: + case RTE_FLOW_FIELD_IPV6_PROTO: return 8; case RTE_FLOW_FIELD_IPV6_SRC: case RTE_FLOW_FIELD_IPV6_DST: @@ -1883,6 +1884,15 @@ mlx5_flow_field_id_to_modify_info info[idx].offset = data->offset; } break; + case RTE_FLOW_FIELD_IPV6_PROTO: + MLX5_ASSERT(data->offset + width <= 8); + off_be = 8 - (data->offset + width); + info[idx] = (struct field_modify_info){1, 0, MLX5_MODI_OUT_IPV6_NEXT_HDR}; + if (mask) + mask[idx] = flow_modify_info_mask_8(width, off_be); + else + info[idx].offset = off_be; + break; case RTE_FLOW_FIELD_POINTER: case RTE_FLOW_FIELD_VALUE: default: -- 2.27.0
[PATCH v3 6/8] ethdev: add modify IPv6 protocol field
Add IPv6 protocol modify field definition. Add new modify field destination type string: "ipv6_proto". Signed-off-by: Rongwei Liu Acked-by: Ori Kam --- app/test-pmd/cmdline_flow.c | 3 ++- lib/ethdev/rte_flow.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c index 4bdb46e89a..1340cf3a9b 100644 --- a/app/test-pmd/cmdline_flow.c +++ b/app/test-pmd/cmdline_flow.c @@ -811,7 +811,8 @@ static const char *const modify_field_ids[] = { "udp_port_src", "udp_port_dst", "vxlan_vni", "geneve_vni", "gtp_teid", "tag", "mark", "meta", "pointer", "value", - "ipv4_ecn", "ipv6_ecn", "gtp_psc_qfi", "meter_color", NULL + "ipv4_ecn", "ipv6_ecn", "gtp_psc_qfi", "meter_color", + "ipv6_proto", NULL }; static const char *const meter_colors[] = { diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h index 9b9018cba2..f4797682b7 100644 --- a/lib/ethdev/rte_flow.h +++ b/lib/ethdev/rte_flow.h @@ -3547,6 +3547,7 @@ enum rte_flow_field_id { RTE_FLOW_FIELD_IPV6_ECN,/**< IPv6 ECN. */ RTE_FLOW_FIELD_GTP_PSC_QFI, /**< GTP QFI. */ RTE_FLOW_FIELD_METER_COLOR, /**< Meter color marker. */ + RTE_FLOW_FIELD_IPV6_PROTO, /**< IPv6 next header. */ }; /** -- 2.27.0
[PATCH v3 8/8] doc/mlx5: add IPv6 routing extension matching docs
Update mlx5 related document on IPv6 routing extension header matching. Signed-off-by: Rongwei Liu Acked-by: Viacheslav Ovsiienko --- doc/guides/nics/features/default.ini | 1 + doc/guides/nics/features/mlx5.ini| 1 + doc/guides/nics/mlx5.rst | 2 ++ 3 files changed, 4 insertions(+) diff --git a/doc/guides/nics/features/default.ini b/doc/guides/nics/features/default.ini index 510cc6679d..3d0744a243 100644 --- a/doc/guides/nics/features/default.ini +++ b/doc/guides/nics/features/default.ini @@ -141,6 +141,7 @@ udp = vlan = vxlan= vxlan_gpe= +ipv6_routing_ext = [rte_flow actions] age = diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini index 62fd330e2b..bd911a467b 100644 --- a/doc/guides/nics/features/mlx5.ini +++ b/doc/guides/nics/features/mlx5.ini @@ -87,6 +87,7 @@ vlan = Y vxlan= Y vxlan_gpe= Y represented_port = Y +ipv6_routing_ext = Y [rte_flow actions] age = I diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index b23ca35b8f..fb8001faef 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -106,6 +106,7 @@ Features - Sub-Function representors. - Sub-Function. - Matching on represented port. +- Matching on IPv6 routing extension header. Limitations @@ -174,6 +175,7 @@ Limitations - ``-EAGAIN`` for ``rte_eth_dev_start()``. - ``-EBUSY`` for ``rte_eth_dev_stop()``. + - Matching on ICMP6 following IPv6 routing extension header, should match ipv6_routing_ext_next_hdr instead of ICMP6. - When using Verbs flow engine (``dv_flow_en`` = 0), flow pattern without any specific VLAN will match for VLAN packets as well: -- 2.27.0
RE: [PATCH v2 01/11] ethdev: add flex item modify field support
Hi Andrew BR Rongwei > -Original Message- > From: Andrew Rybchenko > Sent: Friday, January 20, 2023 17:08 > To: Rongwei Liu ; Matan Azrad ; > Slava Ovsiienko ; Ori Kam ; > NBU-Contact-Thomas Monjalon (EXTERNAL) ; Aman > Singh ; Yuying Zhang > ; Ferruh Yigit > Cc: dev@dpdk.org; Raslan Darawsheh > Subject: Re: [PATCH v2 01/11] ethdev: add flex item modify field support > > External email: Use caution opening links or attachments > > > On 1/19/23 07:58, Rongwei Liu wrote: > > Add flex item as modify field destination. > > Add "struct rte_flow_item_flex_handle *flex_handle" into "struct > > rte_flow_action_modify_data" as union with existed "level" member. > > This new member is dedicated for modifying flex item. > > > > Add flex item modify field cmdline support. Now user can use testpmd > > cli to specify which flex item to be modified, either source or > > destination. > > > > Syntax is as below: > > modify_field op set dst_type flex_item dst_level 0 dst_offset 16 > > src_type value src_value 0x123456781020 width 8 > > > > Signed-off-by: Rongwei Liu > > Acked-by: Ori Kam > > [snip] > > > diff --git a/doc/guides/rel_notes/release_23_03.rst > > b/doc/guides/rel_notes/release_23_03.rst > > index b8c5b68d6c..c673205e5e 100644 > > --- a/doc/guides/rel_notes/release_23_03.rst > > +++ b/doc/guides/rel_notes/release_23_03.rst > > @@ -56,6 +56,10 @@ New Features > >=== > > > > > > It should be just one empty line here > Sure. > > +* ethdev: added a new field: > > "added a new field' is too generic. > > > + > > + - modify flex item: ``rte_flow_action_modify_data.flex_handle`` > > + > > And two empty lines here. > Sure. > > Removed Items > > - > > > > diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h index > > b60987db4b..c66a65351d 100644 > > --- a/lib/ethdev/rte_flow.h > > +++ b/lib/ethdev/rte_flow.h > > @@ -3528,6 +3528,7 @@ enum rte_flow_field_id { > > RTE_FLOW_FIELD_IPV6_ECN,/**< IPv6 ECN. */ > > RTE_FLOW_FIELD_GTP_PSC_QFI, /**< GTP QFI. */ > > RTE_FLOW_FIELD_METER_COLOR, /**< Meter color marker. */ > > + RTE_FLOW_FIELD_FLEX_ITEM, /**< Flex item. */ > > }; > > > > /** > > @@ -3541,8 +3542,11 @@ struct rte_flow_action_modify_data { > > RTE_STD_C11 > > union { > > struct { > > - /** Encapsulation level or tag index. */ > > - uint32_t level; > > + /**< Encapsulation level or tag index or flex > > + item handle. */ > > Have you tried to generate documentation? If it is a union documentation it > should be /**, not /**<. Sure. Sorry, I followed wrong existed examples. > In general, it is better to document union from overall point of view. What > is it > logically? Do not define union as just a union of its fields. Currently, 'flex_handle' is documents in rte_flow.rst file " table:: destination/source field definition" as a new row. From API aspect, when modifying flex item, user should specify the pointer of the flex item instead of ID. That' why it was added as a union. > > > + union { > > + uint32_t level; > > + struct rte_flow_item_flex_handle > > + *flex_handle; > > Union items documentation missing. See above. Do we need another place to document the union again? > > > + }; > > /** Number of bits to skip from a field. */ > > uint32_t offset; > > };
RE: [PATCH v2 01/11] ethdev: add flex item modify field support
Hi Andrew BR Rongwei > -Original Message- > From: Andrew Rybchenko > Sent: Friday, January 20, 2023 17:08 > To: Rongwei Liu ; Matan Azrad ; > Slava Ovsiienko ; Ori Kam ; > NBU-Contact-Thomas Monjalon (EXTERNAL) ; Aman > Singh ; Yuying Zhang > ; Ferruh Yigit > Cc: dev@dpdk.org; Raslan Darawsheh > Subject: Re: [PATCH v2 01/11] ethdev: add flex item modify field support > > External email: Use caution opening links or attachments > > > On 1/19/23 07:58, Rongwei Liu wrote: > > Add flex item as modify field destination. > > Add "struct rte_flow_item_flex_handle *flex_handle" into "struct > > rte_flow_action_modify_data" as union with existed "level" member. > > This new member is dedicated for modifying flex item. > > > > Add flex item modify field cmdline support. Now user can use testpmd > > cli to specify which flex item to be modified, either source or > > destination. > > > > Syntax is as below: > > modify_field op set dst_type flex_item dst_level 0 dst_offset 16 > > src_type value src_value 0x123456781020 width 8 > > > > Signed-off-by: Rongwei Liu > > Acked-by: Ori Kam > > [snip] > > > diff --git a/doc/guides/rel_notes/release_23_03.rst > > b/doc/guides/rel_notes/release_23_03.rst > > index b8c5b68d6c..c673205e5e 100644 > > --- a/doc/guides/rel_notes/release_23_03.rst > > +++ b/doc/guides/rel_notes/release_23_03.rst > > @@ -56,6 +56,10 @@ New Features > >=== > > > > > > It should be just one empty line here > > > +* ethdev: added a new field: > > "added a new field' is too generic. > > > + > > + - modify flex item: ``rte_flow_action_modify_data.flex_handle`` > > + > > And two empty lines here. > > > Removed Items > > - > > > > diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h index > > b60987db4b..c66a65351d 100644 > > --- a/lib/ethdev/rte_flow.h > > +++ b/lib/ethdev/rte_flow.h > > @@ -3528,6 +3528,7 @@ enum rte_flow_field_id { > > RTE_FLOW_FIELD_IPV6_ECN,/**< IPv6 ECN. */ > > RTE_FLOW_FIELD_GTP_PSC_QFI, /**< GTP QFI. */ > > RTE_FLOW_FIELD_METER_COLOR, /**< Meter color marker. */ > > + RTE_FLOW_FIELD_FLEX_ITEM, /**< Flex item. */ > > }; > > > > /** > > @@ -3541,8 +3542,11 @@ struct rte_flow_action_modify_data { > > RTE_STD_C11 > > union { > > struct { > > - /** Encapsulation level or tag index. */ > > - uint32_t level; > > + /**< Encapsulation level or tag index or flex > > + item handle. */ > > Have you tried to generate documentation? If it is a union documentation it > should be /**, not /**<. > In general, it is better to document union from overall point of view. What > is it > logically? Do not define union as just a union of its fields. > > > + union { > > + uint32_t level; > > + struct rte_flow_item_flex_handle > > + *flex_handle; > > Union items documentation missing. Added it in the "rte_flow.rst" following ``level`` segment. > > > + }; > > /** Number of bits to skip from a field. */ > > uint32_t offset; > > };
[PATCH v3 00/11] add flex item support
Support flex item matching and modify field in async flow. Syntax follows sync flow exactly. v3: enhance format, add flex_handle document. Rongwei Liu (11): ethdev: add flex item modify field support app/testpmd: pass flex handle into matching mask net/mlx5: enable hws flex item create net/mlx5: add IPv6 protocol as flex item input net/mlx5: adopt new flex item prm definition net/mlx5/hws: add hws flex item matching support net/mlx5: add flex item modify field implementation net/mlx5: return error for sws modify field app/testpmd: raw encap with flex item support doc/mlx5: update mlx5 doc app/testpmd: adjust cleanup sequence when quitting app/test-pmd/cmdline_flow.c| 123 +--- app/test-pmd/testpmd.c | 2 +- doc/guides/nics/mlx5.rst | 2 +- doc/guides/prog_guide/rte_flow.rst | 41 +++--- doc/guides/rel_notes/release_23_03.rst | 4 + drivers/common/mlx5/mlx5_devx_cmds.c | 14 +- drivers/common/mlx5/mlx5_devx_cmds.h | 7 +- drivers/common/mlx5/mlx5_prm.h | 29 +++- drivers/net/mlx5/hws/mlx5dr_definer.c | 83 +++ drivers/net/mlx5/linux/mlx5_os.c | 27 ++-- drivers/net/mlx5/mlx5.c| 17 ++- drivers/net/mlx5/mlx5.h| 9 +- drivers/net/mlx5/mlx5_flow.h | 4 + drivers/net/mlx5/mlx5_flow_dv.c| 186 ++--- drivers/net/mlx5/mlx5_flow_flex.c | 149 +--- drivers/net/mlx5/mlx5_flow_hw.c| 64 - lib/ethdev/rte_flow.h | 8 +- 17 files changed, 664 insertions(+), 105 deletions(-) -- 2.27.0
[PATCH v3 01/11] ethdev: add flex item modify field support
Add flex item as modify field destination. Add "struct rte_flow_item_flex_handle *flex_handle" into "struct rte_flow_action_modify_data" as union with existed "level" member. This new member is dedicated for modifying flex item. Add flex item modify field cmdline support. Now user can use testpmd cli to specify which flex item to be modified, either source or destination. Syntax is as below: modify_field op set dst_type flex_item dst_level 0 dst_offset 16 src_type value src_value 0x123456781020 width 8 Signed-off-by: Rongwei Liu Acked-by: Ori Kam --- app/test-pmd/cmdline_flow.c| 89 -- doc/guides/prog_guide/rte_flow.rst | 41 +++- doc/guides/rel_notes/release_23_03.rst | 4 ++ lib/ethdev/rte_flow.h | 8 ++- 4 files changed, 116 insertions(+), 26 deletions(-) diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c index 88108498e0..323c07253d 100644 --- a/app/test-pmd/cmdline_flow.c +++ b/app/test-pmd/cmdline_flow.c @@ -601,10 +601,12 @@ enum index { ACTION_MODIFY_FIELD_DST_TYPE, ACTION_MODIFY_FIELD_DST_TYPE_VALUE, ACTION_MODIFY_FIELD_DST_LEVEL, + ACTION_MODIFY_FIELD_DST_LEVEL_VALUE, ACTION_MODIFY_FIELD_DST_OFFSET, ACTION_MODIFY_FIELD_SRC_TYPE, ACTION_MODIFY_FIELD_SRC_TYPE_VALUE, ACTION_MODIFY_FIELD_SRC_LEVEL, + ACTION_MODIFY_FIELD_SRC_LEVEL_VALUE, ACTION_MODIFY_FIELD_SRC_OFFSET, ACTION_MODIFY_FIELD_SRC_VALUE, ACTION_MODIFY_FIELD_SRC_POINTER, @@ -807,7 +809,8 @@ static const char *const modify_field_ids[] = { "udp_port_src", "udp_port_dst", "vxlan_vni", "geneve_vni", "gtp_teid", "tag", "mark", "meta", "pointer", "value", - "ipv4_ecn", "ipv6_ecn", "gtp_psc_qfi", "meter_color", NULL + "ipv4_ecn", "ipv6_ecn", "gtp_psc_qfi", "meter_color", + "hash_result", "flex_item", NULL }; static const char *const meter_colors[] = { @@ -2282,6 +2285,10 @@ parse_vc_modify_field_id(struct context *ctx, const struct token *token, const char *str, unsigned int len, void *buf, unsigned int size); static int +parse_vc_modify_field_level(struct context *ctx, const struct token *token, + const char *str, unsigned int len, void *buf, + unsigned int size); +static int parse_vc_action_conntrack_update(struct context *ctx, const struct token *token, const char *str, unsigned int len, void *buf, unsigned int size); @@ -5976,11 +5983,15 @@ static const struct token token_list[] = { .name = "dst_level", .help = "destination field level", .next = NEXT(action_modify_field_dst, -NEXT_ENTRY(COMMON_UNSIGNED)), - .args = ARGS(ARGS_ENTRY(struct rte_flow_action_modify_field, - dst.level)), +NEXT_ENTRY(ACTION_MODIFY_FIELD_DST_LEVEL_VALUE)), .call = parse_vc_conf, }, + [ACTION_MODIFY_FIELD_DST_LEVEL_VALUE] = { + .name = "{dst_level}", + .help = "destination field level value", + .call = parse_vc_modify_field_level, + .comp = comp_none, + }, [ACTION_MODIFY_FIELD_DST_OFFSET] = { .name = "dst_offset", .help = "destination field bit offset", @@ -6007,11 +6018,15 @@ static const struct token token_list[] = { .name = "src_level", .help = "source field level", .next = NEXT(action_modify_field_src, -NEXT_ENTRY(COMMON_UNSIGNED)), - .args = ARGS(ARGS_ENTRY(struct rte_flow_action_modify_field, - src.level)), +NEXT_ENTRY(ACTION_MODIFY_FIELD_SRC_LEVEL_VALUE)), .call = parse_vc_conf, }, + [ACTION_MODIFY_FIELD_SRC_LEVEL_VALUE] = { + .name = "{src_level}", + .help = "source field level value", + .call = parse_vc_modify_field_level, + .comp = comp_none, + }, [ACTION_MODIFY_FIELD_SRC_OFFSET] = { .name = "src_offset", .help = "source field bit offset", @@ -8477,6 +8492,66 @@ parse_vc_modify_field_id(struct context *ctx, const struct token *token, return len; } +/** Parse level for modify_field command. */ +static int +parse_vc_modify_field_level(struct context *ctx, const struct token *token, +const char *str, unsigned int len, void *buf, +unsigned int size) +{ + struct rte_flow_action_modify_field *action; + struct flex_item *fp; + uint32_t val; + struct buffer *out = buf; + char *end; + + (void)token; +
[PATCH v3 02/11] app/testpmd: pass flex handle into matching mask
In async flow create API, there is only mask information when creating flow table but flex item handle is required to parse the HW sample information. Pass the flex item handle instead of UINT64/32_MAX to mask. Signed-off-by: Rongwei Liu Acked-by: Ori Kam --- app/test-pmd/cmdline_flow.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c index 323c07253d..f5d7a67def 100644 --- a/app/test-pmd/cmdline_flow.c +++ b/app/test-pmd/cmdline_flow.c @@ -10068,8 +10068,8 @@ parse_flex_handle(struct context *ctx, const struct token *token, } if (offset == offsetof(struct rte_flow_item_flex, handle)) { const struct flex_item *fp; - struct rte_flow_item_flex *item_flex = ctx->object; - handle = (uint16_t)(uintptr_t)item_flex->handle; + spec = ctx->object; + handle = (uint16_t)(uintptr_t)spec->handle; if (handle >= FLEX_MAX_PARSERS_NUM) { printf("Bad flex item handle\n"); return -1; @@ -10079,7 +10079,9 @@ parse_flex_handle(struct context *ctx, const struct token *token, printf("Bad flex item handle\n"); return -1; } - item_flex->handle = fp->flex_handle; + spec->handle = fp->flex_handle; + mask = spec + 2; /* spec, last, mask */ + mask->handle = fp->flex_handle; } else if (offset == offsetof(struct rte_flow_item_flex, pattern)) { handle = (uint16_t)(uintptr_t) ((struct rte_flow_item_flex *)ctx->object)->pattern; -- 2.27.0
[PATCH v3 03/11] net/mlx5: enable hws flex item create
Enable flex item create and destroy with dv_flow_en=2 Signed-off-by: Rongwei Liu Acked-by: Viacheslav Ovsiienko --- drivers/net/mlx5/linux/mlx5_os.c | 27 +++ drivers/net/mlx5/mlx5_flow_hw.c | 2 ++ 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index a71474c90a..f5b3edea99 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -474,10 +474,20 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv) err = mlx5_alloc_table_hash_list(priv); if (err) goto error; - if (priv->sh->config.dv_flow_en == 2) - return 0; /* The resources below are only valid with DV support. */ #ifdef HAVE_IBV_FLOW_DV_SUPPORT + /* Init shared flex parsers list, no need lcore_share */ + snprintf(s, sizeof(s), "%s_flex_parsers_list", sh->ibdev_name); + sh->flex_parsers_dv = mlx5_list_create(s, sh, false, + mlx5_flex_parser_create_cb, + mlx5_flex_parser_match_cb, + mlx5_flex_parser_remove_cb, + mlx5_flex_parser_clone_cb, + mlx5_flex_parser_clone_free_cb); + if (!sh->flex_parsers_dv) + goto error; + if (priv->sh->config.dv_flow_en == 2) + return 0; /* Init port id action list. */ snprintf(s, sizeof(s), "%s_port_id_action_list", sh->ibdev_name); sh->port_id_action_list = mlx5_list_create(s, sh, true, @@ -518,16 +528,9 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv) flow_dv_dest_array_clone_free_cb); if (!sh->dest_array_list) goto error; - /* Init shared flex parsers list, no need lcore_share */ - snprintf(s, sizeof(s), "%s_flex_parsers_list", sh->ibdev_name); - sh->flex_parsers_dv = mlx5_list_create(s, sh, false, - mlx5_flex_parser_create_cb, - mlx5_flex_parser_match_cb, - mlx5_flex_parser_remove_cb, - mlx5_flex_parser_clone_cb, - mlx5_flex_parser_clone_free_cb); - if (!sh->flex_parsers_dv) - goto error; +#else + if (priv->sh->config.dv_flow_en == 2) + return 0; #endif #ifdef HAVE_MLX5DV_DR void *domain; diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c index 20c71ff7f0..44953451d5 100644 --- a/drivers/net/mlx5/mlx5_flow_hw.c +++ b/drivers/net/mlx5/mlx5_flow_hw.c @@ -8336,6 +8336,8 @@ const struct mlx5_flow_driver_ops mlx5_flow_hw_drv_ops = { .query = flow_hw_query, .get_aged_flows = flow_hw_get_aged_flows, .get_q_aged_flows = flow_hw_get_q_aged_flows, + .item_create = flow_dv_item_create, + .item_release = flow_dv_item_release, }; /** -- 2.27.0
[PATCH v3 04/11] net/mlx5: add IPv6 protocol as flex item input
Support IPv6 protocol as new flex item input link. Signed-off-by: Rongwei Liu Acked-by: Viacheslav Ovsiienko --- drivers/net/mlx5/mlx5_flow_flex.c | 19 +++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/mlx5/mlx5_flow_flex.c b/drivers/net/mlx5/mlx5_flow_flex.c index fb08910ddb..bec07b13c1 100644 --- a/drivers/net/mlx5/mlx5_flow_flex.c +++ b/drivers/net/mlx5/mlx5_flow_flex.c @@ -1043,6 +1043,22 @@ mlx5_flex_arc_in_udp(const struct rte_flow_item *item, return rte_be_to_cpu_16(spec->hdr.dst_port); } +static int +mlx5_flex_arc_in_ipv6(const struct rte_flow_item *item, + struct rte_flow_error *error) +{ + const struct rte_flow_item_ipv6 *spec = item->spec; + const struct rte_flow_item_ipv6 *mask = item->mask; + struct rte_flow_item_ipv6 ip = { .hdr.proto = 0xff }; + + if (memcmp(mask, &ip, sizeof(struct rte_flow_item_ipv6))) { + return rte_flow_error_set + (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item, +"invalid ipv6 item mask, full mask is desired"); + } + return spec->hdr.proto; +} + static int mlx5_flex_translate_arc_in(struct mlx5_hca_flex_attr *attr, const struct rte_flow_item_flex_conf *conf, @@ -1089,6 +1105,9 @@ mlx5_flex_translate_arc_in(struct mlx5_hca_flex_attr *attr, case RTE_FLOW_ITEM_TYPE_UDP: ret = mlx5_flex_arc_in_udp(rte_item, error); break; + case RTE_FLOW_ITEM_TYPE_IPV6: + ret = mlx5_flex_arc_in_ipv6(rte_item, error); + break; default: MLX5_ASSERT(false); return rte_flow_error_set -- 2.27.0
[PATCH v3 07/11] net/mlx5: add flex item modify field implementation
Add flex item modify field HWS implementation. The minimum modify boundary is one byte. Signed-off-by: Rongwei Liu Acked-by: Viacheslav Ovsiienko --- drivers/common/mlx5/mlx5_prm.h | 1 + drivers/net/mlx5/mlx5_flow.h| 3 + drivers/net/mlx5/mlx5_flow_dv.c | 165 +--- drivers/net/mlx5/mlx5_flow_hw.c | 14 ++- 4 files changed, 170 insertions(+), 13 deletions(-) diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h index ce6cd98fd7..0c2a516e9d 100644 --- a/drivers/common/mlx5/mlx5_prm.h +++ b/drivers/common/mlx5/mlx5_prm.h @@ -759,6 +759,7 @@ enum mlx5_modification_field { MLX5_MODI_OUT_IP_ECN = 0x73, MLX5_MODI_TUNNEL_HDR_DW_1 = 0x75, MLX5_MODI_GTPU_FIRST_EXT_DW_0 = 0x76, + MLX5_MODI_INVALID = INT_MAX, }; /* Total number of metadata reg_c's. */ diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index c8761c4e5a..c71fa1c0ad 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -1080,6 +1080,8 @@ struct field_modify_info { uint32_t size; /* Size of field in protocol header, in bytes. */ uint32_t offset; /* Offset of field in protocol header, in bytes. */ enum mlx5_modification_field id; + uint32_t shift; + uint8_t is_flex; /* Temporary indicator for flex item modify filed WA. */ }; /* HW steering flow attributes. */ @@ -1244,6 +1246,7 @@ struct rte_flow_actions_template { uint16_t mhdr_off; /* Offset of DR modify header action. */ uint32_t refcnt; /* Reference counter. */ uint16_t rx_cpy_pos; /* Action position of Rx metadata to be copied. */ + uint8_t flex_item; /* flex item index. */ }; /* Jump action struct. */ diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 284f18da11..92a5914d4b 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -414,10 +414,15 @@ flow_dv_convert_modify_action(struct rte_flow_item *item, ++field; continue; } - /* Deduce actual data width in bits from mask value. */ - off_b = rte_bsf32(mask) + carry_b; - size_b = sizeof(uint32_t) * CHAR_BIT - -off_b - __builtin_clz(mask); + if (type == MLX5_MODIFICATION_TYPE_COPY && field->is_flex) { + off_b = 32 - field->shift + carry_b - field->size * CHAR_BIT; + size_b = field->size * CHAR_BIT - carry_b; + } else { + /* Deduce actual data width in bits from mask value. */ + off_b = rte_bsf32(mask) + carry_b; + size_b = sizeof(uint32_t) * CHAR_BIT - +off_b - __builtin_clz(mask); + } MLX5_ASSERT(size_b); actions[i] = (struct mlx5_modification_cmd) { .action_type = type, @@ -437,40 +442,46 @@ flow_dv_convert_modify_action(struct rte_flow_item *item, * Destination field overflow. Copy leftovers of * a source field to the next destination field. */ - carry_b = 0; if ((size_b > dcopy->size * CHAR_BIT - dcopy->offset) && dcopy->size != 0) { actions[i].length = dcopy->size * CHAR_BIT - dcopy->offset; - carry_b = actions[i].length; + carry_b += actions[i].length; next_field = false; + } else { + carry_b = 0; } /* * Not enough bits in a source filed to fill a * destination field. Switch to the next source. */ if ((size_b < dcopy->size * CHAR_BIT - dcopy->offset) && - (size_b == field->size * CHAR_BIT - off_b)) { - actions[i].length = - field->size * CHAR_BIT - off_b; + ((size_b == field->size * CHAR_BIT - off_b) || +field->is_flex)) { + actions[i].length = size_b; dcopy->offset += actions[i].length; next_dcopy = false; } - if (next_dcopy) - ++dcopy; } else { MLX5_ASSERT(item->spec); data = flow_dv_fetch_field((const uint8_t *)item->spec + field->offset, field->size);
[PATCH v3 08/11] net/mlx5: return error for sws modify field
Return unsupported error message when application tries to modify flex item field. Validation of packet modifications actions for SW Steering checked if either source or destination field of MODIFY_FIELD action was a flex item. When DEC_TTL action is used, DEC_TTL action does not have any action configuration and dereferencing source or destination field is invalid, so validation of source and destination field types should be moved to MODIFY_FIELD specific validation function, then field types are validated if and only if action type is MODIFY_FIELD. Signed-off-by: Dariusz Sosnowski Signed-off-by: Rongwei Liu Acked-by: Viacheslav Ovsiienko --- drivers/net/mlx5/mlx5_flow_dv.c | 19 --- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 92a5914d4b..a7c0d5bf17 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -4828,6 +4828,7 @@ flow_dv_validate_action_modify_hdr(const uint64_t action_flags, return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, "action configuration not set"); + if (action_flags & MLX5_FLOW_ACTION_ENCAP) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, NULL, @@ -5153,17 +5154,21 @@ flow_dv_validate_action_modify_field(struct rte_eth_dev *dev, struct mlx5_hca_attr *hca_attr = &priv->sh->cdev->config.hca_attr; const struct rte_flow_action_modify_field *action_modify_field = action->conf; - uint32_t dst_width = mlx5_flow_item_field_width(dev, - action_modify_field->dst.field, - -1, attr, error); - uint32_t src_width = mlx5_flow_item_field_width(dev, - action_modify_field->src.field, - dst_width, attr, error); + uint32_t dst_width, src_width; ret = flow_dv_validate_action_modify_hdr(action_flags, action, error); if (ret) return ret; - + if (action_modify_field->src.field == RTE_FLOW_FIELD_FLEX_ITEM || + action_modify_field->dst.field == RTE_FLOW_FIELD_FLEX_ITEM) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "flex item fields modification" + " is not supported"); + dst_width = mlx5_flow_item_field_width(dev, action_modify_field->dst.field, + -1, attr, error); + src_width = mlx5_flow_item_field_width(dev, action_modify_field->src.field, + dst_width, attr, error); if (action_modify_field->width == 0) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, action, -- 2.27.0
[PATCH v3 06/11] net/mlx5/hws: add hws flex item matching support
Support flex item matching in hws and syntax follows sws exactly. Flex item should be created in advance and follow current json mapping logic. Signed-off-by: Rongwei Liu Reviewed-by: Alex Vesker Acked-by: Viacheslav Ovsiienko --- drivers/net/mlx5/hws/mlx5dr_definer.c | 83 ++ drivers/net/mlx5/mlx5.c | 2 +- drivers/net/mlx5/mlx5.h | 6 ++ drivers/net/mlx5/mlx5_flow.h | 1 + drivers/net/mlx5/mlx5_flow_dv.c | 2 +- drivers/net/mlx5/mlx5_flow_flex.c | 116 ++ drivers/net/mlx5/mlx5_flow_hw.c | 48 ++- 7 files changed, 239 insertions(+), 19 deletions(-) diff --git a/drivers/net/mlx5/hws/mlx5dr_definer.c b/drivers/net/mlx5/hws/mlx5dr_definer.c index 6b98eb8c96..a6378afb10 100644 --- a/drivers/net/mlx5/hws/mlx5dr_definer.c +++ b/drivers/net/mlx5/hws/mlx5dr_definer.c @@ -293,6 +293,43 @@ mlx5dr_definer_integrity_set(struct mlx5dr_definer_fc *fc, DR_SET(tag, ok1_bits, fc->byte_off, fc->bit_off, fc->bit_mask); } +static void +mlx5dr_definer_flex_parser_set(struct mlx5dr_definer_fc *fc, + const void *item, + uint8_t *tag, bool is_inner) +{ + const struct rte_flow_item_flex *flex = item; + uint32_t byte_off, val, idx; + int ret; + + val = 0; + byte_off = MLX5_BYTE_OFF(definer_hl, flex_parser.flex_parser_0); + idx = fc->fname - MLX5DR_DEFINER_FNAME_FLEX_PARSER_0; + byte_off -= idx * sizeof(uint32_t); + ret = mlx5_flex_get_parser_value_per_byte_off(flex, flex->handle, byte_off, + false, is_inner, &val); + if (ret == -1 || !val) + return; + + DR_SET(tag, val, fc->byte_off, 0, fc->bit_mask); +} + +static void +mlx5dr_definer_flex_parser_inner_set(struct mlx5dr_definer_fc *fc, +const void *item, +uint8_t *tag) +{ + mlx5dr_definer_flex_parser_set(fc, item, tag, true); +} + +static void +mlx5dr_definer_flex_parser_outer_set(struct mlx5dr_definer_fc *fc, +const void *item, +uint8_t *tag) +{ + mlx5dr_definer_flex_parser_set(fc, item, tag, false); +} + static void mlx5dr_definer_gre_key_set(struct mlx5dr_definer_fc *fc, const void *item_spec, @@ -1465,6 +1502,47 @@ mlx5dr_definer_conv_item_meter_color(struct mlx5dr_definer_conv_data *cd, return 0; } +static int +mlx5dr_definer_conv_item_flex_parser(struct mlx5dr_definer_conv_data *cd, +struct rte_flow_item *item, +int item_idx) +{ + uint32_t base_off = MLX5_BYTE_OFF(definer_hl, flex_parser.flex_parser_0); + const struct rte_flow_item_flex *v, *m; + enum mlx5dr_definer_fname fname; + struct mlx5dr_definer_fc *fc; + uint32_t i, mask, byte_off; + bool is_inner = cd->tunnel; + int ret; + + m = item->mask; + v = item->spec; + mask = 0; + for (i = 0; i < MLX5_GRAPH_NODE_SAMPLE_NUM; i++) { + byte_off = base_off - i * sizeof(uint32_t); + ret = mlx5_flex_get_parser_value_per_byte_off(m, v->handle, byte_off, + true, is_inner, &mask); + if (ret == -1) { + rte_errno = EINVAL; + return rte_errno; + } + + if (!mask) + continue; + + fname = MLX5DR_DEFINER_FNAME_FLEX_PARSER_0; + fname += (enum mlx5dr_definer_fname)i; + fc = &cd->fc[fname]; + fc->byte_off = byte_off; + fc->item_idx = item_idx; + fc->tag_set = cd->tunnel ? &mlx5dr_definer_flex_parser_inner_set : + &mlx5dr_definer_flex_parser_outer_set; + fc->tag_mask_set = &mlx5dr_definer_ones_set; + fc->bit_mask = mask; + } + return 0; +} + static int mlx5dr_definer_conv_items_to_hl(struct mlx5dr_context *ctx, struct mlx5dr_match_template *mt, @@ -1581,6 +1659,11 @@ mlx5dr_definer_conv_items_to_hl(struct mlx5dr_context *ctx, ret = mlx5dr_definer_conv_item_meter_color(&cd, items, i); item_flags |= MLX5_FLOW_ITEM_METER_COLOR; break; + case RTE_FLOW_ITEM_TYPE_FLEX: + ret = mlx5dr_definer_conv_item_flex_parser(&cd, items, i); + item_flags |= cd.tunnel ? MLX5_FLOW_ITEM_INNER_FLEX : + MLX5_FLOW_ITEM_OUTER_FLEX; + break; default: DR_LOG(ERR, "Unsupported item type %d", items->type);
[PATCH v3 09/11] app/testpmd: raw encap with flex item support
Application should retrieve raw_encap buffer from spec->pattern if it is flex item. Signed-off-by: Rongwei Liu Acked-by: Ori Kam --- app/test-pmd/cmdline_flow.c | 26 +- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c index f5d7a67def..50c8ec5594 100644 --- a/app/test-pmd/cmdline_flow.c +++ b/app/test-pmd/cmdline_flow.c @@ -11193,6 +11193,7 @@ cmd_set_raw_parsed(const struct buffer *in) uint16_t proto = 0; uint16_t idx = in->port; /* We borrow port field as index */ int gtp_psc = -1; /* GTP PSC option index. */ + const void *src_spec; if (in->command == SET_SAMPLE_ACTIONS) return cmd_set_raw_parsed_sample(in); @@ -11216,6 +11217,7 @@ cmd_set_raw_parsed(const struct buffer *in) item = in->args.vc.pattern + i; if (item->spec == NULL) item->spec = flow_item_default_mask(item); + src_spec = item->spec; switch (item->type) { case RTE_FLOW_ITEM_TYPE_ETH: size = sizeof(struct rte_ether_hdr); @@ -11343,9 +11345,13 @@ cmd_set_raw_parsed(const struct buffer *in) size = sizeof(struct rte_flow_item_pfcp); break; case RTE_FLOW_ITEM_TYPE_FLEX: - size = item->spec ? - ((const struct rte_flow_item_flex *) - item->spec)->length : 0; + if (item->spec != NULL) { + size = ((const struct rte_flow_item_flex *)item->spec)->length; + src_spec = ((const struct rte_flow_item_flex *)item->spec)->pattern; + } else { + size = 0; + src_spec = NULL; + } break; case RTE_FLOW_ITEM_TYPE_GRE_OPTION: size = 0; @@ -11378,12 +11384,14 @@ cmd_set_raw_parsed(const struct buffer *in) fprintf(stderr, "Error - Not supported item\n"); goto error; } - *total_size += size; - rte_memcpy(data_tail - (*total_size), item->spec, size); - /* update some fields which cannot be set by cmdline */ - update_fields((data_tail - (*total_size)), item, - upper_layer); - upper_layer = proto; + if (size) { + *total_size += size; + rte_memcpy(data_tail - (*total_size), src_spec, size); + /* update some fields which cannot be set by cmdline */ + update_fields((data_tail - (*total_size)), item, + upper_layer); + upper_layer = proto; + } } if (verbose_level & 0x1) printf("total data size is %zu\n", (*total_size)); -- 2.27.0
[PATCH v3 05/11] net/mlx5: adopt new flex item prm definition
Per newest PRM definition, sample_id stands for 3 parts of information instead of single uint32_t id: sample_id + modify_filed_id + format_select_dw. Also new FW capability bits have been introduces to identify the new capability. Signed-off-by: Rongwei Liu Acked-by: Viacheslav Ovsiienko --- drivers/common/mlx5/mlx5_devx_cmds.c | 14 +++--- drivers/common/mlx5/mlx5_devx_cmds.h | 7 ++- drivers/common/mlx5/mlx5_prm.h | 28 ++-- drivers/net/mlx5/mlx5.c | 15 +++ drivers/net/mlx5/mlx5.h | 3 ++- drivers/net/mlx5/mlx5_flow_flex.c| 14 +++--- 6 files changed, 67 insertions(+), 14 deletions(-) diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c index e3a4927d0f..1f65ea7dcb 100644 --- a/drivers/common/mlx5/mlx5_devx_cmds.c +++ b/drivers/common/mlx5/mlx5_devx_cmds.c @@ -607,7 +607,8 @@ mlx5_devx_cmd_query_hca_vdpa_attr(void *ctx, int mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj *flex_obj, - uint32_t ids[], uint32_t num) + struct mlx5_ext_sample_id ids[], + uint32_t num, uint8_t *anchor) { uint32_t in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0}; uint32_t out[MLX5_ST_SZ_DW(create_flex_parser_out)] = {0}; @@ -636,6 +637,7 @@ mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj *flex_obj, (void *)flex_obj); return -rte_errno; } + *anchor = MLX5_GET(parse_graph_flex, flex, head_anchor_id); for (i = 0; i < MLX5_GRAPH_NODE_SAMPLE_NUM; i++) { void *s_off = (void *)((char *)sample + i * MLX5_ST_SZ_BYTES(parse_graph_flow_match_sample)); @@ -645,8 +647,8 @@ mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj *flex_obj, flow_match_sample_en); if (!en) continue; - ids[idx++] = MLX5_GET(parse_graph_flow_match_sample, s_off, - flow_match_sample_field_id); + ids[idx++].id = MLX5_GET(parse_graph_flow_match_sample, s_off, +flow_match_sample_field_id); } if (num != idx) { rte_errno = EINVAL; @@ -794,6 +796,12 @@ mlx5_devx_cmd_query_hca_parse_graph_node_cap max_num_arc_out); attr->max_num_sample = MLX5_GET(parse_graph_node_cap, hcattr, max_num_sample); + attr->anchor_en = MLX5_GET(parse_graph_node_cap, hcattr, anchor_en); + attr->ext_sample_id = MLX5_GET(parse_graph_node_cap, hcattr, ext_sample_id); + attr->sample_tunnel_inner2 = MLX5_GET(parse_graph_node_cap, hcattr, + sample_tunnel_inner2); + attr->zero_size_supported = MLX5_GET(parse_graph_node_cap, hcattr, +zero_size_supported); attr->sample_id_in_out = MLX5_GET(parse_graph_node_cap, hcattr, sample_id_in_out); attr->max_base_header_length = MLX5_GET(parse_graph_node_cap, hcattr, diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h index c94b9eac06..5b33010155 100644 --- a/drivers/common/mlx5/mlx5_devx_cmds.h +++ b/drivers/common/mlx5/mlx5_devx_cmds.h @@ -114,6 +114,10 @@ struct mlx5_hca_flex_attr { uint8_t max_num_arc_out; uint8_t max_num_sample; uint8_t max_num_prog_sample:5; /* From HCA CAP 2 */ + uint8_t anchor_en:1; + uint8_t ext_sample_id:1; + uint8_t sample_tunnel_inner2:1; + uint8_t zero_size_supported:1; uint8_t sample_id_in_out:1; uint16_t max_base_header_length; uint8_t max_sample_base_offset; @@ -706,7 +710,8 @@ int mlx5_devx_cmd_modify_tir(struct mlx5_devx_obj *tir, struct mlx5_devx_modify_tir_attr *tir_attr); __rte_internal int mlx5_devx_cmd_query_parse_samples(struct mlx5_devx_obj *flex_obj, - uint32_t ids[], uint32_t num); + struct mlx5_ext_sample_id ids[], + uint32_t num, uint8_t *anchor); __rte_internal struct mlx5_devx_obj * diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h index 3790dc84b8..ce6cd98fd7 100644 --- a/drivers/common/mlx5/mlx5_prm.h +++ b/drivers/common/mlx5/mlx5_prm.h @@ -1893,7 +1893,11 @@ struct mlx5_ifc_parse_graph_node_cap_bits { u8 max_num_arc_in[0x08]; u8 max_num_arc_out[0x08]; u8 max_num_sample[0x08]; - u8 reserved_at_78[0x07]; + u8 reserved_at_78[0x03]; + u8 anchor_en[0x1]; + u8 ext_sample_id[0x1]; + u8 sample_tunnel_inner2[0x1]; + u8 zero_size_supported[0x1]; u8 s
[PATCH v3 10/11] doc/mlx5: update mlx5 doc
Add flex item matching and modify field feature into mlx5 documents. Signed-off-by: Rongwei Liu Acked-by: Ori Kam --- doc/guides/nics/mlx5.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index b23ca35b8f..a2634c378f 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -106,7 +106,7 @@ Features - Sub-Function representors. - Sub-Function. - Matching on represented port. - +- Modify flex item field. Limitations --- -- 2.27.0
[PATCH v3 11/11] app/testpmd: adjust cleanup sequence when quitting
If flex item is referenced in async flow either by pattern template or action template, currently testpmd complains "flex item has flow references". Flex items should be flushed after async flow resources cleanup. Signed-off-by: Rongwei Liu Acked-by: Ori Kam --- app/test-pmd/testpmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index 134d79a555..e35f7a0e7a 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -3298,10 +3298,10 @@ flush_port_owned_resources(portid_t pi) { mcast_addr_pool_destroy(pi); port_flow_flush(pi); - port_flex_item_flush(pi); port_flow_template_table_flush(pi); port_flow_pattern_template_flush(pi); port_flow_actions_template_flush(pi); + port_flex_item_flush(pi); port_action_handle_flush(pi); } -- 2.27.0
RE: [PATCH v1 00/21] Add control queue & MQ support to Virtio-user vDPA
Hi Maxime, > -Original Message- > From: Maxime Coquelin > Sent: Wednesday, November 30, 2022 11:56 PM > To: dev@dpdk.org; Xia, Chenbo ; > david.march...@redhat.com; epere...@redhat.com > Cc: Maxime Coquelin > Subject: [PATCH v1 00/21] Add control queue & MQ support to Virtio-user > vDPA > > -- > 2.38.1 I see there is one virtio test failed on patchwork, could you check if it's related? Thanks, Chenbo
[RFC v2 0/9] gve PMD enhancement
This patch set includs three main enhancements for gve PMD: - support basic data path for DQO queue format - support jumbo frame for GQI queue format - add new AdminQ cmd to verify driver compatibility v2: - add one missing commit into this patch set. - add new contributors into the mailmap. Junfeng Guo (9): net/gve: add Tx queue setup for DQO net/gve: add Rx queue setup for DQO net/gve: support device start and close for DQO net/gve: support queue release and stop for DQO net/gve: support basic Tx data path for DQO net/gve: support basic Rx data path for DQO net/gve: support basic stats for DQO net/gve: support jumbo frame for GQI net/gve: add AdminQ command to verify driver compatibility .mailmap| 3 + MAINTAINERS | 3 + drivers/net/gve/base/gve.h | 1 + drivers/net/gve/base/gve_adminq.c | 29 ++- drivers/net/gve/base/gve_adminq.h | 48 drivers/net/gve/base/gve_desc_dqo.h | 4 - drivers/net/gve/base/gve_osdep.h| 12 + drivers/net/gve/gve_ethdev.c| 200 ++- drivers/net/gve/gve_ethdev.h| 86 ++- drivers/net/gve/gve_rx.c| 131 +++--- drivers/net/gve/gve_rx_dqo.c| 343 + drivers/net/gve/gve_tx.c| 3 + drivers/net/gve/gve_tx_dqo.c| 380 drivers/net/gve/meson.build | 2 + 14 files changed, 1196 insertions(+), 49 deletions(-) create mode 100644 drivers/net/gve/gve_rx_dqo.c create mode 100644 drivers/net/gve/gve_tx_dqo.c -- 2.34.1
[RFC v2 1/9] net/gve: add Tx queue setup for DQO
Add support for tx_queue_setup_dqo ops. DQO format has submission and completion queue pair for each Tx/Rx queue. Note that with DQO format all descriptors and doorbells, as well as counters are written in little-endian. Signed-off-by: Junfeng Guo Signed-off-by: Rushil Gupta Signed-off-by: Jordan Kimbrough Signed-off-by: Jeroen de Borst --- .mailmap| 3 + MAINTAINERS | 3 + drivers/net/gve/base/gve.h | 1 + drivers/net/gve/base/gve_desc_dqo.h | 4 - drivers/net/gve/base/gve_osdep.h| 4 + drivers/net/gve/gve_ethdev.c| 16 ++- drivers/net/gve/gve_ethdev.h| 33 +- drivers/net/gve/gve_tx_dqo.c| 178 drivers/net/gve/meson.build | 1 + 9 files changed, 235 insertions(+), 8 deletions(-) create mode 100644 drivers/net/gve/gve_tx_dqo.c diff --git a/.mailmap b/.mailmap index 452267a567..553b9ce3ca 100644 --- a/.mailmap +++ b/.mailmap @@ -578,6 +578,7 @@ Jens Freimann Jeremy Plsek Jeremy Spewock Jerin Jacob +Jeroen de Borst Jerome Jutteau Jerry Hao OS Jerry Lilijun @@ -642,6 +643,7 @@ Jonathan Erb Jon DeVree Jon Loeliger Joongi Kim +Jordan Kimbrough Jørgen Østergaard Sloth Jörg Thalheim Joseph Richard @@ -1145,6 +1147,7 @@ Roy Franz Roy Pledge Roy Shterman Ruifeng Wang +Rushil Gupta Ryan E Hall Sabyasachi Sengupta Sachin Saxena diff --git a/MAINTAINERS b/MAINTAINERS index 9a0f416d2e..7ffa709b3b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -703,6 +703,9 @@ F: doc/guides/nics/features/enic.ini Google Virtual Ethernet M: Junfeng Guo +M: Jeroen de Borst +M: Rushil Gupta +M: Jordan Kimbrough F: drivers/net/gve/ F: doc/guides/nics/gve.rst F: doc/guides/nics/features/gve.ini diff --git a/drivers/net/gve/base/gve.h b/drivers/net/gve/base/gve.h index 2dc4507acb..2b7cf7d99b 100644 --- a/drivers/net/gve/base/gve.h +++ b/drivers/net/gve/base/gve.h @@ -7,6 +7,7 @@ #define _GVE_H_ #include "gve_desc.h" +#include "gve_desc_dqo.h" #define GVE_VERSION"1.3.0" #define GVE_VERSION_PREFIX "GVE-" diff --git a/drivers/net/gve/base/gve_desc_dqo.h b/drivers/net/gve/base/gve_desc_dqo.h index ee1afdecb8..bb4a18d4d1 100644 --- a/drivers/net/gve/base/gve_desc_dqo.h +++ b/drivers/net/gve/base/gve_desc_dqo.h @@ -13,10 +13,6 @@ #define GVE_TX_MAX_HDR_SIZE_DQO 255 #define GVE_TX_MIN_TSO_MSS_DQO 88 -#ifndef __LITTLE_ENDIAN_BITFIELD -#error "Only little endian supported" -#endif - /* Basic TX descriptor (DTYPE 0x0C) */ struct gve_tx_pkt_desc_dqo { __le64 buf_addr; diff --git a/drivers/net/gve/base/gve_osdep.h b/drivers/net/gve/base/gve_osdep.h index 7cb73002f4..abf3d379ae 100644 --- a/drivers/net/gve/base/gve_osdep.h +++ b/drivers/net/gve/base/gve_osdep.h @@ -35,6 +35,10 @@ typedef rte_be16_t __be16; typedef rte_be32_t __be32; typedef rte_be64_t __be64; +typedef rte_le16_t __le16; +typedef rte_le32_t __le32; +typedef rte_le64_t __le64; + typedef rte_iova_t dma_addr_t; #define ETH_MIN_MTURTE_ETHER_MIN_MTU diff --git a/drivers/net/gve/gve_ethdev.c b/drivers/net/gve/gve_ethdev.c index 97781f0ed3..d03f2fba92 100644 --- a/drivers/net/gve/gve_ethdev.c +++ b/drivers/net/gve/gve_ethdev.c @@ -299,6 +299,7 @@ gve_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->default_txconf = (struct rte_eth_txconf) { .tx_free_thresh = GVE_DEFAULT_TX_FREE_THRESH, + .tx_rs_thresh = GVE_DEFAULT_TX_RS_THRESH, .offloads = 0, }; @@ -360,6 +361,13 @@ static const struct eth_dev_ops gve_eth_dev_ops = { .mtu_set = gve_dev_mtu_set, }; +static void +gve_eth_dev_ops_override(struct eth_dev_ops *local_eth_dev_ops) +{ + /* override eth_dev ops for DQO */ + local_eth_dev_ops->tx_queue_setup = gve_tx_queue_setup_dqo; +} + static void gve_free_counter_array(struct gve_priv *priv) { @@ -595,6 +603,7 @@ gve_teardown_priv_resources(struct gve_priv *priv) static int gve_dev_init(struct rte_eth_dev *eth_dev) { + static struct eth_dev_ops gve_local_eth_dev_ops = gve_eth_dev_ops; struct gve_priv *priv = eth_dev->data->dev_private; int max_tx_queues, max_rx_queues; struct rte_pci_device *pci_dev; @@ -602,8 +611,6 @@ gve_dev_init(struct rte_eth_dev *eth_dev) rte_be32_t *db_bar; int err; - eth_dev->dev_ops = &gve_eth_dev_ops; - if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; @@ -642,9 +649,12 @@ gve_dev_init(struct rte_eth_dev *eth_dev) eth_dev->rx_pkt_burst = gve_rx_burst; eth_dev->tx_pkt_burst = gve_tx_burst; } else { - PMD_DRV_LOG(ERR, "DQO_RDA is not implemented and will be added in the future"); + /* override Tx/Rx setup/release eth_dev ops */ + gve_eth_dev_ops_override(&gve_local_eth_dev_ops); } + eth_dev->dev_o
[RFC v2 2/9] net/gve: add Rx queue setup for DQO
Add support for rx_queue_setup_dqo ops. Signed-off-by: Junfeng Guo Signed-off-by: Rushil Gupta Signed-off-by: Jordan Kimbrough Signed-off-by: Jeroen de Borst --- drivers/net/gve/gve_ethdev.c | 1 + drivers/net/gve/gve_ethdev.h | 14 drivers/net/gve/gve_rx_dqo.c | 148 +++ drivers/net/gve/meson.build | 1 + 4 files changed, 164 insertions(+) create mode 100644 drivers/net/gve/gve_rx_dqo.c diff --git a/drivers/net/gve/gve_ethdev.c b/drivers/net/gve/gve_ethdev.c index d03f2fba92..26182b0422 100644 --- a/drivers/net/gve/gve_ethdev.c +++ b/drivers/net/gve/gve_ethdev.c @@ -366,6 +366,7 @@ gve_eth_dev_ops_override(struct eth_dev_ops *local_eth_dev_ops) { /* override eth_dev ops for DQO */ local_eth_dev_ops->tx_queue_setup = gve_tx_queue_setup_dqo; + local_eth_dev_ops->rx_queue_setup = gve_rx_queue_setup_dqo; } static void diff --git a/drivers/net/gve/gve_ethdev.h b/drivers/net/gve/gve_ethdev.h index 2dfcef6893..0adfc90554 100644 --- a/drivers/net/gve/gve_ethdev.h +++ b/drivers/net/gve/gve_ethdev.h @@ -145,6 +145,7 @@ struct gve_rx_queue { uint16_t nb_rx_desc; uint16_t expected_seqno; /* the next expected seqno */ uint16_t free_thresh; + uint16_t nb_rx_hold; uint32_t next_avail; uint32_t nb_avail; @@ -163,6 +164,14 @@ struct gve_rx_queue { uint16_t ntfy_id; uint16_t rx_buf_len; + /* newly added for DQO*/ + volatile struct gve_rx_desc_dqo *rx_ring; + struct gve_rx_compl_desc_dqo *compl_ring; + const struct rte_memzone *compl_ring_mz; + uint64_t compl_ring_phys_addr; + uint8_t cur_gen_bit; + uint16_t bufq_tail; + /* Only valid for DQO_RDA queue format */ struct gve_rx_queue *bufq; @@ -334,6 +343,11 @@ gve_tx_burst(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); /* Below functions are used for DQO */ +int +gve_rx_queue_setup_dqo(struct rte_eth_dev *dev, uint16_t queue_id, + uint16_t nb_desc, unsigned int socket_id, + const struct rte_eth_rxconf *conf, + struct rte_mempool *pool); int gve_tx_queue_setup_dqo(struct rte_eth_dev *dev, uint16_t queue_id, uint16_t nb_desc, unsigned int socket_id, diff --git a/drivers/net/gve/gve_rx_dqo.c b/drivers/net/gve/gve_rx_dqo.c new file mode 100644 index 00..e8a6d575fc --- /dev/null +++ b/drivers/net/gve/gve_rx_dqo.c @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2022 Intel Corporation + */ + +#include "gve_ethdev.h" +#include "base/gve_adminq.h" + +static void +gve_reset_rxq_dqo(struct gve_rx_queue *rxq) +{ + struct rte_mbuf **sw_ring; + uint32_t size, i; + + if (rxq == NULL) { + PMD_DRV_LOG(ERR, "pointer to rxq is NULL"); + return; + } + + size = rxq->nb_rx_desc * sizeof(struct gve_rx_desc_dqo); + for (i = 0; i < size; i++) + ((volatile char *)rxq->rx_ring)[i] = 0; + + size = rxq->nb_rx_desc * sizeof(struct gve_rx_compl_desc_dqo); + for (i = 0; i < size; i++) + ((volatile char *)rxq->compl_ring)[i] = 0; + + sw_ring = rxq->sw_ring; + for (i = 0; i < rxq->nb_rx_desc; i++) + sw_ring[i] = NULL; + + rxq->bufq_tail = 0; + rxq->next_avail = 0; + rxq->nb_rx_hold = rxq->nb_rx_desc - 1; + + rxq->rx_tail = 0; + rxq->cur_gen_bit = 1; +} + +int +gve_rx_queue_setup_dqo(struct rte_eth_dev *dev, uint16_t queue_id, + uint16_t nb_desc, unsigned int socket_id, + const struct rte_eth_rxconf *conf, + struct rte_mempool *pool) +{ + struct gve_priv *hw = dev->data->dev_private; + const struct rte_memzone *mz; + struct gve_rx_queue *rxq; + uint16_t free_thresh; + int err = 0; + + if (nb_desc != hw->rx_desc_cnt) { + PMD_DRV_LOG(WARNING, "gve doesn't support nb_desc config, use hw nb_desc %u.", + hw->rx_desc_cnt); + } + nb_desc = hw->rx_desc_cnt; + + /* Allocate the RX queue data structure. */ + rxq = rte_zmalloc_socket("gve rxq", +sizeof(struct gve_rx_queue), +RTE_CACHE_LINE_SIZE, +socket_id); + if (rxq == NULL) { + PMD_DRV_LOG(ERR, "Failed to allocate memory for rx queue structure"); + return -ENOMEM; + } + + /* check free_thresh here */ + free_thresh = conf->rx_free_thresh ? + conf->rx_free_thresh : GVE_DEFAULT_RX_FREE_THRESH; + if (free_thresh >= nb_desc) { + PMD_DRV_LOG(ERR, "rx_free_thresh (%u) must be less than nb_desc (%u).", + free_thresh, rxq->nb_rx_desc); + err = -EINVAL; + goto err_rxq; + }
[RFC v2 3/9] net/gve: support device start and close for DQO
Add device start and close support for DQO. Signed-off-by: Junfeng Guo Signed-off-by: Rushil Gupta Signed-off-by: Jordan Kimbrough Signed-off-by: Jeroen de Borst --- drivers/net/gve/base/gve_adminq.c | 10 +++ drivers/net/gve/gve_ethdev.c | 43 ++- 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/drivers/net/gve/base/gve_adminq.c b/drivers/net/gve/base/gve_adminq.c index e745b709b2..e963f910a0 100644 --- a/drivers/net/gve/base/gve_adminq.c +++ b/drivers/net/gve/base/gve_adminq.c @@ -497,11 +497,11 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index) cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id); } else { cmd.create_tx_queue.tx_ring_size = - cpu_to_be16(txq->nb_tx_desc); + cpu_to_be16(priv->tx_desc_cnt); cmd.create_tx_queue.tx_comp_ring_addr = - cpu_to_be64(txq->complq->tx_ring_phys_addr); + cpu_to_be64(txq->compl_ring_phys_addr); cmd.create_tx_queue.tx_comp_ring_size = - cpu_to_be16(priv->tx_compq_size); + cpu_to_be16(priv->tx_compq_size * DQO_TX_MULTIPLIER); } return gve_adminq_issue_cmd(priv, &cmd); @@ -549,9 +549,9 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) cmd.create_rx_queue.rx_ring_size = cpu_to_be16(priv->rx_desc_cnt); cmd.create_rx_queue.rx_desc_ring_addr = - cpu_to_be64(rxq->rx_ring_phys_addr); + cpu_to_be64(rxq->compl_ring_phys_addr); cmd.create_rx_queue.rx_data_ring_addr = - cpu_to_be64(rxq->bufq->rx_ring_phys_addr); + cpu_to_be64(rxq->rx_ring_phys_addr); cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rxq->rx_buf_len); cmd.create_rx_queue.rx_buff_ring_size = diff --git a/drivers/net/gve/gve_ethdev.c b/drivers/net/gve/gve_ethdev.c index 26182b0422..3543378978 100644 --- a/drivers/net/gve/gve_ethdev.c +++ b/drivers/net/gve/gve_ethdev.c @@ -78,6 +78,9 @@ gve_free_qpls(struct gve_priv *priv) uint16_t nb_rxqs = priv->max_nb_rxq; uint32_t i; + if (priv->queue_format != GVE_GQI_QPL_FORMAT) + return; + for (i = 0; i < nb_txqs + nb_rxqs; i++) { if (priv->qpl[i].mz != NULL) rte_memzone_free(priv->qpl[i].mz); @@ -138,6 +141,41 @@ gve_refill_pages(struct gve_rx_queue *rxq) return 0; } +static int +gve_refill_dqo(struct gve_rx_queue *rxq) +{ + struct rte_mbuf *nmb; + uint16_t i; + int diag; + + diag = rte_pktmbuf_alloc_bulk(rxq->mpool, &rxq->sw_ring[0], rxq->nb_rx_desc); + if (diag < 0) { + for (i = 0; i < rxq->nb_rx_desc - 1; i++) { + nmb = rte_pktmbuf_alloc(rxq->mpool); + if (!nmb) + break; + rxq->sw_ring[i] = nmb; + } + if (i < rxq->nb_rx_desc - 1) + return -ENOMEM; + } + + for (i = 0; i < rxq->nb_rx_desc; i++) { + if (i == rxq->nb_rx_desc - 1) + break; + nmb = rxq->sw_ring[i]; + rxq->rx_ring[i].buf_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb)); + rxq->rx_ring[i].buf_id = rte_cpu_to_le_16(i); + } + + rxq->nb_rx_hold = 0; + rxq->bufq_tail = rxq->nb_rx_desc - 1; + + rte_write32(rxq->bufq_tail, rxq->qrx_tail); + + return 0; +} + static int gve_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete) { @@ -206,7 +244,10 @@ gve_dev_start(struct rte_eth_dev *dev) rte_write32(rte_cpu_to_be_32(GVE_IRQ_MASK), rxq->ntfy_addr); - err = gve_refill_pages(rxq); + if (gve_is_gqi(priv)) + err = gve_refill_pages(rxq); + else + err = gve_refill_dqo(rxq); if (err) { PMD_DRV_LOG(ERR, "Failed to refill for RX"); goto err_rx; -- 2.34.1
[RFC v2 4/9] net/gve: support queue release and stop for DQO
Add support for queue operations: - gve_tx_queue_release_dqo - gve_rx_queue_release_dqo - gve_stop_tx_queues_dqo - gve_stop_rx_queues_dqo Signed-off-by: Junfeng Guo Signed-off-by: Rushil Gupta Signed-off-by: Jordan Kimbrough Signed-off-by: Jeroen de Borst --- drivers/net/gve/gve_ethdev.c | 18 +--- drivers/net/gve/gve_ethdev.h | 12 drivers/net/gve/gve_rx.c | 3 ++ drivers/net/gve/gve_rx_dqo.c | 57 drivers/net/gve/gve_tx.c | 3 ++ drivers/net/gve/gve_tx_dqo.c | 55 ++ 6 files changed, 144 insertions(+), 4 deletions(-) diff --git a/drivers/net/gve/gve_ethdev.c b/drivers/net/gve/gve_ethdev.c index 3543378978..7c4be3a1cb 100644 --- a/drivers/net/gve/gve_ethdev.c +++ b/drivers/net/gve/gve_ethdev.c @@ -292,11 +292,19 @@ gve_dev_close(struct rte_eth_dev *dev) PMD_DRV_LOG(ERR, "Failed to stop dev."); } - for (i = 0; i < dev->data->nb_tx_queues; i++) - gve_tx_queue_release(dev, i); + if (gve_is_gqi(priv)) { + for (i = 0; i < dev->data->nb_tx_queues; i++) + gve_tx_queue_release(dev, i); + + for (i = 0; i < dev->data->nb_rx_queues; i++) + gve_rx_queue_release(dev, i); + } else { + for (i = 0; i < dev->data->nb_tx_queues; i++) + gve_tx_queue_release_dqo(dev, i); - for (i = 0; i < dev->data->nb_rx_queues; i++) - gve_rx_queue_release(dev, i); + for (i = 0; i < dev->data->nb_rx_queues; i++) + gve_rx_queue_release_dqo(dev, i); + } gve_free_qpls(priv); rte_free(priv->adminq); @@ -408,6 +416,8 @@ gve_eth_dev_ops_override(struct eth_dev_ops *local_eth_dev_ops) /* override eth_dev ops for DQO */ local_eth_dev_ops->tx_queue_setup = gve_tx_queue_setup_dqo; local_eth_dev_ops->rx_queue_setup = gve_rx_queue_setup_dqo; + local_eth_dev_ops->tx_queue_release = gve_tx_queue_release_dqo; + local_eth_dev_ops->rx_queue_release = gve_rx_queue_release_dqo; } static void diff --git a/drivers/net/gve/gve_ethdev.h b/drivers/net/gve/gve_ethdev.h index 0adfc90554..93314f2db3 100644 --- a/drivers/net/gve/gve_ethdev.h +++ b/drivers/net/gve/gve_ethdev.h @@ -353,4 +353,16 @@ gve_tx_queue_setup_dqo(struct rte_eth_dev *dev, uint16_t queue_id, uint16_t nb_desc, unsigned int socket_id, const struct rte_eth_txconf *conf); +void +gve_tx_queue_release_dqo(struct rte_eth_dev *dev, uint16_t qid); + +void +gve_rx_queue_release_dqo(struct rte_eth_dev *dev, uint16_t qid); + +void +gve_stop_tx_queues_dqo(struct rte_eth_dev *dev); + +void +gve_stop_rx_queues_dqo(struct rte_eth_dev *dev); + #endif /* _GVE_ETHDEV_H_ */ diff --git a/drivers/net/gve/gve_rx.c b/drivers/net/gve/gve_rx.c index 518c9d109c..9ba975c9b4 100644 --- a/drivers/net/gve/gve_rx.c +++ b/drivers/net/gve/gve_rx.c @@ -343,6 +343,9 @@ gve_stop_rx_queues(struct rte_eth_dev *dev) uint16_t i; int err; + if (!gve_is_gqi(hw)) + return gve_stop_rx_queues_dqo(dev); + err = gve_adminq_destroy_rx_queues(hw, dev->data->nb_rx_queues); if (err != 0) PMD_DRV_LOG(WARNING, "failed to destroy rxqs"); diff --git a/drivers/net/gve/gve_rx_dqo.c b/drivers/net/gve/gve_rx_dqo.c index e8a6d575fc..aca6f8ea2d 100644 --- a/drivers/net/gve/gve_rx_dqo.c +++ b/drivers/net/gve/gve_rx_dqo.c @@ -5,6 +5,38 @@ #include "gve_ethdev.h" #include "base/gve_adminq.h" +static inline void +gve_release_rxq_mbufs_dqo(struct gve_rx_queue *rxq) +{ + uint16_t i; + + for (i = 0; i < rxq->nb_rx_desc; i++) { + if (rxq->sw_ring[i]) { + rte_pktmbuf_free_seg(rxq->sw_ring[i]); + rxq->sw_ring[i] = NULL; + } + } + + rxq->nb_avail = rxq->nb_rx_desc; +} + +void +gve_rx_queue_release_dqo(struct rte_eth_dev *dev, uint16_t qid) +{ + struct gve_rx_queue *q = dev->data->rx_queues[qid]; + + if (q == NULL) + return; + + gve_release_rxq_mbufs_dqo(q); + rte_free(q->sw_ring); + rte_memzone_free(q->compl_ring_mz); + rte_memzone_free(q->mz); + rte_memzone_free(q->qres_mz); + q->qres = NULL; + rte_free(q); +} + static void gve_reset_rxq_dqo(struct gve_rx_queue *rxq) { @@ -54,6 +86,12 @@ gve_rx_queue_setup_dqo(struct rte_eth_dev *dev, uint16_t queue_id, } nb_desc = hw->rx_desc_cnt; + /* Free memory if needed */ + if (dev->data->rx_queues[queue_id]) { + gve_rx_queue_release_dqo(dev, queue_id); + dev->data->rx_queues[queue_id] = NULL; + } + /* Allocate the RX queue data structure. */ rxq = rte_zmalloc_socket("gve rxq", sizeof(struct gve_rx_queue), @@ -146,3 +184,22 @@ gve_rx_q
[RFC v2 5/9] net/gve: support basic Tx data path for DQO
Add basic Tx data path support for DQO. Signed-off-by: Junfeng Guo Signed-off-by: Rushil Gupta Signed-off-by: Jordan Kimbrough Signed-off-by: Jeroen de Borst --- drivers/net/gve/gve_ethdev.c | 1 + drivers/net/gve/gve_ethdev.h | 4 + drivers/net/gve/gve_tx_dqo.c | 141 +++ 3 files changed, 146 insertions(+) diff --git a/drivers/net/gve/gve_ethdev.c b/drivers/net/gve/gve_ethdev.c index 7c4be3a1cb..512a038968 100644 --- a/drivers/net/gve/gve_ethdev.c +++ b/drivers/net/gve/gve_ethdev.c @@ -703,6 +703,7 @@ gve_dev_init(struct rte_eth_dev *eth_dev) } else { /* override Tx/Rx setup/release eth_dev ops */ gve_eth_dev_ops_override(&gve_local_eth_dev_ops); + eth_dev->tx_pkt_burst = gve_tx_burst_dqo; } eth_dev->dev_ops = &gve_local_eth_dev_ops; diff --git a/drivers/net/gve/gve_ethdev.h b/drivers/net/gve/gve_ethdev.h index 93314f2db3..ba657dd6c1 100644 --- a/drivers/net/gve/gve_ethdev.h +++ b/drivers/net/gve/gve_ethdev.h @@ -125,6 +125,7 @@ struct gve_tx_queue { uint8_t cur_gen_bit; uint32_t last_desc_cleaned; void **txqs; + uint16_t re_cnt; /* Only valid for DQO_RDA queue format */ struct gve_tx_queue *complq; @@ -365,4 +366,7 @@ gve_stop_tx_queues_dqo(struct rte_eth_dev *dev); void gve_stop_rx_queues_dqo(struct rte_eth_dev *dev); +uint16_t +gve_tx_burst_dqo(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); + #endif /* _GVE_ETHDEV_H_ */ diff --git a/drivers/net/gve/gve_tx_dqo.c b/drivers/net/gve/gve_tx_dqo.c index e2e4153f27..3583c82246 100644 --- a/drivers/net/gve/gve_tx_dqo.c +++ b/drivers/net/gve/gve_tx_dqo.c @@ -5,6 +5,147 @@ #include "gve_ethdev.h" #include "base/gve_adminq.h" +static inline void +gve_tx_clean_dqo(struct gve_tx_queue *txq) +{ + struct gve_tx_compl_desc *compl_ring; + struct gve_tx_compl_desc *compl_desc; + struct gve_tx_queue *aim_txq; + uint16_t nb_desc_clean; + struct rte_mbuf *txe; + uint16_t compl_tag; + uint16_t next; + + next = txq->complq_tail; + compl_ring = txq->compl_ring; + compl_desc = &compl_ring[next]; + + if (compl_desc->generation != txq->cur_gen_bit) + return; + + compl_tag = rte_le_to_cpu_16(compl_desc->completion_tag); + + aim_txq = txq->txqs[compl_desc->id]; + + switch (compl_desc->type) { + case GVE_COMPL_TYPE_DQO_DESC: + /* need to clean Descs from last_cleaned to compl_tag */ + if (aim_txq->last_desc_cleaned > compl_tag) + nb_desc_clean = aim_txq->nb_tx_desc - aim_txq->last_desc_cleaned + + compl_tag; + else + nb_desc_clean = compl_tag - aim_txq->last_desc_cleaned; + aim_txq->nb_free += nb_desc_clean; + aim_txq->last_desc_cleaned = compl_tag; + break; + case GVE_COMPL_TYPE_DQO_REINJECTION: + PMD_DRV_LOG(DEBUG, "GVE_COMPL_TYPE_DQO_REINJECTION !!!"); + /* FALLTHROUGH */ + case GVE_COMPL_TYPE_DQO_PKT: + txe = aim_txq->sw_ring[compl_tag]; + if (txe != NULL) { + rte_pktmbuf_free_seg(txe); + txe = NULL; + } + break; + case GVE_COMPL_TYPE_DQO_MISS: + rte_delay_us_sleep(1); + PMD_DRV_LOG(DEBUG, "GVE_COMPL_TYPE_DQO_MISS ignored !!!"); + break; + default: + PMD_DRV_LOG(ERR, "unknown completion type."); + return; + } + + next++; + if (next == txq->nb_tx_desc * DQO_TX_MULTIPLIER) { + next = 0; + txq->cur_gen_bit ^= 1; + } + + txq->complq_tail = next; +} + +uint16_t +gve_tx_burst_dqo(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct gve_tx_queue *txq = tx_queue; + volatile union gve_tx_desc_dqo *txr; + volatile union gve_tx_desc_dqo *txd; + struct rte_mbuf **sw_ring; + struct rte_mbuf *tx_pkt; + uint16_t mask, sw_mask; + uint16_t nb_to_clean; + uint16_t nb_tx = 0; + uint16_t nb_used; + uint16_t tx_id; + uint16_t sw_id; + + sw_ring = txq->sw_ring; + txr = txq->tx_ring; + + mask = txq->nb_tx_desc - 1; + sw_mask = txq->sw_size - 1; + tx_id = txq->tx_tail; + sw_id = txq->sw_tail; + + for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { + tx_pkt = tx_pkts[nb_tx]; + + if (txq->nb_free <= txq->free_thresh) { + nb_to_clean = DQO_TX_MULTIPLIER * txq->rs_thresh; + while (nb_to_clean--) + gve_tx_clean_dqo(txq); + } + + if (txq->nb_free < tx_pkt->nb_segs) + break; + + nb_used = tx_pkt->nb_segs; + +
[RFC v2 6/9] net/gve: support basic Rx data path for DQO
Add basic Rx data path support for DQO. Signed-off-by: Junfeng Guo Signed-off-by: Rushil Gupta Signed-off-by: Jordan Kimbrough Signed-off-by: Jeroen de Borst --- drivers/net/gve/gve_ethdev.c | 1 + drivers/net/gve/gve_ethdev.h | 3 + drivers/net/gve/gve_rx_dqo.c | 128 +++ 3 files changed, 132 insertions(+) diff --git a/drivers/net/gve/gve_ethdev.c b/drivers/net/gve/gve_ethdev.c index 512a038968..89e3f09c37 100644 --- a/drivers/net/gve/gve_ethdev.c +++ b/drivers/net/gve/gve_ethdev.c @@ -703,6 +703,7 @@ gve_dev_init(struct rte_eth_dev *eth_dev) } else { /* override Tx/Rx setup/release eth_dev ops */ gve_eth_dev_ops_override(&gve_local_eth_dev_ops); + eth_dev->rx_pkt_burst = gve_rx_burst_dqo; eth_dev->tx_pkt_burst = gve_tx_burst_dqo; } diff --git a/drivers/net/gve/gve_ethdev.h b/drivers/net/gve/gve_ethdev.h index ba657dd6c1..d434f9babe 100644 --- a/drivers/net/gve/gve_ethdev.h +++ b/drivers/net/gve/gve_ethdev.h @@ -366,6 +366,9 @@ gve_stop_tx_queues_dqo(struct rte_eth_dev *dev); void gve_stop_rx_queues_dqo(struct rte_eth_dev *dev); +uint16_t +gve_rx_burst_dqo(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); + uint16_t gve_tx_burst_dqo(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); diff --git a/drivers/net/gve/gve_rx_dqo.c b/drivers/net/gve/gve_rx_dqo.c index aca6f8ea2d..244517ce5d 100644 --- a/drivers/net/gve/gve_rx_dqo.c +++ b/drivers/net/gve/gve_rx_dqo.c @@ -5,6 +5,134 @@ #include "gve_ethdev.h" #include "base/gve_adminq.h" +static inline void +gve_rx_refill_dqo(struct gve_rx_queue *rxq) +{ + volatile struct gve_rx_desc_dqo *rx_buf_ring; + volatile struct gve_rx_desc_dqo *rx_buf_desc; + struct rte_mbuf *nmb[rxq->free_thresh]; + uint16_t nb_refill = rxq->free_thresh; + uint16_t nb_desc = rxq->nb_rx_desc; + uint16_t next_avail = rxq->bufq_tail; + struct rte_eth_dev *dev; + uint64_t dma_addr; + uint16_t delta; + int i; + + if (rxq->nb_rx_hold < rxq->free_thresh) + return; + + rx_buf_ring = rxq->rx_ring; + delta = nb_desc - next_avail; + if (unlikely(delta < nb_refill)) { + if (likely(rte_pktmbuf_alloc_bulk(rxq->mpool, nmb, delta) == 0)) { + for (i = 0; i < delta; i++) { + rx_buf_desc = &rx_buf_ring[next_avail + i]; + rxq->sw_ring[next_avail + i] = nmb[i]; + dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb[i])); + rx_buf_desc->header_buf_addr = 0; + rx_buf_desc->buf_addr = dma_addr; + } + nb_refill -= delta; + next_avail = 0; + rxq->nb_rx_hold -= delta; + } else { + dev = &rte_eth_devices[rxq->port_id]; + dev->data->rx_mbuf_alloc_failed += nb_desc - next_avail; + PMD_DRV_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%u", + rxq->port_id, rxq->queue_id); + return; + } + } + + if (nb_desc - next_avail >= nb_refill) { + if (likely(rte_pktmbuf_alloc_bulk(rxq->mpool, nmb, nb_refill) == 0)) { + for (i = 0; i < nb_refill; i++) { + rx_buf_desc = &rx_buf_ring[next_avail + i]; + rxq->sw_ring[next_avail + i] = nmb[i]; + dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb[i])); + rx_buf_desc->header_buf_addr = 0; + rx_buf_desc->buf_addr = dma_addr; + } + next_avail += nb_refill; + rxq->nb_rx_hold -= nb_refill; + } else { + dev = &rte_eth_devices[rxq->port_id]; + dev->data->rx_mbuf_alloc_failed += nb_desc - next_avail; + PMD_DRV_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%u", + rxq->port_id, rxq->queue_id); + } + } + + rte_write32(next_avail, rxq->qrx_tail); + + rxq->bufq_tail = next_avail; +} + +uint16_t +gve_rx_burst_dqo(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) +{ + volatile struct gve_rx_compl_desc_dqo *rx_compl_ring; + volatile struct gve_rx_compl_desc_dqo *rx_desc; + struct gve_rx_queue *rxq; + struct rte_mbuf *rxm; + uint16_t rx_id_bufq; + uint16_t pkt_len; + uint16_t rx_id; + uint16_t nb_rx; + + nb_rx = 0; + rxq = rx_queue; + rx_id = rxq->rx_tail; + rx_id_bufq = rxq->next_avail; + rx_compl_ring = rxq->compl_ring; + +
[RFC v2 7/9] net/gve: support basic stats for DQO
Add basic stats support for DQO. Signed-off-by: Junfeng Guo Signed-off-by: Rushil Gupta Signed-off-by: Jordan Kimbrough Signed-off-by: Jeroen de Borst --- drivers/net/gve/gve_ethdev.c | 60 drivers/net/gve/gve_ethdev.h | 11 +++ drivers/net/gve/gve_rx_dqo.c | 12 +++- drivers/net/gve/gve_tx_dqo.c | 6 4 files changed, 88 insertions(+), 1 deletion(-) diff --git a/drivers/net/gve/gve_ethdev.c b/drivers/net/gve/gve_ethdev.c index 89e3f09c37..fae00305f9 100644 --- a/drivers/net/gve/gve_ethdev.c +++ b/drivers/net/gve/gve_ethdev.c @@ -369,6 +369,64 @@ gve_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) return 0; } +static int +gve_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) +{ + uint16_t i; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct gve_tx_queue *txq = dev->data->tx_queues[i]; + if (txq == NULL) + continue; + + stats->opackets += txq->packets; + stats->obytes += txq->bytes; + stats->oerrors += txq->errors; + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct gve_rx_queue *rxq = dev->data->rx_queues[i]; + if (rxq == NULL) + continue; + + stats->ipackets += rxq->packets; + stats->ibytes += rxq->bytes; + stats->ierrors += rxq->errors; + stats->rx_nombuf += rxq->no_mbufs; + } + + return 0; +} + +static int +gve_dev_stats_reset(struct rte_eth_dev *dev) +{ + uint16_t i; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct gve_tx_queue *txq = dev->data->tx_queues[i]; + if (txq == NULL) + continue; + + txq->packets = 0; + txq->bytes = 0; + txq->errors = 0; + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct gve_rx_queue *rxq = dev->data->rx_queues[i]; + if (rxq == NULL) + continue; + + rxq->packets = 0; + rxq->bytes = 0; + rxq->errors = 0; + rxq->no_mbufs = 0; + } + + return 0; +} + static int gve_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) { @@ -407,6 +465,8 @@ static const struct eth_dev_ops gve_eth_dev_ops = { .rx_queue_release = gve_rx_queue_release, .tx_queue_release = gve_tx_queue_release, .link_update = gve_link_update, + .stats_get= gve_dev_stats_get, + .stats_reset = gve_dev_stats_reset, .mtu_set = gve_dev_mtu_set, }; diff --git a/drivers/net/gve/gve_ethdev.h b/drivers/net/gve/gve_ethdev.h index d434f9babe..2e0f96499d 100644 --- a/drivers/net/gve/gve_ethdev.h +++ b/drivers/net/gve/gve_ethdev.h @@ -105,6 +105,11 @@ struct gve_tx_queue { struct gve_queue_page_list *qpl; struct gve_tx_iovec *iov_ring; + /* stats items */ + uint64_t packets; + uint64_t bytes; + uint64_t errors; + uint16_t port_id; uint16_t queue_id; @@ -156,6 +161,12 @@ struct gve_rx_queue { /* only valid for GQI_QPL queue format */ struct gve_queue_page_list *qpl; + /* stats items */ + uint64_t packets; + uint64_t bytes; + uint64_t errors; + uint64_t no_mbufs; + struct gve_priv *hw; const struct rte_memzone *qres_mz; struct gve_queue_resources *qres; diff --git a/drivers/net/gve/gve_rx_dqo.c b/drivers/net/gve/gve_rx_dqo.c index 244517ce5d..41ead5bd98 100644 --- a/drivers/net/gve/gve_rx_dqo.c +++ b/drivers/net/gve/gve_rx_dqo.c @@ -37,6 +37,7 @@ gve_rx_refill_dqo(struct gve_rx_queue *rxq) next_avail = 0; rxq->nb_rx_hold -= delta; } else { + rxq->no_mbufs += nb_desc - next_avail; dev = &rte_eth_devices[rxq->port_id]; dev->data->rx_mbuf_alloc_failed += nb_desc - next_avail; PMD_DRV_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%u", @@ -57,6 +58,7 @@ gve_rx_refill_dqo(struct gve_rx_queue *rxq) next_avail += nb_refill; rxq->nb_rx_hold -= nb_refill; } else { + rxq->no_mbufs += nb_desc - next_avail; dev = &rte_eth_devices[rxq->port_id]; dev->data->rx_mbuf_alloc_failed += nb_desc - next_avail; PMD_DRV_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%u", @@ -80,7 +82,9 @@ gve_rx_burst_dqo(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) uint16_t pkt_len; uint16_t rx_id; uint16_t nb_rx; + uint64_t bytes; + bytes = 0;
[RFC v2 8/9] net/gve: support jumbo frame for GQI
Add multi-segment support to enable GQI Rx Jumbo Frame. Signed-off-by: Jordan Kimbrough Signed-off-by: Rushil Gupta Signed-off-by: Junfeng Guo Signed-off-by: Jeroen de Borst --- drivers/net/gve/gve_ethdev.h | 8 +++ drivers/net/gve/gve_rx.c | 128 ++- 2 files changed, 105 insertions(+), 31 deletions(-) diff --git a/drivers/net/gve/gve_ethdev.h b/drivers/net/gve/gve_ethdev.h index 2e0f96499d..608a2f2fb4 100644 --- a/drivers/net/gve/gve_ethdev.h +++ b/drivers/net/gve/gve_ethdev.h @@ -138,6 +138,13 @@ struct gve_tx_queue { uint8_t is_gqi_qpl; }; +struct gve_rx_ctx { + struct rte_mbuf *mbuf_head; + struct rte_mbuf *mbuf_tail; + uint16_t total_frags; + bool drop_pkt; +}; + struct gve_rx_queue { volatile struct gve_rx_desc *rx_desc_ring; volatile union gve_rx_data_slot *rx_data_ring; @@ -146,6 +153,7 @@ struct gve_rx_queue { uint64_t rx_ring_phys_addr; struct rte_mbuf **sw_ring; struct rte_mempool *mpool; + struct gve_rx_ctx ctx; uint16_t rx_tail; uint16_t nb_rx_desc; diff --git a/drivers/net/gve/gve_rx.c b/drivers/net/gve/gve_rx.c index 9ba975c9b4..2468fc70ee 100644 --- a/drivers/net/gve/gve_rx.c +++ b/drivers/net/gve/gve_rx.c @@ -5,6 +5,8 @@ #include "gve_ethdev.h" #include "base/gve_adminq.h" +#define GVE_PKT_CONT_BIT_IS_SET(x) (GVE_RXF_PKT_CONT & (x)) + static inline void gve_rx_refill(struct gve_rx_queue *rxq) { @@ -80,40 +82,70 @@ gve_rx_refill(struct gve_rx_queue *rxq) } } -uint16_t -gve_rx_burst(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) +/* + * This method processes a single rte_mbuf and handles packet segmentation + * In QPL mode it copies data from the mbuf to the gve_rx_queue. + */ +static void +gve_rx_mbuf(struct gve_rx_queue *rxq, struct rte_mbuf *rxe, uint16_t len, + uint16_t rx_id) { - volatile struct gve_rx_desc *rxr, *rxd; - struct gve_rx_queue *rxq = rx_queue; - uint16_t rx_id = rxq->rx_tail; - struct rte_mbuf *rxe; - uint16_t nb_rx, len; + uint16_t padding = 0; uint64_t addr; - uint16_t i; - - rxr = rxq->rx_desc_ring; - nb_rx = 0; - - for (i = 0; i < nb_pkts; i++) { - rxd = &rxr[rx_id]; - if (GVE_SEQNO(rxd->flags_seq) != rxq->expected_seqno) - break; - if (rxd->flags_seq & GVE_RXF_ERR) - continue; - - len = rte_be_to_cpu_16(rxd->len) - GVE_RX_PAD; - rxe = rxq->sw_ring[rx_id]; - if (rxq->is_gqi_qpl) { - addr = (uint64_t)(rxq->qpl->mz->addr) + rx_id * PAGE_SIZE + GVE_RX_PAD; - rte_memcpy((void *)((size_t)rxe->buf_addr + rxe->data_off), - (void *)(size_t)addr, len); - } + rxe->data_len = len; + if (!rxq->ctx.mbuf_head) { + rxq->ctx.mbuf_head = rxe; + rxq->ctx.mbuf_tail = rxe; + rxe->nb_segs = 1; rxe->pkt_len = len; rxe->data_len = len; rxe->port = rxq->port_id; rxe->ol_flags = 0; + padding = GVE_RX_PAD; + } else { + rxq->ctx.mbuf_head->pkt_len += len; + rxq->ctx.mbuf_head->nb_segs += 1; + rxq->ctx.mbuf_tail->next = rxe; + rxq->ctx.mbuf_tail = rxe; + } + if (rxq->is_gqi_qpl) { + addr = (uint64_t)(rxq->qpl->mz->addr) + rx_id * PAGE_SIZE + padding; + rte_memcpy((void *)((size_t)rxe->buf_addr + rxe->data_off), + (void *)(size_t)addr, len); + } +} + +/* + * This method processes a single packet fragment associated with the + * passed packet descriptor. + * This methods returns whether the fragment is the last fragment + * of a packet. + */ +static bool +gve_rx(struct gve_rx_queue *rxq, volatile struct gve_rx_desc *rxd, uint16_t rx_id) +{ + bool is_last_frag = !GVE_PKT_CONT_BIT_IS_SET(rxd->flags_seq); + uint16_t frag_size = rte_be_to_cpu_16(rxd->len); + struct gve_rx_ctx *ctx = &rxq->ctx; + bool is_first_frag = ctx->total_frags == 0; + struct rte_mbuf *rxe; + + if (ctx->drop_pkt) + goto finish_frag; + if (rxd->flags_seq & GVE_RXF_ERR) { + ctx->drop_pkt = true; + goto finish_frag; + } + + if (is_first_frag) + frag_size -= GVE_RX_PAD; + + rxe = rxq->sw_ring[rx_id]; + gve_rx_mbuf(rxq, rxe, frag_size, rx_id); + + if (is_first_frag) { if (rxd->flags_seq & GVE_RXF_TCP) rxe->packet_type |= RTE_PTYPE_L4_TCP; if (rxd->flags_seq & GVE_RXF_UDP) @@ -127,18 +159,52 @@ gve_rx_burst(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rxe->ol_flags |= RTE_M
[RFC v2 9/9] net/gve: add AdminQ command to verify driver compatibility
Check whether the driver is compatible with the device presented. Signed-off-by: Rushil Gupta Signed-off-by: Jordan Kimbrough Signed-off-by: Junfeng Guo Signed-off-by: Jeroen de Borst --- drivers/net/gve/base/gve_adminq.c | 19 ++ drivers/net/gve/base/gve_adminq.h | 48 + drivers/net/gve/base/gve_osdep.h | 8 + drivers/net/gve/gve_ethdev.c | 60 +++ drivers/net/gve/gve_ethdev.h | 1 + 5 files changed, 136 insertions(+) diff --git a/drivers/net/gve/base/gve_adminq.c b/drivers/net/gve/base/gve_adminq.c index e963f910a0..5576990cb1 100644 --- a/drivers/net/gve/base/gve_adminq.c +++ b/drivers/net/gve/base/gve_adminq.c @@ -401,6 +401,9 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, case GVE_ADMINQ_GET_PTYPE_MAP: priv->adminq_get_ptype_map_cnt++; break; + case GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY: + priv->adminq_verify_driver_compatibility_cnt++; + break; default: PMD_DRV_LOG(ERR, "unknown AQ command opcode %d", opcode); } @@ -859,6 +862,22 @@ int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len, return gve_adminq_execute_cmd(priv, &cmd); } +int gve_adminq_verify_driver_compatibility(struct gve_priv *priv, + u64 driver_info_len, + dma_addr_t driver_info_addr) +{ + union gve_adminq_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = cpu_to_be32(GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY); + cmd.verify_driver_compatibility = (struct gve_adminq_verify_driver_compatibility) { + .driver_info_len = cpu_to_be64(driver_info_len), + .driver_info_addr = cpu_to_be64(driver_info_addr), + }; + + return gve_adminq_execute_cmd(priv, &cmd); +} + int gve_adminq_report_link_speed(struct gve_priv *priv) { struct gve_dma_mem link_speed_region_dma_mem; diff --git a/drivers/net/gve/base/gve_adminq.h b/drivers/net/gve/base/gve_adminq.h index 05550119de..c82e02405c 100644 --- a/drivers/net/gve/base/gve_adminq.h +++ b/drivers/net/gve/base/gve_adminq.h @@ -23,6 +23,7 @@ enum gve_adminq_opcodes { GVE_ADMINQ_REPORT_STATS = 0xC, GVE_ADMINQ_REPORT_LINK_SPEED= 0xD, GVE_ADMINQ_GET_PTYPE_MAP= 0xE, + GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY = 0xF, }; /* Admin queue status codes */ @@ -145,6 +146,48 @@ enum gve_sup_feature_mask { }; #define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0 +#define GVE_VERSION_STR_LEN 128 + +enum gve_driver_capbility { + gve_driver_capability_gqi_qpl = 0, + gve_driver_capability_gqi_rda = 1, + gve_driver_capability_dqo_qpl = 2, /* reserved for future use */ + gve_driver_capability_dqo_rda = 3, +}; + +#define GVE_CAP1(a) BIT((int)a) +#define GVE_CAP2(a) BIT(((int)a) - 64) +#define GVE_CAP3(a) BIT(((int)a) - 128) +#define GVE_CAP4(a) BIT(((int)a) - 192) + +#define GVE_DRIVER_CAPABILITY_FLAGS1 \ + (GVE_CAP1(gve_driver_capability_gqi_qpl) | \ +GVE_CAP1(gve_driver_capability_gqi_rda) | \ +GVE_CAP1(gve_driver_capability_dqo_rda)) + +#define GVE_DRIVER_CAPABILITY_FLAGS2 0x0 +#define GVE_DRIVER_CAPABILITY_FLAGS3 0x0 +#define GVE_DRIVER_CAPABILITY_FLAGS4 0x0 + +struct gve_driver_info { + u8 os_type; /* 0x01 = Linux */ + u8 driver_major; + u8 driver_minor; + u8 driver_sub; + __be32 os_version_major; + __be32 os_version_minor; + __be32 os_version_sub; + __be64 driver_capability_flags[4]; + u8 os_version_str1[GVE_VERSION_STR_LEN]; + u8 os_version_str2[GVE_VERSION_STR_LEN]; +}; + +struct gve_adminq_verify_driver_compatibility { + __be64 driver_info_len; + __be64 driver_info_addr; +}; + +GVE_CHECK_STRUCT_LEN(16, gve_adminq_verify_driver_compatibility); struct gve_adminq_configure_device_resources { __be64 counter_array; @@ -345,6 +388,8 @@ union gve_adminq_command { struct gve_adminq_report_stats report_stats; struct gve_adminq_report_link_speed report_link_speed; struct gve_adminq_get_ptype_map get_ptype_map; + struct gve_adminq_verify_driver_compatibility + verify_driver_compatibility; }; }; u8 reserved[64]; @@ -377,5 +422,8 @@ int gve_adminq_report_link_speed(struct gve_priv *priv); struct gve_ptype_lut; int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv, struct gve_ptype_lut *ptype_lut); +int gve_adminq_verify_driver_compatibility(struct gve_priv *priv, + u64 driver_info_len, + dma_addr_t driver_info_addr); #endif /* _GVE_ADMINQ_H */ diff --
[[PATCH] ] linux/igb_uio: fix build with Linux 5.18
Since commit 7968778914 (PCI: Remove the deprecated "pci-dma-compat.h" API) in 5.18, pci_set_dma_mask() and pci_set_consistent_dma_mask() no longer exist switch those api to dma_set_mask_and_coherent. --- linux/igb_uio/igb_uio.c | 8 1 file changed, 8 insertions(+) diff --git a/linux/igb_uio/igb_uio.c b/linux/igb_uio/igb_uio.c index 33e0e02..3672314 100644 --- a/linux/igb_uio/igb_uio.c +++ b/linux/igb_uio/igb_uio.c @@ -512,13 +512,21 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) goto fail_release_iomem; /* set 64-bit DMA mask */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 18, 0) err = pci_set_dma_mask(dev, DMA_BIT_MASK(64)); +#else + err = dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64)); +#endif if (err != 0) { dev_err(&dev->dev, "Cannot set DMA mask\n"); goto fail_release_iomem; } +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 18, 0) err = pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64)); +#else + err = dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64)); +#endif if (err != 0) { dev_err(&dev->dev, "Cannot set consistent DMA mask\n"); goto fail_release_iomem; -- 2.27.0
RE: [PATCH v7 1/6] eal: trace: add trace point emit for blob
> -Original Message- > From: Ankur Dwivedi > Sent: Monday, January 23, 2023 2:32 PM > To: dev@dpdk.org > Cc: tho...@monjalon.net; david.march...@redhat.com; m...@ashroe.eu; > or...@nvidia.com; ferruh.yi...@amd.com; ch...@att.com; > humi...@huawei.com; linvi...@tuxdriver.com; ciara.lof...@intel.com; > qi.z.zh...@intel.com; m...@semihalf.com; m...@semihalf.com; > shaib...@amazon.com; evge...@amazon.com; igo...@amazon.com; > cha...@amd.com; Igor Russkikh ; > shepard.sie...@atomicrules.com; ed.cz...@atomicrules.com; > john.mil...@atomicrules.com; ajit.khapa...@broadcom.com; > somnath.ko...@broadcom.com; Jerin Jacob Kollanukkaran > ; Maciej Czekaj [C] ; Shijith > Thotton ; Srisivasubramanian Srinivasan > ; Harman Kalra ; > rahul.lakkire...@chelsio.com; johnd...@cisco.com; hyon...@cisco.com; > liudongdo...@huawei.com; yisen.zhu...@huawei.com; > xuanziya...@huawei.com; cloud.wangxiao...@huawei.com; > zhouguoy...@huawei.com; simei...@intel.com; wenjun1...@intel.com; > qiming.y...@intel.com; yuying.zh...@intel.com; beilei.x...@intel.com; > xiao.w.w...@intel.com; jingjing...@intel.com; junfeng@intel.com; > rosen...@intel.com; Nithin Kumar Dabilpuram > ; Kiran Kumar Kokkilagadda > ; Sunil Kumar Kori ; Satha > Koteswara Rao Kottidi ; Liron Himi > ; z...@semihalf.com; Radha Chintakuntla > ; Veerasenareddy Burru ; > Sathesh B Edara ; ma...@nvidia.com; > viachesl...@nvidia.com; lon...@microsoft.com; spin...@cesnet.cz; > chaoyong...@corigine.com; niklas.soderl...@corigine.com; > hemant.agra...@nxp.com; sachin.sax...@oss.nxp.com; g.si...@nxp.com; > apeksha.gu...@nxp.com; sachin.sax...@nxp.com; abo...@pensando.io; > Rasesh Mody ; Shahed Shaikh > ; Devendra Singh Rawat > ; andrew.rybche...@oktetlabs.ru; > jiawe...@trustnetic.com; jianw...@trustnetic.com; > jbehr...@vmware.com; maxime.coque...@redhat.com; > chenbo@intel.com; steven.webs...@windriver.com; > matt.pet...@windriver.com; bruce.richard...@intel.com; > mtetsu...@gmail.com; gr...@u256.net; jasvinder.si...@intel.com; > cristian.dumitre...@intel.com; jgraj...@cisco.com; > m...@smartsharesystems.com; Ankur Dwivedi > Subject: [PATCH v7 1/6] eal: trace: add trace point emit for blob > > Adds a trace point emit function for capturing a blob. The blob captures the > length passed by the application followed by the array. > > The maximum blob bytes which can be captured is bounded by > RTE_TRACE_BLOB_LEN_MAX macro. The value for max blob length macro is > 64 bytes. If the length is less than 64 the remaining trailing bytes are set > to > zero. > > This patch also adds test case for emit blob tracepoint function. > > Signed-off-by: Ankur Dwivedi > --- > app/test/test_trace.c | 11 > doc/guides/prog_guide/trace_lib.rst| 12 + > lib/eal/common/eal_common_trace_points.c | 2 ++ > lib/eal/include/rte_eal_trace.h| 6 + > lib/eal/include/rte_trace_point.h | 31 ++ > lib/eal/include/rte_trace_point_register.h | 9 +++ > lib/eal/version.map| 3 +++ > 7 files changed, 74 insertions(+) > Acked-by: Sunil Kumar Kori > diff --git a/app/test/test_trace.c b/app/test/test_trace.c index > 6bedf14024..ad4a394a29 100644 > --- a/app/test/test_trace.c > +++ b/app/test/test_trace.c > @@ -4,6 +4,7 @@ > > #include > #include > +#include > #include > > #include "test.h" > @@ -177,7 +178,12 @@ test_fp_trace_points(void) static int > test_generic_trace_points(void) > { > + uint8_t arr[RTE_TRACE_BLOB_LEN_MAX]; > int tmp; > + int i; > + > + for (i = 0; i < RTE_TRACE_BLOB_LEN_MAX; i++) > + arr[i] = i; > > rte_eal_trace_generic_void(); > rte_eal_trace_generic_u64(0x10); > @@ -195,6 +201,11 @@ test_generic_trace_points(void) > rte_eal_trace_generic_ptr(&tmp); > rte_eal_trace_generic_str("my string"); > rte_eal_trace_generic_size_t(sizeof(void *)); > + rte_eal_trace_generic_blob(arr, 0); > + rte_eal_trace_generic_blob(arr, 17); > + rte_eal_trace_generic_blob(arr, RTE_TRACE_BLOB_LEN_MAX); > + rte_eal_trace_generic_blob(arr, rte_rand() % > + RTE_TRACE_BLOB_LEN_MAX); > RTE_EAL_TRACE_GENERIC_FUNC; > > return TEST_SUCCESS; > diff --git a/doc/guides/prog_guide/trace_lib.rst > b/doc/guides/prog_guide/trace_lib.rst > index 9a8f38073d..3e0ea5835c 100644 > --- a/doc/guides/prog_guide/trace_lib.rst > +++ b/doc/guides/prog_guide/trace_lib.rst > @@ -352,3 +352,15 @@ event ID. > The ``packet.header`` and ``packet.context`` will be written in the slow path > at the time of trace memory creation. The ``trace.header`` and trace payload > will be emitted when the tracepoint function is invoked. > + > +Limitations > +--- > + > +- The ``rte_trace_point_emit_blob()`` function can capture a maximum > +blob of > + length ``RTE_TRACE_BLOB_LEN_MAX`` bytes. The application can call > + ``rte_trace_point_emit_blob()`` multipl
RE: [PATCH v7] ethdev: add special flags when creating async transfer table
Hi Rongwei, For my responses, PSB. By the way, now you mention things like wasting memory and insertion optimisastions, are there any comparative figures to see the effect of this hint on insertion performance / memory footprint? Some "before" / "after" examples would really be helpful. After all, I'm not objecting this patch. But I believe that other reviewers' concerns should nevertheless be addressed anyway. On Mon, 30 Jan 2023, Rongwei Liu wrote: Hi Ivan, BR Rongwei -Original Message- From: Ivan Malov Sent: Monday, January 30, 2023 08:00 To: Rongwei Liu Cc: Matan Azrad ; Slava Ovsiienko ; Ori Kam ; NBU-Contact- Thomas Monjalon (EXTERNAL) ; Aman Singh ; Yuying Zhang ; Ferruh Yigit ; Andrew Rybchenko ; dev@dpdk.org; Raslan Darawsheh Subject: Re: [PATCH v7] ethdev: add special flags when creating async transfer table External email: Use caution opening links or attachments Hi Rongwei, Thanks for persevering. I have no strong opinion, but, at least, the fact that the new flags are no longer meant for use in rte_flow_attr, which is clearly not the right place for such, is an improvement. Thanks for the suggestion, move it to rte_flow_table_attr now and it' dedicated to async API. However, let's take a closer look at the current patch, shall we? But, before we get to that, I'd like to kindly request that you provide a more concrete example of how this feature is supposed to be used. Are there some real-life application examples? Sure. Also, to me, it's still unclear how an application can obtain the knowledge of this hint in the first instance. For example, can Open vSwitch somehow tell ethdevs representing physical ports from ones representing "vports" (host endpoints)? How does it know which attribute to specify? Hint should be initiated by application and application knows it' traffic pattern which highly relates to deployment. Let' use VxLAN encap/decap as an example: 1. Traffic from wire should be VxLAN pattern and do the decap, then send to different vports. flow pattern_template 0 create transfer relaxed no pattern_template_id 4 template represented_port ethdev_port_id is 0 / eth / ipv4 / udp / vxlan / tag index is 0 data is 0x33 / end flow actions_template 0 create transfer actions_template_id 4 template raw_decap index 0 / represented_port ethdev_port_id 1 / end mask raw_decap index 0 / represented_port ethdev_port_id 1 / end flow template_table 0 create group 1 priority 0 transfer wire_orig table_id 4 rules_number 128 pattern_template 4 actions_template 4 2. Traffic from vports should be encap with different VxLAN header and send to wire. flow actions_template 1 create transfer actions_template_id 5 template raw_encap index 0 / represented_port ethdev_port_id 0 / end mask raw_encap index 0 / represented_port ethdev_port_id 0 / end flow template_table 0 create group 1 priority 0 transfer vport_orig table_id 5 rules_number 128 pattern_template 4 actions_template 5 For the rest of my notes, PSB. On Mon, 14 Nov 2022, Rongwei Liu wrote: In case flow rules match only one kind of traffic in a flow table, then optimization can be done via allocation of this table. This wording might confuse readers. Consider rephrasing it, please: If multiple flow rules share a common set of match masks, then they might belong in a flow table which can be pre-allocated. Such optimization is possible only if the application gives a hint about its usage of the table during initial configuration. The transfer domain rules may process traffic from wire or vport, which may correspond to two kinds of underlayer resources. Why name it a "vport"? Why not "host"? host = packets generated by any of the host's "vport"s wire = packets arriving at the NIC from the network Vport is "virtual port" for short and contains "VF/SF" for now. Per my thoughts, it' clearer and maps to DPDK port probing/management. I understand that "host" might not be a brilliant name. If "vport" stands for every port of the NIC that is not a network port, then this name might be OK to me, but why doesn't it cover PFs? A PF is clearly not a network / physical port. Why just VF/SF then? Where does that "for now" decision come from? Just wondering. That's why the first two hints introduced in this patch are about wire and vport traffic specialization. Wire means traffic arrives from the uplink port while vport means traffic initiated from VF/SF. By the sound of it, the meaning is confined to just VFs/SFs. What if the user wants to match packets coming from PFs? It should be "wire_orig". Forgive me, but that does not sound correct. Say, there's an application and it has a PF plugged into it: ethdev index 0. And the application transmits packets using rte_eth_tx_burst() from that port. You say that these packets can be matched via "wire_orig". But they do not come from the wire. They come from PF... There are two possible approaches for providing the hints. Using IPv4 as an example: 1. Use
RE: [PATCH v1 10/21] net/virtio: alloc Rx SW ring only if vectorized path
Hi Maxime, > -Original Message- > From: Maxime Coquelin > Sent: Wednesday, November 30, 2022 11:56 PM > To: dev@dpdk.org; Xia, Chenbo ; > david.march...@redhat.com; epere...@redhat.com > Cc: Maxime Coquelin > Subject: [PATCH v1 10/21] net/virtio: alloc Rx SW ring only if vectorized > path > > This patch only allocates the SW ring when vectorized > datapath is used. It also moves the SW ring and fake mbuf > in the virtnet_rx struct since this is Rx-only. > > Signed-off-by: Maxime Coquelin > --- > drivers/net/virtio/virtio_ethdev.c| 88 --- > drivers/net/virtio/virtio_rxtx.c | 8 +- > drivers/net/virtio/virtio_rxtx.h | 4 +- > drivers/net/virtio/virtio_rxtx_simple.h | 2 +- > .../net/virtio/virtio_rxtx_simple_altivec.c | 4 +- > drivers/net/virtio/virtio_rxtx_simple_neon.c | 4 +- > drivers/net/virtio/virtio_rxtx_simple_sse.c | 4 +- > drivers/net/virtio/virtqueue.c| 6 +- > drivers/net/virtio/virtqueue.h| 1 - > 9 files changed, 72 insertions(+), 49 deletions(-) > > diff --git a/drivers/net/virtio/virtio_ethdev.c > b/drivers/net/virtio/virtio_ethdev.c > index 8b17b450ec..46dd5606f6 100644 > --- a/drivers/net/virtio/virtio_ethdev.c > +++ b/drivers/net/virtio/virtio_ethdev.c > @@ -339,6 +339,47 @@ virtio_free_queue_headers(struct virtqueue *vq) > *hdr_mem = 0; > } > > +static int > +virtio_rxq_sw_ring_alloc(struct virtqueue *vq, int numa_node) > +{ > + void *sw_ring; > + struct rte_mbuf *mbuf; > + size_t size; > + > + /* SW ring is only used with vectorized datapath */ > + if (!vq->hw->use_vec_rx) > + return 0; > + > + size = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq->vq_nentries) * sizeof(vq- > >rxq.sw_ring[0]); > + > + sw_ring = rte_zmalloc_socket("sw_ring", size, RTE_CACHE_LINE_SIZE, > numa_node); > + if (!sw_ring) { > + PMD_INIT_LOG(ERR, "can not allocate RX soft ring"); > + return -ENOMEM; > + } > + > + mbuf = rte_zmalloc_socket("sw_ring", sizeof(*mbuf), > RTE_CACHE_LINE_SIZE, numa_node); > + if (!mbuf) { > + PMD_INIT_LOG(ERR, "can not allocate fake mbuf"); > + rte_free(sw_ring); > + return -ENOMEM; > + } > + > + vq->rxq.sw_ring = sw_ring; > + vq->rxq.fake_mbuf = mbuf; > + > + return 0; > +} > + > +static void > +virtio_rxq_sw_ring_free(struct virtqueue *vq) > +{ > + rte_free(vq->rxq.fake_mbuf); > + vq->rxq.fake_mbuf = NULL; > + rte_free(vq->rxq.sw_ring); > + vq->rxq.sw_ring = NULL; > +} > + > static int > virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx) > { > @@ -346,14 +387,11 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t > queue_idx) > const struct rte_memzone *mz = NULL; > unsigned int vq_size, size; > struct virtio_hw *hw = dev->data->dev_private; > - struct virtnet_rx *rxvq = NULL; > struct virtnet_ctl *cvq = NULL; > struct virtqueue *vq; > - void *sw_ring = NULL; > int queue_type = virtio_get_queue_type(hw, queue_idx); > int ret; > int numa_node = dev->device->numa_node; > - struct rte_mbuf *fake_mbuf = NULL; > > PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d", > queue_idx, numa_node); > @@ -441,28 +479,9 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t > queue_idx) > } > > if (queue_type == VTNET_RQ) { > - size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) * > -sizeof(vq->sw_ring[0]); > - > - sw_ring = rte_zmalloc_socket("sw_ring", sz_sw, > - RTE_CACHE_LINE_SIZE, numa_node); > - if (!sw_ring) { > - PMD_INIT_LOG(ERR, "can not allocate RX soft ring"); > - ret = -ENOMEM; > + ret = virtio_rxq_sw_ring_alloc(vq, numa_node); > + if (ret) > goto free_hdr_mz; > - } > - > - fake_mbuf = rte_zmalloc_socket("sw_ring", sizeof(*fake_mbuf), > - RTE_CACHE_LINE_SIZE, numa_node); > - if (!fake_mbuf) { > - PMD_INIT_LOG(ERR, "can not allocate fake mbuf"); > - ret = -ENOMEM; > - goto free_sw_ring; > - } > - > - vq->sw_ring = sw_ring; > - rxvq = &vq->rxq; > - rxvq->fake_mbuf = fake_mbuf; > } else if (queue_type == VTNET_TQ) { > virtqueue_txq_indirect_headers_init(vq); > } else if (queue_type == VTNET_CQ) { > @@ -486,9 +505,8 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t > queue_idx) > > clean_vq: > hw->cvq = NULL; > - rte_free(fake_mbuf); > -free_sw_ring: > - rte_free(sw_ring); > + if (queue_type == VTNET_RQ) > + virtio_rxq_sw_ring_free(vq); > free_hdr_mz: > virtio_free_queue_headers(vq); > fr
RE: [PATCH v1 01/21] net/virtio: move CVQ code into a dedicated file
> -Original Message- > From: Maxime Coquelin > Sent: Wednesday, November 30, 2022 11:56 PM > To: dev@dpdk.org; Xia, Chenbo ; > david.march...@redhat.com; epere...@redhat.com > Cc: Maxime Coquelin > Subject: [PATCH v1 01/21] net/virtio: move CVQ code into a dedicated file > > This patch moves Virtio control queue code into a dedicated > file, as preliminary rework to support shadow control queue > in Virtio-user. > > Signed-off-by: Maxime Coquelin > --- > drivers/net/virtio/meson.build | 1 + > drivers/net/virtio/virtio_cvq.c| 230 + > drivers/net/virtio/virtio_cvq.h| 126 > drivers/net/virtio/virtio_ethdev.c | 218 +-- > drivers/net/virtio/virtio_rxtx.h | 9 -- > drivers/net/virtio/virtqueue.h | 105 + > 6 files changed, 359 insertions(+), 330 deletions(-) > create mode 100644 drivers/net/virtio/virtio_cvq.c > create mode 100644 drivers/net/virtio/virtio_cvq.h > > -- > 2.38.1 Reviewed-by: Chenbo Xia
RE: [PATCH v1 02/21] net/virtio: introduce notify callback for control queue
> -Original Message- > From: Maxime Coquelin > Sent: Wednesday, November 30, 2022 11:56 PM > To: dev@dpdk.org; Xia, Chenbo ; > david.march...@redhat.com; epere...@redhat.com > Cc: Maxime Coquelin > Subject: [PATCH v1 02/21] net/virtio: introduce notify callback for > control queue > > This patch introduces a notification callback for the control > virtqueue as preliminary work to add shadow control virtqueue > support. > > This new callback is required so that the shadow control queue > implemented in Virtio-user does not call the notifciation op > implemented for the driver layer. > > Signed-off-by: Maxime Coquelin > --- > drivers/net/virtio/virtio_cvq.c| 4 ++-- > drivers/net/virtio/virtio_cvq.h| 4 > drivers/net/virtio/virtio_ethdev.c | 7 +++ > 3 files changed, 13 insertions(+), 2 deletions(-) > > diff --git a/drivers/net/virtio/virtio_cvq.c > b/drivers/net/virtio/virtio_cvq.c > index de4299a2a7..cd25614df8 100644 > --- a/drivers/net/virtio/virtio_cvq.c > +++ b/drivers/net/virtio/virtio_cvq.c > @@ -76,7 +76,7 @@ virtio_send_command_packed(struct virtnet_ctl *cvq, > vq->hw->weak_barriers); > > virtio_wmb(vq->hw->weak_barriers); > - virtqueue_notify(vq); > + cvq->notify_queue(vq, cvq->notify_cookie); > > /* wait for used desc in virtqueue >* desc_is_used has a load-acquire or rte_io_rmb inside > @@ -155,7 +155,7 @@ virtio_send_command_split(struct virtnet_ctl *cvq, > > PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index); > > - virtqueue_notify(vq); > + cvq->notify_queue(vq, cvq->notify_cookie); > > while (virtqueue_nused(vq) == 0) > usleep(100); > diff --git a/drivers/net/virtio/virtio_cvq.h > b/drivers/net/virtio/virtio_cvq.h > index 139e813ffb..224dc81422 100644 > --- a/drivers/net/virtio/virtio_cvq.h > +++ b/drivers/net/virtio/virtio_cvq.h > @@ -7,6 +7,8 @@ > > #include > > +struct virtqueue; > + > /** > * Control the RX mode, ie. promiscuous, allmulti, etc... > * All commands require an "out" sg entry containing a 1 byte > @@ -110,6 +112,8 @@ struct virtnet_ctl { > uint16_t port_id; /**< Device port identifier. */ > const struct rte_memzone *mz; /**< mem zone to populate CTL ring. > */ > rte_spinlock_t lock; /**< spinlock for control queue. > */ > + void (*notify_queue)(struct virtqueue *vq, void *cookie); /**< > notify ops. */ > + void *notify_cookie; /**< cookie for notify ops */ > }; > > #define VIRTIO_MAX_CTRL_DATA 2048 > diff --git a/drivers/net/virtio/virtio_ethdev.c > b/drivers/net/virtio/virtio_ethdev.c > index d553f89a0d..8db8771f4d 100644 > --- a/drivers/net/virtio/virtio_ethdev.c > +++ b/drivers/net/virtio/virtio_ethdev.c > @@ -253,6 +253,12 @@ virtio_init_vring(struct virtqueue *vq) > virtqueue_disable_intr(vq); > } > > +static void > +virtio_control_queue_notify(struct virtqueue *vq, __rte_unused void > *cookie) > +{ > + virtqueue_notify(vq); > +} > + > static int > virtio_init_queue(struct rte_eth_dev *dev, uint16_t queue_idx) > { > @@ -421,6 +427,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t > queue_idx) > memset(cvq->virtio_net_hdr_mz->addr, 0, rte_mem_page_size()); > > hw->cvq = cvq; > + vq->cq.notify_queue = &virtio_control_queue_notify; > } > > if (hw->use_va) > -- > 2.38.1 Reviewed-by: Chenbo Xia
RE: [PATCH v1 03/21] net/virtio: virtqueue headers alloc refactoring
> -Original Message- > From: Maxime Coquelin > Sent: Wednesday, November 30, 2022 11:56 PM > To: dev@dpdk.org; Xia, Chenbo ; > david.march...@redhat.com; epere...@redhat.com > Cc: Maxime Coquelin > Subject: [PATCH v1 03/21] net/virtio: virtqueue headers alloc refactoring > > This patch refactors virtqueue initialization by moving > its headers allocation and deallocation in dedicated > function. > > While at it, it renames the memzone metadata and address > pointers in the virtnet_tx and virtnet_ctl structures to > remove redundant virtio_net_ prefix. > > Signed-off-by: Maxime Coquelin > --- > drivers/net/virtio/virtio_cvq.c| 19 ++-- > drivers/net/virtio/virtio_cvq.h| 9 +- > drivers/net/virtio/virtio_ethdev.c | 149 ++--- > drivers/net/virtio/virtio_rxtx.c | 12 +-- > drivers/net/virtio/virtio_rxtx.h | 12 +-- > drivers/net/virtio/virtqueue.c | 8 +- > drivers/net/virtio/virtqueue.h | 13 +-- > 7 files changed, 126 insertions(+), 96 deletions(-) > -- > 2.38.1 Reviewed-by: Chenbo Xia
RE: [PATCH v1 04/21] net/virtio: remove port ID info from Rx queue
> -Original Message- > From: Maxime Coquelin > Sent: Wednesday, November 30, 2022 11:56 PM > To: dev@dpdk.org; Xia, Chenbo ; > david.march...@redhat.com; epere...@redhat.com > Cc: Maxime Coquelin > Subject: [PATCH v1 04/21] net/virtio: remove port ID info from Rx queue > > The port ID information is duplicated in several places. > This patch removes it from the virtnet_rx struct as it can > be found in virtio_hw struct. > > Signed-off-by: Maxime Coquelin > --- > drivers/net/virtio/virtio_ethdev.c | 1 - > drivers/net/virtio/virtio_rxtx.c| 25 ++--- > drivers/net/virtio/virtio_rxtx.h| 1 - > drivers/net/virtio/virtio_rxtx_packed.c | 3 +-- > drivers/net/virtio/virtio_rxtx_simple.c | 3 ++- > drivers/net/virtio/virtio_rxtx_simple.h | 5 +++-- > 6 files changed, 16 insertions(+), 22 deletions(-) > > diff --git a/drivers/net/virtio/virtio_ethdev.c > b/drivers/net/virtio/virtio_ethdev.c > index cead5f0884..1c68e5a283 100644 > --- a/drivers/net/virtio/virtio_ethdev.c > +++ b/drivers/net/virtio/virtio_ethdev.c > @@ -462,7 +462,6 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t > queue_idx) > > vq->sw_ring = sw_ring; > rxvq = &vq->rxq; > - rxvq->port_id = dev->data->port_id; > rxvq->mz = mz; > rxvq->fake_mbuf = fake_mbuf; > } else if (queue_type == VTNET_TQ) { > diff --git a/drivers/net/virtio/virtio_rxtx.c > b/drivers/net/virtio/virtio_rxtx.c > index bd95e8ceb5..45c04aa3f8 100644 > --- a/drivers/net/virtio/virtio_rxtx.c > +++ b/drivers/net/virtio/virtio_rxtx.c > @@ -1024,7 +1024,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf > **rx_pkts, uint16_t nb_pkts) > continue; > } > > - rxm->port = rxvq->port_id; > + rxm->port = hw->port_id; > rxm->data_off = RTE_PKTMBUF_HEADROOM; > rxm->ol_flags = 0; > rxm->vlan_tci = 0; > @@ -1066,8 +1066,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf > **rx_pkts, uint16_t nb_pkts) > } > nb_enqueued += free_cnt; > } else { > - struct rte_eth_dev *dev = > - &rte_eth_devices[rxvq->port_id]; > + struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id]; > dev->data->rx_mbuf_alloc_failed += free_cnt; > } > } > @@ -1127,7 +1126,7 @@ virtio_recv_pkts_packed(void *rx_queue, struct > rte_mbuf **rx_pkts, > continue; > } > > - rxm->port = rxvq->port_id; > + rxm->port = hw->port_id; > rxm->data_off = RTE_PKTMBUF_HEADROOM; > rxm->ol_flags = 0; > rxm->vlan_tci = 0; > @@ -1169,8 +1168,7 @@ virtio_recv_pkts_packed(void *rx_queue, struct > rte_mbuf **rx_pkts, > } > nb_enqueued += free_cnt; > } else { > - struct rte_eth_dev *dev = > - &rte_eth_devices[rxvq->port_id]; > + struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id]; > dev->data->rx_mbuf_alloc_failed += free_cnt; > } > } > @@ -1258,7 +1256,7 @@ virtio_recv_pkts_inorder(void *rx_queue, > rxm->pkt_len = (uint32_t)(len[i] - hdr_size); > rxm->data_len = (uint16_t)(len[i] - hdr_size); > > - rxm->port = rxvq->port_id; > + rxm->port = hw->port_id; > > rx_pkts[nb_rx] = rxm; > prev = rxm; > @@ -1352,8 +1350,7 @@ virtio_recv_pkts_inorder(void *rx_queue, > } > nb_enqueued += free_cnt; > } else { > - struct rte_eth_dev *dev = > - &rte_eth_devices[rxvq->port_id]; > + struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id]; > dev->data->rx_mbuf_alloc_failed += free_cnt; > } > } > @@ -1437,7 +1434,7 @@ virtio_recv_mergeable_pkts(void *rx_queue, > rxm->pkt_len = (uint32_t)(len[i] - hdr_size); > rxm->data_len = (uint16_t)(len[i] - hdr_size); > > - rxm->port = rxvq->port_id; > + rxm->port = hw->port_id; > > rx_pkts[nb_rx] = rxm; > prev = rxm; > @@ -1530,8 +1527,7 @@ virtio_recv_mergeable_pkts(void *rx_queue, > } > nb_enqueued += free_cnt; > } else { > - struct rte_eth_dev *dev = > - &rte_eth_devices[rxvq->port_id]; > + struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id]; > dev->data->rx_mbuf_alloc_failed += free_cnt; > } > } > @@ -1610,7 +1606,7 @@ virtio_recv_mergeable_pkts_packed(voi
RE: [PATCH v1 05/21] net/virtio: remove unused fields in Tx queue struct
> -Original Message- > From: Maxime Coquelin > Sent: Wednesday, November 30, 2022 11:56 PM > To: dev@dpdk.org; Xia, Chenbo ; > david.march...@redhat.com; epere...@redhat.com > Cc: Maxime Coquelin > Subject: [PATCH v1 05/21] net/virtio: remove unused fields in Tx queue > struct > > The port and queue IDs are not used in virtnet_tx struct, > this patch removes them. > > Signed-off-by: Maxime Coquelin > --- > drivers/net/virtio/virtio_ethdev.c | 1 - > drivers/net/virtio/virtio_rxtx.c | 1 - > drivers/net/virtio/virtio_rxtx.h | 3 --- > 3 files changed, 5 deletions(-) > > diff --git a/drivers/net/virtio/virtio_ethdev.c > b/drivers/net/virtio/virtio_ethdev.c > index 1c68e5a283..a581fae408 100644 > --- a/drivers/net/virtio/virtio_ethdev.c > +++ b/drivers/net/virtio/virtio_ethdev.c > @@ -466,7 +466,6 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t > queue_idx) > rxvq->fake_mbuf = fake_mbuf; > } else if (queue_type == VTNET_TQ) { > txvq = &vq->txq; > - txvq->port_id = dev->data->port_id; > txvq->mz = mz; > } else if (queue_type == VTNET_CQ) { > cvq = &vq->cq; > diff --git a/drivers/net/virtio/virtio_rxtx.c > b/drivers/net/virtio/virtio_rxtx.c > index 45c04aa3f8..304403d46c 100644 > --- a/drivers/net/virtio/virtio_rxtx.c > +++ b/drivers/net/virtio/virtio_rxtx.c > @@ -831,7 +831,6 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, > vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); > > txvq = &vq->txq; > - txvq->queue_id = queue_idx; > > tx_free_thresh = tx_conf->tx_free_thresh; > if (tx_free_thresh == 0) > diff --git a/drivers/net/virtio/virtio_rxtx.h > b/drivers/net/virtio/virtio_rxtx.h > index 97de9eb0a3..9bbcf32f66 100644 > --- a/drivers/net/virtio/virtio_rxtx.h > +++ b/drivers/net/virtio/virtio_rxtx.h > @@ -35,9 +35,6 @@ struct virtnet_tx { > const struct rte_memzone *hdr_mz; /**< memzone to populate hdr. */ > rte_iova_t hdr_mem; /**< hdr for each xmit packet */ > > - uint16_tqueue_id; /**< DPDK queue index. */ > - uint16_tport_id; /**< Device port identifier. */ > - > struct virtnet_stats stats; /* Statistics */ > > const struct rte_memzone *mz;/**< mem zone to populate TX ring. > */ > -- > 2.38.1 Reviewed-by: Chenbo Xia
RE: [PATCH v1 06/21] net/virtio: remove unused queue ID field in Rx queue
> -Original Message- > From: Maxime Coquelin > Sent: Wednesday, November 30, 2022 11:56 PM > To: dev@dpdk.org; Xia, Chenbo ; > david.march...@redhat.com; epere...@redhat.com > Cc: Maxime Coquelin > Subject: [PATCH v1 06/21] net/virtio: remove unused queue ID field in Rx > queue > > This patch removes the queue ID field in virtnet_rx struct. > > Signed-off-by: Maxime Coquelin > --- > drivers/net/virtio/virtio_rxtx.c | 1 - > drivers/net/virtio/virtio_rxtx.h | 2 -- > 2 files changed, 3 deletions(-) > > diff --git a/drivers/net/virtio/virtio_rxtx.c > b/drivers/net/virtio/virtio_rxtx.c > index 304403d46c..4f69b97f41 100644 > --- a/drivers/net/virtio/virtio_rxtx.c > +++ b/drivers/net/virtio/virtio_rxtx.c > @@ -703,7 +703,6 @@ virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, > vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); > > rxvq = &vq->rxq; > - rxvq->queue_id = queue_idx; > rxvq->mpool = mp; > dev->data->rx_queues[queue_idx] = rxvq; > > diff --git a/drivers/net/virtio/virtio_rxtx.h > b/drivers/net/virtio/virtio_rxtx.h > index 9bbcf32f66..a5fe3ea95c 100644 > --- a/drivers/net/virtio/virtio_rxtx.h > +++ b/drivers/net/virtio/virtio_rxtx.h > @@ -23,8 +23,6 @@ struct virtnet_rx { > uint64_t mbuf_initializer; /**< value to init mbufs. */ > struct rte_mempool *mpool; /**< mempool for mbuf allocation */ > > - uint16_t queue_id; /**< DPDK queue index. */ > - > /* Statistics */ > struct virtnet_stats stats; > > -- > 2.38.1 Reviewed-by: Chenbo Xia
RE: [PATCH v1 07/21] net/virtio: remove unused Port ID in control queue
> -Original Message- > From: Maxime Coquelin > Sent: Wednesday, November 30, 2022 11:56 PM > To: dev@dpdk.org; Xia, Chenbo ; > david.march...@redhat.com; epere...@redhat.com > Cc: Maxime Coquelin > Subject: [PATCH v1 07/21] net/virtio: remove unused Port ID in control > queue > > This patch removes the unused port ID information from > virtnet_ctl struct. > > Signed-off-by: Maxime Coquelin > --- > drivers/net/virtio/virtio_cvq.h | 1 - > 1 file changed, 1 deletion(-) > > diff --git a/drivers/net/virtio/virtio_cvq.h > b/drivers/net/virtio/virtio_cvq.h > index 226561e6b8..0ff326b063 100644 > --- a/drivers/net/virtio/virtio_cvq.h > +++ b/drivers/net/virtio/virtio_cvq.h > @@ -108,7 +108,6 @@ typedef uint8_t virtio_net_ctrl_ack; > struct virtnet_ctl { > const struct rte_memzone *hdr_mz; /**< memzone to populate hdr. */ > rte_iova_t hdr_mem; /**< hdr for each xmit packet */ > - uint16_t port_id; /**< Device port identifier. */ > const struct rte_memzone *mz; /**< mem zone to populate CTL ring. > */ > rte_spinlock_t lock; /**< spinlock for control queue. > */ > void (*notify_queue)(struct virtqueue *vq, void *cookie); /**< > notify ops. */ > -- > 2.38.1 Reviewed-by: Chenbo Xia
RE: [PATCH v1 08/21] net/virtio: move vring memzone to virtqueue struct
> -Original Message- > From: Maxime Coquelin > Sent: Wednesday, November 30, 2022 11:56 PM > To: dev@dpdk.org; Xia, Chenbo ; > david.march...@redhat.com; epere...@redhat.com > Cc: Maxime Coquelin > Subject: [PATCH v1 08/21] net/virtio: move vring memzone to virtqueue > struct > > Whatever its type (Rx, Tx or Ctl), all the virtqueue > require a memzone for the vrings. This patch moves its > pointer to the virtqueue struct, simplifying the code. > > Signed-off-by: Maxime Coquelin > --- > drivers/net/virtio/virtio_cvq.h| 1 - > drivers/net/virtio/virtio_ethdev.c | 11 ++- > drivers/net/virtio/virtio_rxtx.h | 4 > drivers/net/virtio/virtqueue.c | 6 ++ > drivers/net/virtio/virtqueue.h | 1 + > 5 files changed, 5 insertions(+), 18 deletions(-) > > diff --git a/drivers/net/virtio/virtio_cvq.h > b/drivers/net/virtio/virtio_cvq.h > index 0ff326b063..70739ae04b 100644 > --- a/drivers/net/virtio/virtio_cvq.h > +++ b/drivers/net/virtio/virtio_cvq.h > @@ -108,7 +108,6 @@ typedef uint8_t virtio_net_ctrl_ack; > struct virtnet_ctl { > const struct rte_memzone *hdr_mz; /**< memzone to populate hdr. */ > rte_iova_t hdr_mem; /**< hdr for each xmit packet */ > - const struct rte_memzone *mz; /**< mem zone to populate CTL ring. > */ > rte_spinlock_t lock; /**< spinlock for control queue. > */ > void (*notify_queue)(struct virtqueue *vq, void *cookie); /**< > notify ops. */ > void *notify_cookie; /**< cookie for notify ops */ > diff --git a/drivers/net/virtio/virtio_ethdev.c > b/drivers/net/virtio/virtio_ethdev.c > index a581fae408..b546916a9f 100644 > --- a/drivers/net/virtio/virtio_ethdev.c > +++ b/drivers/net/virtio/virtio_ethdev.c > @@ -423,6 +423,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t > queue_idx) > > memset(mz->addr, 0, mz->len); > > + vq->mz = mz; > if (hw->use_va) > vq->vq_ring_mem = (uintptr_t)mz->addr; > else > @@ -462,14 +463,11 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t > queue_idx) > > vq->sw_ring = sw_ring; > rxvq = &vq->rxq; > - rxvq->mz = mz; > rxvq->fake_mbuf = fake_mbuf; > } else if (queue_type == VTNET_TQ) { > txvq = &vq->txq; > - txvq->mz = mz; > } else if (queue_type == VTNET_CQ) { > cvq = &vq->cq; > - cvq->mz = mz; > hw->cvq = cvq; > vq->cq.notify_queue = &virtio_control_queue_notify; > } > @@ -550,15 +548,10 @@ virtio_free_queues(struct virtio_hw *hw) > if (queue_type == VTNET_RQ) { > rte_free(vq->rxq.fake_mbuf); > rte_free(vq->sw_ring); > - rte_memzone_free(vq->rxq.mz); > - } else if (queue_type == VTNET_TQ) { > - rte_memzone_free(vq->txq.mz); > - } else { > - rte_memzone_free(vq->cq.mz); > } > > virtio_free_queue_headers(vq); > - > + rte_memzone_free(vq->mz); > rte_free(vq); > hw->vqs[i] = NULL; > } > diff --git a/drivers/net/virtio/virtio_rxtx.h > b/drivers/net/virtio/virtio_rxtx.h > index a5fe3ea95c..57af630110 100644 > --- a/drivers/net/virtio/virtio_rxtx.h > +++ b/drivers/net/virtio/virtio_rxtx.h > @@ -25,8 +25,6 @@ struct virtnet_rx { > > /* Statistics */ > struct virtnet_stats stats; > - > - const struct rte_memzone *mz; /**< mem zone to populate RX ring. */ > }; > > struct virtnet_tx { > @@ -34,8 +32,6 @@ struct virtnet_tx { > rte_iova_t hdr_mem; /**< hdr for each xmit packet */ > > struct virtnet_stats stats; /* Statistics */ > - > - const struct rte_memzone *mz;/**< mem zone to populate TX ring. > */ > }; > > int virtio_rxq_vec_setup(struct virtnet_rx *rxvq); > diff --git a/drivers/net/virtio/virtqueue.c > b/drivers/net/virtio/virtqueue.c > index 3b174a5923..41e3529546 100644 > --- a/drivers/net/virtio/virtqueue.c > +++ b/drivers/net/virtio/virtqueue.c > @@ -148,7 +148,6 @@ virtqueue_rxvq_reset_packed(struct virtqueue *vq) > { > int size = vq->vq_nentries; > struct vq_desc_extra *dxp; > - struct virtnet_rx *rxvq; > uint16_t desc_idx; > > vq->vq_used_cons_idx = 0; > @@ -162,8 +161,7 @@ virtqueue_rxvq_reset_packed(struct virtqueue *vq) > vq->vq_packed.event_flags_shadow = 0; > vq->vq_packed.cached_flags |= VRING_DESC_F_WRITE; > > - rxvq = &vq->rxq; > - memset(rxvq->mz->addr, 0, rxvq->mz->len); > + memset(vq->mz->addr, 0, vq->mz->len); > > for (desc_idx = 0; desc_idx < vq->vq_nentries; desc_idx++) { > dxp = &vq->vq_descx[desc_idx]; > @@ -201,7 +199,7 @@ virtqueue_txvq_reset_packed(struct virtqueue *vq) > > txvq = &vq->txq; > txr = txvq->hdr_mz->addr; > - memset(txvq->mz->addr, 0, txvq->
RE: [PATCH v1 09/21] net/virtio: refactor indirect desc headers init
> -Original Message- > From: Maxime Coquelin > Sent: Wednesday, November 30, 2022 11:56 PM > To: dev@dpdk.org; Xia, Chenbo ; > david.march...@redhat.com; epere...@redhat.com > Cc: Maxime Coquelin > Subject: [PATCH v1 09/21] net/virtio: refactor indirect desc headers init > > This patch refactors the indirect descriptors headers > initialization in a dedicated function, and makes it used > by both queue init and reset functions. > > Signed-off-by: Maxime Coquelin > --- > drivers/net/virtio/virtio_ethdev.c | 30 + > drivers/net/virtio/virtqueue.c | 68 ++ > drivers/net/virtio/virtqueue.h | 2 + > 3 files changed, 54 insertions(+), 46 deletions(-) > > diff --git a/drivers/net/virtio/virtio_ethdev.c > b/drivers/net/virtio/virtio_ethdev.c > index b546916a9f..8b17b450ec 100644 > --- a/drivers/net/virtio/virtio_ethdev.c > +++ b/drivers/net/virtio/virtio_ethdev.c > @@ -347,7 +347,6 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t > queue_idx) > unsigned int vq_size, size; > struct virtio_hw *hw = dev->data->dev_private; > struct virtnet_rx *rxvq = NULL; > - struct virtnet_tx *txvq = NULL; > struct virtnet_ctl *cvq = NULL; > struct virtqueue *vq; > void *sw_ring = NULL; > @@ -465,7 +464,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t > queue_idx) > rxvq = &vq->rxq; > rxvq->fake_mbuf = fake_mbuf; > } else if (queue_type == VTNET_TQ) { > - txvq = &vq->txq; > + virtqueue_txq_indirect_headers_init(vq); > } else if (queue_type == VTNET_CQ) { > cvq = &vq->cq; > hw->cvq = cvq; > @@ -477,33 +476,6 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t > queue_idx) > else > vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_iova); > > - if (queue_type == VTNET_TQ) { > - struct virtio_tx_region *txr; > - unsigned int i; > - > - txr = txvq->hdr_mz->addr; > - for (i = 0; i < vq_size; i++) { > - /* first indirect descriptor is always the tx header */ > - if (!virtio_with_packed_queue(hw)) { > - struct vring_desc *start_dp = txr[i].tx_indir; > - vring_desc_init_split(start_dp, > - RTE_DIM(txr[i].tx_indir)); > - start_dp->addr = txvq->hdr_mem + i * > sizeof(*txr) > - + offsetof(struct virtio_tx_region, > tx_hdr); > - start_dp->len = hw->vtnet_hdr_size; > - start_dp->flags = VRING_DESC_F_NEXT; > - } else { > - struct vring_packed_desc *start_dp = > - txr[i].tx_packed_indir; > - vring_desc_init_indirect_packed(start_dp, > - RTE_DIM(txr[i].tx_packed_indir)); > - start_dp->addr = txvq->hdr_mem + i * > sizeof(*txr) > - + offsetof(struct virtio_tx_region, > tx_hdr); > - start_dp->len = hw->vtnet_hdr_size; > - } > - } > - } > - > if (VIRTIO_OPS(hw)->setup_queue(hw, vq) < 0) { > PMD_INIT_LOG(ERR, "setup_queue failed"); > ret = -EINVAL; > diff --git a/drivers/net/virtio/virtqueue.c > b/drivers/net/virtio/virtqueue.c > index 41e3529546..fb651a4ca3 100644 > --- a/drivers/net/virtio/virtqueue.c > +++ b/drivers/net/virtio/virtqueue.c > @@ -143,6 +143,54 @@ virtqueue_rxvq_flush(struct virtqueue *vq) > virtqueue_rxvq_flush_split(vq); > } > > +static void > +virtqueue_txq_indirect_header_init_packed(struct virtqueue *vq, uint32_t > idx) > +{ > + struct virtio_tx_region *txr; > + struct vring_packed_desc *desc; > + rte_iova_t hdr_mem; > + > + txr = vq->txq.hdr_mz->addr; > + hdr_mem = vq->txq.hdr_mem; > + desc = txr[idx].tx_packed_indir; > + > + vring_desc_init_indirect_packed(desc, > RTE_DIM(txr[idx].tx_packed_indir)); > + desc->addr = hdr_mem + idx * sizeof(*txr) + offsetof(struct > virtio_tx_region, tx_hdr); > + desc->len = vq->hw->vtnet_hdr_size; > +} > + > +static void > +virtqueue_txq_indirect_header_init_split(struct virtqueue *vq, uint32_t > idx) > +{ > + struct virtio_tx_region *txr; > + struct vring_desc *desc; > + rte_iova_t hdr_mem; > + > + txr = vq->txq.hdr_mz->addr; > + hdr_mem = vq->txq.hdr_mem; > + desc = txr[idx].tx_indir; > + > + vring_desc_init_split(desc, RTE_DIM(txr[idx].tx_indir)); > + desc->addr = hdr_mem + idx * sizeof(*txr) + offsetof(struct > virtio_tx_region, tx_hdr); > + desc->len = vq->hw->vtnet_hdr_size; > + desc->flags = VRING_DESC_F_NEXT; > +} > + > +void > +virtqueue_txq_indire
RE: [PATCH v1 11/21] net/virtio: extract virtqueue init from virtio queue init
> -Original Message- > From: Maxime Coquelin > Sent: Wednesday, November 30, 2022 11:56 PM > To: dev@dpdk.org; Xia, Chenbo ; > david.march...@redhat.com; epere...@redhat.com > Cc: Maxime Coquelin > Subject: [PATCH v1 11/21] net/virtio: extract virtqueue init from virtio > queue init > > This patch extracts the virtqueue initialization out of > the Virtio ethdev queue initialization, as preliminary > work to provide a way for Virtio-user to allocate its > shadow control virtqueue. > > Signed-off-by: Maxime Coquelin > --- > drivers/net/virtio/virtio_ethdev.c | 261 ++-- > drivers/net/virtio/virtqueue.c | 266 + > drivers/net/virtio/virtqueue.h | 5 + > 3 files changed, 282 insertions(+), 250 deletions(-) > > -- > 2.38.1 Reviewed-by: Chenbo Xia