[v1 0/4] Add support for modifying ECN in IPv4/IPv6 header

2022-04-02 Thread Sean Zhang
This patch set adds support for modifying ECN fields in IPv4/IPv6
header, and also adds support for modify_filed action in meter.

Jiawei Wang (1):
  ethdev: add IPv4/IPv6 ECN header rewrite action

Sean Zhang (3):
  common/mlx5: add modify ECN capability check
  net/mlx5: add support to modify ECN field
  net/mlx5: add modify field support in meter

 app/test-pmd/cmdline_flow.c  |  3 +-
 doc/guides/nics/mlx5.rst |  4 +-
 drivers/common/mlx5/mlx5_devx_cmds.c |  3 ++
 drivers/common/mlx5/mlx5_devx_cmds.h |  1 +
 drivers/common/mlx5/mlx5_prm.h   | 62 -
 drivers/net/mlx5/mlx5_flow.c |  5 +-
 drivers/net/mlx5/mlx5_flow.h |  2 +
 drivers/net/mlx5/mlx5_flow_dv.c  | 69 ++--
 drivers/net/mlx5/mlx5_flow_meter.c   |  2 +-
 lib/ethdev/rte_flow.h|  2 +
 10 files changed, 143 insertions(+), 10 deletions(-)

-- 
2.31.1



[v1 2/4] common/mlx5: add modify ECN capability check

2022-04-02 Thread Sean Zhang
Flag outer_ip_ecn in header modify capabilities properties layout is
added in order to check if the firmware supports modification of ecn
field.

Signed-off-by: Sean Zhang 
---
 drivers/common/mlx5/mlx5_devx_cmds.c |  3 ++
 drivers/common/mlx5/mlx5_devx_cmds.h |  1 +
 drivers/common/mlx5/mlx5_prm.h   | 62 +++-
 3 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c 
b/drivers/common/mlx5/mlx5_devx_cmds.c
index d02ac2a678..72296c1ca3 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -1047,6 +1047,9 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
attr->flow.tunnel_header_2_3 = MLX5_GET
(flow_table_nic_cap, hcattr,
 ft_field_support_2_nic_receive.tunnel_header_2_3);
+   attr->modify_outer_ip_ecn = MLX5_GET
+   (flow_table_nic_cap, hcattr,
+ft_header_modify_nic_receive.outer_ip_ecn);
attr->pkt_integrity_match = mlx5_devx_query_pkt_integrity_match(hcattr);
attr->inner_ipv4_ihl = MLX5_GET
(flow_table_nic_cap, hcattr,
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h 
b/drivers/common/mlx5/mlx5_devx_cmds.h
index 1bac18c59d..6e9e23f593 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -256,6 +256,7 @@ struct mlx5_hca_attr {
uint32_t esw_mgr_vport_id_valid:1; /* E-Switch Mgr vport ID is valid. */
uint16_t esw_mgr_vport_id; /* E-Switch Mgr vport ID . */
uint16_t max_wqe_sz_sq;
+   uint32_t modify_outer_ip_ecn:1;
 };
 
 /* LAG Context. */
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 44b18225f6..caaa4c7fb9 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -744,6 +744,7 @@ enum mlx5_modification_field {
MLX5_MODI_OUT_TCP_ACK_NUM,
MLX5_MODI_IN_TCP_ACK_NUM = 0x5C,
MLX5_MODI_GTP_TEID = 0x6E,
+   MLX5_MODI_OUT_IP_ECN = 0x73,
 };
 
 /* Total number of metadata reg_c's. */
@@ -1884,6 +1885,62 @@ struct mlx5_ifc_roce_caps_bits {
u8 reserved_at_20[0x7e0];
 };
 
+struct mlx5_ifc_ft_fields_support_bits {
+   u8 outer_dmac[0x1];
+   u8 outer_smac[0x1];
+   u8 outer_ether_type[0x1];
+   u8 reserved_at_3[0x1];
+   u8 outer_first_prio[0x1];
+   u8 outer_first_cfi[0x1];
+   u8 outer_first_vid[0x1];
+   u8 reserved_at_7[0x1];
+   u8 outer_second_prio[0x1];
+   u8 outer_second_cfi[0x1];
+   u8 outer_second_vid[0x1];
+   u8 reserved_at_b[0x1];
+   u8 outer_sip[0x1];
+   u8 outer_dip[0x1];
+   u8 outer_frag[0x1];
+   u8 outer_ip_protocol[0x1];
+   u8 outer_ip_ecn[0x1];
+   u8 outer_ip_dscp[0x1];
+   u8 outer_udp_sport[0x1];
+   u8 outer_udp_dport[0x1];
+   u8 outer_tcp_sport[0x1];
+   u8 outer_tcp_dport[0x1];
+   u8 outer_tcp_flags[0x1];
+   u8 outer_gre_protocol[0x1];
+   u8 outer_gre_key[0x1];
+   u8 outer_vxlan_vni[0x1];
+   u8 reserved_at_1a[0x5];
+   u8 source_eswitch_port[0x1];
+   u8 inner_dmac[0x1];
+   u8 inner_smac[0x1];
+   u8 inner_ether_type[0x1];
+   u8 reserved_at_23[0x1];
+   u8 inner_first_prio[0x1];
+   u8 inner_first_cfi[0x1];
+   u8 inner_first_vid[0x1];
+   u8 reserved_at_27[0x1];
+   u8 inner_second_prio[0x1];
+   u8 inner_second_cfi[0x1];
+   u8 inner_second_vid[0x1];
+   u8 reserved_at_2b[0x1];
+   u8 inner_sip[0x1];
+   u8 inner_dip[0x1];
+   u8 inner_frag[0x1];
+   u8 inner_ip_protocol[0x1];
+   u8 inner_ip_ecn[0x1];
+   u8 inner_ip_dscp[0x1];
+   u8 inner_udp_sport[0x1];
+   u8 inner_udp_dport[0x1];
+   u8 inner_tcp_sport[0x1];
+   u8 inner_tcp_dport[0x1];
+   u8 inner_tcp_flags[0x1];
+   u8 reserved_at_37[0x9];
+   u8 reserved_at_40[0x40];
+};
+
 /*
  * Table 1872 - Flow Table Fields Supported 2 Format
  */
@@ -1923,7 +1980,10 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
flow_table_properties_nic_transmit_rdma;
struct mlx5_ifc_flow_table_prop_layout_bits
flow_table_properties_nic_transmit_sniffer;
-   u8 reserved_at_e00[0x600];
+   u8 reserved_at_e00[0x200];
+   struct mlx5_ifc_ft_fields_support_bits
+   ft_header_modify_nic_receive;
+   u8 reserved_at_1080[0x380];
struct mlx5_ifc_ft_fields_support_2_bits
ft_field_support_2_nic_receive;
 };
-- 
2.31.1



[v1 3/4] net/mlx5: add support to modify ECN field

2022-04-02 Thread Sean Zhang
This patch is to support modify ECN field in IPv4/IPv6 header.

Signed-off-by: Sean Zhang 
---
 drivers/net/mlx5/mlx5_flow_dv.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 1e9bd63635..e416eb5701 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -1449,6 +1449,9 @@ mlx5_flow_item_field_width(struct rte_eth_dev *dev,
case RTE_FLOW_FIELD_POINTER:
case RTE_FLOW_FIELD_VALUE:
return inherit < 0 ? 0 : inherit;
+   case RTE_FLOW_FIELD_IPV4_ECN:
+   case RTE_FLOW_FIELD_IPV6_ECN:
+   return 2;
default:
MLX5_ASSERT(false);
}
@@ -1826,6 +1829,13 @@ mlx5_flow_field_id_to_modify_info
(meta_count - width)) & meta_mask);
}
break;
+   case RTE_FLOW_FIELD_IPV4_ECN:
+   case RTE_FLOW_FIELD_IPV6_ECN:
+   info[idx] = (struct field_modify_info){1, 0,
+   MLX5_MODI_OUT_IP_ECN};
+   if (mask)
+   mask[idx] = 0x3 >> (2 - width);
+   break;
case RTE_FLOW_FIELD_POINTER:
case RTE_FLOW_FIELD_VALUE:
default:
@@ -4825,6 +4835,7 @@ flow_dv_validate_action_modify_field(struct rte_eth_dev 
*dev,
int ret = 0;
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_sh_config *config = &priv->sh->config;
+   struct mlx5_hca_attr *hca_attr = &priv->sh->cdev->config.hca_attr;
const struct rte_flow_action_modify_field *action_modify_field =
action->conf;
uint32_t dst_width = mlx5_flow_item_field_width(dev,
@@ -4952,6 +4963,15 @@ flow_dv_validate_action_modify_field(struct rte_eth_dev 
*dev,
RTE_FLOW_ERROR_TYPE_ACTION, action,
"add and sub operations"
" are not supported");
+   if (action_modify_field->dst.field == RTE_FLOW_FIELD_IPV4_ECN ||
+   action_modify_field->src.field == RTE_FLOW_FIELD_IPV4_ECN ||
+   action_modify_field->dst.field == RTE_FLOW_FIELD_IPV6_ECN ||
+   action_modify_field->src.field == RTE_FLOW_FIELD_IPV6_ECN)
+   if (!hca_attr->modify_outer_ip_ecn &&
+   !attr->transfer && !attr->group)
+   return rte_flow_error_set(error, ENOTSUP,
+   RTE_FLOW_ERROR_TYPE_ACTION, action,
+   "modifications of the ECN for current firmware 
is not supported");
return (action_modify_field->width / 32) +
   !!(action_modify_field->width % 32);
 }
-- 
2.31.1



[v1 4/4] net/mlx5: add modify field support in meter

2022-04-02 Thread Sean Zhang
This patch introduces MODIFY_FIELD action support in meter. User can
create meter policy with MODIFY_FIELD action in green/yellow action.

For example:

testpmd> add port meter policy 0 21 g_actions modify_field op set
dst_type ipv4_ecn src_type value src_value 3 width 2 / ...

Signed-off-by: Sean Zhang 
---
 doc/guides/nics/mlx5.rst   |  4 +--
 drivers/net/mlx5/mlx5_flow.c   |  5 ++-
 drivers/net/mlx5/mlx5_flow.h   |  2 ++
 drivers/net/mlx5/mlx5_flow_dv.c| 49 +++---
 drivers/net/mlx5/mlx5_flow_meter.c |  2 +-
 5 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 4805d08a76..b6d51dc7c9 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -443,8 +443,8 @@ Limitations
  - yellow: NULL or END.
  - RED: DROP / END.
   - The only supported meter policy actions:
- - green: QUEUE, RSS, PORT_ID, REPRESENTED_PORT, JUMP, DROP, MARK and 
SET_TAG.
- - yellow: QUEUE, RSS, PORT_ID, REPRESENTED_PORT, JUMP, DROP, MARK and 
SET_TAG.
+ - green: QUEUE, RSS, PORT_ID, REPRESENTED_PORT, JUMP, DROP, MODIFY_FIELD, 
MARK and SET_TAG.
+ - yellow: QUEUE, RSS, PORT_ID, REPRESENTED_PORT, JUMP, DROP, 
MODIFY_FIELD, MARK and SET_TAG.
  - RED: must be DROP.
   - Policy actions of RSS for green and yellow should have the same 
configuration except queues.
   - Policy with RSS/queue action is not supported when ``dv_xmeta_en`` enabled.
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 78cb38d42b..52b5463648 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -7867,6 +7867,8 @@ mlx5_flow_destroy_mtr_acts(struct rte_eth_dev *dev,
  *   Meter policy struct.
  * @param[in] action
  *   Action specification used to create meter actions.
+ * @param[in] attr
+ *   Flow rule attributes.
  * @param[out] error
  *   Perform verbose error reporting if not NULL. Initialized in case of
  *   error only.
@@ -7878,12 +7880,13 @@ int
 mlx5_flow_create_mtr_acts(struct rte_eth_dev *dev,
  struct mlx5_flow_meter_policy *mtr_policy,
  const struct rte_flow_action *actions[RTE_COLORS],
+ struct rte_flow_attr *attr,
  struct rte_mtr_error *error)
 {
const struct mlx5_flow_driver_ops *fops;
 
fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
-   return fops->create_mtr_acts(dev, mtr_policy, actions, error);
+   return fops->create_mtr_acts(dev, mtr_policy, actions, attr, error);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index f56115dd11..ed9b9e4876 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -1359,6 +1359,7 @@ typedef int (*mlx5_flow_create_mtr_acts_t)
(struct rte_eth_dev *dev,
  struct mlx5_flow_meter_policy *mtr_policy,
  const struct rte_flow_action *actions[RTE_COLORS],
+ struct rte_flow_attr *attr,
  struct rte_mtr_error *error);
 typedef void (*mlx5_flow_destroy_mtr_acts_t)
(struct rte_eth_dev *dev,
@@ -2017,6 +2018,7 @@ void mlx5_flow_destroy_mtr_acts(struct rte_eth_dev *dev,
 int mlx5_flow_create_mtr_acts(struct rte_eth_dev *dev,
  struct mlx5_flow_meter_policy *mtr_policy,
  const struct rte_flow_action *actions[RTE_COLORS],
+ struct rte_flow_attr *attr,
  struct rte_mtr_error *error);
 int mlx5_flow_create_policy_rules(struct rte_eth_dev *dev,
 struct mlx5_flow_meter_policy *mtr_policy);
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index e416eb5701..a01ba04c3b 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -15579,6 +15579,8 @@ flow_dv_destroy_mtr_policy_acts(struct rte_eth_dev *dev,
  *   Meter policy struct.
  * @param[in] action
  *   Action specification used to create meter actions.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
  * @param[out] error
  *   Perform verbose error reporting if not NULL. Initialized in case of
  *   error only.
@@ -15590,6 +15592,7 @@ static int
 __flow_dv_create_domain_policy_acts(struct rte_eth_dev *dev,
struct mlx5_flow_meter_policy *mtr_policy,
const struct rte_flow_action *actions[RTE_COLORS],
+   struct rte_flow_attr *attr,
enum mlx5_meter_domain domain,
struct rte_mtr_error *error)
 {
@@ -15886,6 +15889,28 @@ __flow_dv_create_domain_policy_acts(struct rte_eth_dev 
*dev,
action_flags |= MLX5_FLOW_ACTION_JUMP;
break;
}
+   case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
+   {

[v1 1/4] ethdev: add IPv4/IPv6 ECN header rewrite action

2022-04-02 Thread Sean Zhang
From: Jiawei Wang 

This patch introduces the IPv4/IPv6 ECN modify field support, and
adds the testpmd CLI commands support.

Usage:
modify_field op set dst_type ipv4_ecn src_type ...

For example:

flow create 0 ingress group 1 pattern eth / ipv4 /  end actions
modify_field op set dst_type ipv4_ecn src_type value src_value
0x03 width 2 / queue index 0 / end

Signed-off-by: Jiawei Wang 
---
 app/test-pmd/cmdline_flow.c | 3 ++-
 lib/ethdev/rte_flow.h   | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index fc4a6d9cca..3250add834 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -795,7 +795,8 @@ static const char *const modify_field_ids[] = {
"tcp_seq_num", "tcp_ack_num", "tcp_flags",
"udp_port_src", "udp_port_dst",
"vxlan_vni", "geneve_vni", "gtp_teid",
-   "tag", "mark", "meta", "pointer", "value", NULL
+   "tag", "mark", "meta", "pointer", "value",
+   "ipv4_ecn", "ipv6_ecn", NULL
 };
 
 /** Maximum number of subsequent tokens and arguments on the stack. */
diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
index d8827dd184..1b56f23cba 100644
--- a/lib/ethdev/rte_flow.h
+++ b/lib/ethdev/rte_flow.h
@@ -3690,6 +3690,8 @@ enum rte_flow_field_id {
RTE_FLOW_FIELD_META,/**< Metadata value. */
RTE_FLOW_FIELD_POINTER, /**< Memory pointer. */
RTE_FLOW_FIELD_VALUE,   /**< Immediate value. */
+   RTE_FLOW_FIELD_IPV4_ECN,/**< IPv4 ECN. */
+   RTE_FLOW_FIELD_IPV6_ECN,/**< IPv6 ECN. */
 };
 
 /**
-- 
2.31.1



Re: [PATCH v2] ci: add Fedora 35 container in GHA

2022-04-02 Thread David Marchand
On Fri, Apr 1, 2022 at 8:03 PM David Marchand  wrote:
>
> Build DPDK with Fedora 35 containers.
>
> GHA container support does not allow caching images and docker hub
> seems to limit image pulls.
> On the other hand, the Fedora project hub does not seem to limit them,
> so prefer this hub.
> Nevertheless, let's try to be good citizens and cache (once a day) a
> prepared image for subsequent builds.

We could move to a pipeline approach, with a first stage job preparing
the image and pushing to the cache, and the second stage pulling the
image from the cache and starting compilations.
Food for thoughts...

>
> Differences with the Ubuntu GHA vm images:
> - tasks are run as root in containers, no need for sudo,
> - compiler must be explicitly installed,
> - GHA artifacts can't contain a ':' in their name, and must be filtered,
>


-- 
David Marchand



[PATCH] net/iavf: fix iavf crashed on dev_stop when running in multi-process mode

2022-04-02 Thread Ke Zhang
In the multi process environment, the sub process
operates on the shared memory and changes the
function pointer of the main process, resulting in
the failure to find the address of the function when
main preocess releasing, resulting in crash.

similar with commit<20b631efe785819eb77aabbf500b3352e5731bdb>

Signed-off-by: Ke Zhang 
---
 drivers/net/iavf/iavf_rxtx.c| 27 ++---
 drivers/net/iavf/iavf_rxtx.h|  6 +++---
 drivers/net/iavf/iavf_rxtx_vec_avx512.c |  4 ++--
 drivers/net/iavf/iavf_rxtx_vec_sse.c|  8 
 4 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 16e8d021f9..197c03cd31 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -362,6 +362,9 @@ release_txq_mbufs(struct iavf_tx_queue *txq)
}
 }
 
+const struct iavf_rxq_ops *iavf_rxq_release_mbufs_ops;
+const struct iavf_txq_ops *iavf_txq_release_mbufs_ops;
+
 static const struct iavf_rxq_ops def_rxq_ops = {
.release_mbufs = release_rxq_mbufs,
 };
@@ -674,7 +677,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t 
queue_idx,
rxq->q_set = true;
dev->data->rx_queues[queue_idx] = rxq;
rxq->qrx_tail = hw->hw_addr + IAVF_QRX_TAIL1(rxq->queue_id);
-   rxq->ops = &def_rxq_ops;
+   iavf_rxq_release_mbufs_ops = &def_rxq_ops;
 
if (check_rx_bulk_allow(rxq) == true) {
PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
@@ -811,7 +814,7 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
txq->q_set = true;
dev->data->tx_queues[queue_idx] = txq;
txq->qtx_tail = hw->hw_addr + IAVF_QTX_TAIL1(queue_idx);
-   txq->ops = &def_txq_ops;
+   iavf_txq_release_mbufs_ops = &def_txq_ops;
 
if (check_tx_vec_allow(txq) == false) {
struct iavf_adapter *ad =
@@ -943,7 +946,7 @@ iavf_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t 
rx_queue_id)
}
 
rxq = dev->data->rx_queues[rx_queue_id];
-   rxq->ops->release_mbufs(rxq);
+   iavf_rxq_release_mbufs_ops->release_mbufs(rxq);
reset_rx_queue(rxq);
dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
 
@@ -971,7 +974,7 @@ iavf_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
}
 
txq = dev->data->tx_queues[tx_queue_id];
-   txq->ops->release_mbufs(txq);
+   iavf_txq_release_mbufs_ops->release_mbufs(txq);
reset_tx_queue(txq);
dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
 
@@ -986,7 +989,7 @@ iavf_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t 
qid)
if (!q)
return;
 
-   q->ops->release_mbufs(q);
+   iavf_rxq_release_mbufs_ops->release_mbufs(q);
rte_free(q->sw_ring);
rte_memzone_free(q->mz);
rte_free(q);
@@ -1000,7 +1003,7 @@ iavf_dev_tx_queue_release(struct rte_eth_dev *dev, 
uint16_t qid)
if (!q)
return;
 
-   q->ops->release_mbufs(q);
+   iavf_txq_release_mbufs_ops->release_mbufs(q);
rte_free(q->sw_ring);
rte_memzone_free(q->mz);
rte_free(q);
@@ -1034,7 +1037,7 @@ iavf_stop_queues(struct rte_eth_dev *dev)
txq = dev->data->tx_queues[i];
if (!txq)
continue;
-   txq->ops->release_mbufs(txq);
+   iavf_txq_release_mbufs_ops->release_mbufs(txq);
reset_tx_queue(txq);
dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
}
@@ -1042,7 +1045,7 @@ iavf_stop_queues(struct rte_eth_dev *dev)
rxq = dev->data->rx_queues[i];
if (!rxq)
continue;
-   rxq->ops->release_mbufs(rxq);
+   iavf_rxq_release_mbufs_ops->release_mbufs(rxq);
reset_rx_queue(rxq);
dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
}
@@ -2825,7 +2828,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 
for (i = 0; i < dev->data->nb_rx_queues; i++) {
rxq = dev->data->rx_queues[i];
-   (void)iavf_rxq_vec_setup(rxq);
+   (void)iavf_rxq_vec_setup(rxq, 
&iavf_rxq_release_mbufs_ops);
}
 
if (dev->data->scattered_rx) {
@@ -3008,11 +3011,11 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
continue;
 #ifdef CC_AVX512_SUPPORT
if (use_avx512)
-   iavf_txq_vec_setup_avx512(txq);
+   
iavf_txq_vec_setup_avx512(&iavf_txq_release_mbufs_ops);
else
-   iavf_txq_vec_setup(txq);
+   iavf_txq_vec_setup(&iavf_txq_release_mbufs_ops);
 #else
-   iavf_txq_vec_setup(txq);
+   

[PATCH] net/iavf: fix iavf crashed on dev_stop when running in multi-process mode

2022-04-02 Thread Ke Zhang
In the multi process environment, the sub process
operates on the shared memory and changes the
function pointer of the main process, resulting in
the failure to find the address of the function when
main process releasing, resulting in crash.

similar with commit<20b631efe785819eb77aabbf500b3352e5731bdb>

Signed-off-by: Ke Zhang 
---
 drivers/net/iavf/iavf_rxtx.c| 27 ++---
 drivers/net/iavf/iavf_rxtx.h|  6 +++---
 drivers/net/iavf/iavf_rxtx_vec_avx512.c |  4 ++--
 drivers/net/iavf/iavf_rxtx_vec_sse.c|  8 
 4 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 16e8d021f9..197c03cd31 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -362,6 +362,9 @@ release_txq_mbufs(struct iavf_tx_queue *txq)
}
 }
 
+const struct iavf_rxq_ops *iavf_rxq_release_mbufs_ops;
+const struct iavf_txq_ops *iavf_txq_release_mbufs_ops;
+
 static const struct iavf_rxq_ops def_rxq_ops = {
.release_mbufs = release_rxq_mbufs,
 };
@@ -674,7 +677,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t 
queue_idx,
rxq->q_set = true;
dev->data->rx_queues[queue_idx] = rxq;
rxq->qrx_tail = hw->hw_addr + IAVF_QRX_TAIL1(rxq->queue_id);
-   rxq->ops = &def_rxq_ops;
+   iavf_rxq_release_mbufs_ops = &def_rxq_ops;
 
if (check_rx_bulk_allow(rxq) == true) {
PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
@@ -811,7 +814,7 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
txq->q_set = true;
dev->data->tx_queues[queue_idx] = txq;
txq->qtx_tail = hw->hw_addr + IAVF_QTX_TAIL1(queue_idx);
-   txq->ops = &def_txq_ops;
+   iavf_txq_release_mbufs_ops = &def_txq_ops;
 
if (check_tx_vec_allow(txq) == false) {
struct iavf_adapter *ad =
@@ -943,7 +946,7 @@ iavf_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t 
rx_queue_id)
}
 
rxq = dev->data->rx_queues[rx_queue_id];
-   rxq->ops->release_mbufs(rxq);
+   iavf_rxq_release_mbufs_ops->release_mbufs(rxq);
reset_rx_queue(rxq);
dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
 
@@ -971,7 +974,7 @@ iavf_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
}
 
txq = dev->data->tx_queues[tx_queue_id];
-   txq->ops->release_mbufs(txq);
+   iavf_txq_release_mbufs_ops->release_mbufs(txq);
reset_tx_queue(txq);
dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
 
@@ -986,7 +989,7 @@ iavf_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t 
qid)
if (!q)
return;
 
-   q->ops->release_mbufs(q);
+   iavf_rxq_release_mbufs_ops->release_mbufs(q);
rte_free(q->sw_ring);
rte_memzone_free(q->mz);
rte_free(q);
@@ -1000,7 +1003,7 @@ iavf_dev_tx_queue_release(struct rte_eth_dev *dev, 
uint16_t qid)
if (!q)
return;
 
-   q->ops->release_mbufs(q);
+   iavf_txq_release_mbufs_ops->release_mbufs(q);
rte_free(q->sw_ring);
rte_memzone_free(q->mz);
rte_free(q);
@@ -1034,7 +1037,7 @@ iavf_stop_queues(struct rte_eth_dev *dev)
txq = dev->data->tx_queues[i];
if (!txq)
continue;
-   txq->ops->release_mbufs(txq);
+   iavf_txq_release_mbufs_ops->release_mbufs(txq);
reset_tx_queue(txq);
dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
}
@@ -1042,7 +1045,7 @@ iavf_stop_queues(struct rte_eth_dev *dev)
rxq = dev->data->rx_queues[i];
if (!rxq)
continue;
-   rxq->ops->release_mbufs(rxq);
+   iavf_rxq_release_mbufs_ops->release_mbufs(rxq);
reset_rx_queue(rxq);
dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
}
@@ -2825,7 +2828,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev)
 
for (i = 0; i < dev->data->nb_rx_queues; i++) {
rxq = dev->data->rx_queues[i];
-   (void)iavf_rxq_vec_setup(rxq);
+   (void)iavf_rxq_vec_setup(rxq, 
&iavf_rxq_release_mbufs_ops);
}
 
if (dev->data->scattered_rx) {
@@ -3008,11 +3011,11 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
continue;
 #ifdef CC_AVX512_SUPPORT
if (use_avx512)
-   iavf_txq_vec_setup_avx512(txq);
+   
iavf_txq_vec_setup_avx512(&iavf_txq_release_mbufs_ops);
else
-   iavf_txq_vec_setup(txq);
+   iavf_txq_vec_setup(&iavf_txq_release_mbufs_ops);
 #else
-   iavf_txq_vec_setup(txq);
+

RE: [PATCH v2] eal: add seqlock

2022-04-02 Thread Morten Brørup
> From: Stephen Hemminger [mailto:step...@networkplumber.org]
> Sent: Saturday, 2 April 2022 02.54
> 
> Semantics and naming should be the same as Linux kernel or you risk
> having to reeducate too many people.

Although I do see significant value in that point, I don't consider the Linux 
kernel API the definitive golden standard in all regards. If DPDK can do 
better, it should.

However, if different naming/semantics does a better job for DPDK, then we 
should take care to avoid similar function names with different behavior than 
Linux, to reduce the risk of incorrect use by seasoned Linux kernel developers.



RE: [PATCH v3] eal: add seqlock

2022-04-02 Thread Morten Brørup
> From: Honnappa Nagarahalli [mailto:honnappa.nagaraha...@arm.com]
> Sent: Saturday, 2 April 2022 02.22
> 
> > From: Mattias Rönnblom 
> > Sent: Friday, April 1, 2022 10:08 AM
> >
> > diff --git a/lib/eal/include/rte_seqlock.h
> > b/lib/eal/include/rte_seqlock.h new file
> Other lock implementations are in lib/eal/include/generic.

I'm not sure why what goes where... e.g. rte_branch_prediction.h and 
rte_bitops.h are not in include/generic.

But I agree that keeping lock implementations in the same location makes sense.

Also, other lock implementations have their init() function in their header 
file, so you could consider getting rid of the C file. I don't care, just 
mentioning it.

> > +/**
> > + * The RTE seqlock type.
> > + */
> > +typedef struct {
> > +   uint32_t sn; /**< A sequence number for the protected data. */
> > +   rte_spinlock_t lock; /**< Spinlock used to serialize writers.  */
> }
> Suggest using ticket lock for the writer side. It should have low
> overhead when there is a single writer, but provides better
> functionality when there are multiple writers.

A spinlock and a ticket lock have the same size, so there is no memory cost 
either.

Unless using a ticket lock stands in the way for future extensions to the 
seqlock library, then it seems like a good idea.

> > +__rte_experimental
> > +static inline bool
> > +rte_seqlock_read_tryunlock(const rte_seqlock_t *seqlock, uint32_t
> > +*begin_sn) {

Did anyone object to adding the __attribute__((warn_unused_result))?

Otherwise, I think you should add it.



[v4 1/3] ethdev: introduce protocol type based header split

2022-04-02 Thread wenxuanx . wu
From: Xuan Ding 

Header split consists of splitting a received packet into two separate
regions based on the packet content. The split happens after the
packet header and before the packet payload. Splitting is usually between
the packet header that can be posted to a dedicated buffer and the packet
payload that can be posted to a different buffer.

Currently, Rx buffer split supports length and offset based packet split.
Although header split is a subset of buffer split, configuring buffer
split based on length is not suitable for NICs that do split based on
header protocol types. Because tunneling makes the conversion from length
to protocol type impossible.

This patch extends the current buffer split to support protocol type and
offset based header split. A new proto field is introduced in the
rte_eth_rxseg_split structure reserved field to specify header protocol
type. With Rx offload flag RTE_ETH_RX_OFFLOAD_HEADER_SPLIT enabled and
protocol type configured, PMD will split the ingress packets into two
separate regions. Currently, both inner and outer L2/L3/L4 level header
split can be supported.

For example, let's suppose we configured the Rx queue with the
following segments:
seg0 - pool0, off0=2B
seg1 - pool1, off1=128B

With header split type configured with RTE_ETH_RX_HEADER_SPLIT_UDP,
the packet consists of MAC_IP_UDP_PAYLOAD will be split like following:
seg0 - udp header @ RTE_PKTMBUF_HEADROOM + 2 in mbuf from pool0
seg1 - payload @ 128 in mbuf from pool1

The memory attributes for the split parts may differ either - for example
the mempool0 and mempool1 belong to dpdk memory and external memory,
respectively.

Signed-off-by: Xuan Ding 
Signed-off-by: Yuan Wang 
Signed-off-by: Wenxuan Wu 
Reviewed-by: Qi Zhang 
---
 lib/ethdev/rte_ethdev.c | 34 ++---
 lib/ethdev/rte_ethdev.h | 48 +++--
 2 files changed, 72 insertions(+), 10 deletions(-)

diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 29a3d80466..29adcdc2f0 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1661,6 +1661,7 @@ rte_eth_rx_queue_check_split(const struct 
rte_eth_rxseg_split *rx_seg,
struct rte_mempool *mpl = rx_seg[seg_idx].mp;
uint32_t length = rx_seg[seg_idx].length;
uint32_t offset = rx_seg[seg_idx].offset;
+   uint16_t proto = rx_seg[seg_idx].proto;
 
if (mpl == NULL) {
RTE_ETHDEV_LOG(ERR, "null mempool pointer\n");
@@ -1694,13 +1695,29 @@ rte_eth_rx_queue_check_split(const struct 
rte_eth_rxseg_split *rx_seg,
}
offset += seg_idx != 0 ? 0 : RTE_PKTMBUF_HEADROOM;
*mbp_buf_size = rte_pktmbuf_data_room_size(mpl);
-   length = length != 0 ? length : *mbp_buf_size;
-   if (*mbp_buf_size < length + offset) {
-   RTE_ETHDEV_LOG(ERR,
-  "%s mbuf_data_room_size %u < %u (segment 
length=%u + segment offset=%u)\n",
-  mpl->name, *mbp_buf_size,
-  length + offset, length, offset);
-   return -EINVAL;
+   if (proto == RTE_ETH_RX_HEADER_SPLIT_NONE) {
+   /* Check buffer split. */
+   length = length != 0 ? length : *mbp_buf_size;
+   if (*mbp_buf_size < length + offset) {
+   RTE_ETHDEV_LOG(ERR,
+   "%s mbuf_data_room_size %u < %u 
(segment length=%u + segment offset=%u)\n",
+   mpl->name, *mbp_buf_size,
+   length + offset, length, offset);
+   return -EINVAL;
+   }
+   } else {
+   /* Check header split. */
+   if (length != 0) {
+   RTE_ETHDEV_LOG(ERR, "segment length should be 
set to zero in header split\n");
+   return -EINVAL;
+   }
+   if (*mbp_buf_size < offset) {
+   RTE_ETHDEV_LOG(ERR,
+   "%s mbuf_data_room_size %u < %u segment 
offset)\n",
+   mpl->name, *mbp_buf_size,
+   offset);
+   return -EINVAL;
+   }
}
}
return 0;
@@ -1778,7 +1795,8 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t 
rx_queue_id,
rx_seg = (const struct rte_eth_rxseg_split *)rx_conf->rx_seg;
n_seg = rx_conf->rx_nseg;
 
-   if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT) {
+   if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT ||
+ 

[v4 0/3] ethdev: introduce protocol type based header split

2022-04-02 Thread wenxuanx . wu
From: Xuan Ding 

Header split consists of splitting a received packet into two separate
regions based on the packet content. It is useful in some scenarios,
such as GPU acceleration. The splitting will help to enable true zero
copy and hence improve the performance significantly.

This patchset extends the current buffer split to support protocol based
header split. When Rx queue is configured with header split feature,
packets received will be directly splitted into two different mempools.

v3->v4:
* Use RTE_ETH_RX_HEADER_SPLIT_NONE instead of 0.

v2->v3:
* Fix a PMD bug.
* Add rx queue header split check.
* Revise the log and doc.

v1->v2:
* Add support for all header split protocol types.

Xuan Ding (3):
  ethdev: introduce protocol type based header split
  app/testpmd: add header split configuration
  net/ice: support header split in Rx data path

 app/test-pmd/cmdline.c| 117 ++
 app/test-pmd/testpmd.c|   6 +-
 app/test-pmd/testpmd.h|   2 +
 drivers/net/ice/ice_ethdev.c  |  10 +-
 drivers/net/ice/ice_rxtx.c| 223 ++
 drivers/net/ice/ice_rxtx.h|  16 ++
 drivers/net/ice/ice_rxtx_vec_common.h |   3 +
 lib/ethdev/rte_ethdev.c   |  34 +++-
 lib/ethdev/rte_ethdev.h   |  48 +-
 9 files changed, 417 insertions(+), 42 deletions(-)

-- 
2.17.1



[v4 2/3] app/testpmd: add header split configuration

2022-04-02 Thread wenxuanx . wu
From: Xuan Ding 

This patch adds header split configuration in testpmd. The header split
feature is off by default. To enable header split, you need:
1. Configure Rx queue with rx_offload header split on.
2. Set the protocol type of header split.

Command for set header split protocol type:
testpmd> port config  header_split mac|ipv4|ipv6|l3|tcp|udp|sctp|
l4|inner_mac|inner_ipv4|inner_ipv6|inner_l3|inner_tcp|
inner_udp|inner_sctp|inner_l4

Signed-off-by: Xuan Ding 
Signed-off-by: Yuan Wang 
Signed-off-by: Wenxuan Wu 
---
 app/test-pmd/cmdline.c | 117 +
 app/test-pmd/testpmd.c |   6 ++-
 app/test-pmd/testpmd.h |   2 +
 3 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 6ffea8e21a..abda81b4bc 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -866,6 +866,12 @@ static void cmd_help_long_parsed(void *parsed_result,
" Enable or disable a per port Rx offloading"
" on all Rx queues of a port\n\n"
 
+   "port config  header_split 
mac|ipv4|ipv6|l3|tcp|udp|sctp|l4|"
+   "inner_mac|inner_ipv4|inner_ipv6|inner_l3|inner_tcp|"
+   "inner_udp|inner_sctp|inner_l4\n"
+   " Configure protocol for header split"
+   " on all Rx queues of a port\n\n"
+
"port (port_id) rxq (queue_id) rx_offload vlan_strip|"
"ipv4_cksum|udp_cksum|tcp_cksum|tcp_lro|qinq_strip|"
"outer_ipv4_cksum|macsec_strip|header_split|"
@@ -16353,6 +16359,116 @@ cmdline_parse_inst_t cmd_config_per_port_rx_offload = 
{
}
 };
 
+/* config a per port header split protocol */
+struct cmd_config_per_port_headersplit_protocol_result {
+   cmdline_fixed_string_t port;
+   cmdline_fixed_string_t config;
+   uint16_t port_id;
+   cmdline_fixed_string_t headersplit;
+   cmdline_fixed_string_t protocol;
+};
+
+cmdline_parse_token_string_t 
cmd_config_per_port_headersplit_protocol_result_port =
+   TOKEN_STRING_INITIALIZER
+   (struct cmd_config_per_port_headersplit_protocol_result,
+port, "port");
+cmdline_parse_token_string_t 
cmd_config_per_port_headersplit_protocol_result_config =
+   TOKEN_STRING_INITIALIZER
+   (struct cmd_config_per_port_headersplit_protocol_result,
+config, "config");
+cmdline_parse_token_num_t 
cmd_config_per_port_headersplit_protocol_result_port_id =
+   TOKEN_NUM_INITIALIZER
+   (struct cmd_config_per_port_headersplit_protocol_result,
+port_id, RTE_UINT16);
+cmdline_parse_token_string_t 
cmd_config_per_port_headersplit_protocol_result_headersplit =
+   TOKEN_STRING_INITIALIZER
+   (struct cmd_config_per_port_headersplit_protocol_result,
+headersplit, "header_split");
+cmdline_parse_token_string_t 
cmd_config_per_port_headersplit_protocol_result_protocol =
+   TOKEN_STRING_INITIALIZER
+   (struct cmd_config_per_port_headersplit_protocol_result,
+protocol, "mac#ipv4#ipv6#l3#tcp#udp#sctp#l4#"
+  "inner_mac#inner_ipv4#inner_ipv6#inner_l3#inner_tcp#"
+  "inner_udp#inner_sctp#inner_l4");
+
+static void
+cmd_config_per_port_headersplit_protocol_parsed(void *parsed_result,
+   __rte_unused struct cmdline *cl,
+   __rte_unused void *data)
+{
+   struct cmd_config_per_port_headersplit_protocol_result *res = 
parsed_result;
+   portid_t port_id = res->port_id;
+   struct rte_port *port = &ports[port_id];
+   uint16_t protocol;
+
+   if (port_id_is_invalid(port_id, ENABLED_WARN))
+   return;
+
+   if (port->port_status != RTE_PORT_STOPPED) {
+   fprintf(stderr,
+   "Error: Can't config offload when Port %d is not 
stopped\n",
+   port_id);
+   return;
+   }
+
+   if (!strcmp(res->protocol, "mac"))
+   protocol = RTE_ETH_RX_HEADER_SPLIT_MAC;
+   else if (!strcmp(res->protocol, "ipv4"))
+   protocol = RTE_ETH_RX_HEADER_SPLIT_IPV4;
+   else if (!strcmp(res->protocol, "ipv6"))
+   protocol = RTE_ETH_RX_HEADER_SPLIT_IPV6;
+   else if (!strcmp(res->protocol, "l3"))
+   protocol = RTE_ETH_RX_HEADER_SPLIT_L3;
+   else if (!strcmp(res->protocol, "tcp"))
+   protocol = RTE_ETH_RX_HEADER_SPLIT_TCP;
+   else if (!strcmp(res->protocol, "udp"))
+   protocol = RTE_ETH_RX_HEADER_SPLIT_UDP;
+   else if (!strcmp(res->protocol, "sctp"))
+   protocol = RTE_ETH_RX_HEADER_SPLIT_SCTP;
+   else if (!strcmp(res->protocol, "l4"))
+   protocol = RTE_ETH_RX_HEADER_SPLIT_L4;
+   e

[v4 3/3] net/ice: support header split in Rx data path

2022-04-02 Thread wenxuanx . wu
From: Xuan Ding 

This patch adds support for header split in normal Rx data paths.
When the Rx queue is configured with header split for specific
protocol type, packets received will be directly splited into
header and payload parts. And the two parts will be put into
different mempools.

Currently, header split is not supported in vectorized paths.

Signed-off-by: Xuan Ding 
Signed-off-by: Yuan Wang 
Signed-off-by: Wenxuan Wu 
Reviewed-by: Qi Zhang 
---
 drivers/net/ice/ice_ethdev.c  |  10 +-
 drivers/net/ice/ice_rxtx.c| 223 ++
 drivers/net/ice/ice_rxtx.h|  16 ++
 drivers/net/ice/ice_rxtx_vec_common.h |   3 +
 4 files changed, 221 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index 13adcf90ed..cb32265dbe 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -3713,7 +3713,8 @@ ice_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM |
RTE_ETH_RX_OFFLOAD_VLAN_EXTEND |
RTE_ETH_RX_OFFLOAD_RSS_HASH |
-   RTE_ETH_RX_OFFLOAD_TIMESTAMP;
+   RTE_ETH_RX_OFFLOAD_TIMESTAMP |
+   RTE_ETH_RX_OFFLOAD_HEADER_SPLIT;
dev_info->tx_offload_capa |=
RTE_ETH_TX_OFFLOAD_QINQ_INSERT |
RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |
@@ -3725,7 +3726,7 @@ ice_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
dev_info->flow_type_rss_offloads |= ICE_RSS_OFFLOAD_ALL;
}
 
-   dev_info->rx_queue_offload_capa = 0;
+   dev_info->rx_queue_offload_capa = RTE_ETH_RX_OFFLOAD_HEADER_SPLIT;
dev_info->tx_queue_offload_capa = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
 
dev_info->reta_size = pf->hash_lut_size;
@@ -3794,6 +3795,11 @@ ice_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
dev_info->default_rxportconf.ring_size = ICE_BUF_SIZE_MIN;
dev_info->default_txportconf.ring_size = ICE_BUF_SIZE_MIN;
 
+   dev_info->rx_seg_capa.max_nseg = ICE_RX_MAX_NSEG;
+   dev_info->rx_seg_capa.multi_pools = 1;
+   dev_info->rx_seg_capa.offset_allowed = 0;
+   dev_info->rx_seg_capa.offset_align_log2 = 0;
+
return 0;
 }
 
diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index 041f4bc91f..1f245c853b 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -282,7 +282,6 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
/* Set buffer size as the head split is disabled. */
buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mp) -
  RTE_PKTMBUF_HEADROOM);
-   rxq->rx_hdr_len = 0;
rxq->rx_buf_len = RTE_ALIGN(buf_size, (1 << ICE_RLAN_CTX_DBUF_S));
rxq->max_pkt_len =
RTE_MIN((uint32_t)ICE_SUPPORT_CHAIN_NUM * rxq->rx_buf_len,
@@ -311,11 +310,54 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
 
memset(&rx_ctx, 0, sizeof(rx_ctx));
 
+   if (rxq->offloads & RTE_ETH_RX_OFFLOAD_HEADER_SPLIT) {
+   switch (rxq->rxseg[0].proto) {
+   case RTE_ETH_RX_HEADER_SPLIT_MAC:
+   rx_ctx.dtype = ICE_RX_DTYPE_HEADER_SPLIT;
+   rx_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_SPLIT_L2;
+   break;
+   case RTE_ETH_RX_HEADER_SPLIT_INNER_MAC:
+   rx_ctx.dtype = ICE_RX_DTYPE_HEADER_SPLIT;
+   rx_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_SPLIT_L2;
+   break;
+   case RTE_ETH_RX_HEADER_SPLIT_IPV4:
+   case RTE_ETH_RX_HEADER_SPLIT_IPV6:
+   case RTE_ETH_RX_HEADER_SPLIT_L3:
+   case RTE_ETH_RX_HEADER_SPLIT_INNER_IPV4:
+   case RTE_ETH_RX_HEADER_SPLIT_INNER_IPV6:
+   case RTE_ETH_RX_HEADER_SPLIT_INNER_L3:
+   rx_ctx.dtype = ICE_RX_DTYPE_HEADER_SPLIT;
+   rx_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_SPLIT_IP;
+   break;
+   case RTE_ETH_RX_HEADER_SPLIT_TCP:
+   case RTE_ETH_RX_HEADER_SPLIT_UDP:
+   case RTE_ETH_RX_HEADER_SPLIT_INNER_TCP:
+   case RTE_ETH_RX_HEADER_SPLIT_INNER_UDP:
+   rx_ctx.dtype = ICE_RX_DTYPE_HEADER_SPLIT;
+   rx_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_SPLIT_TCP_UDP;
+   break;
+   case RTE_ETH_RX_HEADER_SPLIT_SCTP:
+   case RTE_ETH_RX_HEADER_SPLIT_INNER_SCTP:
+   rx_ctx.dtype = ICE_RX_DTYPE_HEADER_SPLIT;
+   rx_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_SPLIT_SCTP;
+   break;
+   case RTE_ETH_RX_HEADER_SPLIT_NONE:
+   PMD_DRV_LOG(ERR, "Header split protocol must be 
confi

RE: [PATCH v2] examples/l3fwd: merge l3fwd-acl into l3fwd

2022-04-02 Thread Ananyev, Konstantin

Hi Sean,

My comments inline.
Thanks 
Konstantin

> l3fwd-acl contains duplicate functions to l3fwd.
> For this reason we merge l3fwd-acl code into l3fwd
> with '--lookup acl' cmdline option to run ACL.
> 
> Signed-off-by: Sean Morrissey 
> ---
> V2:
> * add doc changes
> * minor code cleanup
> ---
>  doc/guides/rel_notes/release_22_07.rst|4 +
>  doc/guides/sample_app_ug/index.rst|1 -
>  doc/guides/sample_app_ug/l3_forward.rst   |   63 +-
>  .../sample_app_ug/l3_forward_access_ctrl.rst  |  340 ---
>  examples/l3fwd-acl/Makefile   |   51 -
>  examples/l3fwd-acl/main.c | 2272 -
>  examples/l3fwd-acl/meson.build|   13 -
>  examples/l3fwd/Makefile   |2 +-
>  examples/l3fwd/l3fwd.h|   60 +-
>  examples/l3fwd/l3fwd_acl.c|  960 +++
>  examples/l3fwd/l3fwd_acl.h|  118 +
>  examples/l3fwd/l3fwd_acl_scalar.h |  163 ++
>  examples/l3fwd/l3fwd_route.h  |   16 +
>  examples/l3fwd/main.c |   97 +-
>  examples/l3fwd/meson.build|3 +-
>  examples/meson.build  |1 -
>  16 files changed, 1466 insertions(+), 2698 deletions(-)
>  delete mode 100644 doc/guides/sample_app_ug/l3_forward_access_ctrl.rst
>  delete mode 100644 examples/l3fwd-acl/Makefile
>  delete mode 100644 examples/l3fwd-acl/main.c
>  delete mode 100644 examples/l3fwd-acl/meson.build
>  create mode 100644 examples/l3fwd/l3fwd_acl.c
>  create mode 100644 examples/l3fwd/l3fwd_acl.h
>  create mode 100644 examples/l3fwd/l3fwd_acl_scalar.h
> 

...

> diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
> index ad39496e64..6503cc2f4e 100644
> --- a/examples/l3fwd/l3fwd.h
> +++ b/examples/l3fwd/l3fwd.h
> @@ -7,6 +7,7 @@
> 
>  #include 
>  #include 
> +#include 
> 
>  #define DO_RFC_1812_CHECKS
> 
> @@ -61,6 +62,41 @@
>  struct parm_cfg {
>   const char *rule_ipv4_name;
>   const char *rule_ipv6_name;
> + enum rte_acl_classify_alg alg;
> +};
> +
> +static const struct {
> + const char *name;
> + enum rte_acl_classify_alg alg;
> +} acl_alg[] = {
> + {
> + .name = "scalar",
> + .alg = RTE_ACL_CLASSIFY_SCALAR,
> + },
> + {
> + .name = "sse",
> + .alg = RTE_ACL_CLASSIFY_SSE,
> + },
> + {
> + .name = "avx2",
> + .alg = RTE_ACL_CLASSIFY_AVX2,
> + },
> + {
> + .name = "neon",
> + .alg = RTE_ACL_CLASSIFY_NEON,
> + },
> + {
> + .name = "altivec",
> + .alg = RTE_ACL_CLASSIFY_ALTIVEC,
> + },
> + {
> + .name = "avx512x16",
> + .alg = RTE_ACL_CLASSIFY_AVX512X16,
> + },
> + {
> + .name = "avx512x32",
> + .alg = RTE_ACL_CLASSIFY_AVX512X32,
> + },
>  };

I probably wasn't clear enough in my previous comments.
I think there is no need to move definition of acl_alg[] into .h and making it 
static.
Instead I .h we can just do:
extern struct acl_algorithms acl_alg[];
and keep actual definition in .c.

> 
>  struct mbuf_table {
> @@ -80,6 +116,7 @@ struct lcore_conf {
>   uint16_t tx_port_id[RTE_MAX_ETHPORTS];
>   uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
>   struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
> + struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
>   void *ipv4_lookup_struct;
>   void *ipv6_lookup_struct;
>  } __rte_cache_aligned;
> @@ -193,7 +230,10 @@ is_valid_ipv4_pkt(struct rte_ipv4_hdr *pkt, uint32_t 
> link_len)
>  int
>  init_mem(uint16_t portid, unsigned int nb_mbuf);
> 
> -/* Function pointers for LPM, EM or FIB functionality. */
> +/* Function pointers for ACL, LPM, EM or FIB functionality. */
> +void
> +setup_acl(const int socketid);
> +
>  void
>  setup_lpm(const int socketid);
> 
> @@ -203,12 +243,19 @@ setup_hash(const int socketid);
>  void
>  setup_fib(const int socketid);
> 
> +int
> +acl_check_ptype(int portid);
> +
>  int
>  em_check_ptype(int portid);
> 
>  int
>  lpm_check_ptype(int portid);
> 
> +uint16_t
> +acl_cb_parse_ptype(uint16_t port, uint16_t queue, struct rte_mbuf *pkts[],
> +   uint16_t nb_pkts, uint16_t max_pkts, void *user_param);
> +
>  uint16_t
>  em_cb_parse_ptype(uint16_t port, uint16_t queue, struct rte_mbuf *pkts[],
> uint16_t nb_pkts, uint16_t max_pkts, void *user_param);
> @@ -217,6 +264,9 @@ uint16_t
>  lpm_cb_parse_ptype(uint16_t port, uint16_t queue, struct rte_mbuf *pkts[],
>  uint16_t nb_pkts, uint16_t max_pkts, void *user_param);
> 
> +int
> +acl_main_loop(__rte_unused void *dummy);
> +
>  int
>  em_main_loop(__rte_unused void *dummy);
> 
> @@ -278,7 +328,13 @@ int
>  fib_event_main_loop_tx_q_burst_vector(__rte_unused void *dummy);
> 
> 
> -/* Return ipv4/ipv6 fwd lookup struct for LPM, EM or FIB. */
> +/* Return ipv4/ipv6

Re: [PATCH v2] eal: add seqlock

2022-04-02 Thread Ola Liljedahl



On 4/2/22 12:25, Morten Brørup wrote:

From: Stephen Hemminger [mailto:step...@networkplumber.org]
Sent: Saturday, 2 April 2022 02.54

Semantics and naming should be the same as Linux kernel or you risk
having to reeducate too many people.

Although I do see significant value in that point, I don't consider the Linux 
kernel API the definitive golden standard in all regards. If DPDK can do 
better, it should.

However, if different naming/semantics does a better job for DPDK, then we 
should take care to avoid similar function names with different behavior than 
Linux, to reduce the risk of incorrect use by seasoned Linux kernel developers.

Couldn't have said it better myself.



Re: [PATCH v2] eal: add seqlock

2022-04-02 Thread Ola Liljedahl



On 4/2/22 02:50, Stephen Hemminger wrote:

On Wed, 30 Mar 2022 16:26:02 +0200
Mattias Rönnblom  wrote:


+   __atomic_store_n(&seqlock->sn, sn, __ATOMIC_RELAXED);
+
+   /* __ATOMIC_RELEASE to prevent stores after (in program
order)
+* from happening before the sn store.
+*/
+   rte_atomic_thread_fence(__ATOMIC_RELEASE);

Couldn't atomic store with __ATOMIC_RELEASE do same thing?


No, store-release wouldn't prevent later stores from moving up. It only 
ensures that earlier loads and stores have completed before 
store-release completes. If later stores could move before a supposed 
store-release(seqlock->sn), readers could see inconsistent (torn) data 
with a valid sequence number.




+static inline void
+rte_seqlock_write_end(rte_seqlock_t *seqlock)
+{
+   uint32_t sn;
+
+   sn = seqlock->sn + 1;
+
+   /* synchronizes-with the load acquire in rte_seqlock_begin()
*/
+   __atomic_store_n(&seqlock->sn, sn, __ATOMIC_RELEASE);
+
+   rte_spinlock_unlock(&seqlock->lock);

Atomic store is not necessary here, the atomic operation in
spinlock_unlock wil assure theat the seqeuence number update is
ordered correctly.
Load-acquire(seqlock->sn) in rte_seqlock_begin() must be paired with 
store-release(seqlock->sn) in rte_seqlock_write_end() or there wouldn't 
exist any synchronize-with relationship. Readers don't access the spin 
lock so any writer-side updates to the spin lock don't mean anything to 
readers.


Re: [PATCH v3] eal: add seqlock

2022-04-02 Thread Ola Liljedahl

On 4/1/22 17:07, Mattias Rönnblom wrote:

+
+/**
+ * End a read-side critical section.
+ *
+ * A call to this function marks the end of a read-side critical
+ * section, for @p seqlock. The application must supply the sequence
+ * number produced by the corresponding rte_seqlock_read_lock() (or,
+ * in case of a retry, the rte_seqlock_tryunlock()) call.
+ *
+ * After this function has been called, the caller should not access
+ * the protected data.
+ *
+ * In case this function returns true, the just-read data was
+ * consistent and the set of atomic and non-atomic load operations
+ * performed between rte_seqlock_read_lock() and
+ * rte_seqlock_read_tryunlock() were atomic, as a whole.
+ *
+ * In case rte_seqlock_read_tryunlock() returns false, the data was
+ * modified as it was being read and may be inconsistent, and thus
+ * should be discarded. The @p begin_sn is updated with the
+ * now-current sequence number.
+ *
+ * @param seqlock
+ *   A pointer to the seqlock.
+ * @param begin_sn
+ *   The seqlock sequence number returned by
+ *   rte_seqlock_read_lock() (potentially updated in subsequent
+ *   rte_seqlock_read_tryunlock() calls) for this critical section.
+ * @return
+ *   true or false, if the just-read seqlock-protected data was consistent
+ *   or inconsistent, respectively, at the time it was read.
+ *
+ * @see rte_seqlock_read_lock()
+ */
+__rte_experimental
+static inline bool
+rte_seqlock_read_tryunlock(const rte_seqlock_t *seqlock, uint32_t *begin_sn)
+{
+   uint32_t end_sn;
+
+   /* make sure the data loads happens before the sn load */
+   rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+
+   end_sn = __atomic_load_n(&seqlock->sn, __ATOMIC_RELAXED);


Since we are reading and potentially returning the sequence number here 
(repeating the read of the protected data), we need to use load-acquire. 
I assume it is not expected that the user will call 
rte_seqlock_read_lock() again.


Seeing this implementation, I might actually prefer the original 
implementation, I think it is cleaner. But I would like for the begin 
function also to wait for an even sequence number, the end function 
would only have to check for same sequence number, this might improve 
performance a little bit as readers won't perform one or several broken 
reads while a write is in progress. The function names are a different 
thing though.


The writer side behaves much more like a lock with mutual exclusion so 
write_lock/write_unlock makes sense.



+
+   if (unlikely(end_sn & 1 || *begin_sn != end_sn)) {
+   *begin_sn = end_sn;
+   return false;
+   }
+
+   return true;
+}
+


RE: [PATCH v3] eal: add seqlock

2022-04-02 Thread Honnappa Nagarahalli


> > +__rte_experimental
> > +static inline bool
> > +rte_seqlock_read_tryunlock(const rte_seqlock_t *seqlock, uint32_t
> > +*begin_sn) {
> > +   uint32_t end_sn;
> > +
> > +   /* make sure the data loads happens before the sn load */
> > +   rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
> > +
> > +   end_sn = __atomic_load_n(&seqlock->sn, __ATOMIC_RELAXED);
> 
> Since we are reading and potentially returning the sequence number here
> (repeating the read of the protected data), we need to use load-acquire.
> I assume it is not expected that the user will call
> rte_seqlock_read_lock() again.
Good point, we need a load-acquire (due to changes done in v3).

> 
> Seeing this implementation, I might actually prefer the original
> implementation, I think it is cleaner. But I would like for the begin function
> also to wait for an even sequence number, the end function would only have
> to check for same sequence number, this might improve performance a little
> bit as readers won't perform one or several broken reads while a write is in
> progress. The function names are a different thing though.
I think we need to be optimizing for the case where there is no contention 
between readers and writers (as that happens most of the time). From this 
perspective, not checking for an even seq number in the begin function would 
reduce one 'if' statement.

Going back to the earlier model is better as well, because of the load-acquire 
required in the 'rte_seqlock_read_tryunlock' function. The earlier model would 
not introduce the load-acquire for the no contention case.

> 
> The writer side behaves much more like a lock with mutual exclusion so
> write_lock/write_unlock makes sense.
> 
> > +
> > +   if (unlikely(end_sn & 1 || *begin_sn != end_sn)) {
> > +   *begin_sn = end_sn;
> > +   return false;
> > +   }
> > +
> > +   return true;
> > +}
> > +


RE: [PATCH v2] eal: add seqlock

2022-04-02 Thread Honnappa Nagarahalli


> >> +static inline void
> >> +rte_seqlock_write_end(rte_seqlock_t *seqlock) {
> >> +  uint32_t sn;
> >> +
> >> +  sn = seqlock->sn + 1;
> >> +
> >> +  /* synchronizes-with the load acquire in rte_seqlock_begin()
> >> */
> >> +  __atomic_store_n(&seqlock->sn, sn, __ATOMIC_RELEASE);
> >> +
> >> +  rte_spinlock_unlock(&seqlock->lock);
> > Atomic store is not necessary here, the atomic operation in
> > spinlock_unlock wil assure theat the seqeuence number update is
> > ordered correctly.
> Load-acquire(seqlock->sn) in rte_seqlock_begin() must be paired with
> store-release(seqlock->sn) in rte_seqlock_write_end() or there wouldn't exist
> any synchronize-with relationship. Readers don't access the spin lock so any
> writer-side updates to the spin lock don't mean anything to readers.
Agree with this assessment. The store-release in spin-lock unlock does not 
synchronize with the readers.


RE: [PATCH v3] eal: add seqlock

2022-04-02 Thread Honnappa Nagarahalli


> 
> > > +__rte_experimental
> > > +static inline bool
> > > +rte_seqlock_read_tryunlock(const rte_seqlock_t *seqlock, uint32_t
> > > +*begin_sn) {
> 
> Did anyone object to adding the __attribute__((warn_unused_result))?
> 
> Otherwise, I think you should add it.
+1


RE: [PATCH v3] eal: add seqlock

2022-04-02 Thread Morten Brørup
> From: Honnappa Nagarahalli [mailto:honnappa.nagaraha...@arm.com]
> Sent: Saturday, 2 April 2022 21.31
> 
> 
> 
> > > +__rte_experimental
> > > +static inline bool
> > > +rte_seqlock_read_tryunlock(const rte_seqlock_t *seqlock, uint32_t
> > > +*begin_sn) {
> > > + uint32_t end_sn;
> > > +
> > > + /* make sure the data loads happens before the sn load */
> > > + rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
> > > +
> > > + end_sn = __atomic_load_n(&seqlock->sn, __ATOMIC_RELAXED);
> >
> > Since we are reading and potentially returning the sequence number
> here
> > (repeating the read of the protected data), we need to use load-
> acquire.
> > I assume it is not expected that the user will call
> > rte_seqlock_read_lock() again.
> Good point, we need a load-acquire (due to changes done in v3).
> 
> >
> > Seeing this implementation, I might actually prefer the original
> > implementation, I think it is cleaner. But I would like for the begin
> function
> > also to wait for an even sequence number, the end function would only
> have
> > to check for same sequence number, this might improve performance a
> little
> > bit as readers won't perform one or several broken reads while a
> write is in
> > progress. The function names are a different thing though.
> I think we need to be optimizing for the case where there is no
> contention between readers and writers (as that happens most of the
> time). From this perspective, not checking for an even seq number in
> the begin function would reduce one 'if' statement.

I might be siding with Ola on this, but with a twist: The read_lock() should 
not wait, but test. (Or both variants could be available. Or all three, 
including the variant without checking for an even sequence number.)

My argument for this is: The write operation could take a long time to 
complete, and while this goes on, it is good for the reading threads to know at 
entry of their critical read section that the read operation will fail, so they 
can take the alternative code path instead of proceeding into the critical read 
section. Otherwise, the reading threads have to waste time reading the 
protected data, only to discard them at the end. It's an optimization based on 
the assumption that reading the protected data has some small cost, because 
this small cost adds up if done many times during a longwinded write operation.

And, although checking for the sequence number in read_trylock() adds an 'if' 
statement to it, that 'if' statement should be surrounded by likely() to reduce 
its cost in the case we are optimizing for, i.e. when no write operation is 
ongoing.

This means that read_trylock() returns a boolean, and the sequence number is 
returned in an output parameter.

Please note that it doesn't change the fact that read_tryunlock() can still 
fail, even though read_trylock() gave the go-ahead.

I'm trying to highlight that while we all agree to optimize for the case of 
reading while no writing is ongoing, there might be opportunity for optimizing 
for the opposite case (i.e. trying to read while writing is ongoing) at the 
same time.

I only hope it can be done with negligent performance cost for the primary case.

I'll respectfully leave the hardcore implementation details and performance 
considerations to you experts in this area. :-)



RE: [PATCH v3] eal: add seqlock

2022-04-02 Thread Honnappa Nagarahalli

> >
> > > > +__rte_experimental
> > > > +static inline bool
> > > > +rte_seqlock_read_tryunlock(const rte_seqlock_t *seqlock, uint32_t
> > > > +*begin_sn) {
> > > > +   uint32_t end_sn;
> > > > +
> > > > +   /* make sure the data loads happens before the sn load */
> > > > +   rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
> > > > +
> > > > +   end_sn = __atomic_load_n(&seqlock->sn, __ATOMIC_RELAXED);
> > >
> > > Since we are reading and potentially returning the sequence number
> > here
> > > (repeating the read of the protected data), we need to use load-
> > acquire.
> > > I assume it is not expected that the user will call
> > > rte_seqlock_read_lock() again.
> > Good point, we need a load-acquire (due to changes done in v3).
> >
> > >
> > > Seeing this implementation, I might actually prefer the original
> > > implementation, I think it is cleaner. But I would like for the
> > > begin
> > function
> > > also to wait for an even sequence number, the end function would
> > > only
> > have
> > > to check for same sequence number, this might improve performance a
> > little
> > > bit as readers won't perform one or several broken reads while a
> > write is in
> > > progress. The function names are a different thing though.
> > I think we need to be optimizing for the case where there is no
> > contention between readers and writers (as that happens most of the
> > time). From this perspective, not checking for an even seq number in
> > the begin function would reduce one 'if' statement.
> 
> I might be siding with Ola on this, but with a twist: The read_lock() should 
> not
> wait, but test. (Or both variants could be available. Or all three, including 
> the
> variant without checking for an even sequence number.)
> 
> My argument for this is: The write operation could take a long time to
> complete, and while this goes on, it is good for the reading threads to know 
> at
> entry of their critical read section that the read operation will fail, so 
> they can
> take the alternative code path instead of proceeding into the critical read
> section. Otherwise, the reading threads have to waste time reading the
> protected data, only to discard them at the end. It's an optimization based on
> the assumption that reading the protected data has some small cost, because
> this small cost adds up if done many times during a longwinded write
> operation.
> 
> And, although checking for the sequence number in read_trylock() adds an 'if'
> statement to it, that 'if' statement should be surrounded by likely() to 
> reduce
> its cost in the case we are optimizing for, i.e. when no write operation is
> ongoing.
This 'if' statement can be part of the application code as well. This would 
allow for multiple models to exist.

> 
> This means that read_trylock() returns a boolean, and the sequence number is
> returned in an output parameter.
> 
> Please note that it doesn't change the fact that read_tryunlock() can still 
> fail,
> even though read_trylock() gave the go-ahead.
> 
> I'm trying to highlight that while we all agree to optimize for the case of
> reading while no writing is ongoing, there might be opportunity for optimizing
> for the opposite case (i.e. trying to read while writing is ongoing) at the 
> same
> time.
> 
> I only hope it can be done with negligent performance cost for the primary
> case.
> 
> I'll respectfully leave the hardcore implementation details and performance
> considerations to you experts in this area. :-)



RE: [PATCH] net/iavf: fix iavf crashed on dev_stop when running in multi-process mode

2022-04-02 Thread Zhang, Qi Z



> -Original Message-
> From: Ke Zhang 
> Sent: Saturday, April 2, 2022 5:51 PM
> To: Li, Xiaoyun ; Wu, Jingjing ;
> Xing, Beilei ; dev@dpdk.org
> Cc: Zhang, Ke1X 
> Subject: [PATCH] net/iavf: fix iavf crashed on dev_stop when running in
> multi-process mode
> 
> In the multi process environment, the sub process operates on the shared
> memory and changes the function pointer of the main process, resulting in
> the failure to find the address of the function when main process releasing,
> resulting in crash.
> 
> similar with commit<20b631efe785819eb77aabbf500b3352e5731bdb>
> 
> Signed-off-by: Ke Zhang 
> ---
>  drivers/net/iavf/iavf_rxtx.c| 27 ++---
>  drivers/net/iavf/iavf_rxtx.h|  6 +++---
>  drivers/net/iavf/iavf_rxtx_vec_avx512.c |  4 ++--
>  drivers/net/iavf/iavf_rxtx_vec_sse.c|  8 
>  4 files changed, 24 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c index
> 16e8d021f9..197c03cd31 100644
> --- a/drivers/net/iavf/iavf_rxtx.c
> +++ b/drivers/net/iavf/iavf_rxtx.c
> @@ -362,6 +362,9 @@ release_txq_mbufs(struct iavf_tx_queue *txq)
>   }
>  }
> 
> +const struct iavf_rxq_ops *iavf_rxq_release_mbufs_ops; const struct
> +iavf_txq_ops *iavf_txq_release_mbufs_ops;


> +
>  static const struct iavf_rxq_ops def_rxq_ops = {
>   .release_mbufs = release_rxq_mbufs,
>  };
> @@ -674,7 +677,7 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev,
> uint16_t queue_idx,
>   rxq->q_set = true;
>   dev->data->rx_queues[queue_idx] = rxq;
>   rxq->qrx_tail = hw->hw_addr + IAVF_QRX_TAIL1(rxq->queue_id);
> - rxq->ops = &def_rxq_ops;
> + iavf_rxq_release_mbufs_ops = &def_rxq_ops;

This is not correct. 
Now we replace per-queue ops with a global ops which is not expected.

Please reference method of below patch

commit 0ed16e01313e1f8930dc6a52b22159b20269d4e0
Author: Steve Yang 
Date:   Mon Feb 28 09:48:59 2022 +

net/iavf: fix function pointer in multi-process

...


> 
> diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c
> b/drivers/net/iavf/iavf_rxtx_vec_sse.c
> index 717a227b2c..a782bed2e0 100644
> --- a/drivers/net/iavf/iavf_rxtx_vec_sse.c
> +++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c
> @@ -1219,16 +1219,16 @@ static const struct iavf_txq_ops sse_vec_txq_ops
> = {  };
> 
>  int __rte_cold
> -iavf_txq_vec_setup(struct iavf_tx_queue *txq)
> +iavf_txq_vec_setup(const struct iavf_txq_ops **txq_ops)
>  {
> - txq->ops = &sse_vec_txq_ops;
> + *txq_ops = &sse_vec_txq_ops;
>   return 0;
>  }
> 
>  int __rte_cold
> -iavf_rxq_vec_setup(struct iavf_rx_queue *rxq)
> +iavf_rxq_vec_setup(struct iavf_rx_queue *rxq, const struct iavf_rxq_ops
> +**rxq_ops)
>  {
> - rxq->ops = &sse_vec_rxq_ops;
> + *rxq_ops = &sse_vec_rxq_ops;
>   return iavf_rxq_vec_setup_default(rxq);  }

seems lots of redundant in  iavf_rxtx_vec.sse.c 

Can we move iavf_r(t)xq_vec_setup | sse_vec_r(t)xq_ops into iavf_rxtx.c and 
delete iavf_r(t)x_queue_release_mbufs_sse)?

Btw, We can keep this patch unchanged, a separated patch to refact the code is 
expected.


> 
> --
> 2.25.1



Re: [PATCH v3] eal: add seqlock

2022-04-02 Thread Mattias Rönnblom

On 2022-04-02 02:21, Honnappa Nagarahalli wrote:

Hi Mattias,
Few comments inline.


-Original Message-
From: Mattias Rönnblom 
Sent: Friday, April 1, 2022 10:08 AM
To: dev@dpdk.org
Cc: tho...@monjalon.net; David Marchand ;
onar.ol...@ericsson.com; Honnappa Nagarahalli
; nd ;
konstantin.anan...@intel.com; m...@smartsharesystems.com;
step...@networkplumber.org; Mattias Rönnblom
; Ola Liljedahl 
Subject: [PATCH v3] eal: add seqlock

A sequence lock (seqlock) is synchronization primitive which allows for data-
race free, low-overhead, high-frequency reads, especially for data structures
shared across many cores and which are updated with relatively infrequently.

A seqlock permits multiple parallel readers. The variant of seqlock implemented
in this patch supports multiple writers as well. A spinlock is used for writer-
writer serialization.

To avoid resource reclamation and other issues, the data protected by a seqlock
is best off being self-contained (i.e., no pointers [except to constant data]).

One way to think about seqlocks is that they provide means to perform atomic
operations on data objects larger what the native atomic machine instructions
allow for.

DPDK seqlocks are not preemption safe on the writer side. A thread preemption
affects performance, not correctness.

A seqlock contains a sequence number, which can be thought of as the
generation of the data it protects.

A reader will
   1. Load the sequence number (sn).
   2. Load, in arbitrary order, the seqlock-protected data.
   3. Load the sn again.
   4. Check if the first and second sn are equal, and even numbered.
  If they are not, discard the loaded data, and restart from 1.

The first three steps need to be ordered using suitable memory fences.

A writer will
   1. Take the spinlock, to serialize writer access.
   2. Load the sn.
   3. Store the original sn + 1 as the new sn.
   4. Perform load and stores to the seqlock-protected data.
   5. Store the original sn + 2 as the new sn.
   6. Release the spinlock.

Proper memory fencing is required to make sure the first sn store, the data
stores, and the second sn store appear to the reader in the mentioned order.

The sn loads and stores must be atomic, but the data loads and stores need not
be.

The original seqlock design and implementation was done by Stephen
Hemminger. This is an independent implementation, using C11 atomics.

For more information on seqlocks, see
https://en.wikipedia.org/wiki/Seqlock

PATCH v3:
   * Renamed both read and write-side critical section begin/end functions
 to better match rwlock naming, per Ola Liljedahl's suggestion.
   * Added 'extern "C"' guards for C++ compatibility.
   * Refer to the main lcore as the main, and not the master.

PATCH v2:
   * Skip instead of fail unit test in case too few lcores are available.
   * Use main lcore for testing, reducing the minimum number of lcores
 required to run the unit tests to four.
   * Consistently refer to sn field as the "sequence number" in the
 documentation.
   * Fixed spelling mistakes in documentation.

Updates since RFC:
   * Added API documentation.
   * Added link to Wikipedia article in the commit message.
   * Changed seqlock sequence number field from uint64_t (which was
 overkill) to uint32_t. The sn type needs to be sufficiently large
 to assure no reader will read a sn, access the data, and then read
 the same sn, but the sn has been updated to many times during the
 read, so it has wrapped.
   * Added RTE_SEQLOCK_INITIALIZER macro for static initialization.
   * Removed the rte_seqlock struct + separate rte_seqlock_t typedef
 with an anonymous struct typedef:ed to rte_seqlock_t.

Acked-by: Morten Brørup 
Reviewed-by: Ola Liljedahl 
Signed-off-by: Mattias Rönnblom 
---
  app/test/meson.build  |   2 +
  app/test/test_seqlock.c   | 202 +++
  lib/eal/common/meson.build|   1 +
  lib/eal/common/rte_seqlock.c  |  12 ++
  lib/eal/include/meson.build   |   1 +
  lib/eal/include/rte_seqlock.h | 302 ++
  lib/eal/version.map   |   3 +
  7 files changed, 523 insertions(+)
  create mode 100644 app/test/test_seqlock.c  create mode 100644
lib/eal/common/rte_seqlock.c  create mode 100644
lib/eal/include/rte_seqlock.h

diff --git a/app/test/meson.build b/app/test/meson.build index
5fc1dd1b7b..5e418e8766 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -125,6 +125,7 @@ test_sources = files(
  'test_rwlock.c',
  'test_sched.c',
  'test_security.c',
+'test_seqlock.c',
  'test_service_cores.c',
  'test_spinlock.c',
  'test_stack.c',
@@ -214,6 +215,7 @@ fast_tests = [
  ['rwlock_rde_wro_autotest', true],
  ['sched_autotest', true],
  ['security_autotest', false],
+['seqlock_autotest', true],
  ['spinlock_autotest', true],
  ['stack_autotest', false],
  ['stack_lf_au

Re: [PATCH v2] eal: add seqlock

2022-04-02 Thread Mattias Rönnblom

On 2022-04-02 02:52, Stephen Hemminger wrote:

On Thu, 31 Mar 2022 16:53:00 +0200
Ola Liljedahl  wrote:


From: Ola Liljedahl 
To: Mattias Rönnblom ,  "dev@dpdk.org"
 Cc: Thomas Monjalon ,  David
Marchand ,  Onar Olsen
,  "honnappa.nagaraha...@arm.com"
,  "n...@arm.com" ,
"konstantin.anan...@intel.com" ,
"m...@smartsharesystems.com" ,
"step...@networkplumber.org"  Subject:
Re: [PATCH v2] eal: add seqlock Date: Thu, 31 Mar 2022 16:53:00 +0200
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101
Thunderbird/91.7.0

(Thunderbird suddenly refuses to edit in plain text mode, hope the
mail gets sent as text anyway)

On 3/31/22 15:38, Mattias Rönnblom wrote:


On 2022-03-31 11:04, Ola Liljedahl wrote:

On 3/31/22 09:46, Mattias Rönnblom wrote:

On 2022-03-30 16:26, Mattias Rönnblom wrote:
  



Should the rte_seqlock_read_retry() be called
rte_seqlock_read_end(), or some third alternative? I wanted to
make clear it's not just a "release the lock" function. You could
use the|||__attribute__((warn_unused_result)) annotation to make
clear the return value cannot be ignored, although I'm not sure
DPDK ever use that attribute.

We have to decide how to use the seqlock API from the application
perspective.
Your current proposal:
do {
       sn = rte_seqlock_read_begin(&seqlock)
       //read protected data
} while (rte_seqlock_read_retry(&seqlock, sn));

or perhaps
sn = rte_seqlock_read_lock(&seqlock);
do {
       //read protected data
} while (!rte_seqlock_read_tryunlock(&seqlock, &sn));

Tryunlock should signal to the user that the unlock operation
might not succeed and something needs to be repeated.
  

I like that your proposal is consistent with rwlock API, although I
tend to think about a seqlock more like an arbitrary-size atomic
load/store, where begin() is the beginning of the read transaction.
  


I can see the evolution of an application where is starts to use
plain spin locks, moves to reader/writer locks for better performance
and eventually moves to seqlocks. The usage is the same, only the
characteristics (including performance) differ.



The semantics of seqlock in DPDK must be the same as what Linux kernel
does or you are asking for trouble.  It is not a reader-writer lock in
traditional sense.


Does "semantics" here including the naming of the functions? The overall 
semantics will be the same, except the kernel has a bunch of variants 
with different kind of write-side synchronization, if I recall correctly.


I'll try to summarize the options as I see them:

Option A: (PATCH v3):
rte_seqlock_read_lock()
rte_seqlock_read_tryunlock() /* with built-in "read restart" */ 
rte_seqlock_write_lock()

rte_seqlock_write_unlock()


Option B: (Linux kernel-style naming):
rte_seqlock_read_begin()
rte_seqlock_read_end()
rte_seqlock_write_begin()
rte_seqlock_write_end()

A combination, acknowledging there's a lock on the writer side, but not 
on the read side.


Option C:

rte_seqlock_read_begin()
rte_seqlock_read_retry()
rte_seqlock_write_lock()
rte_seqlock_write_unlock()


Re: [PATCH v3] eal: add seqlock

2022-04-02 Thread Mattias Rönnblom

I missed some of your comments.

On 2022-04-02 02:21, Honnappa Nagarahalli wrote:




+ * Example usage:
+ * @code{.c}
+ * #define MAX_Y_LEN (16)
+ * // Application-defined example data structure, protected by a seqlock.
+ * struct config {
+ * rte_seqlock_t lock;
+ * int param_x;
+ * char param_y[MAX_Y_LEN];
+ * };
+ *
+ * // Accessor function for reading config fields.
+ * void
+ * config_read(const struct config *config, int *param_x, char
+*param_y)
+ * {
+ * // Temporary variables, just to improve readability.

I think the above comment is not necessary. It is beneficial to copy the 
protected data to keep the read side critical section small.



The data here would be copied into the buffers supplied by config_read() 
anyways, so it's a copy regardless.



+ * int tentative_x;
+ * char tentative_y[MAX_Y_LEN];
+ * uint32_t sn;
+ *
+ * sn = rte_seqlock_read_lock(&config->lock);
+ * do {
+ * // Loads may be atomic or non-atomic, as in this example.
+ * tentative_x = config->param_x;
+ * strcpy(tentative_y, config->param_y);
+ * } while (!rte_seqlock_read_tryunlock(&config->lock, &sn));
+ * // An application could skip retrying, and try again later, if
+ * // progress is possible without the data.
+ *
+ * *param_x = tentative_x;
+ * strcpy(param_y, tentative_y);
+ * }
+ *
+ * // Accessor function for writing config fields.
+ * void
+ * config_update(struct config *config, int param_x, const char
+*param_y)
+ * {
+ * rte_seqlock_write_lock(&config->lock);
+ * // Stores may be atomic or non-atomic, as in this example.
+ * config->param_x = param_x;
+ * strcpy(config->param_y, param_y);
+ * rte_seqlock_write_unlock(&config->lock);
+ * }
+ * @endcode
+ *
+ * @see
+ * https://en.wikipedia.org/wiki/Seqlock.
+ */
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+/**
+ * The RTE seqlock type.
+ */
+typedef struct {
+   uint32_t sn; /**< A sequence number for the protected data. */
+   rte_spinlock_t lock; /**< Spinlock used to serialize writers.  */ }

Suggest using ticket lock for the writer side. It should have low overhead when 
there is a single writer, but provides better functionality when there are 
multiple writers.



Is a seqlock the synchronization primitive of choice for high-contention 
cases? I would say no, but I'm not sure what you would use instead.





Re: [PATCH v3] eal: add seqlock

2022-04-02 Thread Mattias Rönnblom

On 2022-04-02 20:15, Ola Liljedahl wrote:

On 4/1/22 17:07, Mattias Rönnblom wrote:

+
+/**
+ * End a read-side critical section.
+ *
+ * A call to this function marks the end of a read-side critical
+ * section, for @p seqlock. The application must supply the sequence
+ * number produced by the corresponding rte_seqlock_read_lock() (or,
+ * in case of a retry, the rte_seqlock_tryunlock()) call.
+ *
+ * After this function has been called, the caller should not access
+ * the protected data.
+ *
+ * In case this function returns true, the just-read data was
+ * consistent and the set of atomic and non-atomic load operations
+ * performed between rte_seqlock_read_lock() and
+ * rte_seqlock_read_tryunlock() were atomic, as a whole.
+ *
+ * In case rte_seqlock_read_tryunlock() returns false, the data was
+ * modified as it was being read and may be inconsistent, and thus
+ * should be discarded. The @p begin_sn is updated with the
+ * now-current sequence number.
+ *
+ * @param seqlock
+ *   A pointer to the seqlock.
+ * @param begin_sn
+ *   The seqlock sequence number returned by
+ *   rte_seqlock_read_lock() (potentially updated in subsequent
+ *   rte_seqlock_read_tryunlock() calls) for this critical section.
+ * @return
+ *   true or false, if the just-read seqlock-protected data was 
consistent

+ *   or inconsistent, respectively, at the time it was read.
+ *
+ * @see rte_seqlock_read_lock()
+ */
+__rte_experimental
+static inline bool
+rte_seqlock_read_tryunlock(const rte_seqlock_t *seqlock, uint32_t 
*begin_sn)

+{
+    uint32_t end_sn;
+
+    /* make sure the data loads happens before the sn load */
+    rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+
+    end_sn = __atomic_load_n(&seqlock->sn, __ATOMIC_RELAXED);


Since we are reading and potentially returning the sequence number here 
(repeating the read of the protected data), we need to use load-acquire. 
I assume it is not expected that the user will call 
rte_seqlock_read_lock() again.




Good point.

Seeing this implementation, I might actually prefer the original 
implementation, I think it is cleaner.


Me too.

But I would like for the begin 
function also to wait for an even sequence number, the end function 
would only have to check for same sequence number, this might improve 
performance a little bit as readers won't perform one or several broken 
reads while a write is in progress. The function names are a different 
thing though.




The odd sn should be a rare case, if the seqlock is used for relatively 
low frequency update scenarios, which is what I think it should be 
designed for.


Waiting for an even sn in read_begin() would exclude the option for the 
caller to defer reading the new data to same later time, in case it's 
being written. That in turn would force even a single writer to make 
sure its thread is not preempted, or risk blocking all lcore worker 
cores attempting to read the protected data.


You could complete the above API with a read_trybegin() function to 
address that issue, for those who care, but that would force some extra 
complexity on the user.


The writer side behaves much more like a lock with mutual exclusion so 
write_lock/write_unlock makes sense.



+
+    if (unlikely(end_sn & 1 || *begin_sn != end_sn)) {
+    *begin_sn = end_sn;
+    return false;
+    }
+
+    return true;
+}
+