date:20230424

[PATCH v5] common/idpf: refine capability get

2023-04-24 Thread beilei . xing

From: Beilei Xing 

Initialize required capability in PMD, and refine
idpf_vc_caps_get function. Then different PMDs can
require different capability.

Signed-off-by: Beilei Xing 
---
v5 changes:
 - No need to move check_pf_reset_done and mbx_init from common module.
V4 changes:
 - No need to require PTP CAP for CPFL. 
V3 changes:
 - refine capability get.
 
 drivers/common/idpf/idpf_common_virtchnl.c | 45 ++
 drivers/net/cpfl/cpfl_ethdev.c | 40 +++
 drivers/net/idpf/idpf_ethdev.c | 40 +++
 3 files changed, 83 insertions(+), 42 deletions(-)

diff --git a/drivers/common/idpf/idpf_common_virtchnl.c 
b/drivers/common/idpf/idpf_common_virtchnl.c
index 9ee7259539..a4e129062e 100644
--- a/drivers/common/idpf/idpf_common_virtchnl.c
+++ b/drivers/common/idpf/idpf_common_virtchnl.c
@@ -278,51 +278,12 @@ idpf_vc_api_version_check(struct idpf_adapter *adapter)
 int
 idpf_vc_caps_get(struct idpf_adapter *adapter)
 {
-   struct virtchnl2_get_capabilities caps_msg;
struct idpf_cmd_info args;
int err;
 
-   memset(&caps_msg, 0, sizeof(struct virtchnl2_get_capabilities));
-
-   caps_msg.csum_caps =
-   VIRTCHNL2_CAP_TX_CSUM_L3_IPV4  |
-   VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_TCP  |
-   VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_UDP  |
-   VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP |
-   VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_TCP  |
-   VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_UDP  |
-   VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP |
-   VIRTCHNL2_CAP_TX_CSUM_GENERIC  |
-   VIRTCHNL2_CAP_RX_CSUM_L3_IPV4  |
-   VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP  |
-   VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP  |
-   VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP |
-   VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP  |
-   VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP  |
-   VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP |
-   VIRTCHNL2_CAP_RX_CSUM_GENERIC;
-
-   caps_msg.rss_caps =
-   VIRTCHNL2_CAP_RSS_IPV4_TCP |
-   VIRTCHNL2_CAP_RSS_IPV4_UDP |
-   VIRTCHNL2_CAP_RSS_IPV4_SCTP|
-   VIRTCHNL2_CAP_RSS_IPV4_OTHER   |
-   VIRTCHNL2_CAP_RSS_IPV6_TCP |
-   VIRTCHNL2_CAP_RSS_IPV6_UDP |
-   VIRTCHNL2_CAP_RSS_IPV6_SCTP|
-   VIRTCHNL2_CAP_RSS_IPV6_OTHER   |
-   VIRTCHNL2_CAP_RSS_IPV4_AH  |
-   VIRTCHNL2_CAP_RSS_IPV4_ESP |
-   VIRTCHNL2_CAP_RSS_IPV4_AH_ESP  |
-   VIRTCHNL2_CAP_RSS_IPV6_AH  |
-   VIRTCHNL2_CAP_RSS_IPV6_ESP |
-   VIRTCHNL2_CAP_RSS_IPV6_AH_ESP;
-
-   caps_msg.other_caps = VIRTCHNL2_CAP_WB_ON_ITR;
-
args.ops = VIRTCHNL2_OP_GET_CAPS;
-   args.in_args = (uint8_t *)&caps_msg;
-   args.in_args_size = sizeof(caps_msg);
+   args.in_args = (uint8_t *)&adapter->caps;
+   args.in_args_size = sizeof(struct virtchnl2_get_capabilities);
args.out_buffer = adapter->mbx_resp;
args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
 
@@ -333,7 +294,7 @@ idpf_vc_caps_get(struct idpf_adapter *adapter)
return err;
}
 
-   rte_memcpy(&adapter->caps, args.out_buffer, sizeof(caps_msg));
+   rte_memcpy(&adapter->caps, args.out_buffer, sizeof(struct 
virtchnl2_get_capabilities));
 
return 0;
 }
diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c
index ede730fd50..517ae15f4c 100644
--- a/drivers/net/cpfl/cpfl_ethdev.c
+++ b/drivers/net/cpfl/cpfl_ethdev.c
@@ -1165,6 +1165,44 @@ cpfl_dev_alarm_handler(void *param)
rte_eal_alarm_set(CPFL_ALARM_INTERVAL, cpfl_dev_alarm_handler, adapter);
 }
 
+static struct virtchnl2_get_capabilities req_caps = {
+   .csum_caps =
+   VIRTCHNL2_CAP_TX_CSUM_L3_IPV4  |
+   VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_TCP  |
+   VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_UDP  |
+   VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP |
+   VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_TCP  |
+   VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_UDP  |
+   VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP |
+   VIRTCHNL2_CAP_TX_CSUM_GENERIC  |
+   VIRTCHNL2_CAP_RX_CSUM_L3_IPV4  |
+   VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP  |
+   VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP  |
+   VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP |
+   VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP  |
+   VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP  |
+   VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP |
+   VIRTCHNL2_CAP_RX_CSUM_GENERIC,
+
+   .rss_caps =
+   VIRTCHNL2_CAP_RSS_IPV4_TCP |
+   VIRTCHNL2_CAP_RSS_IPV4_UDP |
+   VIRTCHNL2_CAP_RSS_IPV4_SCTP|
+   VIRTCHNL2_

RE: [PATCH 03/10] common/idpf: support queue groups add/delete

2023-04-24 Thread Liu, Mingxia




> -Original Message-
> From: Xing, Beilei 
> Sent: Friday, April 21, 2023 2:51 PM
> To: Wu, Jingjing 
> Cc: dev@dpdk.org; Liu, Mingxia ; Xing, Beilei
> 
> Subject: [PATCH 03/10] common/idpf: support queue groups add/delete
> 
> From: Beilei Xing 
> 
> This patch adds queue group add/delete virtual channel support.
> 
> Signed-off-by: Mingxia Liu 
> Signed-off-by: Beilei Xing 
> ---
>  drivers/common/idpf/idpf_common_virtchnl.c | 66
> ++
> drivers/common/idpf/idpf_common_virtchnl.h |  9 +++
>  drivers/common/idpf/version.map|  2 +
>  3 files changed, 77 insertions(+)
> 
> diff --git a/drivers/common/idpf/idpf_common_virtchnl.c
> b/drivers/common/idpf/idpf_common_virtchnl.c
> index a4e129062e..76a658bb26 100644
> --- a/drivers/common/idpf/idpf_common_virtchnl.c
> +++ b/drivers/common/idpf/idpf_common_virtchnl.c
> @@ -359,6 +359,72 @@ idpf_vc_vport_destroy(struct idpf_vport *vport)
>   return err;
>  }
> 
> +int
> +idpf_vc_queue_grps_add(struct idpf_vport *vport,
> +struct virtchnl2_add_queue_groups
> *ptp_queue_grps_info,
> +uint8_t *ptp_queue_grps_out)
[Liu, Mingxia] Better to unify the abbreviation of "port to port" , this patch 
p2p is used, in the next patch p2p is used.
> +{
> + struct idpf_adapter *adapter = vport->adapter;
> + struct idpf_cmd_info args;
> + int size, qg_info_size;
> + int err = -1;
> +
> + size = sizeof(*ptp_queue_grps_info) +
> +(ptp_queue_grps_info->qg_info.num_queue_groups - 1) *
> +sizeof(struct virtchnl2_queue_group_info);
> +
> + memset(&args, 0, sizeof(args));
> + args.ops = VIRTCHNL2_OP_ADD_QUEUE_GROUPS;
> + args.in_args = (uint8_t *)ptp_queue_grps_info;
> + args.in_args_size = size;
> + args.out_buffer = adapter->mbx_resp;
> + args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
> +
> + err = idpf_vc_cmd_execute(adapter, &args);
> + if (err != 0) {
> + DRV_LOG(ERR,
> + "Failed to execute command of
> VIRTCHNL2_OP_ADD_QUEUE_GROUPS");
> + return err;
> + }
> +
> + rte_memcpy(ptp_queue_grps_out, args.out_buffer,
> IDPF_DFLT_MBX_BUF_SIZE);
> + return 0;
> +}
> +
> +int idpf_vc_queue_grps_del(struct idpf_vport *vport,
> +   uint16_t num_q_grps,
> +   struct virtchnl2_queue_group_id *qg_ids) {
> + struct idpf_adapter *adapter = vport->adapter;
> + struct virtchnl2_delete_queue_groups *vc_del_q_grps;
> + struct idpf_cmd_info args;
> + int size;
> + int err;
> +
> + size = sizeof(*vc_del_q_grps) +
> +(num_q_grps - 1) * sizeof(struct virtchnl2_queue_group_id);
> + vc_del_q_grps = rte_zmalloc("vc_del_q_grps", size, 0);
> +
> + vc_del_q_grps->vport_id = vport->vport_id;
> + vc_del_q_grps->num_queue_groups = num_q_grps;
> + memcpy(vc_del_q_grps->qg_ids, qg_ids,
> +num_q_grps * sizeof(struct virtchnl2_queue_group_id));
> +
> + memset(&args, 0, sizeof(args));
> + args.ops = VIRTCHNL2_OP_DEL_QUEUE_GROUPS;
> + args.in_args = (uint8_t *)vc_del_q_grps;
> + args.in_args_size = size;
> + args.out_buffer = adapter->mbx_resp;
> + args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
> +
> + err = idpf_vc_cmd_execute(adapter, &args);
> + if (err != 0)
> + DRV_LOG(ERR, "Failed to execute command of
> +VIRTCHNL2_OP_DEL_QUEUE_GROUPS");
> +
> + rte_free(vc_del_q_grps);
> + return err;
> +}
> +
>  int
>  idpf_vc_rss_key_set(struct idpf_vport *vport)  { diff --git
> a/drivers/common/idpf/idpf_common_virtchnl.h
> b/drivers/common/idpf/idpf_common_virtchnl.h
> index d479d93c8e..bf1d014c8d 100644
> --- a/drivers/common/idpf/idpf_common_virtchnl.h
> +++ b/drivers/common/idpf/idpf_common_virtchnl.h
> @@ -64,4 +64,13 @@ int idpf_vc_ctlq_recv(struct idpf_ctlq_info *cq, u16
> *num_q_msg,  __rte_internal  int idpf_vc_ctlq_post_rx_buffs(struct
> idpf_hw *hw, struct idpf_ctlq_info *cq,
>  u16 *buff_count, struct idpf_dma_mem **buffs);
> +__rte_internal
> +int idpf_vc_queue_grps_del(struct idpf_vport *vport,
> +uint16_t num_q_grps,
> +struct virtchnl2_queue_group_id *qg_ids);
> __rte_internal int
> +idpf_vc_queue_grps_add(struct idpf_vport *vport,
> +struct virtchnl2_add_queue_groups
> *ptp_queue_grps_info,
> +uint8_t *ptp_queue_grps_out);
>  #endif /* _IDPF_COMMON_VIRTCHNL_H_ */
> diff --git a/drivers/common/idpf/version.map
> b/drivers/common/idpf/version.map index 7076759024..aa67f7ee27
> 100644
> --- a/drivers/common/idpf/version.map
> +++ b/drivers/common/idpf/version.map
> @@ -48,6 +48,8 @@ INTERNAL {
>   idpf_vc_irq_map_unmap_config;
>   idpf_vc_one_msg_read;
>   idpf_vc_ptype_info_query;
> + idpf_vc_queue_grps_add;
> + idpf_vc_queue_grps_del;
>   idpf_vc_queue_switch;
>   idpf_vc_queues_ena_dis;
>   idpf_vc_rss_hash_get;

[PATCH] net/idpf: add VF support

2023-04-24 Thread beilei . xing

From: Beilei Xing 

Support VF whose device id is 0x145c.

Signed-off-by: Beilei Xing 
---
v3 change:
 - move check reset done function and mail box init to common module.
v2 change:
 - Rebase code based on new patchset:
   
https://patches.dpdk.org/project/dpdk/cover/20230404124112.71703-1-beilei.x...@intel.com/

 drivers/common/idpf/idpf_common_device.c | 140 ---
 drivers/common/idpf/idpf_common_device.h |   2 +
 drivers/net/idpf/idpf_ethdev.c   |   2 +
 3 files changed, 105 insertions(+), 39 deletions(-)

diff --git a/drivers/common/idpf/idpf_common_device.c 
b/drivers/common/idpf/idpf_common_device.c
index c5e7bbf66c..7da4eef82c 100644
--- a/drivers/common/idpf/idpf_common_device.c
+++ b/drivers/common/idpf/idpf_common_device.c
@@ -16,6 +16,7 @@ idpf_reset_pf(struct idpf_hw *hw)
 }
 
 #define IDPF_RESET_WAIT_CNT 100
+
 static int
 idpf_check_pf_reset_done(struct idpf_hw *hw)
 {
@@ -33,48 +34,105 @@ idpf_check_pf_reset_done(struct idpf_hw *hw)
return -EBUSY;
 }
 
-#define CTLQ_NUM 2
 static int
-idpf_init_mbx(struct idpf_hw *hw)
+idpf_check_vf_reset_done(struct idpf_hw *hw)
 {
-   struct idpf_ctlq_create_info ctlq_info[CTLQ_NUM] = {
-   {
-   .type = IDPF_CTLQ_TYPE_MAILBOX_TX,
-   .id = IDPF_CTLQ_ID,
-   .len = IDPF_CTLQ_LEN,
-   .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
-   .reg = {
-   .head = PF_FW_ATQH,
-   .tail = PF_FW_ATQT,
-   .len = PF_FW_ATQLEN,
-   .bah = PF_FW_ATQBAH,
-   .bal = PF_FW_ATQBAL,
-   .len_mask = PF_FW_ATQLEN_ATQLEN_M,
-   .len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M,
-   .head_mask = PF_FW_ATQH_ATQH_M,
-   }
-   },
-   {
-   .type = IDPF_CTLQ_TYPE_MAILBOX_RX,
-   .id = IDPF_CTLQ_ID,
-   .len = IDPF_CTLQ_LEN,
-   .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
-   .reg = {
-   .head = PF_FW_ARQH,
-   .tail = PF_FW_ARQT,
-   .len = PF_FW_ARQLEN,
-   .bah = PF_FW_ARQBAH,
-   .bal = PF_FW_ARQBAL,
-   .len_mask = PF_FW_ARQLEN_ARQLEN_M,
-   .len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M,
-   .head_mask = PF_FW_ARQH_ARQH_M,
-   }
+   uint32_t reg;
+   int i;
+
+   for (i = 0; i < IDPF_RESET_WAIT_CNT; i++) {
+   reg = IDPF_READ_REG(hw, VFGEN_RSTAT);
+   if (reg != 0x && (reg & VFGEN_RSTAT_VFR_STATE_M))
+   return 0;
+   rte_delay_ms(1000);
+   }
+
+   DRV_LOG(ERR, "VF reset timeout");
+   return -EBUSY;
+}
+
+#define IDPF_CTLQ_NUM 2
+
+struct idpf_ctlq_create_info pf_ctlq_info[IDPF_CTLQ_NUM] = {
+   {
+   .type = IDPF_CTLQ_TYPE_MAILBOX_TX,
+   .id = IDPF_CTLQ_ID,
+   .len = IDPF_CTLQ_LEN,
+   .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
+   .reg = {
+   .head = PF_FW_ATQH,
+   .tail = PF_FW_ATQT,
+   .len = PF_FW_ATQLEN,
+   .bah = PF_FW_ATQBAH,
+   .bal = PF_FW_ATQBAL,
+   .len_mask = PF_FW_ATQLEN_ATQLEN_M,
+   .len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M,
+   .head_mask = PF_FW_ATQH_ATQH_M,
}
-   };
+   },
+   {
+   .type = IDPF_CTLQ_TYPE_MAILBOX_RX,
+   .id = IDPF_CTLQ_ID,
+   .len = IDPF_CTLQ_LEN,
+   .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
+   .reg = {
+   .head = PF_FW_ARQH,
+   .tail = PF_FW_ARQT,
+   .len = PF_FW_ARQLEN,
+   .bah = PF_FW_ARQBAH,
+   .bal = PF_FW_ARQBAL,
+   .len_mask = PF_FW_ARQLEN_ARQLEN_M,
+   .len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M,
+   .head_mask = PF_FW_ARQH_ARQH_M,
+   }
+   }
+};
+
+struct idpf_ctlq_create_info vf_ctlq_info[IDPF_CTLQ_NUM] = {
+   {
+   .type = IDPF_CTLQ_TYPE_MAILBOX_TX,
+   .id = IDPF_CTLQ_ID,
+   .len = IDPF_CTLQ_LEN,
+   .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
+   .reg = {
+   .head = VF_ATQH,
+   .tail = VF_ATQT,
+   .len = VF_ATQLEN,
+   .bah = VF_ATQBAH,
+   .bal = VF_ATQBAL,
+   .len_m

RE: [PATCH 03/10] common/idpf: support queue groups add/delete

2023-04-24 Thread Liu, Mingxia




> -Original Message-
> From: Xing, Beilei 
> Sent: Friday, April 21, 2023 2:51 PM
> To: Wu, Jingjing 
> Cc: dev@dpdk.org; Liu, Mingxia ; Xing, Beilei
> 
> Subject: [PATCH 03/10] common/idpf: support queue groups add/delete
> 
> From: Beilei Xing 
> 
> This patch adds queue group add/delete virtual channel support.
> 
> Signed-off-by: Mingxia Liu 
> Signed-off-by: Beilei Xing 
> ---
>  drivers/common/idpf/idpf_common_virtchnl.c | 66
> ++
> drivers/common/idpf/idpf_common_virtchnl.h |  9 +++
>  drivers/common/idpf/version.map|  2 +
>  3 files changed, 77 insertions(+)
> 
> diff --git a/drivers/common/idpf/idpf_common_virtchnl.c
> b/drivers/common/idpf/idpf_common_virtchnl.c
> index a4e129062e..76a658bb26 100644
> --- a/drivers/common/idpf/idpf_common_virtchnl.c
> +++ b/drivers/common/idpf/idpf_common_virtchnl.c
> @@ -359,6 +359,72 @@ idpf_vc_vport_destroy(struct idpf_vport *vport)
>   return err;
>  }
> 
> +int
> +idpf_vc_queue_grps_add(struct idpf_vport *vport,
> +struct virtchnl2_add_queue_groups
> *ptp_queue_grps_info,
> +uint8_t *ptp_queue_grps_out)
[Liu, Mingxia] Better to unify the abbreviation of "port to port" , this patch 
ptp is used, in the next patch p2p is used.
> +{
> + struct idpf_adapter *adapter = vport->adapter;
> + struct idpf_cmd_info args;
> + int size, qg_info_size;
> + int err = -1;
> +
> + size = sizeof(*ptp_queue_grps_info) +
> +(ptp_queue_grps_info->qg_info.num_queue_groups - 1) *
> +sizeof(struct virtchnl2_queue_group_info);
> +
> + memset(&args, 0, sizeof(args));
> + args.ops = VIRTCHNL2_OP_ADD_QUEUE_GROUPS;
> + args.in_args = (uint8_t *)ptp_queue_grps_info;
> + args.in_args_size = size;
> + args.out_buffer = adapter->mbx_resp;
> + args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
> +
> + err = idpf_vc_cmd_execute(adapter, &args);
> + if (err != 0) {
> + DRV_LOG(ERR,
> + "Failed to execute command of
> VIRTCHNL2_OP_ADD_QUEUE_GROUPS");
> + return err;
> + }
> +
> + rte_memcpy(ptp_queue_grps_out, args.out_buffer,
> IDPF_DFLT_MBX_BUF_SIZE);
> + return 0;
> +}
> +
> +int idpf_vc_queue_grps_del(struct idpf_vport *vport,
> +   uint16_t num_q_grps,
> +   struct virtchnl2_queue_group_id *qg_ids) {
> + struct idpf_adapter *adapter = vport->adapter;
> + struct virtchnl2_delete_queue_groups *vc_del_q_grps;
> + struct idpf_cmd_info args;
> + int size;
> + int err;
> +
> + size = sizeof(*vc_del_q_grps) +
> +(num_q_grps - 1) * sizeof(struct virtchnl2_queue_group_id);
> + vc_del_q_grps = rte_zmalloc("vc_del_q_grps", size, 0);
> +
> + vc_del_q_grps->vport_id = vport->vport_id;
> + vc_del_q_grps->num_queue_groups = num_q_grps;
> + memcpy(vc_del_q_grps->qg_ids, qg_ids,
> +num_q_grps * sizeof(struct virtchnl2_queue_group_id));
> +
> + memset(&args, 0, sizeof(args));
> + args.ops = VIRTCHNL2_OP_DEL_QUEUE_GROUPS;
> + args.in_args = (uint8_t *)vc_del_q_grps;
> + args.in_args_size = size;
> + args.out_buffer = adapter->mbx_resp;
> + args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
> +
> + err = idpf_vc_cmd_execute(adapter, &args);
> + if (err != 0)
> + DRV_LOG(ERR, "Failed to execute command of
> +VIRTCHNL2_OP_DEL_QUEUE_GROUPS");
> +
> + rte_free(vc_del_q_grps);
> + return err;
> +}
> +
>  int
>  idpf_vc_rss_key_set(struct idpf_vport *vport)  { diff --git
> a/drivers/common/idpf/idpf_common_virtchnl.h
> b/drivers/common/idpf/idpf_common_virtchnl.h
> index d479d93c8e..bf1d014c8d 100644
> --- a/drivers/common/idpf/idpf_common_virtchnl.h
> +++ b/drivers/common/idpf/idpf_common_virtchnl.h
> @@ -64,4 +64,13 @@ int idpf_vc_ctlq_recv(struct idpf_ctlq_info *cq, u16
> *num_q_msg,  __rte_internal  int idpf_vc_ctlq_post_rx_buffs(struct
> idpf_hw *hw, struct idpf_ctlq_info *cq,
>  u16 *buff_count, struct idpf_dma_mem **buffs);
> +__rte_internal
> +int idpf_vc_queue_grps_del(struct idpf_vport *vport,
> +uint16_t num_q_grps,
> +struct virtchnl2_queue_group_id *qg_ids);
> __rte_internal int
> +idpf_vc_queue_grps_add(struct idpf_vport *vport,
> +struct virtchnl2_add_queue_groups
> *ptp_queue_grps_info,
> +uint8_t *ptp_queue_grps_out);
>  #endif /* _IDPF_COMMON_VIRTCHNL_H_ */
> diff --git a/drivers/common/idpf/version.map
> b/drivers/common/idpf/version.map index 7076759024..aa67f7ee27
> 100644
> --- a/drivers/common/idpf/version.map
> +++ b/drivers/common/idpf/version.map
> @@ -48,6 +48,8 @@ INTERNAL {
>   idpf_vc_irq_map_unmap_config;
>   idpf_vc_one_msg_read;
>   idpf_vc_ptype_info_query;
> + idpf_vc_queue_grps_add;
> + idpf_vc_queue_grps_del;
>   idpf_vc_queue_switch;
>   idpf_vc_queues_ena_dis;
>   idpf_vc_rss_hash_get;

[PATCH v3] net/idpf: add VF support

2023-04-24 Thread beilei . xing

From: Beilei Xing 

Support VF whose device id is 0x145c.

Signed-off-by: Beilei Xing 
---
v3 change:
 - move check reset done function and mail box init to common module.
v2 change:
 - Rebase code based on new patchset:
   
https://patches.dpdk.org/project/dpdk/cover/20230404124112.71703-1-beilei.x...@intel.com/

 drivers/common/idpf/idpf_common_device.c | 140 ---
 drivers/common/idpf/idpf_common_device.h |   2 +
 drivers/net/idpf/idpf_ethdev.c   |   2 +
 3 files changed, 105 insertions(+), 39 deletions(-)

diff --git a/drivers/common/idpf/idpf_common_device.c 
b/drivers/common/idpf/idpf_common_device.c
index c5e7bbf66c..7da4eef82c 100644
--- a/drivers/common/idpf/idpf_common_device.c
+++ b/drivers/common/idpf/idpf_common_device.c
@@ -16,6 +16,7 @@ idpf_reset_pf(struct idpf_hw *hw)
 }
 
 #define IDPF_RESET_WAIT_CNT 100
+
 static int
 idpf_check_pf_reset_done(struct idpf_hw *hw)
 {
@@ -33,48 +34,105 @@ idpf_check_pf_reset_done(struct idpf_hw *hw)
return -EBUSY;
 }
 
-#define CTLQ_NUM 2
 static int
-idpf_init_mbx(struct idpf_hw *hw)
+idpf_check_vf_reset_done(struct idpf_hw *hw)
 {
-   struct idpf_ctlq_create_info ctlq_info[CTLQ_NUM] = {
-   {
-   .type = IDPF_CTLQ_TYPE_MAILBOX_TX,
-   .id = IDPF_CTLQ_ID,
-   .len = IDPF_CTLQ_LEN,
-   .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
-   .reg = {
-   .head = PF_FW_ATQH,
-   .tail = PF_FW_ATQT,
-   .len = PF_FW_ATQLEN,
-   .bah = PF_FW_ATQBAH,
-   .bal = PF_FW_ATQBAL,
-   .len_mask = PF_FW_ATQLEN_ATQLEN_M,
-   .len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M,
-   .head_mask = PF_FW_ATQH_ATQH_M,
-   }
-   },
-   {
-   .type = IDPF_CTLQ_TYPE_MAILBOX_RX,
-   .id = IDPF_CTLQ_ID,
-   .len = IDPF_CTLQ_LEN,
-   .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
-   .reg = {
-   .head = PF_FW_ARQH,
-   .tail = PF_FW_ARQT,
-   .len = PF_FW_ARQLEN,
-   .bah = PF_FW_ARQBAH,
-   .bal = PF_FW_ARQBAL,
-   .len_mask = PF_FW_ARQLEN_ARQLEN_M,
-   .len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M,
-   .head_mask = PF_FW_ARQH_ARQH_M,
-   }
+   uint32_t reg;
+   int i;
+
+   for (i = 0; i < IDPF_RESET_WAIT_CNT; i++) {
+   reg = IDPF_READ_REG(hw, VFGEN_RSTAT);
+   if (reg != 0x && (reg & VFGEN_RSTAT_VFR_STATE_M))
+   return 0;
+   rte_delay_ms(1000);
+   }
+
+   DRV_LOG(ERR, "VF reset timeout");
+   return -EBUSY;
+}
+
+#define IDPF_CTLQ_NUM 2
+
+struct idpf_ctlq_create_info pf_ctlq_info[IDPF_CTLQ_NUM] = {
+   {
+   .type = IDPF_CTLQ_TYPE_MAILBOX_TX,
+   .id = IDPF_CTLQ_ID,
+   .len = IDPF_CTLQ_LEN,
+   .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
+   .reg = {
+   .head = PF_FW_ATQH,
+   .tail = PF_FW_ATQT,
+   .len = PF_FW_ATQLEN,
+   .bah = PF_FW_ATQBAH,
+   .bal = PF_FW_ATQBAL,
+   .len_mask = PF_FW_ATQLEN_ATQLEN_M,
+   .len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M,
+   .head_mask = PF_FW_ATQH_ATQH_M,
}
-   };
+   },
+   {
+   .type = IDPF_CTLQ_TYPE_MAILBOX_RX,
+   .id = IDPF_CTLQ_ID,
+   .len = IDPF_CTLQ_LEN,
+   .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
+   .reg = {
+   .head = PF_FW_ARQH,
+   .tail = PF_FW_ARQT,
+   .len = PF_FW_ARQLEN,
+   .bah = PF_FW_ARQBAH,
+   .bal = PF_FW_ARQBAL,
+   .len_mask = PF_FW_ARQLEN_ARQLEN_M,
+   .len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M,
+   .head_mask = PF_FW_ARQH_ARQH_M,
+   }
+   }
+};
+
+struct idpf_ctlq_create_info vf_ctlq_info[IDPF_CTLQ_NUM] = {
+   {
+   .type = IDPF_CTLQ_TYPE_MAILBOX_TX,
+   .id = IDPF_CTLQ_ID,
+   .len = IDPF_CTLQ_LEN,
+   .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
+   .reg = {
+   .head = VF_ATQH,
+   .tail = VF_ATQT,
+   .len = VF_ATQLEN,
+   .bah = VF_ATQBAH,
+   .bal = VF_ATQBAL,
+   .len_m

RE: [PATCH 04/10] net/cpfl: add haipin queue group during vpotr init

2023-04-24 Thread Liu, Mingxia




> -Original Message-
> From: Xing, Beilei 
> Sent: Friday, April 21, 2023 2:51 PM
> To: Wu, Jingjing 
> Cc: dev@dpdk.org; Liu, Mingxia ; Xing, Beilei
> 
> Subject: [PATCH 04/10] net/cpfl: add haipin queue group during vpotr init
[Liu, Mingxia] vpotr , spelling error?
> 
> From: Beilei Xing 
> 
> This patch adds haipin queue group during vpotr init.
> 
> Signed-off-by: Mingxia Liu 
> Signed-off-by: Beilei Xing 
> ---
>  drivers/net/cpfl/cpfl_ethdev.c | 125
> +  drivers/net/cpfl/cpfl_ethdev.h |
> 17 +
>  drivers/net/cpfl/cpfl_rxtx.h   |   4 ++
>  3 files changed, 146 insertions(+)
> 
> diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c
> index 114fc18f5f..ad5ddebd3a 100644
> --- a/drivers/net/cpfl/cpfl_ethdev.c
> +++ b/drivers/net/cpfl/cpfl_ethdev.c
> @@ -856,6 +856,20 @@ cpfl_dev_stop(struct rte_eth_dev *dev)
>   return 0;
>  }
> 
> +static int
> +cpfl_p2p_queue_grps_del(struct idpf_vport *vport) {
> + struct virtchnl2_queue_group_id
> qg_ids[CPFL_P2P_NB_QUEUE_GRPS] = {0};
> + int ret = 0;
> +
> + qg_ids[0].queue_group_id = CPFL_P2P_QUEUE_GRP_ID;
> + qg_ids[0].queue_group_type = VIRTCHNL2_QUEUE_GROUP_P2P;
> + ret = idpf_vc_queue_grps_del(vport, CPFL_P2P_NB_QUEUE_GRPS,
> qg_ids);
> + if (ret)
> + PMD_DRV_LOG(ERR, "Failed to delete p2p queue groups");
> + return ret;
> +}
> +
>  static int
>  cpfl_dev_close(struct rte_eth_dev *dev)  { @@ -864,6 +878,9 @@
> cpfl_dev_close(struct rte_eth_dev *dev)
>   struct cpfl_adapter_ext *adapter = CPFL_ADAPTER_TO_EXT(vport-
> >adapter);
> 
>   cpfl_dev_stop(dev);
> +
> + cpfl_p2p_queue_grps_del(vport);
> +
>   idpf_vport_deinit(vport);
> 
>   adapter->cur_vports &= ~RTE_BIT32(vport->devarg_id); @@ -
> 1350,6 +1367,96 @@ cpfl_vport_idx_alloc(struct cpfl_adapter_ext
> *adapter)
>   return vport_idx;
>  }
> 
> +static int
> +cpfl_p2p_q_grps_add(struct idpf_vport *vport,
> + struct virtchnl2_add_queue_groups
> *p2p_queue_grps_info,
> + uint8_t *p2p_q_vc_out_info)
> +{
> + int ret;
> +
> + p2p_queue_grps_info->vport_id = vport->vport_id;
> + p2p_queue_grps_info->qg_info.num_queue_groups =
> CPFL_P2P_NB_QUEUE_GRPS;
> + p2p_queue_grps_info->qg_info.groups[0].num_rx_q =
> CPFL_MAX_P2P_NB_QUEUES;
> + p2p_queue_grps_info->qg_info.groups[0].num_rx_bufq =
> CPFL_P2P_NB_RX_BUFQ;
> + p2p_queue_grps_info->qg_info.groups[0].num_tx_q =
> CPFL_MAX_P2P_NB_QUEUES;
> + p2p_queue_grps_info->qg_info.groups[0].num_tx_complq =
> CPFL_P2P_NB_TX_COMPLQ;
> + p2p_queue_grps_info->qg_info.groups[0].qg_id.queue_group_id =
> CPFL_P2P_QUEUE_GRP_ID;
> + p2p_queue_grps_info->qg_info.groups[0].qg_id.queue_group_type
> = VIRTCHNL2_QUEUE_GROUP_P2P;
> + p2p_queue_grps_info-
> >qg_info.groups[0].rx_q_grp_info.rss_lut_size = 0;
> + p2p_queue_grps_info->qg_info.groups[0].tx_q_grp_info.tx_tc = 0;
> + p2p_queue_grps_info->qg_info.groups[0].tx_q_grp_info.priority =
> 0;
> + p2p_queue_grps_info->qg_info.groups[0].tx_q_grp_info.is_sp = 0;
> + p2p_queue_grps_info->qg_info.groups[0].tx_q_grp_info.pir_weight
> = 0;
> +
> + ret = idpf_vc_queue_grps_add(vport, p2p_queue_grps_info,
> p2p_q_vc_out_info);
> + if (ret != 0) {
> + PMD_DRV_LOG(ERR, "Failed to add p2p queue groups.");
> + return ret;
> + }
> +
> + return ret;
> +}
> +
> +static int
> +cpfl_p2p_queue_info_init(struct cpfl_vport *cpfl_vport,
> +  struct virtchnl2_add_queue_groups
> *p2p_q_vc_out_info) {
> + struct p2p_queue_chunks_info *p2p_q_chunks_info =
> &cpfl_vport->p2p_q_chunks_info;
> + struct virtchnl2_queue_reg_chunks *vc_chunks_out;
> + int i, type;
> +
> + if (p2p_q_vc_out_info-
> >qg_info.groups[0].qg_id.queue_group_type !=
> + VIRTCHNL2_QUEUE_GROUP_P2P) {
> + PMD_DRV_LOG(ERR, "Add queue group response
> mismatch.");
> + return -EINVAL;
> + }
> +
> + vc_chunks_out = &p2p_q_vc_out_info->qg_info.groups[0].chunks;
> +
> + for (i = 0; i < vc_chunks_out->num_chunks; i++) {
> + type = vc_chunks_out->chunks[i].type;
> + switch (type) {
> + case VIRTCHNL2_QUEUE_TYPE_TX:
> + p2p_q_chunks_info->tx_start_qid =
> + vc_chunks_out->chunks[i].start_queue_id;
> + p2p_q_chunks_info->tx_qtail_start =
> + vc_chunks_out->chunks[i].qtail_reg_start;
> + p2p_q_chunks_info->tx_qtail_spacing =
> + vc_chunks_out->chunks[i].qtail_reg_spacing;
> + break;
> + case VIRTCHNL2_QUEUE_TYPE_RX:
> + p2p_q_chunks_info->rx_start_qid =
> + vc_chunks_out->chunks[i].start_queue_id;
> + p2p_q_chunks_info->rx_qtail_start =
> + vc_chunks_out->chunks[i].

[PATCH] cryptodev: clarify error codes returned

2023-04-24 Thread Anoob Joseph

When symmetric sessions are created, it may fail due to non-critical
errors. When PMD doesn't support the specific combination that
application requested, it can return -ENOTSUP which can be handled so in
application. The API is already setting rte_errno according to the
reason of the failure. Clarifying this in the spec to list down possible
error codes.

Fixes: bdce2564dbf7 ("cryptodev: rework session framework")

Signed-off-by: Anoob Joseph 
---
 lib/cryptodev/rte_cryptodev.h | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lib/cryptodev/rte_cryptodev.h b/lib/cryptodev/rte_cryptodev.h
index 3a9ad13660..4e3a7db0fe 100644
--- a/lib/cryptodev/rte_cryptodev.h
+++ b/lib/cryptodev/rte_cryptodev.h
@@ -1048,11 +1048,14 @@ rte_cryptodev_asym_session_pool_create(const char 
*name, uint32_t nb_elts,
  * @param   dev_id   ID of device that we want the session to be used on
  * @param   xforms   Symmetric crypto transform operations to apply on flow
  *   processed with this session
- * @param   mp   Mempool where the private data is allocated.
+ * @param   mp   Mempool to allocate symmetric session objects from
  *
  * @return
  *  - On success return pointer to sym-session.
- *  - On failure returns NULL.
+ *  - On failure returns NULL and rte_errno is set to the error code.
+ * EINVAL on invalid arguments.
+ * ENOMEM on memory error for session allocation.
+ * ENOTSUP if device doesn't support session configuration.
  */
 void *
 rte_cryptodev_sym_session_create(uint8_t dev_id,
-- 
2.25.1

[PATCH v3 0/7] fix and enhance idpf and cpfl timestamp

2023-04-24 Thread Wenjing Qiao

Using alarm to save main time to solve timestamp roll over issue.
Enhance timestamp offload feature support for ACC. Ajust timestamp
mbuf registering at dev start.

Wenjing Qiao (7):
  common/idpf: fix 64b timestamp roll over issue
  net/idpf: save main time by alarm
  net/cpfl: save main time by alarm
  common/idpf: enhance timestamp offload feature for ACC
  common/idpf: add timestamp enable flag for rxq
  net/cpfl: register timestamp mbuf when starting dev
  net/idpf: register timestamp mbuf when starting dev

 config/meson.build |   3 +
 drivers/common/idpf/base/idpf_osdep.h  |  48 +
 drivers/common/idpf/idpf_common_rxtx.c | 133 ++---
 drivers/common/idpf/idpf_common_rxtx.h |   5 +-
 drivers/common/idpf/version.map|   4 +
 drivers/net/cpfl/cpfl_ethdev.c |  19 
 drivers/net/cpfl/cpfl_ethdev.h |   3 +
 drivers/net/cpfl/cpfl_rxtx.c   |   2 +
 drivers/net/idpf/idpf_ethdev.c |  19 
 drivers/net/idpf/idpf_ethdev.h |   3 +
 drivers/net/idpf/idpf_rxtx.c   |   3 +
 meson_options.txt  |   2 +
 12 files changed, 186 insertions(+), 58 deletions(-)

-- 
2.25.1

[PATCH v3 1/7] common/idpf: fix 64b timestamp roll over issue

2023-04-24 Thread Wenjing Qiao

Reading MTS register at first packet will cause timestamp
roll over issue. To support calculating 64b timestamp, need
an alarm to save main time from registers every 1 second.

Fixes: 8c6098afa075 ("common/idpf: add Rx/Tx data path")
Cc: sta...@dpdk.org

Signed-off-by: Wenjing Qiao 
---
 drivers/common/idpf/idpf_common_rxtx.c | 108 -
 drivers/common/idpf/idpf_common_rxtx.h |   3 +-
 drivers/common/idpf/version.map|   1 +
 3 files changed, 55 insertions(+), 57 deletions(-)

diff --git a/drivers/common/idpf/idpf_common_rxtx.c 
b/drivers/common/idpf/idpf_common_rxtx.c
index fc87e3e243..19bcb94077 100644
--- a/drivers/common/idpf/idpf_common_rxtx.c
+++ b/drivers/common/idpf/idpf_common_rxtx.c
@@ -4,6 +4,7 @@
 
 #include 
 #include 
+#include 
 
 #include "idpf_common_rxtx.h"
 
@@ -442,56 +443,23 @@ idpf_qc_split_rxq_mbufs_alloc(struct idpf_rx_queue *rxq)
return 0;
 }
 
-#define IDPF_TIMESYNC_REG_WRAP_GUARD_BAND  1
 /* Helper function to convert a 32b nanoseconds timestamp to 64b. */
 static inline uint64_t
-idpf_tstamp_convert_32b_64b(struct idpf_adapter *ad, uint32_t flag,
-   uint32_t in_timestamp)
+idpf_tstamp_convert_32b_64b(uint64_t time_hw, uint32_t in_timestamp)
 {
-#ifdef RTE_ARCH_X86_64
-   struct idpf_hw *hw = &ad->hw;
const uint64_t mask = 0x;
-   uint32_t hi, lo, lo2, delta;
+   const uint32_t half_overflow_duration = 0x1 << 31;
+   uint32_t delta;
uint64_t ns;
 
-   if (flag != 0) {
-   IDPF_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, 
PF_GLTSYN_CMD_SYNC_SHTIME_EN_M);
-   IDPF_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, 
PF_GLTSYN_CMD_SYNC_EXEC_CMD_M |
-  PF_GLTSYN_CMD_SYNC_SHTIME_EN_M);
-   lo = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
-   hi = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_H_0);
-   /*
-* On typical system, the delta between lo and lo2 is ~1000ns,
-* so 1 seems a large-enough but not overly-big guard band.
-*/
-   if (lo > (UINT32_MAX - IDPF_TIMESYNC_REG_WRAP_GUARD_BAND))
-   lo2 = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
-   else
-   lo2 = lo;
-
-   if (lo2 < lo) {
-   lo = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_L_0);
-   hi = IDPF_READ_REG(hw, PF_GLTSYN_SHTIME_H_0);
-   }
-
-   ad->time_hw = ((uint64_t)hi << 32) | lo;
-   }
-
-   delta = (in_timestamp - (uint32_t)(ad->time_hw & mask));
-   if (delta > (mask / 2)) {
-   delta = ((uint32_t)(ad->time_hw & mask) - in_timestamp);
-   ns = ad->time_hw - delta;
+   delta = (in_timestamp - (uint32_t)(time_hw & mask));
+   if (delta > half_overflow_duration) {
+   delta = ((uint32_t)(time_hw & mask) - in_timestamp);
+   ns = time_hw - delta;
} else {
-   ns = ad->time_hw + delta;
+   ns = time_hw + delta;
}
-
return ns;
-#else /* !RTE_ARCH_X86_64 */
-   RTE_SET_USED(ad);
-   RTE_SET_USED(flag);
-   RTE_SET_USED(in_timestamp);
-   return 0;
-#endif /* RTE_ARCH_X86_64 */
 }
 
 #define IDPF_RX_FLEX_DESC_ADV_STATUS0_XSUM_S   \
@@ -659,9 +627,6 @@ idpf_dp_splitq_recv_pkts(void *rx_queue, struct rte_mbuf 
**rx_pkts,
rx_desc_ring = rxq->rx_ring;
ptype_tbl = rxq->adapter->ptype_tbl;
 
-   if ((rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0)
-   rxq->hw_register_set = 1;
-
while (nb_rx < nb_pkts) {
rx_desc = &rx_desc_ring[rx_id];
 
@@ -720,10 +685,8 @@ idpf_dp_splitq_recv_pkts(void *rx_queue, struct rte_mbuf 
**rx_pkts,
if (idpf_timestamp_dynflag > 0 &&
(rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP)) {
/* timestamp */
-   ts_ns = idpf_tstamp_convert_32b_64b(ad,
-   
rxq->hw_register_set,
+   ts_ns = idpf_tstamp_convert_32b_64b(ad->time_hw,

rte_le_to_cpu_32(rx_desc->ts_high));
-   rxq->hw_register_set = 0;
*RTE_MBUF_DYNFIELD(rxm,
   idpf_timestamp_dynfield_offset,
   rte_mbuf_timestamp_t *) = ts_ns;
@@ -1077,9 +1040,6 @@ idpf_dp_singleq_recv_pkts(void *rx_queue, struct rte_mbuf 
**rx_pkts,
rx_ring = rxq->rx_ring;
ptype_tbl = rxq->adapter->ptype_tbl;
 
-   if ((rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0)
-   rxq->hw_register_set = 1;
-
while (nb_rx < nb_pkts) {
rxdp = &rx_ring[rx_id];
rx_status0 = rte_le_to_cpu_16(rxdp->flex_nic_wb.status_error0);
@@ -1142,10 +1102,8 @@ idpf_dp_s

[PATCH v3 2/7] net/idpf: save main time by alarm

2023-04-24 Thread Wenjing Qiao

Using alarm to save main time from registers every 1 second.

Fixes: 8c6098afa075 ("common/idpf: add Rx/Tx data path")
Cc: sta...@dpdk.org

Signed-off-by: Wenjing Qiao 
---
 drivers/net/idpf/idpf_ethdev.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/idpf/idpf_ethdev.c b/drivers/net/idpf/idpf_ethdev.c
index e02ec2ec5a..3f33ffbc78 100644
--- a/drivers/net/idpf/idpf_ethdev.c
+++ b/drivers/net/idpf/idpf_ethdev.c
@@ -761,6 +761,12 @@ idpf_dev_start(struct rte_eth_dev *dev)
goto err_vec;
}
 
+   if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) 
{
+   rte_eal_alarm_set(1000 * 1000,
+ &idpf_dev_read_time_hw,
+ (void *)base);
+   }
+
ret = idpf_vc_vectors_alloc(vport, req_vecs_num);
if (ret != 0) {
PMD_DRV_LOG(ERR, "Failed to allocate interrupt vectors");
@@ -810,6 +816,7 @@ static int
 idpf_dev_stop(struct rte_eth_dev *dev)
 {
struct idpf_vport *vport = dev->data->dev_private;
+   struct idpf_adapter *base = vport->adapter;
 
if (vport->stopped == 1)
return 0;
@@ -822,6 +829,11 @@ idpf_dev_stop(struct rte_eth_dev *dev)
 
idpf_vc_vectors_dealloc(vport);
 
+   if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) 
{
+   rte_eal_alarm_cancel(idpf_dev_read_time_hw,
+base);
+   }
+
vport->stopped = 1;
 
return 0;
-- 
2.25.1

[PATCH v3 3/7] net/cpfl: save main time by alarm

2023-04-24 Thread Wenjing Qiao

Using alarm to save main time from registers every 1 second.

Fixes: 8c6098afa075 ("common/idpf: add Rx/Tx data path")
Cc: sta...@dpdk.org

Signed-off-by: Wenjing Qiao 
---
 drivers/net/cpfl/cpfl_ethdev.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c
index ede730fd50..82d8147494 100644
--- a/drivers/net/cpfl/cpfl_ethdev.c
+++ b/drivers/net/cpfl/cpfl_ethdev.c
@@ -767,6 +767,12 @@ cpfl_dev_start(struct rte_eth_dev *dev)
goto err_vec;
}
 
+   if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) 
{
+   rte_eal_alarm_set(1000 * 1000,
+ &idpf_dev_read_time_hw,
+ (void *)base);
+   }
+
ret = idpf_vc_vectors_alloc(vport, req_vecs_num);
if (ret != 0) {
PMD_DRV_LOG(ERR, "Failed to allocate interrupt vectors");
@@ -816,6 +822,7 @@ static int
 cpfl_dev_stop(struct rte_eth_dev *dev)
 {
struct idpf_vport *vport = dev->data->dev_private;
+   struct idpf_adapter *base = vport->adapter;
 
if (vport->stopped == 1)
return 0;
@@ -828,6 +835,11 @@ cpfl_dev_stop(struct rte_eth_dev *dev)
 
idpf_vc_vectors_dealloc(vport);
 
+   if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) 
{
+   rte_eal_alarm_cancel(idpf_dev_read_time_hw,
+base);
+   }
+
vport->stopped = 1;
 
return 0;
-- 
2.25.1

[PATCH v3 4/7] common/idpf: enhance timestamp offload feature for ACC

2023-04-24 Thread Wenjing Qiao

For ACC, getting main time from MTS registers by shared memory.

Notice: it is a workaround, and it will be removed after generic
solution are provided.

Fixes: 8c6098afa075 ("common/idpf: add Rx/Tx data path")
Cc: sta...@dpdk.org

Signed-off-by: Wenjing Qiao 
---
 config/meson.build |  3 ++
 drivers/common/idpf/base/idpf_osdep.h  | 48 ++
 drivers/common/idpf/idpf_common_rxtx.c | 30 +---
 meson_options.txt  |  2 ++
 4 files changed, 79 insertions(+), 4 deletions(-)

diff --git a/config/meson.build b/config/meson.build
index fa730a1b14..8d74f301b4 100644
--- a/config/meson.build
+++ b/config/meson.build
@@ -316,6 +316,9 @@ endif
 if get_option('mbuf_refcnt_atomic')
 dpdk_conf.set('RTE_MBUF_REFCNT_ATOMIC', true)
 endif
+if get_option('enable_acc_timestamp')
+dpdk_conf.set('IDPF_ACC_TIMESTAMP', true)
+endif
 dpdk_conf.set10('RTE_IOVA_IN_MBUF', get_option('enable_iova_as_pa'))
 
 compile_time_cpuflags = []
diff --git a/drivers/common/idpf/base/idpf_osdep.h 
b/drivers/common/idpf/base/idpf_osdep.h
index 99ae9cf60a..e634939a51 100644
--- a/drivers/common/idpf/base/idpf_osdep.h
+++ b/drivers/common/idpf/base/idpf_osdep.h
@@ -24,6 +24,13 @@
 #include 
 #include 
 
+#ifdef IDPF_ACC_TIMESTAMP
+#include 
+#include 
+#include 
+#include 
+#endif /* IDPF_ACC_TIMESTAMP */
+
 #define INLINE inline
 #define STATIC static
 
@@ -361,4 +368,45 @@ idpf_hweight32(u32 num)
 
 #endif
 
+#ifdef IDPF_ACC_TIMESTAMP
+#define IDPF_ACC_TIMESYNC_BASE_ADDR 0x480D50
+#define IDPF_ACC_GLTSYN_TIME_H (IDPF_ACC_TIMESYNC_BASE_ADDR + 0x1C)
+#define IDPF_ACC_GLTSYN_TIME_L (IDPF_ACC_TIMESYNC_BASE_ADDR + 0x10)
+
+inline uint32_t
+idpf_mmap_r32(uint64_t pa)
+{
+   int fd;
+   void *bp, *vp;
+   uint32_t rval = 0xdeadbeef;
+   uint32_t ps, ml, of;
+
+   fd = open("/dev/mem", (O_RDWR | O_SYNC));
+   if (fd == -1) {
+   perror("/dev/mem");
+   return -1;
+   }
+   ml = ps = getpagesize();
+   of = (uint32_t)pa & (ps - 1);
+   if (of + (sizeof(uint32_t) * 4) > ps)
+   ml *= 2;
+   bp = mmap(NULL, ml, (PROT_READ | PROT_WRITE), MAP_SHARED, fd, pa & 
~(uint64_t)(ps - 1));
+   if (bp == MAP_FAILED) {
+   perror("mmap");
+   goto done;
+   }
+
+   vp = (char *)bp + of;
+
+   rval = *(volatile uint32_t *)vp;
+   if (munmap(bp, ml) == -1)
+   perror("munmap");
+done:
+   close(fd);
+
+   return rval;
+}
+
+#endif /* IDPF_ACC_TIMESTAMP */
+
 #endif /* _IDPF_OSDEP_H_ */
diff --git a/drivers/common/idpf/idpf_common_rxtx.c 
b/drivers/common/idpf/idpf_common_rxtx.c
index 19bcb94077..9c58f3fb11 100644
--- a/drivers/common/idpf/idpf_common_rxtx.c
+++ b/drivers/common/idpf/idpf_common_rxtx.c
@@ -1582,12 +1582,36 @@ idpf_qc_splitq_rx_vec_setup(struct idpf_rx_queue *rxq)
 void
 idpf_dev_read_time_hw(void *cb_arg)
 {
-#ifdef RTE_ARCH_X86_64
struct idpf_adapter *ad = (struct idpf_adapter *)cb_arg;
uint32_t hi, lo, lo2;
int rc = 0;
+#ifndef IDPF_ACC_TIMESTAMP
struct idpf_hw *hw = &ad->hw;
+#endif /*  !IDPF_ACC_TIMESTAMP */
 
+#ifdef IDPF_ACC_TIMESTAMP
+
+   lo = idpf_mmap_r32(IDPF_ACC_GLTSYN_TIME_L);
+   hi = idpf_mmap_r32(IDPF_ACC_GLTSYN_TIME_H);
+   DRV_LOG(DEBUG, "lo : %X,", lo);
+   DRV_LOG(DEBUG, "hi : %X,", hi);
+   /*
+* On typical system, the delta between lo and lo2 is ~1000ns,
+* so 1 seems a large-enough but not overly-big guard band.
+*/
+   if (lo > (UINT32_MAX - IDPF_TIMESYNC_REG_WRAP_GUARD_BAND))
+   lo2 = idpf_mmap_r32(IDPF_ACC_GLTSYN_TIME_L);
+   else
+   lo2 = lo;
+
+   if (lo2 < lo) {
+   lo = idpf_mmap_r32(IDPF_ACC_GLTSYN_TIME_L);
+   hi = idpf_mmap_r32(IDPF_ACC_GLTSYN_TIME_H);
+   }
+
+   ad->time_hw = ((uint64_t)hi << 32) | lo;
+
+#else  /* !IDPF_ACC_TIMESTAMP */
IDPF_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_SHTIME_EN_M);
IDPF_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0,
   PF_GLTSYN_CMD_SYNC_EXEC_CMD_M | 
PF_GLTSYN_CMD_SYNC_SHTIME_EN_M);
@@ -1608,9 +1632,7 @@ idpf_dev_read_time_hw(void *cb_arg)
}
 
ad->time_hw = ((uint64_t)hi << 32) | lo;
-#else  /* !RTE_ARCH_X86_64 */
-   ad->time_hw = 0;
-#endif /* RTE_ARCH_X86_64 */
+#endif /* IDPF_ACC_TIMESTAMP */
 
/* re-alarm watchdog */
rc = rte_eal_alarm_set(1000 * 1000, &idpf_dev_read_time_hw, cb_arg);
diff --git a/meson_options.txt b/meson_options.txt
index 82c8297065..31fc634aa0 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -52,3 +52,5 @@ option('tests', type: 'boolean', value: true, description:
'build unit tests')
 option('use_hpet', type: 'boolean', value: false, description:
'use HPET timer in EAL')
+option('enable_acc_timestamp', type: 'boolean', value: false, description:
+   'enable timestamp on ACC.')
-- 
2.25.1

[PATCH v3 5/7] common/idpf: add timestamp enable flag for rxq

2023-04-24 Thread Wenjing Qiao

A rxq can be configured with timestamp offload.
So, add timestamp enable flag for rxq.

Fixes: 8c6098afa075 ("common/idpf: add Rx/Tx data path")
Cc: sta...@dpdk.org

Signed-off-by: Wenjing Qiao 
Suggested-by: Jingjing Wu 
---
 drivers/common/idpf/idpf_common_rxtx.c | 3 ++-
 drivers/common/idpf/idpf_common_rxtx.h | 2 ++
 drivers/common/idpf/version.map| 3 +++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/common/idpf/idpf_common_rxtx.c 
b/drivers/common/idpf/idpf_common_rxtx.c
index 9c58f3fb11..7afe7afe3f 100644
--- a/drivers/common/idpf/idpf_common_rxtx.c
+++ b/drivers/common/idpf/idpf_common_rxtx.c
@@ -354,7 +354,7 @@ int
 idpf_qc_ts_mbuf_register(struct idpf_rx_queue *rxq)
 {
int err;
-   if ((rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP) != 0) {
+   if (!rxq->ts_enable && (rxq->offloads & IDPF_RX_OFFLOAD_TIMESTAMP)) {
/* Register mbuf field and flag for Rx timestamp */
err = 
rte_mbuf_dyn_rx_timestamp_register(&idpf_timestamp_dynfield_offset,
 
&idpf_timestamp_dynflag);
@@ -363,6 +363,7 @@ idpf_qc_ts_mbuf_register(struct idpf_rx_queue *rxq)
"Cannot register mbuf field/flag for 
timestamp");
return -EINVAL;
}
+   rxq->ts_enable = TRUE;
}
return 0;
 }
diff --git a/drivers/common/idpf/idpf_common_rxtx.h 
b/drivers/common/idpf/idpf_common_rxtx.h
index af1425eb3f..cb7f5a3ba8 100644
--- a/drivers/common/idpf/idpf_common_rxtx.h
+++ b/drivers/common/idpf/idpf_common_rxtx.h
@@ -142,6 +142,8 @@ struct idpf_rx_queue {
struct idpf_rx_queue *bufq2;
 
uint64_t offloads;
+
+   bool ts_enable; /* if timestamp is enabled */
 };
 
 struct idpf_tx_entry {
diff --git a/drivers/common/idpf/version.map b/drivers/common/idpf/version.map
index c67c554911..15b42b4d2e 100644
--- a/drivers/common/idpf/version.map
+++ b/drivers/common/idpf/version.map
@@ -69,5 +69,8 @@ INTERNAL {
idpf_vport_rss_config;
idpf_vport_stats_update;
 
+   idpf_timestamp_dynfield_offset;
+   idpf_timestamp_dynflag;
+
local: *;
 };
-- 
2.25.1

[PATCH v3 6/7] net/cpfl: register timestamp mbuf when starting dev

2023-04-24 Thread Wenjing Qiao

Due to only support timestamp at port level, registering
timestamp mbuf should be at dev start stage.

Fixes: 8c6098afa075 ("common/idpf: add Rx/Tx data path")
Cc: sta...@dpdk.org

Signed-off-by: Wenjing Qiao 
Suggested-by: Jingjing Wu 
---
 drivers/net/cpfl/cpfl_ethdev.c | 7 +++
 drivers/net/cpfl/cpfl_ethdev.h | 3 +++
 drivers/net/cpfl/cpfl_rxtx.c   | 2 ++
 3 files changed, 12 insertions(+)

diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c
index 82d8147494..416273f567 100644
--- a/drivers/net/cpfl/cpfl_ethdev.c
+++ b/drivers/net/cpfl/cpfl_ethdev.c
@@ -771,6 +771,13 @@ cpfl_dev_start(struct rte_eth_dev *dev)
rte_eal_alarm_set(1000 * 1000,
  &idpf_dev_read_time_hw,
  (void *)base);
+   /* Register mbuf field and flag for Rx timestamp */
+   ret = 
rte_mbuf_dyn_rx_timestamp_register(&idpf_timestamp_dynfield_offset,
+
&idpf_timestamp_dynflag);
+   if (ret != 0) {
+   PMD_DRV_LOG(ERR, "Cannot register mbuf field/flag for 
timestamp");
+   return -EINVAL;
+   }
}
 
ret = idpf_vc_vectors_alloc(vport, req_vecs_num);
diff --git a/drivers/net/cpfl/cpfl_ethdev.h b/drivers/net/cpfl/cpfl_ethdev.h
index 200dfcac02..eec253bc77 100644
--- a/drivers/net/cpfl/cpfl_ethdev.h
+++ b/drivers/net/cpfl/cpfl_ethdev.h
@@ -57,6 +57,9 @@
 /* Device IDs */
 #define IDPF_DEV_ID_CPF0x1453
 
+extern int idpf_timestamp_dynfield_offset;
+extern uint64_t idpf_timestamp_dynflag;
+
 struct cpfl_vport_param {
struct cpfl_adapter_ext *adapter;
uint16_t devarg_id; /* arg id from user */
diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c
index de59b31b3d..cdb5b37da0 100644
--- a/drivers/net/cpfl/cpfl_rxtx.c
+++ b/drivers/net/cpfl/cpfl_rxtx.c
@@ -529,6 +529,8 @@ cpfl_rx_queue_init(struct rte_eth_dev *dev, uint16_t 
rx_queue_id)
frame_size > rxq->rx_buf_len)
dev->data->scattered_rx = 1;
 
+   if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+   rxq->ts_enable = TRUE;
err = idpf_qc_ts_mbuf_register(rxq);
if (err != 0) {
PMD_DRV_LOG(ERR, "fail to register timestamp mbuf %u",
-- 
2.25.1

[PATCH v3 7/7] net/idpf: register timestamp mbuf when starting dev

2023-04-24 Thread Wenjing Qiao

Due to only support timestamp at port level, registering
timestamp mbuf should be at dev start stage.

Fixes: 8c6098afa075 ("common/idpf: add Rx/Tx data path")
Cc: sta...@dpdk.org

Signed-off-by: Wenjing Qiao 
Suggested-by: Jingjing Wu 
---
 drivers/net/idpf/idpf_ethdev.c | 7 +++
 drivers/net/idpf/idpf_ethdev.h | 3 +++
 drivers/net/idpf/idpf_rxtx.c   | 3 +++
 3 files changed, 13 insertions(+)

diff --git a/drivers/net/idpf/idpf_ethdev.c b/drivers/net/idpf/idpf_ethdev.c
index 3f33ffbc78..7c43f51c25 100644
--- a/drivers/net/idpf/idpf_ethdev.c
+++ b/drivers/net/idpf/idpf_ethdev.c
@@ -765,6 +765,13 @@ idpf_dev_start(struct rte_eth_dev *dev)
rte_eal_alarm_set(1000 * 1000,
  &idpf_dev_read_time_hw,
  (void *)base);
+   /* Register mbuf field and flag for Rx timestamp */
+   ret = 
rte_mbuf_dyn_rx_timestamp_register(&idpf_timestamp_dynfield_offset,
+
&idpf_timestamp_dynflag);
+   if (ret != 0) {
+   PMD_DRV_LOG(ERR, "Cannot register mbuf field/flag for 
timestamp");
+   return -EINVAL;
+   }
}
 
ret = idpf_vc_vectors_alloc(vport, req_vecs_num);
diff --git a/drivers/net/idpf/idpf_ethdev.h b/drivers/net/idpf/idpf_ethdev.h
index 3c2c932438..256e348710 100644
--- a/drivers/net/idpf/idpf_ethdev.h
+++ b/drivers/net/idpf/idpf_ethdev.h
@@ -55,6 +55,9 @@
 
 #define IDPF_ALARM_INTERVAL5 /* us */
 
+extern int idpf_timestamp_dynfield_offset;
+extern uint64_t idpf_timestamp_dynflag;
+
 struct idpf_vport_param {
struct idpf_adapter_ext *adapter;
uint16_t devarg_id; /* arg id from user */
diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c
index 414f9a37f6..1aaf0142d2 100644
--- a/drivers/net/idpf/idpf_rxtx.c
+++ b/drivers/net/idpf/idpf_rxtx.c
@@ -529,6 +529,9 @@ idpf_rx_queue_init(struct rte_eth_dev *dev, uint16_t 
rx_queue_id)
frame_size > rxq->rx_buf_len)
dev->data->scattered_rx = 1;
 
+   if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+   rxq->ts_enable = TRUE;
+
err = idpf_qc_ts_mbuf_register(rxq);
if (err != 0) {
PMD_DRV_LOG(ERR, "fail to residter timestamp mbuf %u",
-- 
2.25.1

RE: [PATCH 06/10] net/cpfl: support hairpin queue configuration

2023-04-24 Thread Liu, Mingxia




> -Original Message-
> From: Xing, Beilei 
> Sent: Friday, April 21, 2023 2:51 PM
> To: Wu, Jingjing 
> Cc: dev@dpdk.org; Liu, Mingxia ; Xing, Beilei
> ; Wang, Xiao W 
> Subject: [PATCH 06/10] net/cpfl: support hairpin queue configuration
> 
> From: Beilei Xing 
> 
> This patch supports Rx/Tx hairpin queue configuration.
> 
> Signed-off-by: Xiao Wang 
> Signed-off-by: Mingxia Liu 
> Signed-off-by: Beilei Xing 
> ---
>  drivers/common/idpf/idpf_common_virtchnl.c |  70 +++
>  drivers/common/idpf/idpf_common_virtchnl.h |   6 +
>  drivers/common/idpf/version.map|   2 +
>  drivers/net/cpfl/cpfl_ethdev.c | 136 -
>  drivers/net/cpfl/cpfl_rxtx.c   |  80 
>  drivers/net/cpfl/cpfl_rxtx.h   |   7 ++
>  6 files changed, 297 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/common/idpf/idpf_common_virtchnl.c
> b/drivers/common/idpf/idpf_common_virtchnl.c
> index 76a658bb26..50cd43a8dd 100644
> --- a/drivers/common/idpf/idpf_common_virtchnl.c
> +++ b/drivers/common/idpf/idpf_common_virtchnl.c
> @@ -1050,6 +1050,41 @@ idpf_vc_rxq_config(struct idpf_vport *vport, struct
> idpf_rx_queue *rxq)
>   return err;
>  }
> 
> +int idpf_vc_rxq_config_by_info(struct idpf_vport *vport, struct
> virtchnl2_rxq_info *rxq_info,
> +uint16_t num_qs)
> +{
> + struct idpf_adapter *adapter = vport->adapter;
> + struct virtchnl2_config_rx_queues *vc_rxqs = NULL;
> + struct idpf_cmd_info args;
> + int size, err, i;
> +
> + size = sizeof(*vc_rxqs) + (num_qs - 1) *
> + sizeof(struct virtchnl2_rxq_info);
> + vc_rxqs = rte_zmalloc("cfg_rxqs", size, 0);
> + if (vc_rxqs == NULL) {
> + DRV_LOG(ERR, "Failed to allocate virtchnl2_config_rx_queues");
> + err = -ENOMEM;
> + return err;
> + }
> + vc_rxqs->vport_id = vport->vport_id;
> + vc_rxqs->num_qinfo = num_qs;
> + memcpy(vc_rxqs->qinfo, rxq_info, num_qs * sizeof(struct
> +virtchnl2_rxq_info));
> +
> + memset(&args, 0, sizeof(args));
> + args.ops = VIRTCHNL2_OP_CONFIG_RX_QUEUES;
> + args.in_args = (uint8_t *)vc_rxqs;
> + args.in_args_size = size;
> + args.out_buffer = adapter->mbx_resp;
> + args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
> +
> + err = idpf_vc_cmd_execute(adapter, &args);
> + rte_free(vc_rxqs);
> + if (err != 0)
> + DRV_LOG(ERR, "Failed to execute command of
> +VIRTCHNL2_OP_CONFIG_RX_QUEUES");
> +
> + return err;
> +}
> +
>  int
>  idpf_vc_txq_config(struct idpf_vport *vport, struct idpf_tx_queue *txq)  { 
> @@ -
> 1121,6 +1156,41 @@ idpf_vc_txq_config(struct idpf_vport *vport, struct
> idpf_tx_queue *txq)
>   return err;
>  }
> 
> +int
> +idpf_vc_txq_config_by_info(struct idpf_vport *vport, struct 
> virtchnl2_txq_info
> *txq_info,
> +uint16_t num_qs)
> +{
> + struct idpf_adapter *adapter = vport->adapter;
> + struct virtchnl2_config_tx_queues *vc_txqs = NULL;
> + struct idpf_cmd_info args;
> + int size, err;
> +
> + size = sizeof(*vc_txqs) + (num_qs - 1) * sizeof(struct 
> virtchnl2_txq_info);
> + vc_txqs = rte_zmalloc("cfg_txqs", size, 0);
> + if (vc_txqs == NULL) {
> + DRV_LOG(ERR, "Failed to allocate virtchnl2_config_tx_queues");
> + err = -ENOMEM;
> + return err;
> + }
> + vc_txqs->vport_id = vport->vport_id;
> + vc_txqs->num_qinfo = num_qs;
> + memcpy(vc_txqs->qinfo, txq_info, num_qs * sizeof(struct
> +virtchnl2_txq_info));
> +
> + memset(&args, 0, sizeof(args));
> + args.ops = VIRTCHNL2_OP_CONFIG_TX_QUEUES;
> + args.in_args = (uint8_t *)vc_txqs;
> + args.in_args_size = size;
> + args.out_buffer = adapter->mbx_resp;
> + args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
> +
> + err = idpf_vc_cmd_execute(adapter, &args);
> + rte_free(vc_txqs);
> + if (err != 0)
> + DRV_LOG(ERR, "Failed to execute command of
> +VIRTCHNL2_OP_CONFIG_TX_QUEUES");
> +
> + return err;
> +}
> +
>  int
>  idpf_vc_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg,
> struct idpf_ctlq_msg *q_msg)
> diff --git a/drivers/common/idpf/idpf_common_virtchnl.h
> b/drivers/common/idpf/idpf_common_virtchnl.h
> index bf1d014c8d..277235ba7d 100644
> --- a/drivers/common/idpf/idpf_common_virtchnl.h
> +++ b/drivers/common/idpf/idpf_common_virtchnl.h
> @@ -65,6 +65,12 @@ __rte_internal
>  int idpf_vc_ctlq_post_rx_buffs(struct idpf_hw *hw, struct idpf_ctlq_info *cq,
>  u16 *buff_count, struct idpf_dma_mem **buffs);
> __rte_internal
> +int idpf_vc_rxq_config_by_info(struct idpf_vport *vport, struct
> virtchnl2_rxq_info *rxq_info,
> +uint16_t num_qs);
> +__rte_internal
> +int idpf_vc_txq_config_by_info(struct idpf_vport *vport, struct
> virtchnl2_txq_info *txq_info,
> +uint16_t num_qs);
> +__rte_internal
>  int idpf_vc

[PATCH v2] net/ixgbe: add proper memory barriers for some Rx functions

2023-04-24 Thread Min Zhou

Segmentation fault has been observed while running the
ixgbe_recv_pkts_lro() function to receive packets on the Loongson 3C5000
processor which has 64 cores and 4 NUMA nodes.

>From the ixgbe_recv_pkts_lro() function, we found that as long as the first
packet has the EOP bit set, and the length of this packet is less than or
equal to rxq->crc_len, the segmentation fault will definitely happen even
though on the other platforms, such as X86.

Because when processd the first packet the first_seg->next will be NULL, if
at the same time this packet has the EOP bit set and its length is less
than or equal to rxq->crc_len, the following loop will be excecuted:

for (lp = first_seg; lp->next != rxm; lp = lp->next)
;

We know that the first_seg->next will be NULL under this condition. So the
expression of lp->next->next will cause the segmentation fault.

Normally, the length of the first packet with EOP bit set will be greater
than rxq->crc_len. However, the out-of-order execution of CPU may make the
read ordering of the status and the rest of the descriptor fields in this
function not be correct. The related codes are as following:

rxdp = &rx_ring[rx_id];
 #1 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);

if (!(staterr & IXGBE_RXDADV_STAT_DD))
break;

 #2 rxd = *rxdp;

The sentence #2 may be executed before sentence #1. This action is likely
to make the ready packet zero length. If the packet is the first packet and
has the EOP bit set, the above segmentation fault will happen.

So, we should add rte_rmb() to ensure the read ordering be correct. We also
did the same thing in the ixgbe_recv_pkts() function to make the rxd data
be valid even thougth we did not find segmentation fault in this function.

Signed-off-by: Min Zhou 
---
v2:
- Make the calling of rte_rmb() for all platforms
---
 drivers/net/ixgbe/ixgbe_rxtx.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index c9d6ca9efe..302a5ab7ff 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -1823,6 +1823,8 @@ ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
staterr = rxdp->wb.upper.status_error;
if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
break;
+
+   rte_rmb();
rxd = *rxdp;
 
/*
@@ -2122,6 +2124,7 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf 
**rx_pkts, uint16_t nb_pkts,
if (!(staterr & IXGBE_RXDADV_STAT_DD))
break;
 
+   rte_rmb();
rxd = *rxdp;
 
PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
-- 
2.31.1

[Bug 1220] examples/ip_pipeline build failure with gcc 13

2023-04-24 Thread bugzilla

https://bugs.dpdk.org/show_bug.cgi?id=1220

Bug ID: 1220
   Summary: examples/ip_pipeline build failure with gcc 13
   Product: DPDK
   Version: 23.03
  Hardware: All
OS: All
Status: UNCONFIRMED
  Severity: normal
  Priority: Normal
 Component: examples
  Assignee: dev@dpdk.org
  Reporter: alia...@nvidia.com
  Target Milestone: ---

$ meson --werror --buildtype=debugoptimized -Dexamples=all build && ninja-build
-C build
[..]
examples/ip_pipeline/thread.c:535:20: error: 'req' may be used uninitialized
[-Werror=maybe-uninitialized]

OS: Fedora Linux 38
gcc: 13.0.1

-- 
You are receiving this mail because:
You are the assignee for the bug.

RE: [PATCH v2 01/15] common/idpf: remove virtchnl related shared code

2023-04-24 Thread Zhang, Qi Z




> -Original Message-
> From: Qiao, Wenjing 
> Sent: Friday, April 21, 2023 4:40 PM
> To: Wu, Jingjing ; Xing, Beilei
> ; Zhang, Qi Z 
> Cc: dev@dpdk.org; Qiao, Wenjing 
> Subject: [PATCH v2 01/15] common/idpf: remove virtchnl related shared
> code
> 
> Since virtchnl related shared code of iavf
> and idpf has the same sourse, so remove
> virtual related shared code of idpf.

We need to replace #include "virtchnl.h" with #include   as it is 
not expected to search the header file from current directly now.

> 
> Signed-off-by: Wenjing Qiao 
> ---
>  drivers/common/idpf/base/virtchnl.h   | 2866 -
>  .../common/idpf/base/virtchnl_inline_ipsec.h  |  567 
>  drivers/common/idpf/meson.build   |2 +
>  3 files changed, 2 insertions(+), 3433 deletions(-)
>  delete mode 100644 drivers/common/idpf/base/virtchnl.h
>  delete mode 100644 drivers/common/idpf/base/virtchnl_inline_ipsec.h
> 
> diff --git a/drivers/common/idpf/base/virtchnl.h
> b/drivers/common/idpf/base/virtchnl.h
> deleted file mode 100644
> index ea798e3971..00
> --- a/drivers/common/idpf/base/virtchnl.h
> +++ /dev/null
> @@ -1,2866 +0,0 @@
> -/* SPDX-License-Identifier: BSD-3-Clause
> - * Copyright(c) 2001-2022 Intel Corporation
> - */
> -
> -#ifndef _VIRTCHNL_H_
> -#define _VIRTCHNL_H_
> -
> -/* Description:
> - * This header file describes the Virtual Function (VF) - Physical Function
> - * (PF) communication protocol used by the drivers for all devices starting
> - * from our 40G product line
> - *
> - * Admin queue buffer usage:
> - * desc->opcode is always aqc_opc_send_msg_to_pf
> - * flags, retval, datalen, and data addr are all used normally.
> - * The Firmware copies the cookie fields when sending messages between
> the
> - * PF and VF, but uses all other fields internally. Due to this limitation,
> - * we must send all messages as "indirect", i.e. using an external buffer.
> - *
> - * All the VSI indexes are relative to the VF. Each VF can have maximum of
> - * three VSIs. All the queue indexes are relative to the VSI.  Each VF can
> - * have a maximum of sixteen queues for all of its VSIs.
> - *
> - * The PF is required to return a status code in v_retval for all messages
> - * except RESET_VF, which does not require any response. The returned
> value
> - * is of virtchnl_status_code type, defined here.
> - *
> - * In general, VF driver initialization should roughly follow the order of
> - * these opcodes. The VF driver must first validate the API version of the
> - * PF driver, then request a reset, then get resources, then configure
> - * queues and interrupts. After these operations are complete, the VF
> - * driver may start its queues, optionally add MAC and VLAN filters, and
> - * process traffic.
> - */
> -
> -/* START GENERIC DEFINES
> - * Need to ensure the following enums and defines hold the same meaning
> and
> - * value in current and future projects
> - */
> -
> -#define VIRTCHNL_ETH_LENGTH_OF_ADDRESS   6
> -
> -/* These macros are used to generate compilation errors if a
> structure/union
> - * is not exactly the correct length. It gives a divide by zero error if the
> - * structure/union is not of the correct size, otherwise it creates an enum
> - * that is never used.
> - */
> -#define VIRTCHNL_CHECK_STRUCT_LEN(n, X) enum
> virtchnl_static_assert_enum_##X \
> - { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
> -#define VIRTCHNL_CHECK_UNION_LEN(n, X) enum
> virtchnl_static_asset_enum_##X \
> - { virtchnl_static_assert_##X = (n)/((sizeof(union X) == (n)) ? 1 : 0) }
> -
> -
> -/* Error Codes
> - * Note that many older versions of various iAVF drivers convert the
> reported
> - * status code directly into an iavf_status enumeration. For this reason, it
> - * is important that the values of these enumerations line up.
> - */
> -enum virtchnl_status_code {
> - VIRTCHNL_STATUS_SUCCESS = 0,
> - VIRTCHNL_STATUS_ERR_PARAM   = -5,
> - VIRTCHNL_STATUS_ERR_NO_MEMORY   = -18,
> - VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH = -38,
> - VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR = -39,
> - VIRTCHNL_STATUS_ERR_INVALID_VF_ID   = -40,
> - VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR   = -53,
> - VIRTCHNL_STATUS_ERR_NOT_SUPPORTED   = -64,
> -};
> -
> -/* Backward compatibility */
> -#define VIRTCHNL_ERR_PARAM VIRTCHNL_STATUS_ERR_PARAM
> -#define VIRTCHNL_STATUS_NOT_SUPPORTED
> VIRTCHNL_STATUS_ERR_NOT_SUPPORTED
> -
> -#define VIRTCHNL_LINK_SPEED_2_5GB_SHIFT  0x0
> -#define VIRTCHNL_LINK_SPEED_100MB_SHIFT  0x1
> -#define VIRTCHNL_LINK_SPEED_1000MB_SHIFT 0x2
> -#define VIRTCHNL_LINK_SPEED_10GB_SHIFT   0x3
> -#define VIRTCHNL_LINK_SPEED_40GB_SHIFT   0x4
> -#define VIRTCHNL_LINK_SPEED_20GB_SHIFT   0x5
> -#define VIRTCHNL_LINK_SPEED_25GB_SHIFT   0x6
> -#def

RE: [PATCH v2 11/15] common/idpf: allocate static buffer at initialization

2023-04-24 Thread Zhang, Qi Z




> -Original Message-
> From: Qiao, Wenjing 
> Sent: Friday, April 21, 2023 4:41 PM
> To: Wu, Jingjing ; Xing, Beilei
> ; Zhang, Qi Z 
> Cc: dev@dpdk.org; Qiao, Wenjing ; Pau,
> Christopher 
> Subject: [PATCH v2 11/15] common/idpf: allocate static buffer at
> initialization
> 
> Some OSs don't allow allocating DMA memory at runtime. So create an initial
> static buffer at initialization to hold this data.

Seems this is not for DPDK which should support DMA allocation, do we really 
need this patch?

Btw using global variables in a module can create issues in a multi-device 
environment where multiple devices share the same module.
We can consider to embedded this struct in idpf_hw.

> 
> Signed-off-by: Christopher Pau 
> Signed-off-by: Wenjing Qiao 
> ---
>  drivers/common/idpf/base/idpf_common.c | 26 +++---
>  1 file changed, 15 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/common/idpf/base/idpf_common.c
> b/drivers/common/idpf/base/idpf_common.c
> index de82c3458f..f4a5707272 100644
> --- a/drivers/common/idpf/base/idpf_common.c
> +++ b/drivers/common/idpf/base/idpf_common.c
> @@ -6,6 +6,7 @@
>  #include "idpf_prototype.h"
>  #include "virtchnl.h"
> 
> +struct idpf_dma_mem send_dma_mem = { 0 };
> 
>  /**
>   * idpf_set_mac_type - Sets MAC type
> @@ -132,6 +133,15 @@ int idpf_init_hw(struct idpf_hw *hw, struct
> idpf_ctlq_size ctlq_size)
> 
>   idpf_free(hw, q_info);
> 
> + /*
> +  * Need an initial static buffer to copy DMA memory to send
> +  * for drivers that do not allow this allocation at runtime
> +  */
> + send_dma_mem.va = (struct idpf_dma_mem *)
> + idpf_alloc_dma_mem(hw, &send_dma_mem, 4096);
> + if (!send_dma_mem.va)
> + return -ENOMEM;
> +
>   return 0;
>  }
> 
> @@ -152,7 +162,6 @@ int idpf_send_msg_to_cp(struct idpf_hw *hw, int
> v_opcode,
>   int v_retval, u8 *msg, u16 msglen)
>  {
>   struct idpf_ctlq_msg ctlq_msg = { 0 };
> - struct idpf_dma_mem dma_mem = { 0 };
>   int status;
> 
>   ctlq_msg.opcode = idpf_mbq_opc_send_msg_to_pf; @@ -162,19
> +171,11 @@ int idpf_send_msg_to_cp(struct idpf_hw *hw, int v_opcode,
>   ctlq_msg.cookie.mbx.chnl_opcode = v_opcode;
> 
>   if (msglen > 0) {
> - dma_mem.va = (struct idpf_dma_mem *)
> -   idpf_alloc_dma_mem(hw, &dma_mem, msglen);
> - if (!dma_mem.va)
> - return -ENOMEM;
> -
> - idpf_memcpy(dma_mem.va, msg, msglen,
> IDPF_NONDMA_TO_DMA);
> - ctlq_msg.ctx.indirect.payload = &dma_mem;
> + idpf_memcpy(send_dma_mem.va, msg, msglen,
> IDPF_NONDMA_TO_DMA);
> + ctlq_msg.ctx.indirect.payload = &send_dma_mem;
>   }
>   status = idpf_ctlq_send(hw, hw->asq, 1, &ctlq_msg);
> 
> - if (dma_mem.va)
> - idpf_free_dma_mem(hw, &dma_mem);
> -
>   return status;
>  }
> 
> @@ -262,6 +263,9 @@ int idpf_clean_arq_element(struct idpf_hw *hw,
>   */
>  int idpf_deinit_hw(struct idpf_hw *hw)
>  {
> + if (send_dma_mem.va)
> + idpf_free_dma_mem(hw, &send_dma_mem);
> +
>   hw->asq = NULL;
>   hw->arq = NULL;
> 
> --
> 2.25.1

[PATCH v3 01/11] net/octeon_ep: support cnf95n and cnf95o SoC

2023-04-24 Thread Sathesh Edara

Adds the required functionality in the Octeon endpoint
driver to support the cnf95n and cnf95o endpoint device.

Signed-off-by: Sathesh Edara 
---
 drivers/net/octeon_ep/otx2_ep_vf.h|  2 ++
 drivers/net/octeon_ep/otx_ep_ethdev.c | 13 +++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/net/octeon_ep/otx2_ep_vf.h 
b/drivers/net/octeon_ep/otx2_ep_vf.h
index 757eeae9f0..8f00acd737 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.h
+++ b/drivers/net/octeon_ep/otx2_ep_vf.h
@@ -115,6 +115,8 @@
 
 #define PCI_DEVID_CN9K_EP_NET_VF   0xB203 /* OCTEON 9 EP mode */
 #define PCI_DEVID_CN98XX_EP_NET_VF 0xB103
+#define PCI_DEVID_CNF95N_EP_NET_VF 0xB403
+#define PCI_DEVID_CNF95O_EP_NET_VF 0xB603
 
 int
 otx2_ep_vf_setup_device(struct otx_ep_device *sdpvf);
diff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c 
b/drivers/net/octeon_ep/otx_ep_ethdev.c
index f43db1e398..24f62c3e49 100644
--- a/drivers/net/octeon_ep/otx_ep_ethdev.c
+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c
@@ -105,6 +105,8 @@ otx_ep_chip_specific_setup(struct otx_ep_device *otx_epvf)
break;
case PCI_DEVID_CN9K_EP_NET_VF:
case PCI_DEVID_CN98XX_EP_NET_VF:
+   case PCI_DEVID_CNF95N_EP_NET_VF:
+   case PCI_DEVID_CNF95O_EP_NET_VF:
otx_epvf->chip_id = dev_id;
ret = otx2_ep_vf_setup_device(otx_epvf);
otx_epvf->fn_list.disable_io_queues(otx_epvf);
@@ -144,7 +146,9 @@ otx_epdev_init(struct otx_ep_device *otx_epvf)
if (otx_epvf->chip_id == PCI_DEVID_OCTEONTX_EP_VF)
otx_epvf->eth_dev->tx_pkt_burst = &otx_ep_xmit_pkts;
else if (otx_epvf->chip_id == PCI_DEVID_CN9K_EP_NET_VF ||
-otx_epvf->chip_id == PCI_DEVID_CN98XX_EP_NET_VF)
+otx_epvf->chip_id == PCI_DEVID_CN98XX_EP_NET_VF ||
+otx_epvf->chip_id == PCI_DEVID_CNF95N_EP_NET_VF ||
+otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF)
otx_epvf->eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
else if (otx_epvf->chip_id == PCI_DEVID_CNXK_EP_NET_VF)
otx_epvf->eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
@@ -494,7 +498,10 @@ otx_ep_eth_dev_init(struct rte_eth_dev *eth_dev)
otx_epvf->pdev = pdev;
 
otx_epdev_init(otx_epvf);
-   if (pdev->id.device_id == PCI_DEVID_CN9K_EP_NET_VF)
+   if (otx_epvf->chip_id == PCI_DEVID_CN9K_EP_NET_VF ||
+   otx_epvf->chip_id == PCI_DEVID_CN98XX_EP_NET_VF ||
+   otx_epvf->chip_id == PCI_DEVID_CNF95N_EP_NET_VF ||
+   otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF)
otx_epvf->pkind = SDP_OTX2_PKIND_FS0;
else
otx_epvf->pkind = SDP_PKIND;
@@ -524,6 +531,8 @@ static const struct rte_pci_id pci_id_otx_ep_map[] = {
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX_EP_VF) },
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN9K_EP_NET_VF) },
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN98XX_EP_NET_VF) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CNF95N_EP_NET_VF) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CNF95O_EP_NET_VF) },
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CNXK_EP_NET_VF) },
{ .vendor_id = 0, /* sentinel */ }
 };
-- 
2.31.1

[PATCH v3 05/11] devtools: add acronym in dictionary for commit checks

2023-04-24 Thread Sathesh Edara

ISM -> Interrupt Status Messages

Signed-off-by: Sathesh Edara 
---
 devtools/words-case.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/devtools/words-case.txt b/devtools/words-case.txt
index 53e029a958..3a7af902bd 100644
--- a/devtools/words-case.txt
+++ b/devtools/words-case.txt
@@ -35,6 +35,7 @@ IP
 IPsec
 IPv4
 IPv6
+ISM
 L2
 L3
 L4
-- 
2.31.1

[PATCH v3 06/11] net/octeon_ep: support ISM

2023-04-24 Thread Sathesh Edara

Adds the ISM specific functionality.

Signed-off-by: Sathesh Edara 
---
 drivers/net/octeon_ep/cnxk_ep_vf.c| 35 +++--
 drivers/net/octeon_ep/cnxk_ep_vf.h| 12 ++
 drivers/net/octeon_ep/otx2_ep_vf.c| 45 ++---
 drivers/net/octeon_ep/otx2_ep_vf.h| 14 +++
 drivers/net/octeon_ep/otx_ep_common.h | 16 
 drivers/net/octeon_ep/otx_ep_ethdev.c | 36 +
 drivers/net/octeon_ep/otx_ep_rxtx.c   | 56 +--
 7 files changed, 194 insertions(+), 20 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c 
b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 1a92887109..a437ae68cb 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -2,11 +2,12 @@
  * Copyright(C) 2022 Marvell.
  */
 
+#include 
 #include 
 
 #include 
 #include 
-
+#include 
 #include "cnxk_ep_vf.h"
 
 static void
@@ -85,6 +86,7 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
struct otx_ep_instr_queue *iq = otx_ep->instr_queue[iq_no];
int loop = OTX_EP_BUSY_LOOP_COUNT;
volatile uint64_t reg_val = 0ull;
+   uint64_t ism_addr;
 
reg_val = oct_ep_read64(otx_ep->hw_addr + CNXK_EP_R_IN_CONTROL(iq_no));
 
@@ -132,6 +134,19 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
 */
oct_ep_write64(OTX_EP_CLEAR_SDP_IN_INT_LVLS,
   otx_ep->hw_addr + CNXK_EP_R_IN_INT_LEVELS(iq_no));
+   /* Set up IQ ISM registers and structures */
+   ism_addr = (otx_ep->ism_buffer_mz->iova | CNXK_EP_ISM_EN
+   | CNXK_EP_ISM_MSIX_DIS)
+   + CNXK_EP_IQ_ISM_OFFSET(iq_no);
+   rte_write64(ism_addr, (uint8_t *)otx_ep->hw_addr +
+   CNXK_EP_R_IN_CNTS_ISM(iq_no));
+   iq->inst_cnt_ism =
+   (uint32_t *)((uint8_t *)otx_ep->ism_buffer_mz->addr
++ CNXK_EP_IQ_ISM_OFFSET(iq_no));
+   otx_ep_err("SDP_R[%d] INST Q ISM virt: %p, dma: 0x%" PRIX64, iq_no,
+  (void *)iq->inst_cnt_ism, ism_addr);
+   *iq->inst_cnt_ism = 0;
+   iq->inst_cnt_ism_prev = 0;
return 0;
 }
 
@@ -142,6 +157,7 @@ cnxk_ep_vf_setup_oq_regs(struct otx_ep_device *otx_ep, 
uint32_t oq_no)
uint64_t oq_ctl = 0ull;
int loop = OTX_EP_BUSY_LOOP_COUNT;
struct otx_ep_droq *droq = otx_ep->droq[oq_no];
+   uint64_t ism_addr;
 
/* Wait on IDLE to set to 1, supposed to configure BADDR
 * as long as IDLE is 0
@@ -201,9 +217,22 @@ cnxk_ep_vf_setup_oq_regs(struct otx_ep_device *otx_ep, 
uint32_t oq_no)
rte_write32((uint32_t)reg_val, droq->pkts_sent_reg);
 
otx_ep_dbg("SDP_R[%d]_sent: %x", oq_no, 
rte_read32(droq->pkts_sent_reg));
-   loop = OTX_EP_BUSY_LOOP_COUNT;
+   /* Set up ISM registers and structures */
+   ism_addr = (otx_ep->ism_buffer_mz->iova | CNXK_EP_ISM_EN
+   | CNXK_EP_ISM_MSIX_DIS)
+   + CNXK_EP_OQ_ISM_OFFSET(oq_no);
+   rte_write64(ism_addr, (uint8_t *)otx_ep->hw_addr +
+   CNXK_EP_R_OUT_CNTS_ISM(oq_no));
+   droq->pkts_sent_ism =
+   (uint32_t *)((uint8_t *)otx_ep->ism_buffer_mz->addr
++ CNXK_EP_OQ_ISM_OFFSET(oq_no));
+   otx_ep_err("SDP_R[%d] OQ ISM virt: %p dma: 0x%" PRIX64,
+   oq_no, (void *)droq->pkts_sent_ism, ism_addr);
+   *droq->pkts_sent_ism = 0;
+   droq->pkts_sent_ism_prev = 0;
 
-   while (((rte_read32(droq->pkts_sent_reg)) != 0ull)) {
+   loop = OTX_EP_BUSY_LOOP_COUNT;
+   while (((rte_read32(droq->pkts_sent_reg)) != 0ull) && loop--) {
reg_val = rte_read32(droq->pkts_sent_reg);
rte_write32((uint32_t)reg_val, droq->pkts_sent_reg);
rte_delay_ms(1);
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.h 
b/drivers/net/octeon_ep/cnxk_ep_vf.h
index aaa5774552..072b38ea15 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.h
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.h
@@ -27,6 +27,7 @@
 #define CNXK_EP_R_IN_INT_LEVELS_START   0x10060
 #define CNXK_EP_R_IN_PKT_CNT_START  0x10080
 #define CNXK_EP_R_IN_BYTE_CNT_START 0x10090
+#define CNXK_EP_R_IN_CNTS_ISM_START 0x10520
 
 #define CNXK_EP_R_IN_CONTROL(ring) \
(CNXK_EP_R_IN_CONTROL_START + ((ring) * CNXK_EP_RING_OFFSET))
@@ -55,6 +56,8 @@
 #define CNXK_EP_R_IN_BYTE_CNT(ring)\
(CNXK_EP_R_IN_BYTE_CNT_START +  ((ring) * CNXK_EP_RING_OFFSET))
 
+#define CNXK_EP_R_IN_CNTS_ISM(ring)\
+   (CNXK_EP_R_IN_CNTS_ISM_START + ((ring) * CNXK_EP_RING_OFFSET))
 
 /** Rings per Virtual Function **/
 #define CNXK_EP_R_IN_CTL_RPVF_MASK (0xF)
@@ -87,6 +90,7 @@
 #define CNXK_EP_R_OUT_ENABLE_START 0x10170
 #define CNXK_EP_R_OUT_PKT_CNT_START0x10180
 #define CNXK_EP_R_OUT_BYTE_CNT_START   0x10190
+#define CNXK_EP_R_OUT_CNTS_ISM_START   0x10510
 
 #define CNXK_EP_R_OUT_CNTS(r

[PATCH v3 07/11] net/octeon_ep: flush pending DMA operations

2023-04-24 Thread Sathesh Edara

Flushes the pending DMA operations while reading
the packets by reading control and status register.

Signed-off-by: Sathesh Edara 
---
 drivers/net/octeon_ep/otx_ep_common.h | 8 
 drivers/net/octeon_ep/otx_ep_rxtx.c   | 4 
 2 files changed, 12 insertions(+)

diff --git a/drivers/net/octeon_ep/otx_ep_common.h 
b/drivers/net/octeon_ep/otx_ep_common.h
index 76528ed49d..444136923f 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -345,6 +345,14 @@ struct otx_ep_droq {
 */
void *pkts_sent_reg;
 
+   /** Handle DMA incompletion during pkt reads.
+* This variable is used to initiate a sent_reg_read
+* that completes pending dma
+* this variable is used as lvalue so compiler cannot optimize
+* the reads.
+*/
+   uint32_t sent_reg_val;
+
/* Statistics for this DROQ. */
struct otx_ep_droq_stats stats;
 
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c 
b/drivers/net/octeon_ep/otx_ep_rxtx.c
index c4153bd583..ca968f6fe7 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -917,6 +917,10 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
struct rte_mbuf *first_buf = NULL;
struct rte_mbuf *last_buf = NULL;
 
+   /* csr read helps to flush pending dma */
+   droq->sent_reg_val = rte_read32(droq->pkts_sent_reg);
+   rte_rmb();
+
while (pkt_len < total_pkt_len) {
int cpy_len = 0;
 
-- 
2.31.1

[PATCH v3 08/11] net/octeon_ep: update queue size checks

2023-04-24 Thread Sathesh Edara

Updates the output queue size checks to ensure
that queue is larger than backpressure watermark.
Add setting of default queue sizes to the minimum
so that applications like testpmd can be started
without explicit queue size arguments.

Signed-off-by: Sathesh Edara 
---
 drivers/net/octeon_ep/otx_ep_common.h |  9 +++--
 drivers/net/octeon_ep/otx_ep_ethdev.c | 12 ++--
 drivers/net/octeon_ep/otx_ep_rxtx.h   |  4 ++--
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/net/octeon_ep/otx_ep_common.h 
b/drivers/net/octeon_ep/otx_ep_common.h
index 444136923f..3582f3087b 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -11,8 +11,13 @@
 #define OTX_EP_MAX_RINGS_PER_VF(8)
 #define OTX_EP_CFG_IO_QUEUESOTX_EP_MAX_RINGS_PER_VF
 #define OTX_EP_64BYTE_INSTR (64)
-#define OTX_EP_MIN_IQ_DESCRIPTORS   (128)
-#define OTX_EP_MIN_OQ_DESCRIPTORS   (128)
+/*
+ * Backpressure for SDP is configured on Octeon, and the minimum queue sizes
+ * must be much larger than the backpressure watermark configured in the Octeon
+ * SDP driver.  IQ and OQ backpressure configurations are separate.
+ */
+#define OTX_EP_MIN_IQ_DESCRIPTORS   (2048)
+#define OTX_EP_MIN_OQ_DESCRIPTORS   (2048)
 #define OTX_EP_MAX_IQ_DESCRIPTORS   (8192)
 #define OTX_EP_MAX_OQ_DESCRIPTORS   (8192)
 #define OTX_EP_OQ_BUF_SIZE  (2048)
diff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c 
b/drivers/net/octeon_ep/otx_ep_ethdev.c
index 30a7a450fb..0f710b1ffa 100644
--- a/drivers/net/octeon_ep/otx_ep_ethdev.c
+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c
@@ -48,6 +48,9 @@ otx_ep_dev_info_get(struct rte_eth_dev *eth_dev,
devinfo->rx_desc_lim = otx_ep_rx_desc_lim;
devinfo->tx_desc_lim = otx_ep_tx_desc_lim;
 
+   devinfo->default_rxportconf.ring_size = OTX_EP_MIN_OQ_DESCRIPTORS;
+   devinfo->default_txportconf.ring_size = OTX_EP_MIN_IQ_DESCRIPTORS;
+
return 0;
 }
 
@@ -274,8 +277,8 @@ otx_ep_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t 
q_no,
return -EINVAL;
}
if (num_rx_descs < (SDP_GBL_WMARK * 8)) {
-   otx_ep_err("Invalid rx desc number should at least be greater 
than 8xwmark  %u\n",
-  num_rx_descs);
+   otx_ep_err("Invalid rx desc number(%u) should at least be 
greater than 8xwmark  %u\n",
+  num_rx_descs, (SDP_GBL_WMARK * 8));
return -EINVAL;
}
 
@@ -357,6 +360,11 @@ otx_ep_tx_queue_setup(struct rte_eth_dev *eth_dev, 
uint16_t q_no,
   num_tx_descs);
return -EINVAL;
}
+   if (num_tx_descs < (SDP_GBL_WMARK * 8)) {
+   otx_ep_err("Invalid tx desc number(%u) should at least be 
greater than 8*wmark(%u)\n",
+  num_tx_descs, (SDP_GBL_WMARK * 8));
+   return -EINVAL;
+   }
 
retval = otx_ep_setup_iqs(otx_epvf, q_no, num_tx_descs, socket_id);
 
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.h 
b/drivers/net/octeon_ep/otx_ep_rxtx.h
index 1527d350b5..7012888100 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.h
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.h
@@ -7,8 +7,8 @@
 
 #include 
 
-#define OTX_EP_RXD_ALIGN 1
-#define OTX_EP_TXD_ALIGN 1
+#define OTX_EP_RXD_ALIGN 2
+#define OTX_EP_TXD_ALIGN 2
 
 #define OTX_EP_IQ_SEND_FAILED  (-1)
 #define OTX_EP_IQ_SEND_SUCCESS (0)
-- 
2.31.1

[PATCH v3 09/11] net/octeon_ep: support mailbox between VF and PF

2023-04-24 Thread Sathesh Edara

Adds the mailbox communication between VF and
PF and supports the following mailbox messages.
- Get and set  MAC address
- Get link information
- Get stats
- Set and get MTU
- Send notification to PF

Signed-off-by: Sathesh Edara 
---
 doc/guides/nics/features/octeon_ep.ini |   1 +
 drivers/net/octeon_ep/cnxk_ep_vf.c |   1 +
 drivers/net/octeon_ep/cnxk_ep_vf.h |  12 +-
 drivers/net/octeon_ep/meson.build  |   1 +
 drivers/net/octeon_ep/otx_ep_common.h  |  26 +++
 drivers/net/octeon_ep/otx_ep_ethdev.c  | 143 +++-
 drivers/net/octeon_ep/otx_ep_mbox.c| 309 +
 drivers/net/octeon_ep/otx_ep_mbox.h| 163 +
 8 files changed, 643 insertions(+), 13 deletions(-)
 create mode 100644 drivers/net/octeon_ep/otx_ep_mbox.c
 create mode 100644 drivers/net/octeon_ep/otx_ep_mbox.h

diff --git a/doc/guides/nics/features/octeon_ep.ini 
b/doc/guides/nics/features/octeon_ep.ini
index 305e219262..f3b821c89e 100644
--- a/doc/guides/nics/features/octeon_ep.ini
+++ b/doc/guides/nics/features/octeon_ep.ini
@@ -10,4 +10,5 @@ Linux= Y
 x86-64   = Y
 Basic stats  = Y
 Link status  = Y
+MTU update   = Y
 Usage doc= Y
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c 
b/drivers/net/octeon_ep/cnxk_ep_vf.c
index a437ae68cb..cadb4ecbf9 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include "otx_ep_common.h"
 #include "cnxk_ep_vf.h"
 
 static void
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.h 
b/drivers/net/octeon_ep/cnxk_ep_vf.h
index 072b38ea15..86277449ea 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.h
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.h
@@ -5,7 +5,7 @@
 #define _CNXK_EP_VF_H_
 
 #include 
-#include "otx_ep_common.h"
+
 #define CNXK_CONFIG_XPANSION_BAR 0x38
 #define CNXK_CONFIG_PCIE_CAP 0x70
 #define CNXK_CONFIG_PCIE_DEVCAP  0x74
@@ -92,6 +92,10 @@
 #define CNXK_EP_R_OUT_BYTE_CNT_START   0x10190
 #define CNXK_EP_R_OUT_CNTS_ISM_START   0x10510
 
+#define CNXK_EP_R_MBOX_PF_VF_DATA_START0x10210
+#define CNXK_EP_R_MBOX_VF_PF_DATA_START0x10230
+#define CNXK_EP_R_MBOX_PF_VF_INT_START 0x10220
+
 #define CNXK_EP_R_OUT_CNTS(ring)\
(CNXK_EP_R_OUT_CNTS_START + ((ring) * CNXK_EP_RING_OFFSET))
 
@@ -125,6 +129,12 @@
 #define CNXK_EP_R_OUT_CNTS_ISM(ring) \
(CNXK_EP_R_OUT_CNTS_ISM_START + ((ring) * CNXK_EP_RING_OFFSET))
 
+#define CNXK_EP_R_MBOX_VF_PF_DATA(ring)  \
+   (CNXK_EP_R_MBOX_VF_PF_DATA_START + ((ring) * CNXK_EP_RING_OFFSET))
+
+#define CNXK_EP_R_MBOX_PF_VF_INT(ring)   \
+   (CNXK_EP_R_MBOX_PF_VF_INT_START + ((ring) * CNXK_EP_RING_OFFSET))
+
 /*-- R_OUT Masks */
 #define CNXK_EP_R_OUT_INT_LEVELS_BMODE   (1ULL << 63)
 #define CNXK_EP_R_OUT_INT_LEVELS_TIMET   (32)
diff --git a/drivers/net/octeon_ep/meson.build 
b/drivers/net/octeon_ep/meson.build
index a267b60290..e698bf9792 100644
--- a/drivers/net/octeon_ep/meson.build
+++ b/drivers/net/octeon_ep/meson.build
@@ -8,4 +8,5 @@ sources = files(
 'otx_ep_vf.c',
 'otx2_ep_vf.c',
 'cnxk_ep_vf.c',
+'otx_ep_mbox.c',
 )
diff --git a/drivers/net/octeon_ep/otx_ep_common.h 
b/drivers/net/octeon_ep/otx_ep_common.h
index 3582f3087b..dadc8d1579 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -4,6 +4,7 @@
 #ifndef _OTX_EP_COMMON_H_
 #define _OTX_EP_COMMON_H_
 
+#include 
 
 #define OTX_EP_NW_PKT_OP   0x1220
 #define OTX_EP_NW_CMD_OP   0x1221
@@ -67,6 +68,9 @@
 #define oct_ep_read64(addr) rte_read64_relaxed((void *)(addr))
 #define oct_ep_write64(val, addr) rte_write64_relaxed((val), (void *)(addr))
 
+/* Mailbox maximum data size */
+#define MBOX_MAX_DATA_BUF_SIZE 320
+
 /* Input Request Header format */
 union otx_ep_instr_irh {
uint64_t u64;
@@ -488,6 +492,18 @@ struct otx_ep_device {
 
/* DMA buffer for SDP ISM messages */
const struct rte_memzone *ism_buffer_mz;
+
+   /* Mailbox lock */
+   rte_spinlock_t mbox_lock;
+
+   /* Mailbox data */
+   uint8_t mbox_data_buf[MBOX_MAX_DATA_BUF_SIZE];
+
+   /* Mailbox data index */
+   int32_t mbox_data_index;
+
+   /* Mailbox receive message length */
+   int32_t mbox_rcv_message_len;
 };
 
 int otx_ep_setup_iqs(struct otx_ep_device *otx_ep, uint32_t iq_no,
@@ -541,6 +557,16 @@ struct otx_ep_buf_free_info {
 #define OTX_EP_CLEAR_SLIST_DBELL 0x
 #define OTX_EP_CLEAR_SDP_OUT_PKT_CNT 0xF
 
+/* Max overhead includes
+ * - Ethernet hdr
+ * - CRC
+ * - nested VLANs
+ * - octeon rx info
+ */
+#define OTX_EP_ETH_OVERHEAD \
+   (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + \
+(2 * RTE_VLAN_HLEN) + OTX_EP_DROQ_INFO_SIZE)
+
 /* PCI IDs */
 #define PCI_VENDOR_ID_CAVIUM   0x177D
 
diff --git a/driver

[PATCH v3 11/11] net/octeon_ep: set secondary process dev ops

2023-04-24 Thread Sathesh Edara

Sets the dev ops and transmit/receive callbacks
for secondary process.

Signed-off-by: Sathesh Edara 
---
 doc/guides/nics/features/octeon_ep.ini |  1 +
 drivers/net/octeon_ep/otx_ep_ethdev.c  | 22 +++---
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/doc/guides/nics/features/octeon_ep.ini 
b/doc/guides/nics/features/octeon_ep.ini
index f3b821c89e..d52491afa3 100644
--- a/doc/guides/nics/features/octeon_ep.ini
+++ b/doc/guides/nics/features/octeon_ep.ini
@@ -11,4 +11,5 @@ x86-64   = Y
 Basic stats  = Y
 Link status  = Y
 MTU update   = Y
+Multiprocess aware   = Y
 Usage doc= Y
diff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c 
b/drivers/net/octeon_ep/otx_ep_ethdev.c
index 885fbb475f..a9868909f8 100644
--- a/drivers/net/octeon_ep/otx_ep_ethdev.c
+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c
@@ -527,9 +527,17 @@ otx_ep_dev_stats_get(struct rte_eth_dev *eth_dev,
 static int
 otx_ep_dev_close(struct rte_eth_dev *eth_dev)
 {
-   struct otx_ep_device *otx_epvf = OTX_EP_DEV(eth_dev);
+   struct otx_ep_device *otx_epvf;
uint32_t num_queues, q_no;
 
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+   eth_dev->dev_ops = NULL;
+   eth_dev->rx_pkt_burst = NULL;
+   eth_dev->tx_pkt_burst = NULL;
+   return 0;
+   }
+
+   otx_epvf = OTX_EP_DEV(eth_dev);
otx_ep_mbox_send_dev_exit(eth_dev);
otx_epvf->fn_list.disable_io_queues(otx_epvf);
num_queues = otx_epvf->nb_rx_queues;
@@ -593,8 +601,12 @@ static const struct eth_dev_ops otx_ep_eth_dev_ops = {
 static int
 otx_ep_eth_dev_uninit(struct rte_eth_dev *eth_dev)
 {
-   if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+   eth_dev->dev_ops = NULL;
+   eth_dev->rx_pkt_burst = NULL;
+   eth_dev->tx_pkt_burst = NULL;
return 0;
+   }
 
eth_dev->dev_ops = NULL;
eth_dev->rx_pkt_burst = NULL;
@@ -642,8 +654,12 @@ otx_ep_eth_dev_init(struct rte_eth_dev *eth_dev)
struct rte_ether_addr vf_mac_addr;
 
/* Single process support */
-   if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+   eth_dev->dev_ops = &otx_ep_eth_dev_ops;
+   eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
+   eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
return 0;
+   }
 
rte_eth_copy_pci_info(eth_dev, pdev);
otx_epvf->eth_dev = eth_dev;
-- 
2.31.1

Re: [PATCH 1/1] net/ixgbe: add a proper memory barrier for LoongArch

2023-04-24 Thread zhoumin


Hi Bibo,

On Sat, Apr 22, 2023 at 8:29AM, bibo, mao wrote:




在 2023/4/21 9:12, zhoumin 写道:

On Fri, Apr 7, 2023 at 4:50PM, Min Zhou wrote:

Segmentation fault has been observed while running the
ixgbe_recv_pkts_lro() function to receive packets on the Loongson
3C5000 processor which has 64 cores and 4 NUMA nodes.

Reason is the read ordering of the status and the rest of the
descriptor fields in this function may not be correct on the
LoongArch processor. We should add rte_rmb() to ensure the read
ordering be correct.

We also did the same thing in the ixgbe_recv_pkts() function.

Signed-off-by: Min Zhou 
---
  drivers/net/ixgbe/ixgbe_rxtx.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c 
b/drivers/net/ixgbe/ixgbe_rxtx.c

index c9d6ca9efe..16391a42f9 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -1823,6 +1823,9 @@ ixgbe_recv_pkts(void *rx_queue, struct 
rte_mbuf **rx_pkts,

  staterr = rxdp->wb.upper.status_error;
  if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
  break;
+#if defined(RTE_ARCH_LOONGARCH)
+    rte_rmb();
+#endif
  rxd = *rxdp;

Hi Min,

Could you add more detailed analysis aboout the issu? Althrough rxdp 
is declared as volatile, which is only in order for compiler. However 
some architectures like LoongArch are weak-ordered. For this piece of 
code:


 1: staterr = rxdp->wb.upper.status_error;
Sentence 1 can be execute after sentence 1, dd indicated that packet 
is ready with new value.


 2:  rxd = *rxdp;
Sentence 2 can be execute first and get old value.

...Balabala


Regards
Bibo, Mao



Thanks for your kind comments.


I have sent the v2 patch and give the more detailed analysis for the 
segmentation fault.



Regards

Min

  /*
@@ -2122,6 +2125,9 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct 
rte_mbuf **rx_pkts, uint16_t nb_pkts,

  if (!(staterr & IXGBE_RXDADV_STAT_DD))
  break;
+#if defined(RTE_ARCH_LOONGARCH)
+    rte_rmb();
+#endif
  rxd = *rxdp;
  PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "


Kindly ping.

Any comments or suggestions will be appreciated.


Min

[PATCH v3 08/11] net/octeon_ep: update queue size checks

2023-04-24 Thread Sathesh Edara

Updates the output queue size checks to ensure
that queue is larger than backpressure watermark.
Add setting of default queue sizes to the minimum
so that applications like testpmd can be started
without explicit queue size arguments.

Signed-off-by: Sathesh Edara 
---
 drivers/net/octeon_ep/otx_ep_common.h |  9 +++--
 drivers/net/octeon_ep/otx_ep_ethdev.c | 12 ++--
 drivers/net/octeon_ep/otx_ep_rxtx.h   |  4 ++--
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/net/octeon_ep/otx_ep_common.h 
b/drivers/net/octeon_ep/otx_ep_common.h
index 444136923f..3582f3087b 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -11,8 +11,13 @@
 #define OTX_EP_MAX_RINGS_PER_VF(8)
 #define OTX_EP_CFG_IO_QUEUESOTX_EP_MAX_RINGS_PER_VF
 #define OTX_EP_64BYTE_INSTR (64)
-#define OTX_EP_MIN_IQ_DESCRIPTORS   (128)
-#define OTX_EP_MIN_OQ_DESCRIPTORS   (128)
+/*
+ * Backpressure for SDP is configured on Octeon, and the minimum queue sizes
+ * must be much larger than the backpressure watermark configured in the Octeon
+ * SDP driver.  IQ and OQ backpressure configurations are separate.
+ */
+#define OTX_EP_MIN_IQ_DESCRIPTORS   (2048)
+#define OTX_EP_MIN_OQ_DESCRIPTORS   (2048)
 #define OTX_EP_MAX_IQ_DESCRIPTORS   (8192)
 #define OTX_EP_MAX_OQ_DESCRIPTORS   (8192)
 #define OTX_EP_OQ_BUF_SIZE  (2048)
diff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c 
b/drivers/net/octeon_ep/otx_ep_ethdev.c
index 30a7a450fb..0f710b1ffa 100644
--- a/drivers/net/octeon_ep/otx_ep_ethdev.c
+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c
@@ -48,6 +48,9 @@ otx_ep_dev_info_get(struct rte_eth_dev *eth_dev,
devinfo->rx_desc_lim = otx_ep_rx_desc_lim;
devinfo->tx_desc_lim = otx_ep_tx_desc_lim;
 
+   devinfo->default_rxportconf.ring_size = OTX_EP_MIN_OQ_DESCRIPTORS;
+   devinfo->default_txportconf.ring_size = OTX_EP_MIN_IQ_DESCRIPTORS;
+
return 0;
 }
 
@@ -274,8 +277,8 @@ otx_ep_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t 
q_no,
return -EINVAL;
}
if (num_rx_descs < (SDP_GBL_WMARK * 8)) {
-   otx_ep_err("Invalid rx desc number should at least be greater 
than 8xwmark  %u\n",
-  num_rx_descs);
+   otx_ep_err("Invalid rx desc number(%u) should at least be 
greater than 8xwmark  %u\n",
+  num_rx_descs, (SDP_GBL_WMARK * 8));
return -EINVAL;
}
 
@@ -357,6 +360,11 @@ otx_ep_tx_queue_setup(struct rte_eth_dev *eth_dev, 
uint16_t q_no,
   num_tx_descs);
return -EINVAL;
}
+   if (num_tx_descs < (SDP_GBL_WMARK * 8)) {
+   otx_ep_err("Invalid tx desc number(%u) should at least be 
greater than 8*wmark(%u)\n",
+  num_tx_descs, (SDP_GBL_WMARK * 8));
+   return -EINVAL;
+   }
 
retval = otx_ep_setup_iqs(otx_epvf, q_no, num_tx_descs, socket_id);
 
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.h 
b/drivers/net/octeon_ep/otx_ep_rxtx.h
index 1527d350b5..7012888100 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.h
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.h
@@ -7,8 +7,8 @@
 
 #include 
 
-#define OTX_EP_RXD_ALIGN 1
-#define OTX_EP_TXD_ALIGN 1
+#define OTX_EP_RXD_ALIGN 2
+#define OTX_EP_TXD_ALIGN 2
 
 #define OTX_EP_IQ_SEND_FAILED  (-1)
 #define OTX_EP_IQ_SEND_SUCCESS (0)
-- 
2.31.1

[PATCH v3 11/11] net/octeon_ep: set secondary process dev ops

2023-04-24 Thread Sathesh Edara

Sets the dev ops and transmit/receive callbacks
for secondary process.

Signed-off-by: Sathesh Edara 
---
 doc/guides/nics/features/octeon_ep.ini |  1 +
 drivers/net/octeon_ep/otx_ep_ethdev.c  | 22 +++---
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/doc/guides/nics/features/octeon_ep.ini 
b/doc/guides/nics/features/octeon_ep.ini
index f3b821c89e..d52491afa3 100644
--- a/doc/guides/nics/features/octeon_ep.ini
+++ b/doc/guides/nics/features/octeon_ep.ini
@@ -11,4 +11,5 @@ x86-64   = Y
 Basic stats  = Y
 Link status  = Y
 MTU update   = Y
+Multiprocess aware   = Y
 Usage doc= Y
diff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c 
b/drivers/net/octeon_ep/otx_ep_ethdev.c
index 885fbb475f..a9868909f8 100644
--- a/drivers/net/octeon_ep/otx_ep_ethdev.c
+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c
@@ -527,9 +527,17 @@ otx_ep_dev_stats_get(struct rte_eth_dev *eth_dev,
 static int
 otx_ep_dev_close(struct rte_eth_dev *eth_dev)
 {
-   struct otx_ep_device *otx_epvf = OTX_EP_DEV(eth_dev);
+   struct otx_ep_device *otx_epvf;
uint32_t num_queues, q_no;
 
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+   eth_dev->dev_ops = NULL;
+   eth_dev->rx_pkt_burst = NULL;
+   eth_dev->tx_pkt_burst = NULL;
+   return 0;
+   }
+
+   otx_epvf = OTX_EP_DEV(eth_dev);
otx_ep_mbox_send_dev_exit(eth_dev);
otx_epvf->fn_list.disable_io_queues(otx_epvf);
num_queues = otx_epvf->nb_rx_queues;
@@ -593,8 +601,12 @@ static const struct eth_dev_ops otx_ep_eth_dev_ops = {
 static int
 otx_ep_eth_dev_uninit(struct rte_eth_dev *eth_dev)
 {
-   if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+   eth_dev->dev_ops = NULL;
+   eth_dev->rx_pkt_burst = NULL;
+   eth_dev->tx_pkt_burst = NULL;
return 0;
+   }
 
eth_dev->dev_ops = NULL;
eth_dev->rx_pkt_burst = NULL;
@@ -642,8 +654,12 @@ otx_ep_eth_dev_init(struct rte_eth_dev *eth_dev)
struct rte_ether_addr vf_mac_addr;
 
/* Single process support */
-   if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+   if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+   eth_dev->dev_ops = &otx_ep_eth_dev_ops;
+   eth_dev->rx_pkt_burst = &otx_ep_recv_pkts;
+   eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
return 0;
+   }
 
rte_eth_copy_pci_info(eth_dev, pdev);
otx_epvf->eth_dev = eth_dev;
-- 
2.31.1

[PATCH v4] net/idpf: add VF support

2023-04-24 Thread beilei . xing

From: Beilei Xing 

Support VF whose device id is 0x145c.

Signed-off-by: Beilei Xing 
---

v4 change:
 - refine if condition.
v3 change:
 - move check reset done function and mail box init to common module.
v2 change:
 - Rebase code based on new patchset:
   
https://patches.dpdk.org/project/dpdk/cover/20230404124112.71703-1-beilei.x...@intel.com/

 drivers/common/idpf/idpf_common_device.c | 140 ---
 drivers/common/idpf/idpf_common_device.h |   2 +
 drivers/net/idpf/idpf_ethdev.c   |   2 +
 3 files changed, 105 insertions(+), 39 deletions(-)

diff --git a/drivers/common/idpf/idpf_common_device.c 
b/drivers/common/idpf/idpf_common_device.c
index c5e7bbf66c..abd4be423a 100644
--- a/drivers/common/idpf/idpf_common_device.c
+++ b/drivers/common/idpf/idpf_common_device.c
@@ -16,6 +16,7 @@ idpf_reset_pf(struct idpf_hw *hw)
 }
 
 #define IDPF_RESET_WAIT_CNT 100
+
 static int
 idpf_check_pf_reset_done(struct idpf_hw *hw)
 {
@@ -33,48 +34,105 @@ idpf_check_pf_reset_done(struct idpf_hw *hw)
return -EBUSY;
 }
 
-#define CTLQ_NUM 2
 static int
-idpf_init_mbx(struct idpf_hw *hw)
+idpf_check_vf_reset_done(struct idpf_hw *hw)
 {
-   struct idpf_ctlq_create_info ctlq_info[CTLQ_NUM] = {
-   {
-   .type = IDPF_CTLQ_TYPE_MAILBOX_TX,
-   .id = IDPF_CTLQ_ID,
-   .len = IDPF_CTLQ_LEN,
-   .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
-   .reg = {
-   .head = PF_FW_ATQH,
-   .tail = PF_FW_ATQT,
-   .len = PF_FW_ATQLEN,
-   .bah = PF_FW_ATQBAH,
-   .bal = PF_FW_ATQBAL,
-   .len_mask = PF_FW_ATQLEN_ATQLEN_M,
-   .len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M,
-   .head_mask = PF_FW_ATQH_ATQH_M,
-   }
-   },
-   {
-   .type = IDPF_CTLQ_TYPE_MAILBOX_RX,
-   .id = IDPF_CTLQ_ID,
-   .len = IDPF_CTLQ_LEN,
-   .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
-   .reg = {
-   .head = PF_FW_ARQH,
-   .tail = PF_FW_ARQT,
-   .len = PF_FW_ARQLEN,
-   .bah = PF_FW_ARQBAH,
-   .bal = PF_FW_ARQBAL,
-   .len_mask = PF_FW_ARQLEN_ARQLEN_M,
-   .len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M,
-   .head_mask = PF_FW_ARQH_ARQH_M,
-   }
+   uint32_t reg;
+   int i;
+
+   for (i = 0; i < IDPF_RESET_WAIT_CNT; i++) {
+   reg = IDPF_READ_REG(hw, VFGEN_RSTAT);
+   if (reg != 0x && (reg & VFGEN_RSTAT_VFR_STATE_M))
+   return 0;
+   rte_delay_ms(1000);
+   }
+
+   DRV_LOG(ERR, "VF reset timeout");
+   return -EBUSY;
+}
+
+#define IDPF_CTLQ_NUM 2
+
+struct idpf_ctlq_create_info pf_ctlq_info[IDPF_CTLQ_NUM] = {
+   {
+   .type = IDPF_CTLQ_TYPE_MAILBOX_TX,
+   .id = IDPF_CTLQ_ID,
+   .len = IDPF_CTLQ_LEN,
+   .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
+   .reg = {
+   .head = PF_FW_ATQH,
+   .tail = PF_FW_ATQT,
+   .len = PF_FW_ATQLEN,
+   .bah = PF_FW_ATQBAH,
+   .bal = PF_FW_ATQBAL,
+   .len_mask = PF_FW_ATQLEN_ATQLEN_M,
+   .len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M,
+   .head_mask = PF_FW_ATQH_ATQH_M,
+   }
+   },
+   {
+   .type = IDPF_CTLQ_TYPE_MAILBOX_RX,
+   .id = IDPF_CTLQ_ID,
+   .len = IDPF_CTLQ_LEN,
+   .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
+   .reg = {
+   .head = PF_FW_ARQH,
+   .tail = PF_FW_ARQT,
+   .len = PF_FW_ARQLEN,
+   .bah = PF_FW_ARQBAH,
+   .bal = PF_FW_ARQBAL,
+   .len_mask = PF_FW_ARQLEN_ARQLEN_M,
+   .len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M,
+   .head_mask = PF_FW_ARQH_ARQH_M,
+   }
+   }
+};
+
+struct idpf_ctlq_create_info vf_ctlq_info[IDPF_CTLQ_NUM] = {
+   {
+   .type = IDPF_CTLQ_TYPE_MAILBOX_TX,
+   .id = IDPF_CTLQ_ID,
+   .len = IDPF_CTLQ_LEN,
+   .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
+   .reg = {
+   .head = VF_ATQH,
+   .tail = VF_ATQT,
+   .len = VF_ATQLEN,
+   .bah = VF_ATQBAH,
+   .bal = VF_ATQBAL,
+

[RFC 0/3] introduce coroutine library

2023-04-24 Thread Chengwen Feng

This patchset introduces the coroutine library which will help refactor
the hns3 PMD's reset process.

The hns3 single function reset process consists of the following steps:
1.stop_service();
2.prepare_reset();
3.delay(100ms);
4.notify_hw();
5.wait_hw_reset_done(); // multiple sleep waits are involved.
6.reinit();
7.restore_conf();

If the DPDK process take over multiple hns3 functions (e.g. 100),
it's impractical to reset and restore functions in sequence:
1.proc_func(001); // will completed in 100+ms range.
2.proc_func(002); // will completed in 100~200+ms range.
...
x.proc_func(100); // will completed in 9900~1+ms range.
The later functions will process fail because it's too late to deal with.

One solution is that create a reset thread for each function, and it
will lead to large number of threads if the DPDK process take over
multiple hns3 functions.

So the current hns3 driver uses asynchronous mechanism, for examples, it
use rte_eal_alarm_set() when process delay(100ms), it splits a serial
process into multiple asynchronous processes, and the code is complex
and difficult to understand.

The coroutine is a good mechanism to provide programmers with the 
simplicity of keeping serial processes within a limited number of
threads.

This patchset use  to build the coroutine framework, and it
just provides a demo. More APIs maybe added in the future.

In addition, we would like to ask the community whether it it possible
to accept the library. If not, whether it is allowed to provide the
library in hns3 PMD.

Chengwen Feng (3):
  lib/coroutine: add coroutine library
  examples/coroutine: support coroutine examples
  net/hns3: refactor reset process with coroutine

 drivers/net/hns3/hns3_ethdev.c| 217 ++
 drivers/net/hns3/hns3_ethdev.h|   3 +
 drivers/net/hns3/hns3_intr.c  |  38 ++
 drivers/net/hns3/meson.build  |   2 +-
 examples/coroutine/main.c | 153 +
 examples/coroutine/meson.build|  10 ++
 examples/meson.build  |   1 +
 lib/coroutine/meson.build |   8 ++
 lib/coroutine/rte_coroutine.c | 190 ++
 lib/coroutine/rte_coroutine.h | 110 +++
 lib/coroutine/rte_coroutine_imp.h |  46 +++
 lib/coroutine/version.map |  11 ++
 lib/meson.build   |   1 +
 13 files changed, 789 insertions(+), 1 deletion(-)
 create mode 100644 examples/coroutine/main.c
 create mode 100644 examples/coroutine/meson.build
 create mode 100644 lib/coroutine/meson.build
 create mode 100644 lib/coroutine/rte_coroutine.c
 create mode 100644 lib/coroutine/rte_coroutine.h
 create mode 100644 lib/coroutine/rte_coroutine_imp.h
 create mode 100644 lib/coroutine/version.map

-- 
2.17.1

[RFC 2/3] examples/coroutine: support coroutine examples

2023-04-24 Thread Chengwen Feng

This patch adds coroutine example, usage:
1. start examples: dpdk-coroutine -a :7d:00.2  -l 10-11
2. will output:
Start yield coroutine test!
I am in yield coroutine 111!
I am in yield coroutine 222!
I am in yield coroutine 333!
I am in yield coroutine 111!
I am in yield coroutine 222!
I am in yield coroutine 333!
...
Start delay coroutine test!
I am in delay coroutine 111!
I am in delay coroutine 222!
I am in delay coroutine 222!
I am in delay coroutine 111!
I am in delay coroutine 222!
I am in delay coroutine 222!
...
3. use ctrl+c to exit example.

Signed-off-by: Chengwen Feng 
---
 examples/coroutine/main.c  | 153 +
 examples/coroutine/meson.build |  10 +++
 examples/meson.build   |   1 +
 3 files changed, 164 insertions(+)
 create mode 100644 examples/coroutine/main.c
 create mode 100644 examples/coroutine/meson.build

diff --git a/examples/coroutine/main.c b/examples/coroutine/main.c
new file mode 100644
index 00..2704ad1dc9
--- /dev/null
+++ b/examples/coroutine/main.c
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 HiSilicon Limited
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+static volatile bool force_quit;
+
+static struct rte_schedule *target_s;
+
+static void
+yield_coroutine_1(void *arg)
+{
+   RTE_SET_USED(arg);
+   int i = 10;
+   while (i--) {
+   printf("\tI am in yield coroutine 111!\n");
+   rte_co_yield();
+   }
+}
+
+static void
+yield_coroutine_2(void *arg)
+{
+   RTE_SET_USED(arg);
+   int i = 10;
+   while (i--) {
+   printf("\tI am in yield coroutine 222!\n");
+   rte_co_yield();
+   }
+}
+
+static void
+yield_coroutine_3(void *arg)
+{
+   RTE_SET_USED(arg);
+   int i = 10;
+   while (i--) {
+   printf("\tI am in yield coroutine 333!\n");
+   rte_co_yield();
+   }
+}
+
+static void
+yield_coroutine_test(void)
+{
+   printf("Start yield coroutine test!\n");
+   rte_co_create(target_s, yield_coroutine_1, NULL, 0);
+   rte_co_create(target_s, yield_coroutine_2, NULL, 0);
+   rte_co_create(target_s, yield_coroutine_3, NULL, 0);
+   sleep(1);
+}
+
+static void
+delay_coroutine_1(void *arg)
+{
+   RTE_SET_USED(arg);
+   int i = 10;
+   while (i--) {
+   printf("\tI am in delay coroutine 111!\n");
+   rte_co_delay(100 * 1000);
+   }
+}
+
+static void
+delay_coroutine_2(void *arg)
+{
+   RTE_SET_USED(arg);
+   int i = 20;
+   while (i--) {
+   printf("\tI am in delay coroutine 222!\n");
+   rte_co_delay(50 * 1000);
+   }
+}
+
+static void
+delay_coroutine_test(void)
+{
+   printf("Start delay coroutine test!\n");
+   rte_co_create(target_s, delay_coroutine_1, NULL, 0);
+   rte_co_create(target_s, delay_coroutine_2, NULL, 0);
+   sleep(1);
+}
+
+static int
+co_main_loop(void *arg)
+{
+   RTE_SET_USED(arg);
+   while (!force_quit)
+   rte_schedule_run(target_s);
+   return 0;
+}
+
+static void
+signal_handler(int signum)
+{
+   if (signum == SIGINT || signum == SIGTERM) {
+   printf("\n\nSignal %d received, preparing to exit...\n",
+   signum);
+   force_quit = true;
+   }
+}
+
+int
+main(int argc, char **argv)
+{
+   uint32_t lcore_id = rte_lcore_id();
+   int ret;
+
+   /* Init EAL. 8< */
+   ret = rte_eal_init(argc, argv);
+   if (ret < 0)
+   rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
+
+   force_quit = false;
+   signal(SIGINT, signal_handler);
+   signal(SIGTERM, signal_handler);
+
+   /* Check if there is enough lcores for all ports. */
+   if (rte_lcore_count() < 2)
+   rte_exit(EXIT_FAILURE,
+   "There should be at least one worker lcore.\n");
+
+   target_s = rte_schedule_create("co-sched-test", 128);
+   if (target_s == NULL)
+   rte_exit(EXIT_FAILURE,
+   "Create target scheduler failed!\n");
+
+   lcore_id = rte_get_next_lcore(lcore_id, true, true);
+   rte_eal_remote_launch(co_main_loop, NULL, lcore_id);
+
+   yield_coroutine_test();
+   delay_coroutine_test();
+
+   /* force_quit is true when we get here */
+   rte_eal_mp_wait_lcore();
+
+   /* clean up the EAL */
+   rte_eal_cleanup();
+
+   printf("Bye...\n");
+   return 0;
+}
diff --git a/examples/coroutine/meson.build b/examples/coroutine/meson.build
new file mode 100644
index 00..c3576fe2f3
--- /dev/null
+++ b/examples/coroutine/meson.build
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2023 HiSilicon Limited
+
+allow_experimental_apis = true
+
+deps +=

[RFC 3/3] net/hns3: refactor reset process with coroutine

2023-04-24 Thread Chengwen Feng

This patch adds reset mode 1 which use coroutine to refactor the
reset process. And this just a demo which only function at PF driver.

Using the coroutine will make the reset process more intuitive.

Signed-off-by: Chengwen Feng 
---
 drivers/net/hns3/hns3_ethdev.c | 217 +
 drivers/net/hns3/hns3_ethdev.h |   3 +
 drivers/net/hns3/hns3_intr.c   |  38 ++
 drivers/net/hns3/meson.build   |   2 +-
 4 files changed, 259 insertions(+), 1 deletion(-)

diff --git a/drivers/net/hns3/hns3_ethdev.c b/drivers/net/hns3/hns3_ethdev.c
index 36896f8989..06ff0bcae1 100644
--- a/drivers/net/hns3/hns3_ethdev.c
+++ b/drivers/net/hns3/hns3_ethdev.c
@@ -6487,7 +6487,224 @@ static const struct eth_dev_ops hns3_eth_dev_ops = {
.eth_tx_descriptor_dump = hns3_tx_descriptor_dump,
 };
 
+#include 
+
+static const char *reset_string[HNS3_MAX_RESET] = {
+   "flr", "vf_func", "vf_pf_func", "vf_full", "vf_global",
+   "pf_func", "global", "IMP", "none",
+};
+
+static void
+hns3_clear_reset_level(struct hns3_hw *hw, uint64_t *levels)
+{
+   uint64_t merge_cnt = hw->reset.stats.merge_cnt;
+   uint64_t tmp;
+
+   switch (hw->reset.level) {
+   case HNS3_IMP_RESET:
+   hns3_atomic_clear_bit(HNS3_IMP_RESET, levels);
+   tmp = hns3_test_and_clear_bit(HNS3_GLOBAL_RESET, levels);
+   merge_cnt = tmp > 0 ? merge_cnt + 1 : merge_cnt;
+   tmp = hns3_test_and_clear_bit(HNS3_FUNC_RESET, levels);
+   merge_cnt = tmp > 0 ? merge_cnt + 1 : merge_cnt;
+   break;
+   case HNS3_GLOBAL_RESET:
+   hns3_atomic_clear_bit(HNS3_GLOBAL_RESET, levels);
+   tmp = hns3_test_and_clear_bit(HNS3_FUNC_RESET, levels);
+   merge_cnt = tmp > 0 ? merge_cnt + 1 : merge_cnt;
+   break;
+   case HNS3_FUNC_RESET:
+   hns3_atomic_clear_bit(HNS3_FUNC_RESET, levels);
+   break;
+   case HNS3_VF_RESET:
+   hns3_atomic_clear_bit(HNS3_VF_RESET, levels);
+   tmp = hns3_test_and_clear_bit(HNS3_VF_PF_FUNC_RESET, levels);
+   merge_cnt = tmp > 0 ? merge_cnt + 1 : merge_cnt;
+   tmp = hns3_test_and_clear_bit(HNS3_VF_FUNC_RESET, levels);
+   merge_cnt = tmp > 0 ? merge_cnt + 1 : merge_cnt;
+   break;
+   case HNS3_VF_FULL_RESET:
+   hns3_atomic_clear_bit(HNS3_VF_FULL_RESET, levels);
+   tmp = hns3_test_and_clear_bit(HNS3_VF_FUNC_RESET, levels);
+   merge_cnt = tmp > 0 ? merge_cnt + 1 : merge_cnt;
+   break;
+   case HNS3_VF_PF_FUNC_RESET:
+   hns3_atomic_clear_bit(HNS3_VF_PF_FUNC_RESET, levels);
+   tmp = hns3_test_and_clear_bit(HNS3_VF_FUNC_RESET, levels);
+   merge_cnt = tmp > 0 ? merge_cnt + 1 : merge_cnt;
+   break;
+   case HNS3_VF_FUNC_RESET:
+   hns3_atomic_clear_bit(HNS3_VF_FUNC_RESET, levels);
+   break;
+   case HNS3_FLR_RESET:
+   hns3_atomic_clear_bit(HNS3_FLR_RESET, levels);
+   break;
+   case HNS3_NONE_RESET:
+   default:
+   return;
+   };
+
+   if (merge_cnt != hw->reset.stats.merge_cnt) {
+   hns3_warn(hw,
+ "No need to do low-level reset after %s reset. "
+ "merge cnt: %" PRIu64 " total merge cnt: %" PRIu64,
+ reset_string[hw->reset.level],
+ hw->reset.stats.merge_cnt - merge_cnt,
+ hw->reset.stats.merge_cnt);
+   hw->reset.stats.merge_cnt = merge_cnt;
+   }
+}
+
+static void hns3_reset_coroutine(void *arg)
+{
+   struct hns3_hw *hw = (struct hns3_hw *)arg;
+   struct hns3_adapter *hns = HNS3_DEV_HW_TO_ADAPTER(hw);
+   struct timeval tv, tv_delta;
+   int ret, i;
+
+   /*
+* calc reset level.
+*/
+   hns3_clock_gettime(&hw->reset.start_time);
+   hw->reset.level = hns3_get_reset_level(hns, &hw->reset.pending);
+   if (hw->reset.level == HNS3_NONE_RESET)
+   hw->reset.level = HNS3_IMP_RESET;
+   hns3_warn(hw, "start reset level: %s", reset_string[hw->reset.level]);
+
+
+   /*
+* stop service.
+*/
+   ret = hns3_stop_service(hns);
+   hns3_clock_gettime(&tv);
+   if (ret) {
+   hns3_warn(hw, "Reset step1 down fail=%d time=%ld.%.6ld",
+ ret, tv.tv_sec, tv.tv_usec);
+   return;
+   }
+   hns3_warn(hw, "Reset step1 down success time=%ld.%.6ld",
+ tv.tv_sec, tv.tv_usec);
+
+
+   /*
+* yield CPU to schedule other function's reset.
+*/
+   rte_co_yield();
+
+
+   /*
+* prepare reset.
+*/
+   ret = hns3_prepare_reset(hns);
+   hns3_clock_gettime(&tv);
+   if (ret) {
+   hns3_warn(hw,
+ "Reset step2 pr

[RFC 1/3] lib/coroutine: add coroutine library

2023-04-24 Thread Chengwen Feng

This patch adds coroutine library. The main elements are:
1. scheduler: container of coroutines, which is responsible for
scheduling coroutine.
2. coroutine: Minimum scheduling unit, it should associated to one
scheduler.

In the coroutine callback, application could invoke rte_co_yield() to
give up the CPU, and invoke rte_co_delay() to delay the specified
microsecond.

Signed-off-by: Chengwen Feng 
---
 lib/coroutine/meson.build |   8 ++
 lib/coroutine/rte_coroutine.c | 190 ++
 lib/coroutine/rte_coroutine.h | 110 +
 lib/coroutine/rte_coroutine_imp.h |  46 
 lib/coroutine/version.map |  11 ++
 lib/meson.build   |   1 +
 6 files changed, 366 insertions(+)
 create mode 100644 lib/coroutine/meson.build
 create mode 100644 lib/coroutine/rte_coroutine.c
 create mode 100644 lib/coroutine/rte_coroutine.h
 create mode 100644 lib/coroutine/rte_coroutine_imp.h
 create mode 100644 lib/coroutine/version.map

diff --git a/lib/coroutine/meson.build b/lib/coroutine/meson.build
new file mode 100644
index 00..2064fb1909
--- /dev/null
+++ b/lib/coroutine/meson.build
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2023 HiSilicon Limited.
+
+sources = files('rte_coroutine.c')
+headers = files('rte_coroutine.h')
+indirect_headers += files('rte_coroutine_imp.h')
+
+deps += ['ring']
diff --git a/lib/coroutine/rte_coroutine.c b/lib/coroutine/rte_coroutine.c
new file mode 100644
index 00..07c79fc901
--- /dev/null
+++ b/lib/coroutine/rte_coroutine.c
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 HiSilicon Limited
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include "rte_coroutine.h"
+#include "rte_coroutine_imp.h"
+
+#define FATAL(fmt, args...) printf("[FATAL] %s() %d: " fmt "\n", __func__, 
__LINE__, ##args)
+
+static __thread struct rte_schedule *co_schedule;
+
+struct rte_schedule *
+rte_schedule_create(const char *name, uint32_t max_coroutines)
+{
+   struct rte_schedule *s = calloc(1, sizeof(struct rte_schedule));
+   if (s == NULL)
+   return NULL;
+
+   s->ring = rte_ring_create(name, max_coroutines, rte_socket_id(),
+ RING_F_SC_DEQ);
+   if (s->ring == NULL) {
+   free(s);
+   return NULL;
+   }
+
+   s->max_coroutines = max_coroutines;
+
+   return s;
+}
+
+static void
+co_run_func(uint32_t low, uint32_t hi)
+{
+   uintptr_t ptr = (uint64_t)low | ((uint64_t)hi << 32);
+   struct rte_cocontext *co = (struct rte_cocontext *)ptr;
+   co->cb(co->arg);
+   /* Run complete, so free it. */
+   free(co->stack);
+   free(co);
+}
+
+int
+rte_schedule_run(struct rte_schedule *s)
+{
+   struct rte_cocontext *co = NULL;
+   uintptr_t ptr;
+
+   /* Set local thread variable as input argument. */
+   co_schedule = s;
+
+   while (!rte_ring_empty(s->ring)) {
+   rte_ring_dequeue(s->ring, (void **)&co);
+   if (co->state == COROUTINE_READY) {
+   getcontext(&co->ctx);
+   co->ctx.uc_stack.ss_sp = co->stack;
+   co->ctx.uc_stack.ss_size = co->stack_sz;
+   co->ctx.uc_link = &s->main;
+   co->state = COROUTINE_RUNNING;
+   s->running = co;
+   ptr = (uintptr_t)co;
+   makecontext(&co->ctx, (void (*)(void))co_run_func, 2,
+   (uint32_t)ptr, (uint32_t)(ptr >> 32));
+   swapcontext(&s->main, &co->ctx);
+   } else if (co->state == COROUTINE_SUSPEND) {
+   co->state = COROUTINE_RUNNING;
+   s->running = co;
+   swapcontext(&s->main, &co->ctx);
+   } else {
+   FATAL("invalid state!");
+   }
+   }
+
+   while (s->yield_head != NULL) {
+   co = s->yield_head;
+   s->yield_head = co->yield_next;
+   if (co->state == COROUTINE_YIELD) {
+   co->state = COROUTINE_RUNNING;
+   s->running = co;
+   swapcontext(&s->main, &co->ctx);
+   } else {
+   FATAL("invalid yield state!");
+   }
+   }
+
+   return 0;
+}
+
+int
+rte_co_create(struct rte_schedule *s, coroutine_callback_t cb, void *arg, 
uint32_t stack_sz)
+{
+   struct rte_cocontext *co = calloc(1, sizeof(struct rte_cocontext));
+   int ret;
+   if (co == NULL)
+   return -ENOMEM;
+
+   co->owner = s;
+   co->state = COROUTINE_READY;
+   co->cb = cb;
+   co->arg = arg;
+   if (stack_sz < MIN_STACK_SIZE)
+   stack_sz = MIN_STACK_SIZE;
+   co->stack_sz = stack_sz;
+   co->stack = calloc(1, stack_sz);
+

[PATCH v2] dts: replace pexpect with fabric

2023-04-24 Thread Juraj Linkeš

Pexpect is not a dedicated SSH connection library while Fabric is. With
Fabric, all SSH-related logic is provided and we can just focus on
what's DTS specific.

Signed-off-by: Juraj Linkeš 
---
 doc/guides/tools/dts.rst  |  29 +-
 dts/conf.yaml |   2 +-
 dts/framework/exception.py|  10 +-
 dts/framework/remote_session/linux_session.py |  31 +-
 dts/framework/remote_session/os_session.py|  51 +++-
 dts/framework/remote_session/posix_session.py |  48 +--
 .../remote_session/remote/remote_session.py   |  35 ++-
 .../remote_session/remote/ssh_session.py  | 287 ++
 dts/framework/testbed_model/sut_node.py   |  12 +-
 dts/framework/utils.py|   9 -
 dts/poetry.lock   | 161 --
 dts/pyproject.toml|   2 +-
 12 files changed, 376 insertions(+), 301 deletions(-)

diff --git a/doc/guides/tools/dts.rst b/doc/guides/tools/dts.rst
index ebd6dceb6a..d15826c098 100644
--- a/doc/guides/tools/dts.rst
+++ b/doc/guides/tools/dts.rst
@@ -95,9 +95,14 @@ Setting up DTS environment
 
 #. **SSH Connection**
 
-   DTS uses Python pexpect for SSH connections between DTS environment and the 
other hosts.
-   The pexpect implementation is a wrapper around the ssh command in the DTS 
environment.
-   This means it'll use the SSH agent providing the ssh command and its keys.
+   DTS uses the Fabric Python library for SSH connections between DTS 
environment
+   and the other hosts.
+   The authentication method used is pubkey authentication.
+   Fabric tries to use a passed key/certificate,
+   then any key it can with through an SSH agent,
+   then any "id_rsa", "id_dsa" or "id_ecdsa" key discoverable in ``~/.ssh/``
+   (with any matching OpenSSH-style certificates).
+   DTS doesn't pass any keys, so Fabric tries to use the other two methods.
 
 
 Setting up System Under Test
@@ -132,6 +137,21 @@ There are two areas that need to be set up on a System 
Under Test:
  It's possible to use the hugepage configuration already present on the 
SUT.
  If you wish to do so, don't specify the hugepage configuration in the DTS 
config file.
 
+#. **User with administrator privileges**
+
+.. _sut_admin_user:
+
+   DTS needs administrator privileges to run DPDK applications (such as 
testpmd) on the SUT.
+   The SUT user must be able run commands in privileged mode without asking 
for password.
+   On most Linux distributions, it's a matter of setting up passwordless sudo:
+
+   #. Run ``sudo visudo`` and check that it contains ``%sudo   ALL=(ALL:ALL) 
ALL``.
+
+   #. Add the SUT user to the sudo group with:
+
+   .. code-block:: console
+
+  sudo usermod -aG sudo 
 
 Running DTS
 ---
@@ -151,7 +171,8 @@ which is a template that illustrates what can be configured 
in DTS:
  :start-at: executions:
 
 
-The user must be root or any other user with prompt starting with ``#``.
+The user must have :ref:`administrator privileges `
+which don't require password authentication.
 The other fields are mostly self-explanatory
 and documented in more detail in 
``dts/framework/config/conf_yaml_schema.json``.
 
diff --git a/dts/conf.yaml b/dts/conf.yaml
index a9bd8a3ecf..129801d87c 100644
--- a/dts/conf.yaml
+++ b/dts/conf.yaml
@@ -16,7 +16,7 @@ executions:
 nodes:
   - name: "SUT 1"
 hostname: sut1.change.me.localhost
-user: root
+user: dtsuser
 arch: x86_64
 os: linux
 lcores: ""
diff --git a/dts/framework/exception.py b/dts/framework/exception.py
index ca353d98fc..44ff4e979a 100644
--- a/dts/framework/exception.py
+++ b/dts/framework/exception.py
@@ -62,13 +62,19 @@ class SSHConnectionError(DTSError):
 """
 
 host: str
+errors: list[str]
 severity: ClassVar[ErrorSeverity] = ErrorSeverity.SSH_ERR
 
-def __init__(self, host: str):
+def __init__(self, host: str, errors: list[str] | None = None):
 self.host = host
+self.errors = [] if errors is None else errors
 
 def __str__(self) -> str:
-return f"Error trying to connect with {self.host}"
+message = f"Error trying to connect with {self.host}."
+if self.errors:
+message += f" Errors encountered while retrying: {', 
'.join(self.errors)}"
+
+return message
 
 
 class SSHSessionDeadError(DTSError):
diff --git a/dts/framework/remote_session/linux_session.py 
b/dts/framework/remote_session/linux_session.py
index a1e3bc3a92..f13f399121 100644
--- a/dts/framework/remote_session/linux_session.py
+++ b/dts/framework/remote_session/linux_session.py
@@ -14,10 +14,11 @@ class LinuxSession(PosixSession):
 The implementation of non-Posix compliant parts of Linux remote sessions.
 """
 
+def _get_privileged_command(self, command: str) -> str:
+return f"sudo -- sh -c '{command}'"
+
 def get_remote_cpus(self, use_first_core: bool) -> list[LogicalCore]:
-cpu_info = self.remote_sessio

RE: [EXT] [PATCH v1] crypto/ipsec_mb: add digest encrypted feature in AESNI_MB

2023-04-24 Thread Dooley, Brian

Hi Akhil,

> -Original Message-
> From: Akhil Goyal 
> Sent: Monday 24 April 2023 06:46
> To: Dooley, Brian ; Ji, Kai ; De
> Lara Guarch, Pablo 
> Cc: dev@dpdk.org
> Subject: RE: [EXT] [PATCH v1] crypto/ipsec_mb: add digest encrypted feature
> in AESNI_MB
> 
> > Subject: [EXT] [PATCH v1] crypto/ipsec_mb: add digest encrypted
> > feature in AESNI_MB AESNI_MB PMD does not support Digest Encrypted.
> > This patch adds partial support for this feature.
> 
> I do not get it, what is the point of adding partial support.
> It should be added when it is supported.
> Also whenever, you add, add in documentation as well.
Apologies for this, This patch has a bit more work to do and should have been 
an RFC.
Confident that it can be completed for the release.
> 
> 
> >
> > Signed-off-by: Brian Dooley 
> > ---
> > Some out-of-place tests are still failing.
> > Only some in-place tests are passing.
> > Working on adding support for this feature in v2.
> 
> You cannot just send half cooked patches.
> 
> > ---
> >  app/test/1.diff| 0
> >  drivers/crypto/ipsec_mb/pmd_aesni_mb.c | 3 ++-
> >  2 files changed, 2 insertions(+), 1 deletion(-)  create mode 100644
> > app/test/1.diff
> >
> > diff --git a/app/test/1.diff b/app/test/1.diff new file mode 100644
> > index 00..e69de29bb2
> This file is accidently added.

Thanks,
Brian

[PATCH v3 02/11] net/octeon_ep: support CNX10K series SoC

2023-04-24 Thread Sathesh Edara

Adds the required functionality in the Octeon endpoint driver
to support the following CNX10K series endpoint devices.
- CN10KA
- CN10KB
- CNF10KA
- CNF10KB

Signed-off-by: Sathesh Edara 
---
 drivers/net/octeon_ep/cnxk_ep_vf.h|  5 -
 drivers/net/octeon_ep/otx_ep_ethdev.c | 21 +
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.h 
b/drivers/net/octeon_ep/cnxk_ep_vf.h
index 7162461dd9..aaa5774552 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.h
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.h
@@ -134,7 +134,10 @@
 #define CNXK_EP_R_OUT_CTL_ROR_P  (1ULL << 24)
 #define CNXK_EP_R_OUT_CTL_IMODE  (1ULL << 23)
 
-#define PCI_DEVID_CNXK_EP_NET_VF   0xB903
+#define PCI_DEVID_CN10KA_EP_NET_VF 0xB903
+#define PCI_DEVID_CNF10KA_EP_NET_VF0xBA03
+#define PCI_DEVID_CNF10KB_EP_NET_VF0xBC03
+#define PCI_DEVID_CN10KB_EP_NET_VF 0xBD03
 
 int
 cnxk_ep_vf_setup_device(struct otx_ep_device *sdpvf);
diff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c 
b/drivers/net/octeon_ep/otx_ep_ethdev.c
index 24f62c3e49..b23d52ff84 100644
--- a/drivers/net/octeon_ep/otx_ep_ethdev.c
+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c
@@ -111,7 +111,10 @@ otx_ep_chip_specific_setup(struct otx_ep_device *otx_epvf)
ret = otx2_ep_vf_setup_device(otx_epvf);
otx_epvf->fn_list.disable_io_queues(otx_epvf);
break;
-   case PCI_DEVID_CNXK_EP_NET_VF:
+   case PCI_DEVID_CN10KA_EP_NET_VF:
+   case PCI_DEVID_CN10KB_EP_NET_VF:
+   case PCI_DEVID_CNF10KA_EP_NET_VF:
+   case PCI_DEVID_CNF10KB_EP_NET_VF:
otx_epvf->chip_id = dev_id;
ret = cnxk_ep_vf_setup_device(otx_epvf);
otx_epvf->fn_list.disable_io_queues(otx_epvf);
@@ -150,7 +153,10 @@ otx_epdev_init(struct otx_ep_device *otx_epvf)
 otx_epvf->chip_id == PCI_DEVID_CNF95N_EP_NET_VF ||
 otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF)
otx_epvf->eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
-   else if (otx_epvf->chip_id == PCI_DEVID_CNXK_EP_NET_VF)
+   else if (otx_epvf->chip_id == PCI_DEVID_CN10KA_EP_NET_VF ||
+otx_epvf->chip_id == PCI_DEVID_CN10KB_EP_NET_VF ||
+otx_epvf->chip_id == PCI_DEVID_CNF10KA_EP_NET_VF ||
+otx_epvf->chip_id == PCI_DEVID_CNF10KB_EP_NET_VF)
otx_epvf->eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
ethdev_queues = (uint32_t)(otx_epvf->sriov_info.rings_per_vf);
otx_epvf->max_rx_queues = ethdev_queues;
@@ -501,7 +507,11 @@ otx_ep_eth_dev_init(struct rte_eth_dev *eth_dev)
if (otx_epvf->chip_id == PCI_DEVID_CN9K_EP_NET_VF ||
otx_epvf->chip_id == PCI_DEVID_CN98XX_EP_NET_VF ||
otx_epvf->chip_id == PCI_DEVID_CNF95N_EP_NET_VF ||
-   otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF)
+   otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF ||
+   otx_epvf->chip_id == PCI_DEVID_CN10KA_EP_NET_VF ||
+   otx_epvf->chip_id == PCI_DEVID_CN10KB_EP_NET_VF ||
+   otx_epvf->chip_id == PCI_DEVID_CNF10KA_EP_NET_VF ||
+   otx_epvf->chip_id == PCI_DEVID_CNF10KB_EP_NET_VF)
otx_epvf->pkind = SDP_OTX2_PKIND_FS0;
else
otx_epvf->pkind = SDP_PKIND;
@@ -533,7 +543,10 @@ static const struct rte_pci_id pci_id_otx_ep_map[] = {
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN98XX_EP_NET_VF) },
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CNF95N_EP_NET_VF) },
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CNF95O_EP_NET_VF) },
-   { RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CNXK_EP_NET_VF) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10KA_EP_NET_VF) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10KB_EP_NET_VF) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CNF10KA_EP_NET_VF) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CNF10KB_EP_NET_VF) },
{ .vendor_id = 0, /* sentinel */ }
 };
 
-- 
2.31.1

[PATCH v3 03/11] net/octeon_ep: support error propagation

2023-04-24 Thread Sathesh Edara

Adds  detection of loop limits being hit,
and propagate errors up the call chain
when this happens.

Signed-off-by: Sathesh Edara 
---
 drivers/net/octeon_ep/cnxk_ep_vf.c| 51 +++
 drivers/net/octeon_ep/otx2_ep_vf.c| 49 ++
 drivers/net/octeon_ep/otx_ep_common.h |  6 +--
 drivers/net/octeon_ep/otx_ep_ethdev.c | 27 +++---
 drivers/net/octeon_ep/otx_ep_rxtx.c   | 51 +--
 drivers/net/octeon_ep/otx_ep_vf.c | 71 +++
 6 files changed, 155 insertions(+), 100 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c 
b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 3427fb213b..1a92887109 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -47,36 +47,43 @@ cnxk_ep_vf_setup_global_oq_reg(struct otx_ep_device 
*otx_ep, int q_no)
oct_ep_write64(reg_val, otx_ep->hw_addr + CNXK_EP_R_OUT_CONTROL(q_no));
 }
 
-static void
+static int
 cnxk_ep_vf_setup_global_input_regs(struct otx_ep_device *otx_ep)
 {
uint64_t q_no = 0ull;
 
for (q_no = 0; q_no < (otx_ep->sriov_info.rings_per_vf); q_no++)
cnxk_ep_vf_setup_global_iq_reg(otx_ep, q_no);
+   return 0;
 }
 
-static void
+static int
 cnxk_ep_vf_setup_global_output_regs(struct otx_ep_device *otx_ep)
 {
uint32_t q_no;
 
for (q_no = 0; q_no < (otx_ep->sriov_info.rings_per_vf); q_no++)
cnxk_ep_vf_setup_global_oq_reg(otx_ep, q_no);
+   return 0;
 }
 
-static void
+static int
 cnxk_ep_vf_setup_device_regs(struct otx_ep_device *otx_ep)
 {
-   cnxk_ep_vf_setup_global_input_regs(otx_ep);
-   cnxk_ep_vf_setup_global_output_regs(otx_ep);
+   int ret;
+
+   ret = cnxk_ep_vf_setup_global_input_regs(otx_ep);
+   if (ret)
+   return ret;
+   ret = cnxk_ep_vf_setup_global_output_regs(otx_ep);
+   return ret;
 }
 
-static void
+static int
 cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
 {
struct otx_ep_instr_queue *iq = otx_ep->instr_queue[iq_no];
-   uint64_t loop = OTX_EP_BUSY_LOOP_COUNT;
+   int loop = OTX_EP_BUSY_LOOP_COUNT;
volatile uint64_t reg_val = 0ull;
 
reg_val = oct_ep_read64(otx_ep->hw_addr + CNXK_EP_R_IN_CONTROL(iq_no));
@@ -91,9 +98,9 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
} while ((!(reg_val & CNXK_EP_R_IN_CTL_IDLE)) && loop--);
}
 
-   if (!loop) {
+   if (loop < 0) {
otx_ep_err("IDLE bit is not set\n");
-   return;
+   return -EIO;
}
 
/* Write the start of the input queue's ring and its size  */
@@ -115,9 +122,9 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
rte_delay_ms(1);
} while (reg_val != 0 && loop--);
 
-   if (!loop) {
+   if (loop < 0) {
otx_ep_err("INST CNT REGISTER is not zero\n");
-   return;
+   return -EIO;
}
 
/* IN INTR_THRESHOLD is set to max() which disable the IN INTR
@@ -125,14 +132,15 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
 */
oct_ep_write64(OTX_EP_CLEAR_SDP_IN_INT_LVLS,
   otx_ep->hw_addr + CNXK_EP_R_IN_INT_LEVELS(iq_no));
+   return 0;
 }
 
-static void
+static int
 cnxk_ep_vf_setup_oq_regs(struct otx_ep_device *otx_ep, uint32_t oq_no)
 {
volatile uint64_t reg_val = 0ull;
uint64_t oq_ctl = 0ull;
-   uint64_t loop = OTX_EP_BUSY_LOOP_COUNT;
+   int loop = OTX_EP_BUSY_LOOP_COUNT;
struct otx_ep_droq *droq = otx_ep->droq[oq_no];
 
/* Wait on IDLE to set to 1, supposed to configure BADDR
@@ -145,9 +153,9 @@ cnxk_ep_vf_setup_oq_regs(struct otx_ep_device *otx_ep, 
uint32_t oq_no)
rte_delay_ms(1);
}
 
-   if (!loop) {
+   if (loop < 0) {
otx_ep_err("OUT CNT REGISTER value is zero\n");
-   return;
+   return -EIO;
}
 
oct_ep_write64(droq->desc_ring_dma, otx_ep->hw_addr + 
CNXK_EP_R_OUT_SLIST_BADDR(oq_no));
@@ -181,9 +189,9 @@ cnxk_ep_vf_setup_oq_regs(struct otx_ep_device *otx_ep, 
uint32_t oq_no)
rte_delay_ms(1);
}
 
-   if (!loop) {
+   if (loop < 0) {
otx_ep_err("Packets credit register value is not cleared\n");
-   return;
+   return -EIO;
}
 
otx_ep_dbg("SDP_R[%d]_credit:%x", oq_no, 
rte_read32(droq->pkts_credit_reg));
@@ -201,18 +209,19 @@ cnxk_ep_vf_setup_oq_regs(struct otx_ep_device *otx_ep, 
uint32_t oq_no)
rte_delay_ms(1);
}
 
-   if (!loop) {
+   if (loop < 0) {
otx_ep_err("Packets sent register value is not cleared\n");
-   return;
+   return -EIO;
}
 
otx_ep_dbg("SDP_R[%d]_sent: %x", oq_no, 
rte_read32(droq->pkts_sent_reg));
+

[PATCH v3 04/11] net/octeon_ep: support IQ/OQ reset

2023-04-24 Thread Sathesh Edara

Adds input and output queue reset functionality,
also receive queue interrupt enable and disable
functionality.

Signed-off-by: Sathesh Edara 
---
 drivers/net/octeon_ep/otx2_ep_vf.c| 193 +-
 drivers/net/octeon_ep/otx2_ep_vf.h|  61 ++--
 drivers/net/octeon_ep/otx_ep_common.h |   5 +-
 3 files changed, 244 insertions(+), 15 deletions(-)

diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c 
b/drivers/net/octeon_ep/otx2_ep_vf.c
index 3ffc7275c7..3e4895862b 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -9,6 +9,117 @@
 #include "otx_ep_common.h"
 #include "otx2_ep_vf.h"
 
+static int otx2_vf_enable_rxq_intr(struct otx_ep_device *otx_epvf,
+  uint16_t q_no);
+
+static int
+otx2_vf_reset_iq(struct otx_ep_device *otx_ep, int q_no)
+{
+   int loop = SDP_VF_BUSY_LOOP_COUNT;
+   volatile uint64_t d64 = 0ull;
+
+   /* There is no RST for a ring.
+* Clear all registers one by one after disabling the ring
+*/
+
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_IN_ENABLE(q_no));
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_BADDR(q_no));
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_RSIZE(q_no));
+
+   d64 = 0x; /* ~0ull */
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_DBELL(q_no));
+   d64 = otx2_read64(otx_ep->hw_addr + SDP_VF_R_IN_INSTR_DBELL(q_no));
+
+   while ((d64 != 0) && loop--) {
+   rte_delay_ms(1);
+   d64 = otx2_read64(otx_ep->hw_addr +
+ SDP_VF_R_IN_INSTR_DBELL(q_no));
+   }
+   if (loop < 0) {
+   otx_ep_err("%s: doorbell init retry limit exceeded.\n", 
__func__);
+   return -EIO;
+   }
+
+   loop = SDP_VF_BUSY_LOOP_COUNT;
+   do {
+   d64 = otx2_read64(otx_ep->hw_addr + SDP_VF_R_IN_CNTS(q_no));
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_IN_CNTS(q_no));
+   rte_delay_ms(1);
+   } while ((d64 & ~SDP_VF_R_IN_CNTS_OUT_INT) != 0 && loop--);
+   if (loop < 0) {
+   otx_ep_err("%s: in_cnts init retry limit exceeded.\n", 
__func__);
+   return -EIO;
+   }
+
+   d64 = 0ull;
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_IN_INT_LEVELS(q_no));
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_IN_PKT_CNT(q_no));
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_IN_BYTE_CNT(q_no));
+
+   return 0;
+}
+
+static int
+otx2_vf_reset_oq(struct otx_ep_device *otx_ep, int q_no)
+{
+   int loop = SDP_VF_BUSY_LOOP_COUNT;
+   volatile uint64_t d64 = 0ull;
+
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_OUT_ENABLE(q_no));
+
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_OUT_SLIST_BADDR(q_no));
+
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_OUT_SLIST_RSIZE(q_no));
+
+   d64 = 0x;
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_OUT_SLIST_DBELL(q_no));
+   d64 = otx2_read64(otx_ep->hw_addr + SDP_VF_R_OUT_SLIST_DBELL(q_no));
+   while ((d64 != 0) && loop--) {
+   rte_delay_ms(1);
+   d64 = otx2_read64(otx_ep->hw_addr +
+ SDP_VF_R_OUT_SLIST_DBELL(q_no));
+   }
+   if (loop < 0) {
+   otx_ep_err("%s: doorbell init retry limit exceeded.\n", 
__func__);
+   return -EIO;
+   }
+
+   if (otx2_read64(otx_ep->hw_addr + SDP_VF_R_OUT_CNTS(q_no))
+   & SDP_VF_R_OUT_CNTS_OUT_INT) {
+   /*
+* The OUT_INT bit is set.  This interrupt must be enabled in
+* order to clear the interrupt.  Interrupts are disabled
+* at the end of this function.
+*/
+   union out_int_lvl_t out_int_lvl;
+
+   out_int_lvl.d64 = otx2_read64(otx_ep->hw_addr +
+   SDP_VF_R_OUT_INT_LEVELS(q_no));
+   out_int_lvl.s.time_cnt_en = 1;
+   out_int_lvl.s.cnt = 0;
+   otx2_write64(out_int_lvl.d64, otx_ep->hw_addr +
+   SDP_VF_R_OUT_INT_LEVELS(q_no));
+   }
+
+   loop = SDP_VF_BUSY_LOOP_COUNT;
+   do {
+   d64 = otx2_read64(otx_ep->hw_addr + SDP_VF_R_OUT_CNTS(q_no));
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_OUT_CNTS(q_no));
+   rte_delay_ms(1);
+   } while ((d64 & ~SDP_VF_R_OUT_CNTS_IN_INT) != 0 && loop--);
+   if (loop < 0) {
+   otx_ep_err("%s: out_cnts init retry limit exceeded.\n", 
__func__);
+   return -EIO;
+   }
+
+   d64 = 0ull;
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_OUT_INT_LEVELS(q_no));
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_OUT_PKT_CNT(q_no));
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_OUT_BYTE_CNT(q_no));
+
+   return 0;
+}
+
 static void
 otx2_vf_setup_global_iq_reg(struct otx_ep_de

[PATCH v3 00/11] extend octeon ep driver functionality

2023-04-24 Thread Sathesh Edara

This patch set adds the following support to
octeon_ep driver
- extends support for newer SoCs
- support new features like IQ/OQ reset, ISM,
mailbox between VF and PF and sets the watermark
level for output queues.

V3 changes:
- Addresses V2 review comments

Sathesh Edara (11):
  net/octeon_ep: support cnf95n and cnf95o SoC
  net/octeon_ep: support CNX10K series SoC
  net/octeon_ep: support error propagation
  net/octeon_ep: support IQ/OQ reset
  devtools: add acronym in dictionary for commit checks
  net/octeon_ep: support ISM
  net/octeon_ep: flush pending DMA operations
  net/octeon_ep: update queue size checks
  net/octeon_ep: support mailbox between VF and PF
  net/octeon_ep: set watermark for output queues
  net/octeon_ep: set secondary process dev ops

 devtools/words-case.txt|   1 +
 doc/guides/nics/features/octeon_ep.ini |   2 +
 drivers/net/octeon_ep/cnxk_ep_vf.c |  92 ++--
 drivers/net/octeon_ep/cnxk_ep_vf.h |  29 ++-
 drivers/net/octeon_ep/meson.build  |   1 +
 drivers/net/octeon_ep/otx2_ep_vf.c | 279 --
 drivers/net/octeon_ep/otx2_ep_vf.h |  77 +-
 drivers/net/octeon_ep/otx_ep_common.h  |  71 +-
 drivers/net/octeon_ep/otx_ep_ethdev.c  | 264 ++---
 drivers/net/octeon_ep/otx_ep_mbox.c| 309 +
 drivers/net/octeon_ep/otx_ep_mbox.h| 163 +
 drivers/net/octeon_ep/otx_ep_rxtx.c| 111 +
 drivers/net/octeon_ep/otx_ep_rxtx.h|   4 +-
 drivers/net/octeon_ep/otx_ep_vf.c  |  71 --
 14 files changed, 1319 insertions(+), 155 deletions(-)
 create mode 100644 drivers/net/octeon_ep/otx_ep_mbox.c
 create mode 100644 drivers/net/octeon_ep/otx_ep_mbox.h

-- 
2.31.1

[PATCH v3 00/11] extend octeon ep driver functionality

2023-04-24 Thread Sathesh Edara

This patch set adds the following support to
octeon_ep driver
- extends support for newer SoCs
- support new features like IQ/OQ reset, ISM,
mailbox between VF and PF and sets the watermark
level for output queues.

V3 changes:
- Addresses V2 review comments

Sathesh Edara (11):
  net/octeon_ep: support cnf95n and cnf95o SoC
  net/octeon_ep: support CNX10K series SoC
  net/octeon_ep: support error propagation
  net/octeon_ep: support IQ/OQ reset
  devtools: add acronym in dictionary for commit checks
  net/octeon_ep: support ISM
  net/octeon_ep: flush pending DMA operations
  net/octeon_ep: update queue size checks
  net/octeon_ep: support mailbox between VF and PF
  net/octeon_ep: set watermark for output queues
  net/octeon_ep: set secondary process dev ops

 devtools/words-case.txt|   1 +
 doc/guides/nics/features/octeon_ep.ini |   2 +
 drivers/net/octeon_ep/cnxk_ep_vf.c |  92 ++--
 drivers/net/octeon_ep/cnxk_ep_vf.h |  29 ++-
 drivers/net/octeon_ep/meson.build  |   1 +
 drivers/net/octeon_ep/otx2_ep_vf.c | 279 --
 drivers/net/octeon_ep/otx2_ep_vf.h |  77 +-
 drivers/net/octeon_ep/otx_ep_common.h  |  71 +-
 drivers/net/octeon_ep/otx_ep_ethdev.c  | 264 ++---
 drivers/net/octeon_ep/otx_ep_mbox.c| 309 +
 drivers/net/octeon_ep/otx_ep_mbox.h| 163 +
 drivers/net/octeon_ep/otx_ep_rxtx.c| 111 +
 drivers/net/octeon_ep/otx_ep_rxtx.h|   4 +-
 drivers/net/octeon_ep/otx_ep_vf.c  |  71 --
 14 files changed, 1319 insertions(+), 155 deletions(-)
 create mode 100644 drivers/net/octeon_ep/otx_ep_mbox.c
 create mode 100644 drivers/net/octeon_ep/otx_ep_mbox.h

-- 
2.31.1

[PATCH v3 01/11] net/octeon_ep: support cnf95n and cnf95o SoC

2023-04-24 Thread Sathesh Edara

Adds the required functionality in the Octeon endpoint
driver to support the cnf95n and cnf95o endpoint device.

Signed-off-by: Sathesh Edara 
---
 drivers/net/octeon_ep/otx2_ep_vf.h|  2 ++
 drivers/net/octeon_ep/otx_ep_ethdev.c | 13 +++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/net/octeon_ep/otx2_ep_vf.h 
b/drivers/net/octeon_ep/otx2_ep_vf.h
index 757eeae9f0..8f00acd737 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.h
+++ b/drivers/net/octeon_ep/otx2_ep_vf.h
@@ -115,6 +115,8 @@
 
 #define PCI_DEVID_CN9K_EP_NET_VF   0xB203 /* OCTEON 9 EP mode */
 #define PCI_DEVID_CN98XX_EP_NET_VF 0xB103
+#define PCI_DEVID_CNF95N_EP_NET_VF 0xB403
+#define PCI_DEVID_CNF95O_EP_NET_VF 0xB603
 
 int
 otx2_ep_vf_setup_device(struct otx_ep_device *sdpvf);
diff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c 
b/drivers/net/octeon_ep/otx_ep_ethdev.c
index f43db1e398..24f62c3e49 100644
--- a/drivers/net/octeon_ep/otx_ep_ethdev.c
+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c
@@ -105,6 +105,8 @@ otx_ep_chip_specific_setup(struct otx_ep_device *otx_epvf)
break;
case PCI_DEVID_CN9K_EP_NET_VF:
case PCI_DEVID_CN98XX_EP_NET_VF:
+   case PCI_DEVID_CNF95N_EP_NET_VF:
+   case PCI_DEVID_CNF95O_EP_NET_VF:
otx_epvf->chip_id = dev_id;
ret = otx2_ep_vf_setup_device(otx_epvf);
otx_epvf->fn_list.disable_io_queues(otx_epvf);
@@ -144,7 +146,9 @@ otx_epdev_init(struct otx_ep_device *otx_epvf)
if (otx_epvf->chip_id == PCI_DEVID_OCTEONTX_EP_VF)
otx_epvf->eth_dev->tx_pkt_burst = &otx_ep_xmit_pkts;
else if (otx_epvf->chip_id == PCI_DEVID_CN9K_EP_NET_VF ||
-otx_epvf->chip_id == PCI_DEVID_CN98XX_EP_NET_VF)
+otx_epvf->chip_id == PCI_DEVID_CN98XX_EP_NET_VF ||
+otx_epvf->chip_id == PCI_DEVID_CNF95N_EP_NET_VF ||
+otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF)
otx_epvf->eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
else if (otx_epvf->chip_id == PCI_DEVID_CNXK_EP_NET_VF)
otx_epvf->eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
@@ -494,7 +498,10 @@ otx_ep_eth_dev_init(struct rte_eth_dev *eth_dev)
otx_epvf->pdev = pdev;
 
otx_epdev_init(otx_epvf);
-   if (pdev->id.device_id == PCI_DEVID_CN9K_EP_NET_VF)
+   if (otx_epvf->chip_id == PCI_DEVID_CN9K_EP_NET_VF ||
+   otx_epvf->chip_id == PCI_DEVID_CN98XX_EP_NET_VF ||
+   otx_epvf->chip_id == PCI_DEVID_CNF95N_EP_NET_VF ||
+   otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF)
otx_epvf->pkind = SDP_OTX2_PKIND_FS0;
else
otx_epvf->pkind = SDP_PKIND;
@@ -524,6 +531,8 @@ static const struct rte_pci_id pci_id_otx_ep_map[] = {
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX_EP_VF) },
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN9K_EP_NET_VF) },
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN98XX_EP_NET_VF) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CNF95N_EP_NET_VF) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CNF95O_EP_NET_VF) },
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CNXK_EP_NET_VF) },
{ .vendor_id = 0, /* sentinel */ }
 };
-- 
2.31.1

[PATCH v3 02/11] net/octeon_ep: support CNX10K series SoC

2023-04-24 Thread Sathesh Edara

Adds the required functionality in the Octeon endpoint driver
to support the following CNX10K series endpoint devices.
- CN10KA
- CN10KB
- CNF10KA
- CNF10KB

Signed-off-by: Sathesh Edara 
---
 drivers/net/octeon_ep/cnxk_ep_vf.h|  5 -
 drivers/net/octeon_ep/otx_ep_ethdev.c | 21 +
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.h 
b/drivers/net/octeon_ep/cnxk_ep_vf.h
index 7162461dd9..aaa5774552 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.h
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.h
@@ -134,7 +134,10 @@
 #define CNXK_EP_R_OUT_CTL_ROR_P  (1ULL << 24)
 #define CNXK_EP_R_OUT_CTL_IMODE  (1ULL << 23)
 
-#define PCI_DEVID_CNXK_EP_NET_VF   0xB903
+#define PCI_DEVID_CN10KA_EP_NET_VF 0xB903
+#define PCI_DEVID_CNF10KA_EP_NET_VF0xBA03
+#define PCI_DEVID_CNF10KB_EP_NET_VF0xBC03
+#define PCI_DEVID_CN10KB_EP_NET_VF 0xBD03
 
 int
 cnxk_ep_vf_setup_device(struct otx_ep_device *sdpvf);
diff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c 
b/drivers/net/octeon_ep/otx_ep_ethdev.c
index 24f62c3e49..b23d52ff84 100644
--- a/drivers/net/octeon_ep/otx_ep_ethdev.c
+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c
@@ -111,7 +111,10 @@ otx_ep_chip_specific_setup(struct otx_ep_device *otx_epvf)
ret = otx2_ep_vf_setup_device(otx_epvf);
otx_epvf->fn_list.disable_io_queues(otx_epvf);
break;
-   case PCI_DEVID_CNXK_EP_NET_VF:
+   case PCI_DEVID_CN10KA_EP_NET_VF:
+   case PCI_DEVID_CN10KB_EP_NET_VF:
+   case PCI_DEVID_CNF10KA_EP_NET_VF:
+   case PCI_DEVID_CNF10KB_EP_NET_VF:
otx_epvf->chip_id = dev_id;
ret = cnxk_ep_vf_setup_device(otx_epvf);
otx_epvf->fn_list.disable_io_queues(otx_epvf);
@@ -150,7 +153,10 @@ otx_epdev_init(struct otx_ep_device *otx_epvf)
 otx_epvf->chip_id == PCI_DEVID_CNF95N_EP_NET_VF ||
 otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF)
otx_epvf->eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
-   else if (otx_epvf->chip_id == PCI_DEVID_CNXK_EP_NET_VF)
+   else if (otx_epvf->chip_id == PCI_DEVID_CN10KA_EP_NET_VF ||
+otx_epvf->chip_id == PCI_DEVID_CN10KB_EP_NET_VF ||
+otx_epvf->chip_id == PCI_DEVID_CNF10KA_EP_NET_VF ||
+otx_epvf->chip_id == PCI_DEVID_CNF10KB_EP_NET_VF)
otx_epvf->eth_dev->tx_pkt_burst = &otx2_ep_xmit_pkts;
ethdev_queues = (uint32_t)(otx_epvf->sriov_info.rings_per_vf);
otx_epvf->max_rx_queues = ethdev_queues;
@@ -501,7 +507,11 @@ otx_ep_eth_dev_init(struct rte_eth_dev *eth_dev)
if (otx_epvf->chip_id == PCI_DEVID_CN9K_EP_NET_VF ||
otx_epvf->chip_id == PCI_DEVID_CN98XX_EP_NET_VF ||
otx_epvf->chip_id == PCI_DEVID_CNF95N_EP_NET_VF ||
-   otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF)
+   otx_epvf->chip_id == PCI_DEVID_CNF95O_EP_NET_VF ||
+   otx_epvf->chip_id == PCI_DEVID_CN10KA_EP_NET_VF ||
+   otx_epvf->chip_id == PCI_DEVID_CN10KB_EP_NET_VF ||
+   otx_epvf->chip_id == PCI_DEVID_CNF10KA_EP_NET_VF ||
+   otx_epvf->chip_id == PCI_DEVID_CNF10KB_EP_NET_VF)
otx_epvf->pkind = SDP_OTX2_PKIND_FS0;
else
otx_epvf->pkind = SDP_PKIND;
@@ -533,7 +543,10 @@ static const struct rte_pci_id pci_id_otx_ep_map[] = {
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN98XX_EP_NET_VF) },
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CNF95N_EP_NET_VF) },
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CNF95O_EP_NET_VF) },
-   { RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CNXK_EP_NET_VF) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10KA_EP_NET_VF) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10KB_EP_NET_VF) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CNF10KA_EP_NET_VF) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CNF10KB_EP_NET_VF) },
{ .vendor_id = 0, /* sentinel */ }
 };
 
-- 
2.31.1

[PATCH v3 03/11] net/octeon_ep: support error propagation

2023-04-24 Thread Sathesh Edara

Adds  detection of loop limits being hit,
and propagate errors up the call chain
when this happens.

Signed-off-by: Sathesh Edara 
---
 drivers/net/octeon_ep/cnxk_ep_vf.c| 51 +++
 drivers/net/octeon_ep/otx2_ep_vf.c| 49 ++
 drivers/net/octeon_ep/otx_ep_common.h |  6 +--
 drivers/net/octeon_ep/otx_ep_ethdev.c | 27 +++---
 drivers/net/octeon_ep/otx_ep_rxtx.c   | 51 +--
 drivers/net/octeon_ep/otx_ep_vf.c | 71 +++
 6 files changed, 155 insertions(+), 100 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c 
b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 3427fb213b..1a92887109 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -47,36 +47,43 @@ cnxk_ep_vf_setup_global_oq_reg(struct otx_ep_device 
*otx_ep, int q_no)
oct_ep_write64(reg_val, otx_ep->hw_addr + CNXK_EP_R_OUT_CONTROL(q_no));
 }
 
-static void
+static int
 cnxk_ep_vf_setup_global_input_regs(struct otx_ep_device *otx_ep)
 {
uint64_t q_no = 0ull;
 
for (q_no = 0; q_no < (otx_ep->sriov_info.rings_per_vf); q_no++)
cnxk_ep_vf_setup_global_iq_reg(otx_ep, q_no);
+   return 0;
 }
 
-static void
+static int
 cnxk_ep_vf_setup_global_output_regs(struct otx_ep_device *otx_ep)
 {
uint32_t q_no;
 
for (q_no = 0; q_no < (otx_ep->sriov_info.rings_per_vf); q_no++)
cnxk_ep_vf_setup_global_oq_reg(otx_ep, q_no);
+   return 0;
 }
 
-static void
+static int
 cnxk_ep_vf_setup_device_regs(struct otx_ep_device *otx_ep)
 {
-   cnxk_ep_vf_setup_global_input_regs(otx_ep);
-   cnxk_ep_vf_setup_global_output_regs(otx_ep);
+   int ret;
+
+   ret = cnxk_ep_vf_setup_global_input_regs(otx_ep);
+   if (ret)
+   return ret;
+   ret = cnxk_ep_vf_setup_global_output_regs(otx_ep);
+   return ret;
 }
 
-static void
+static int
 cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, uint32_t iq_no)
 {
struct otx_ep_instr_queue *iq = otx_ep->instr_queue[iq_no];
-   uint64_t loop = OTX_EP_BUSY_LOOP_COUNT;
+   int loop = OTX_EP_BUSY_LOOP_COUNT;
volatile uint64_t reg_val = 0ull;
 
reg_val = oct_ep_read64(otx_ep->hw_addr + CNXK_EP_R_IN_CONTROL(iq_no));
@@ -91,9 +98,9 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
} while ((!(reg_val & CNXK_EP_R_IN_CTL_IDLE)) && loop--);
}
 
-   if (!loop) {
+   if (loop < 0) {
otx_ep_err("IDLE bit is not set\n");
-   return;
+   return -EIO;
}
 
/* Write the start of the input queue's ring and its size  */
@@ -115,9 +122,9 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
rte_delay_ms(1);
} while (reg_val != 0 && loop--);
 
-   if (!loop) {
+   if (loop < 0) {
otx_ep_err("INST CNT REGISTER is not zero\n");
-   return;
+   return -EIO;
}
 
/* IN INTR_THRESHOLD is set to max() which disable the IN INTR
@@ -125,14 +132,15 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
 */
oct_ep_write64(OTX_EP_CLEAR_SDP_IN_INT_LVLS,
   otx_ep->hw_addr + CNXK_EP_R_IN_INT_LEVELS(iq_no));
+   return 0;
 }
 
-static void
+static int
 cnxk_ep_vf_setup_oq_regs(struct otx_ep_device *otx_ep, uint32_t oq_no)
 {
volatile uint64_t reg_val = 0ull;
uint64_t oq_ctl = 0ull;
-   uint64_t loop = OTX_EP_BUSY_LOOP_COUNT;
+   int loop = OTX_EP_BUSY_LOOP_COUNT;
struct otx_ep_droq *droq = otx_ep->droq[oq_no];
 
/* Wait on IDLE to set to 1, supposed to configure BADDR
@@ -145,9 +153,9 @@ cnxk_ep_vf_setup_oq_regs(struct otx_ep_device *otx_ep, 
uint32_t oq_no)
rte_delay_ms(1);
}
 
-   if (!loop) {
+   if (loop < 0) {
otx_ep_err("OUT CNT REGISTER value is zero\n");
-   return;
+   return -EIO;
}
 
oct_ep_write64(droq->desc_ring_dma, otx_ep->hw_addr + 
CNXK_EP_R_OUT_SLIST_BADDR(oq_no));
@@ -181,9 +189,9 @@ cnxk_ep_vf_setup_oq_regs(struct otx_ep_device *otx_ep, 
uint32_t oq_no)
rte_delay_ms(1);
}
 
-   if (!loop) {
+   if (loop < 0) {
otx_ep_err("Packets credit register value is not cleared\n");
-   return;
+   return -EIO;
}
 
otx_ep_dbg("SDP_R[%d]_credit:%x", oq_no, 
rte_read32(droq->pkts_credit_reg));
@@ -201,18 +209,19 @@ cnxk_ep_vf_setup_oq_regs(struct otx_ep_device *otx_ep, 
uint32_t oq_no)
rte_delay_ms(1);
}
 
-   if (!loop) {
+   if (loop < 0) {
otx_ep_err("Packets sent register value is not cleared\n");
-   return;
+   return -EIO;
}
 
otx_ep_dbg("SDP_R[%d]_sent: %x", oq_no, 
rte_read32(droq->pkts_sent_reg));
+

[PATCH] common/idpf: refine header file include

2023-04-24 Thread Qi Zhang

Replace #include  with #include "filename" for
local header file.

Signed-off-by: Qi Zhang 
---
 drivers/common/idpf/idpf_common_device.c  | 4 ++--
 drivers/common/idpf/idpf_common_device.h  | 6 +++---
 drivers/common/idpf/idpf_common_rxtx_avx512.c | 4 ++--
 drivers/common/idpf/idpf_common_virtchnl.c| 4 ++--
 drivers/common/idpf/idpf_common_virtchnl.h| 4 ++--
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/common/idpf/idpf_common_device.c 
b/drivers/common/idpf/idpf_common_device.c
index c5e7bbf66c..98029c9826 100644
--- a/drivers/common/idpf/idpf_common_device.c
+++ b/drivers/common/idpf/idpf_common_device.c
@@ -3,8 +3,8 @@
  */
 
 #include 
-#include 
-#include 
+#include "idpf_common_device.h"
+#include "idpf_common_virtchnl.h"
 
 static void
 idpf_reset_pf(struct idpf_hw *hw)
diff --git a/drivers/common/idpf/idpf_common_device.h 
b/drivers/common/idpf/idpf_common_device.h
index c2dc2f16b9..08e2517b3e 100644
--- a/drivers/common/idpf/idpf_common_device.h
+++ b/drivers/common/idpf/idpf_common_device.h
@@ -6,9 +6,9 @@
 #define _IDPF_COMMON_DEVICE_H_
 
 #include 
-#include 
-#include 
-#include 
+#include "base/idpf_prototype.h"
+#include "base/virtchnl2.h"
+#include "idpf_common_logs.h"
 
 #define IDPF_RSS_KEY_LEN   52
 
diff --git a/drivers/common/idpf/idpf_common_rxtx_avx512.c 
b/drivers/common/idpf/idpf_common_rxtx_avx512.c
index ee68e6a1f7..dffb11fcf2 100644
--- a/drivers/common/idpf/idpf_common_rxtx_avx512.c
+++ b/drivers/common/idpf/idpf_common_rxtx_avx512.c
@@ -3,8 +3,8 @@
  */
 
 #include 
-#include 
-#include 
+#include "idpf_common_device.h"
+#include "idpf_common_rxtx.h"
 
 #ifndef __INTEL_COMPILER
 #pragma GCC diagnostic ignored "-Wcast-qual"
diff --git a/drivers/common/idpf/idpf_common_virtchnl.c 
b/drivers/common/idpf/idpf_common_virtchnl.c
index 9ee7259539..b96cf8fdcc 100644
--- a/drivers/common/idpf/idpf_common_virtchnl.c
+++ b/drivers/common/idpf/idpf_common_virtchnl.c
@@ -2,8 +2,8 @@
  * Copyright(c) 2023 Intel Corporation
  */
 
-#include 
-#include 
+#include "idpf_common_virtchnl.h"
+#include "idpf_common_logs.h"
 
 static int
 idpf_vc_clean(struct idpf_adapter *adapter)
diff --git a/drivers/common/idpf/idpf_common_virtchnl.h 
b/drivers/common/idpf/idpf_common_virtchnl.h
index d479d93c8e..c45295290e 100644
--- a/drivers/common/idpf/idpf_common_virtchnl.h
+++ b/drivers/common/idpf/idpf_common_virtchnl.h
@@ -5,8 +5,8 @@
 #ifndef _IDPF_COMMON_VIRTCHNL_H_
 #define _IDPF_COMMON_VIRTCHNL_H_
 
-#include 
-#include 
+#include "idpf_common_device.h"
+#include "idpf_common_rxtx.h"
 
 __rte_internal
 int idpf_vc_api_version_check(struct idpf_adapter *adapter);
-- 
2.31.1

[PATCH] common/idpf: remove unnecessary compile option

2023-04-24 Thread Qi Zhang

Remove compile option "__KERNEL" which should not be considered in
DPDK. Also only #include  in idpf_osdep.h.

Signed-off-by: Qi Zhang 
---
 drivers/common/idpf/base/idpf_controlq.c | 5 -
 drivers/common/idpf/base/idpf_controlq.h | 8 
 drivers/common/idpf/base/idpf_controlq_api.h | 6 --
 drivers/common/idpf/base/idpf_lan_txrx.h | 3 +--
 drivers/common/idpf/base/idpf_osdep.h| 1 +
 5 files changed, 2 insertions(+), 21 deletions(-)

diff --git a/drivers/common/idpf/base/idpf_controlq.c 
b/drivers/common/idpf/base/idpf_controlq.c
index 3af81e5a64..93a3a20fd1 100644
--- a/drivers/common/idpf/base/idpf_controlq.c
+++ b/drivers/common/idpf/base/idpf_controlq.c
@@ -162,11 +162,6 @@ int idpf_ctlq_add(struct idpf_hw *hw,
switch (qinfo->type) {
case IDPF_CTLQ_TYPE_MAILBOX_RX:
is_rxq = true;
-#ifdef __KERNEL__
-   fallthrough;
-#else
-   /* fallthrough */
-#endif /* __KERNEL__ */
case IDPF_CTLQ_TYPE_MAILBOX_TX:
status = idpf_ctlq_alloc_ring_res(hw, *cq_out);
break;
diff --git a/drivers/common/idpf/base/idpf_controlq.h 
b/drivers/common/idpf/base/idpf_controlq.h
index e7b0d803b3..fea8dda618 100644
--- a/drivers/common/idpf/base/idpf_controlq.h
+++ b/drivers/common/idpf/base/idpf_controlq.h
@@ -5,14 +5,8 @@
 #ifndef _IDPF_CONTROLQ_H_
 #define _IDPF_CONTROLQ_H_
 
-#ifdef __KERNEL__
-#include 
-#endif
-
-#ifndef __KERNEL__
 #include "idpf_osdep.h"
 #include "idpf_alloc.h"
-#endif
 #include "idpf_controlq_api.h"
 
 /* Maximum buffer lengths for all control queue types */
@@ -26,14 +20,12 @@
((u16)R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->ring_size) + 
\
   (R)->next_to_clean - (R)->next_to_use - 1))
 
-#ifndef __KERNEL__
 /* Data type manipulation macros. */
 #define IDPF_HI_DWORD(x)   ((u32)x) >> 16) >> 16) & 0x))
 #define IDPF_LO_DWORD(x)   ((u32)((x) & 0x))
 #define IDPF_HI_WORD(x)((u16)(((x) >> 16) & 0x))
 #define IDPF_LO_WORD(x)((u16)((x) & 0x))
 
-#endif
 /* Control Queue default settings */
 #define IDPF_CTRL_SQ_CMD_TIMEOUT   250  /* msecs */
 
diff --git a/drivers/common/idpf/base/idpf_controlq_api.h 
b/drivers/common/idpf/base/idpf_controlq_api.h
index 32d17baadf..e80debebb8 100644
--- a/drivers/common/idpf/base/idpf_controlq_api.h
+++ b/drivers/common/idpf/base/idpf_controlq_api.h
@@ -5,14 +5,8 @@
 #ifndef _IDPF_CONTROLQ_API_H_
 #define _IDPF_CONTROLQ_API_H_
 
-#ifdef __KERNEL__
-#include "idpf_mem.h"
-#else /* !__KERNEL__ */
 #include "idpf_osdep.h"
 
-#include 
-#endif /* !__KERNEL__ */
-
 struct idpf_hw;
 
 /* Used for queue init, response and events */
diff --git a/drivers/common/idpf/base/idpf_lan_txrx.h 
b/drivers/common/idpf/base/idpf_lan_txrx.h
index 98484b267c..2d635a0b9c 100644
--- a/drivers/common/idpf/base/idpf_lan_txrx.h
+++ b/drivers/common/idpf/base/idpf_lan_txrx.h
@@ -4,9 +4,8 @@
 
 #ifndef _IDPF_LAN_TXRX_H_
 #define _IDPF_LAN_TXRX_H_
-#ifndef __KERNEL__
+
 #include "idpf_osdep.h"
-#endif
 
 enum idpf_rss_hash {
/* Values 0 - 28 are reserved for future use */
diff --git a/drivers/common/idpf/base/idpf_osdep.h 
b/drivers/common/idpf/base/idpf_osdep.h
index 99ae9cf60a..78049e25b4 100644
--- a/drivers/common/idpf/base/idpf_osdep.h
+++ b/drivers/common/idpf/base/idpf_osdep.h
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define INLINE inline
 #define STATIC static
-- 
2.31.1

[PATCH 1/2] common/sfc_efx/base: add API to drop MAE action resource IDs

2023-04-24 Thread Ivan Malov

When the client driver (the DPDK one, for instance) parses user flow
actions, it ends up with an action set specification. Next, in case
there are any FW resource-backed actions, like COUNT or SET_DST_MAC,
the driver allocates these resources and indicates their IDs in the
action set specification. The API used to set these IDs checks that
the current value of the target ID is INVALID, prior to the call.

The latter check, however, prevents the driver from updating the
IDs on port restart. When the port goes down, the driver frees
the resources. When the port goes up, the driver reallocates
them, tries to set the IDs in the specification and fails.

In order to address the problem, add an API to drop the
current resource IDs in the actions set specification.

Fixes: 3907defa5bf0 ("common/sfc_efx/base: support adding encap action to a 
set")
Cc: sta...@dpdk.org

Signed-off-by: Ivan Malov 
Reviewed-by: Andy Moreton 
Tested-by: Denis Pryazhennikov 
---
 drivers/common/sfc_efx/base/efx.h  | 14 ++
 drivers/common/sfc_efx/base/efx_impl.h |  4 
 drivers/common/sfc_efx/base/efx_mae.c  | 15 +++
 drivers/common/sfc_efx/version.map |  1 +
 4 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/drivers/common/sfc_efx/base/efx.h 
b/drivers/common/sfc_efx/base/efx.h
index 6028e08eb6..e4a5694ae2 100644
--- a/drivers/common/sfc_efx/base/efx.h
+++ b/drivers/common/sfc_efx/base/efx.h
@@ -4774,6 +4774,20 @@ efx_mae_action_set_fill_in_counter_id(
__inefx_mae_actions_t *spec,
__inconst efx_counter_t *counter_idp);
 
+/*
+ * Clears dangling FW object IDs (counter ID, for instance) in
+ * the action set specification. Useful for adapter restarts,
+ * when all MAE objects need to be reallocated by the driver.
+ *
+ * This method only clears the IDs in the specification.
+ * The driver is still responsible for keeping the IDs
+ * separately and freeing them when stopping the port.
+ */
+LIBEFX_API
+extern void
+efx_mae_action_set_clear_fw_rsrc_ids(
+   __inefx_mae_actions_t *spec);
+
 /* Action set ID */
 typedef struct efx_mae_aset_id_s {
uint32_t id;
diff --git a/drivers/common/sfc_efx/base/efx_impl.h 
b/drivers/common/sfc_efx/base/efx_impl.h
index a48d4f6e04..bed4601715 100644
--- a/drivers/common/sfc_efx/base/efx_impl.h
+++ b/drivers/common/sfc_efx/base/efx_impl.h
@@ -1802,6 +1802,10 @@ typedef struct efx_mae_action_vlan_push_s {
uint16_temavp_tci_be;
 } efx_mae_action_vlan_push_t;
 
+/*
+ * Helper efx_mae_action_set_clear_fw_rsrc_ids() is responsible
+ * to initialise every field in this structure to INVALID value.
+ */
 typedef struct efx_mae_actions_rsrc_s {
efx_mae_mac_id_temar_dst_mac_id;
efx_mae_mac_id_temar_src_mac_id;
diff --git a/drivers/common/sfc_efx/base/efx_mae.c 
b/drivers/common/sfc_efx/base/efx_mae.c
index 7732d2..4c33471f28 100644
--- a/drivers/common/sfc_efx/base/efx_mae.c
+++ b/drivers/common/sfc_efx/base/efx_mae.c
@@ -1394,10 +1394,7 @@ efx_mae_action_set_spec_init(
goto fail1;
}
 
-   spec->ema_rsrc.emar_dst_mac_id.id = EFX_MAE_RSRC_ID_INVALID;
-   spec->ema_rsrc.emar_src_mac_id.id = EFX_MAE_RSRC_ID_INVALID;
-   spec->ema_rsrc.emar_eh_id.id = EFX_MAE_RSRC_ID_INVALID;
-   spec->ema_rsrc.emar_counter_id.id = EFX_MAE_RSRC_ID_INVALID;
+   efx_mae_action_set_clear_fw_rsrc_ids(spec);
 
/*
 * Helpers which populate v2 actions must reject them when v2 is not
@@ -3027,6 +3024,16 @@ efx_mae_action_set_fill_in_counter_id(
return (rc);
 }
 
+   void
+efx_mae_action_set_clear_fw_rsrc_ids(
+   __inefx_mae_actions_t *spec)
+{
+   spec->ema_rsrc.emar_dst_mac_id.id = EFX_MAE_RSRC_ID_INVALID;
+   spec->ema_rsrc.emar_src_mac_id.id = EFX_MAE_RSRC_ID_INVALID;
+   spec->ema_rsrc.emar_eh_id.id = EFX_MAE_RSRC_ID_INVALID;
+   spec->ema_rsrc.emar_counter_id.id = EFX_MAE_RSRC_ID_INVALID;
+}
+
__checkReturn   efx_rc_t
 efx_mae_counters_alloc(
__inefx_nic_t *enp,
diff --git a/drivers/common/sfc_efx/version.map 
b/drivers/common/sfc_efx/version.map
index d36c3786fc..070de3ba54 100644
--- a/drivers/common/sfc_efx/version.map
+++ b/drivers/common/sfc_efx/version.map
@@ -90,6 +90,7 @@ INTERNAL {
efx_mae_action_rule_insert;
efx_mae_action_rule_remove;
efx_mae_action_set_alloc;
+   efx_mae_action_set_clear_fw_rsrc_ids;
efx_mae_action_set_fill_in_counter_id;
efx_mae_action_set_fill_in_dst_mac_id;
efx_mae_action_set_fill_in_eh_id;
-- 
2.17.1

[PATCH 2/2] net/sfc: invalidate dangling MAE flow action FW resource IDs

2023-04-24 Thread Ivan Malov

When reinserting a flow (on port restart, for instance)
FW resource IDs found in the action set specification
need to be invalidated so that the new (reallocated)
FW resource IDs can be accepted by libefx again.

Fixes: 1bbd1ec2348a ("net/sfc: support action VXLAN encap in MAE backend")
Cc: sta...@dpdk.org

Signed-off-by: Ivan Malov 
Reviewed-by: Andy Moreton 
Tested-by: Denis Pryazhennikov 
---
 drivers/net/sfc/sfc_mae.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/sfc/sfc_mae.c b/drivers/net/sfc/sfc_mae.c
index e5e9257998..60b9fdc290 100644
--- a/drivers/net/sfc/sfc_mae.c
+++ b/drivers/net/sfc/sfc_mae.c
@@ -1180,6 +1180,8 @@ sfc_mae_action_set_disable(struct sfc_adapter *sa,
}
 
if (fw_rsrc->refcnt == 1) {
+   efx_mae_action_set_clear_fw_rsrc_ids(action_set->spec);
+
rc = efx_mae_action_set_free(sa->nic, &fw_rsrc->aset_id);
if (rc == 0) {
sfc_dbg(sa, "disabled action_set=%p with AS_ID=0x%08x",
-- 
2.17.1

[PATCH v3 10/11] net/octeon_ep: set watermark for output queues

2023-04-24 Thread Sathesh Edara

Sets the watermark level for SDP output queues
to send backpressure to NIX, when available Rx
buffers fall below watermark.

Signed-off-by: Sathesh Edara 
---
 drivers/net/octeon_ep/cnxk_ep_vf.c| 7 ++-
 drivers/net/octeon_ep/otx_ep_common.h | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c 
b/drivers/net/octeon_ep/cnxk_ep_vf.c
index cadb4ecbf9..92c2d2ca5c 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -245,7 +245,12 @@ cnxk_ep_vf_setup_oq_regs(struct otx_ep_device *otx_ep, 
uint32_t oq_no)
}
 
otx_ep_dbg("SDP_R[%d]_sent: %x", oq_no, 
rte_read32(droq->pkts_sent_reg));
-   return 0;
+
+   /* Set Watermark for backpressure */
+   oct_ep_write64(OTX_EP_OQ_WMARK_MIN,
+  otx_ep->hw_addr + CNXK_EP_R_OUT_WMARK(oq_no));
+
+   return 0;
 }
 
 static int
diff --git a/drivers/net/octeon_ep/otx_ep_common.h 
b/drivers/net/octeon_ep/otx_ep_common.h
index dadc8d1579..0305079af9 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -23,6 +23,7 @@
 #define OTX_EP_MAX_OQ_DESCRIPTORS   (8192)
 #define OTX_EP_OQ_BUF_SIZE  (2048)
 #define OTX_EP_MIN_RX_BUF_SIZE  (64)
+#define OTX_EP_OQ_WMARK_MIN (256)
 
 #define OTX_EP_OQ_INFOPTR_MODE  (0)
 #define OTX_EP_OQ_REFIL_THRESHOLD   (16)
-- 
2.31.1

[PATCH v3 04/11] net/octeon_ep: support IQ/OQ reset

2023-04-24 Thread Sathesh Edara

Adds input and output queue reset functionality,
also receive queue interrupt enable and disable
functionality.

Signed-off-by: Sathesh Edara 
---
 drivers/net/octeon_ep/otx2_ep_vf.c| 193 +-
 drivers/net/octeon_ep/otx2_ep_vf.h|  61 ++--
 drivers/net/octeon_ep/otx_ep_common.h |   5 +-
 3 files changed, 244 insertions(+), 15 deletions(-)

diff --git a/drivers/net/octeon_ep/otx2_ep_vf.c 
b/drivers/net/octeon_ep/otx2_ep_vf.c
index 3ffc7275c7..3e4895862b 100644
--- a/drivers/net/octeon_ep/otx2_ep_vf.c
+++ b/drivers/net/octeon_ep/otx2_ep_vf.c
@@ -9,6 +9,117 @@
 #include "otx_ep_common.h"
 #include "otx2_ep_vf.h"
 
+static int otx2_vf_enable_rxq_intr(struct otx_ep_device *otx_epvf,
+  uint16_t q_no);
+
+static int
+otx2_vf_reset_iq(struct otx_ep_device *otx_ep, int q_no)
+{
+   int loop = SDP_VF_BUSY_LOOP_COUNT;
+   volatile uint64_t d64 = 0ull;
+
+   /* There is no RST for a ring.
+* Clear all registers one by one after disabling the ring
+*/
+
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_IN_ENABLE(q_no));
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_BADDR(q_no));
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_RSIZE(q_no));
+
+   d64 = 0x; /* ~0ull */
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_IN_INSTR_DBELL(q_no));
+   d64 = otx2_read64(otx_ep->hw_addr + SDP_VF_R_IN_INSTR_DBELL(q_no));
+
+   while ((d64 != 0) && loop--) {
+   rte_delay_ms(1);
+   d64 = otx2_read64(otx_ep->hw_addr +
+ SDP_VF_R_IN_INSTR_DBELL(q_no));
+   }
+   if (loop < 0) {
+   otx_ep_err("%s: doorbell init retry limit exceeded.\n", 
__func__);
+   return -EIO;
+   }
+
+   loop = SDP_VF_BUSY_LOOP_COUNT;
+   do {
+   d64 = otx2_read64(otx_ep->hw_addr + SDP_VF_R_IN_CNTS(q_no));
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_IN_CNTS(q_no));
+   rte_delay_ms(1);
+   } while ((d64 & ~SDP_VF_R_IN_CNTS_OUT_INT) != 0 && loop--);
+   if (loop < 0) {
+   otx_ep_err("%s: in_cnts init retry limit exceeded.\n", 
__func__);
+   return -EIO;
+   }
+
+   d64 = 0ull;
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_IN_INT_LEVELS(q_no));
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_IN_PKT_CNT(q_no));
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_IN_BYTE_CNT(q_no));
+
+   return 0;
+}
+
+static int
+otx2_vf_reset_oq(struct otx_ep_device *otx_ep, int q_no)
+{
+   int loop = SDP_VF_BUSY_LOOP_COUNT;
+   volatile uint64_t d64 = 0ull;
+
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_OUT_ENABLE(q_no));
+
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_OUT_SLIST_BADDR(q_no));
+
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_OUT_SLIST_RSIZE(q_no));
+
+   d64 = 0x;
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_OUT_SLIST_DBELL(q_no));
+   d64 = otx2_read64(otx_ep->hw_addr + SDP_VF_R_OUT_SLIST_DBELL(q_no));
+   while ((d64 != 0) && loop--) {
+   rte_delay_ms(1);
+   d64 = otx2_read64(otx_ep->hw_addr +
+ SDP_VF_R_OUT_SLIST_DBELL(q_no));
+   }
+   if (loop < 0) {
+   otx_ep_err("%s: doorbell init retry limit exceeded.\n", 
__func__);
+   return -EIO;
+   }
+
+   if (otx2_read64(otx_ep->hw_addr + SDP_VF_R_OUT_CNTS(q_no))
+   & SDP_VF_R_OUT_CNTS_OUT_INT) {
+   /*
+* The OUT_INT bit is set.  This interrupt must be enabled in
+* order to clear the interrupt.  Interrupts are disabled
+* at the end of this function.
+*/
+   union out_int_lvl_t out_int_lvl;
+
+   out_int_lvl.d64 = otx2_read64(otx_ep->hw_addr +
+   SDP_VF_R_OUT_INT_LEVELS(q_no));
+   out_int_lvl.s.time_cnt_en = 1;
+   out_int_lvl.s.cnt = 0;
+   otx2_write64(out_int_lvl.d64, otx_ep->hw_addr +
+   SDP_VF_R_OUT_INT_LEVELS(q_no));
+   }
+
+   loop = SDP_VF_BUSY_LOOP_COUNT;
+   do {
+   d64 = otx2_read64(otx_ep->hw_addr + SDP_VF_R_OUT_CNTS(q_no));
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_OUT_CNTS(q_no));
+   rte_delay_ms(1);
+   } while ((d64 & ~SDP_VF_R_OUT_CNTS_IN_INT) != 0 && loop--);
+   if (loop < 0) {
+   otx_ep_err("%s: out_cnts init retry limit exceeded.\n", 
__func__);
+   return -EIO;
+   }
+
+   d64 = 0ull;
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_OUT_INT_LEVELS(q_no));
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_OUT_PKT_CNT(q_no));
+   otx2_write64(d64, otx_ep->hw_addr + SDP_VF_R_OUT_BYTE_CNT(q_no));
+
+   return 0;
+}
+
 static void
 otx2_vf_setup_global_iq_reg(struct otx_ep_de

[PATCH v3 07/11] net/octeon_ep: flush pending DMA operations

2023-04-24 Thread Sathesh Edara

Flushes the pending DMA operations while reading
the packets by reading control and status register.

Signed-off-by: Sathesh Edara 
---
 drivers/net/octeon_ep/otx_ep_common.h | 8 
 drivers/net/octeon_ep/otx_ep_rxtx.c   | 4 
 2 files changed, 12 insertions(+)

diff --git a/drivers/net/octeon_ep/otx_ep_common.h 
b/drivers/net/octeon_ep/otx_ep_common.h
index 76528ed49d..444136923f 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -345,6 +345,14 @@ struct otx_ep_droq {
 */
void *pkts_sent_reg;
 
+   /** Handle DMA incompletion during pkt reads.
+* This variable is used to initiate a sent_reg_read
+* that completes pending dma
+* this variable is used as lvalue so compiler cannot optimize
+* the reads.
+*/
+   uint32_t sent_reg_val;
+
/* Statistics for this DROQ. */
struct otx_ep_droq_stats stats;
 
diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c 
b/drivers/net/octeon_ep/otx_ep_rxtx.c
index c4153bd583..ca968f6fe7 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -917,6 +917,10 @@ otx_ep_droq_read_packet(struct otx_ep_device *otx_ep,
struct rte_mbuf *first_buf = NULL;
struct rte_mbuf *last_buf = NULL;
 
+   /* csr read helps to flush pending dma */
+   droq->sent_reg_val = rte_read32(droq->pkts_sent_reg);
+   rte_rmb();
+
while (pkt_len < total_pkt_len) {
int cpy_len = 0;
 
-- 
2.31.1

[PATCH v3 06/11] net/octeon_ep: support ISM

2023-04-24 Thread Sathesh Edara

Adds the ISM specific functionality.

Signed-off-by: Sathesh Edara 
---
 drivers/net/octeon_ep/cnxk_ep_vf.c| 35 +++--
 drivers/net/octeon_ep/cnxk_ep_vf.h| 12 ++
 drivers/net/octeon_ep/otx2_ep_vf.c| 45 ++---
 drivers/net/octeon_ep/otx2_ep_vf.h| 14 +++
 drivers/net/octeon_ep/otx_ep_common.h | 16 
 drivers/net/octeon_ep/otx_ep_ethdev.c | 36 +
 drivers/net/octeon_ep/otx_ep_rxtx.c   | 56 +--
 7 files changed, 194 insertions(+), 20 deletions(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c 
b/drivers/net/octeon_ep/cnxk_ep_vf.c
index 1a92887109..a437ae68cb 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -2,11 +2,12 @@
  * Copyright(C) 2022 Marvell.
  */
 
+#include 
 #include 
 
 #include 
 #include 
-
+#include 
 #include "cnxk_ep_vf.h"
 
 static void
@@ -85,6 +86,7 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
struct otx_ep_instr_queue *iq = otx_ep->instr_queue[iq_no];
int loop = OTX_EP_BUSY_LOOP_COUNT;
volatile uint64_t reg_val = 0ull;
+   uint64_t ism_addr;
 
reg_val = oct_ep_read64(otx_ep->hw_addr + CNXK_EP_R_IN_CONTROL(iq_no));
 
@@ -132,6 +134,19 @@ cnxk_ep_vf_setup_iq_regs(struct otx_ep_device *otx_ep, 
uint32_t iq_no)
 */
oct_ep_write64(OTX_EP_CLEAR_SDP_IN_INT_LVLS,
   otx_ep->hw_addr + CNXK_EP_R_IN_INT_LEVELS(iq_no));
+   /* Set up IQ ISM registers and structures */
+   ism_addr = (otx_ep->ism_buffer_mz->iova | CNXK_EP_ISM_EN
+   | CNXK_EP_ISM_MSIX_DIS)
+   + CNXK_EP_IQ_ISM_OFFSET(iq_no);
+   rte_write64(ism_addr, (uint8_t *)otx_ep->hw_addr +
+   CNXK_EP_R_IN_CNTS_ISM(iq_no));
+   iq->inst_cnt_ism =
+   (uint32_t *)((uint8_t *)otx_ep->ism_buffer_mz->addr
++ CNXK_EP_IQ_ISM_OFFSET(iq_no));
+   otx_ep_err("SDP_R[%d] INST Q ISM virt: %p, dma: 0x%" PRIX64, iq_no,
+  (void *)iq->inst_cnt_ism, ism_addr);
+   *iq->inst_cnt_ism = 0;
+   iq->inst_cnt_ism_prev = 0;
return 0;
 }
 
@@ -142,6 +157,7 @@ cnxk_ep_vf_setup_oq_regs(struct otx_ep_device *otx_ep, 
uint32_t oq_no)
uint64_t oq_ctl = 0ull;
int loop = OTX_EP_BUSY_LOOP_COUNT;
struct otx_ep_droq *droq = otx_ep->droq[oq_no];
+   uint64_t ism_addr;
 
/* Wait on IDLE to set to 1, supposed to configure BADDR
 * as long as IDLE is 0
@@ -201,9 +217,22 @@ cnxk_ep_vf_setup_oq_regs(struct otx_ep_device *otx_ep, 
uint32_t oq_no)
rte_write32((uint32_t)reg_val, droq->pkts_sent_reg);
 
otx_ep_dbg("SDP_R[%d]_sent: %x", oq_no, 
rte_read32(droq->pkts_sent_reg));
-   loop = OTX_EP_BUSY_LOOP_COUNT;
+   /* Set up ISM registers and structures */
+   ism_addr = (otx_ep->ism_buffer_mz->iova | CNXK_EP_ISM_EN
+   | CNXK_EP_ISM_MSIX_DIS)
+   + CNXK_EP_OQ_ISM_OFFSET(oq_no);
+   rte_write64(ism_addr, (uint8_t *)otx_ep->hw_addr +
+   CNXK_EP_R_OUT_CNTS_ISM(oq_no));
+   droq->pkts_sent_ism =
+   (uint32_t *)((uint8_t *)otx_ep->ism_buffer_mz->addr
++ CNXK_EP_OQ_ISM_OFFSET(oq_no));
+   otx_ep_err("SDP_R[%d] OQ ISM virt: %p dma: 0x%" PRIX64,
+   oq_no, (void *)droq->pkts_sent_ism, ism_addr);
+   *droq->pkts_sent_ism = 0;
+   droq->pkts_sent_ism_prev = 0;
 
-   while (((rte_read32(droq->pkts_sent_reg)) != 0ull)) {
+   loop = OTX_EP_BUSY_LOOP_COUNT;
+   while (((rte_read32(droq->pkts_sent_reg)) != 0ull) && loop--) {
reg_val = rte_read32(droq->pkts_sent_reg);
rte_write32((uint32_t)reg_val, droq->pkts_sent_reg);
rte_delay_ms(1);
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.h 
b/drivers/net/octeon_ep/cnxk_ep_vf.h
index aaa5774552..072b38ea15 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.h
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.h
@@ -27,6 +27,7 @@
 #define CNXK_EP_R_IN_INT_LEVELS_START   0x10060
 #define CNXK_EP_R_IN_PKT_CNT_START  0x10080
 #define CNXK_EP_R_IN_BYTE_CNT_START 0x10090
+#define CNXK_EP_R_IN_CNTS_ISM_START 0x10520
 
 #define CNXK_EP_R_IN_CONTROL(ring) \
(CNXK_EP_R_IN_CONTROL_START + ((ring) * CNXK_EP_RING_OFFSET))
@@ -55,6 +56,8 @@
 #define CNXK_EP_R_IN_BYTE_CNT(ring)\
(CNXK_EP_R_IN_BYTE_CNT_START +  ((ring) * CNXK_EP_RING_OFFSET))
 
+#define CNXK_EP_R_IN_CNTS_ISM(ring)\
+   (CNXK_EP_R_IN_CNTS_ISM_START + ((ring) * CNXK_EP_RING_OFFSET))
 
 /** Rings per Virtual Function **/
 #define CNXK_EP_R_IN_CTL_RPVF_MASK (0xF)
@@ -87,6 +90,7 @@
 #define CNXK_EP_R_OUT_ENABLE_START 0x10170
 #define CNXK_EP_R_OUT_PKT_CNT_START0x10180
 #define CNXK_EP_R_OUT_BYTE_CNT_START   0x10190
+#define CNXK_EP_R_OUT_CNTS_ISM_START   0x10510
 
 #define CNXK_EP_R_OUT_CNTS(r

[PATCH v3 09/11] net/octeon_ep: support mailbox between VF and PF

2023-04-24 Thread Sathesh Edara

Adds the mailbox communication between VF and
PF and supports the following mailbox messages.
- Get and set  MAC address
- Get link information
- Get stats
- Set and get MTU
- Send notification to PF

Signed-off-by: Sathesh Edara 
---
 doc/guides/nics/features/octeon_ep.ini |   1 +
 drivers/net/octeon_ep/cnxk_ep_vf.c |   1 +
 drivers/net/octeon_ep/cnxk_ep_vf.h |  12 +-
 drivers/net/octeon_ep/meson.build  |   1 +
 drivers/net/octeon_ep/otx_ep_common.h  |  26 +++
 drivers/net/octeon_ep/otx_ep_ethdev.c  | 143 +++-
 drivers/net/octeon_ep/otx_ep_mbox.c| 309 +
 drivers/net/octeon_ep/otx_ep_mbox.h| 163 +
 8 files changed, 643 insertions(+), 13 deletions(-)
 create mode 100644 drivers/net/octeon_ep/otx_ep_mbox.c
 create mode 100644 drivers/net/octeon_ep/otx_ep_mbox.h

diff --git a/doc/guides/nics/features/octeon_ep.ini 
b/doc/guides/nics/features/octeon_ep.ini
index 305e219262..f3b821c89e 100644
--- a/doc/guides/nics/features/octeon_ep.ini
+++ b/doc/guides/nics/features/octeon_ep.ini
@@ -10,4 +10,5 @@ Linux= Y
 x86-64   = Y
 Basic stats  = Y
 Link status  = Y
+MTU update   = Y
 Usage doc= Y
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c 
b/drivers/net/octeon_ep/cnxk_ep_vf.c
index a437ae68cb..cadb4ecbf9 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include "otx_ep_common.h"
 #include "cnxk_ep_vf.h"
 
 static void
diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.h 
b/drivers/net/octeon_ep/cnxk_ep_vf.h
index 072b38ea15..86277449ea 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.h
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.h
@@ -5,7 +5,7 @@
 #define _CNXK_EP_VF_H_
 
 #include 
-#include "otx_ep_common.h"
+
 #define CNXK_CONFIG_XPANSION_BAR 0x38
 #define CNXK_CONFIG_PCIE_CAP 0x70
 #define CNXK_CONFIG_PCIE_DEVCAP  0x74
@@ -92,6 +92,10 @@
 #define CNXK_EP_R_OUT_BYTE_CNT_START   0x10190
 #define CNXK_EP_R_OUT_CNTS_ISM_START   0x10510
 
+#define CNXK_EP_R_MBOX_PF_VF_DATA_START0x10210
+#define CNXK_EP_R_MBOX_VF_PF_DATA_START0x10230
+#define CNXK_EP_R_MBOX_PF_VF_INT_START 0x10220
+
 #define CNXK_EP_R_OUT_CNTS(ring)\
(CNXK_EP_R_OUT_CNTS_START + ((ring) * CNXK_EP_RING_OFFSET))
 
@@ -125,6 +129,12 @@
 #define CNXK_EP_R_OUT_CNTS_ISM(ring) \
(CNXK_EP_R_OUT_CNTS_ISM_START + ((ring) * CNXK_EP_RING_OFFSET))
 
+#define CNXK_EP_R_MBOX_VF_PF_DATA(ring)  \
+   (CNXK_EP_R_MBOX_VF_PF_DATA_START + ((ring) * CNXK_EP_RING_OFFSET))
+
+#define CNXK_EP_R_MBOX_PF_VF_INT(ring)   \
+   (CNXK_EP_R_MBOX_PF_VF_INT_START + ((ring) * CNXK_EP_RING_OFFSET))
+
 /*-- R_OUT Masks */
 #define CNXK_EP_R_OUT_INT_LEVELS_BMODE   (1ULL << 63)
 #define CNXK_EP_R_OUT_INT_LEVELS_TIMET   (32)
diff --git a/drivers/net/octeon_ep/meson.build 
b/drivers/net/octeon_ep/meson.build
index a267b60290..e698bf9792 100644
--- a/drivers/net/octeon_ep/meson.build
+++ b/drivers/net/octeon_ep/meson.build
@@ -8,4 +8,5 @@ sources = files(
 'otx_ep_vf.c',
 'otx2_ep_vf.c',
 'cnxk_ep_vf.c',
+'otx_ep_mbox.c',
 )
diff --git a/drivers/net/octeon_ep/otx_ep_common.h 
b/drivers/net/octeon_ep/otx_ep_common.h
index 3582f3087b..dadc8d1579 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -4,6 +4,7 @@
 #ifndef _OTX_EP_COMMON_H_
 #define _OTX_EP_COMMON_H_
 
+#include 
 
 #define OTX_EP_NW_PKT_OP   0x1220
 #define OTX_EP_NW_CMD_OP   0x1221
@@ -67,6 +68,9 @@
 #define oct_ep_read64(addr) rte_read64_relaxed((void *)(addr))
 #define oct_ep_write64(val, addr) rte_write64_relaxed((val), (void *)(addr))
 
+/* Mailbox maximum data size */
+#define MBOX_MAX_DATA_BUF_SIZE 320
+
 /* Input Request Header format */
 union otx_ep_instr_irh {
uint64_t u64;
@@ -488,6 +492,18 @@ struct otx_ep_device {
 
/* DMA buffer for SDP ISM messages */
const struct rte_memzone *ism_buffer_mz;
+
+   /* Mailbox lock */
+   rte_spinlock_t mbox_lock;
+
+   /* Mailbox data */
+   uint8_t mbox_data_buf[MBOX_MAX_DATA_BUF_SIZE];
+
+   /* Mailbox data index */
+   int32_t mbox_data_index;
+
+   /* Mailbox receive message length */
+   int32_t mbox_rcv_message_len;
 };
 
 int otx_ep_setup_iqs(struct otx_ep_device *otx_ep, uint32_t iq_no,
@@ -541,6 +557,16 @@ struct otx_ep_buf_free_info {
 #define OTX_EP_CLEAR_SLIST_DBELL 0x
 #define OTX_EP_CLEAR_SDP_OUT_PKT_CNT 0xF
 
+/* Max overhead includes
+ * - Ethernet hdr
+ * - CRC
+ * - nested VLANs
+ * - octeon rx info
+ */
+#define OTX_EP_ETH_OVERHEAD \
+   (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + \
+(2 * RTE_VLAN_HLEN) + OTX_EP_DROQ_INFO_SIZE)
+
 /* PCI IDs */
 #define PCI_VENDOR_ID_CAVIUM   0x177D
 
diff --git a/driver

[PATCH v3 05/11] devtools: add acronym in dictionary for commit checks

2023-04-24 Thread Sathesh Edara

ISM -> Interrupt Status Messages

Signed-off-by: Sathesh Edara 
---
 devtools/words-case.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/devtools/words-case.txt b/devtools/words-case.txt
index 53e029a958..3a7af902bd 100644
--- a/devtools/words-case.txt
+++ b/devtools/words-case.txt
@@ -35,6 +35,7 @@ IP
 IPsec
 IPv4
 IPv6
+ISM
 L2
 L3
 L4
-- 
2.31.1

[PATCH v3 10/11] net/octeon_ep: set watermark for output queues

2023-04-24 Thread Sathesh Edara

Sets the watermark level for SDP output queues
to send backpressure to NIX, when available Rx
buffers fall below watermark.

Signed-off-by: Sathesh Edara 
---
 drivers/net/octeon_ep/cnxk_ep_vf.c| 7 ++-
 drivers/net/octeon_ep/otx_ep_common.h | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/octeon_ep/cnxk_ep_vf.c 
b/drivers/net/octeon_ep/cnxk_ep_vf.c
index cadb4ecbf9..92c2d2ca5c 100644
--- a/drivers/net/octeon_ep/cnxk_ep_vf.c
+++ b/drivers/net/octeon_ep/cnxk_ep_vf.c
@@ -245,7 +245,12 @@ cnxk_ep_vf_setup_oq_regs(struct otx_ep_device *otx_ep, 
uint32_t oq_no)
}
 
otx_ep_dbg("SDP_R[%d]_sent: %x", oq_no, 
rte_read32(droq->pkts_sent_reg));
-   return 0;
+
+   /* Set Watermark for backpressure */
+   oct_ep_write64(OTX_EP_OQ_WMARK_MIN,
+  otx_ep->hw_addr + CNXK_EP_R_OUT_WMARK(oq_no));
+
+   return 0;
 }
 
 static int
diff --git a/drivers/net/octeon_ep/otx_ep_common.h 
b/drivers/net/octeon_ep/otx_ep_common.h
index dadc8d1579..0305079af9 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -23,6 +23,7 @@
 #define OTX_EP_MAX_OQ_DESCRIPTORS   (8192)
 #define OTX_EP_OQ_BUF_SIZE  (2048)
 #define OTX_EP_MIN_RX_BUF_SIZE  (64)
+#define OTX_EP_OQ_WMARK_MIN (256)
 
 #define OTX_EP_OQ_INFOPTR_MODE  (0)
 #define OTX_EP_OQ_REFIL_THRESHOLD   (16)
-- 
2.31.1

[Bug 1221] PPC64le memory init use mmap return EBUSY

2023-04-24 Thread bugzilla

https://bugs.dpdk.org/show_bug.cgi?id=1221

Bug ID: 1221
   Summary: PPC64le memory init use  mmap return EBUSY
   Product: DPDK
   Version: 21.11
  Hardware: POWER
OS: Linux
Status: UNCONFIRMED
  Severity: normal
  Priority: Normal
 Component: core
  Assignee: dev@dpdk.org
  Reporter: haib...@hcdatainc.com
  Target Milestone: ---

When dpdk is initializing, it will first use the eal_get_virtual_area() to
obtain the virtual address(pagesize is 64K), and then call the mmap() to bind
the hugepage(pagesize is 16M) to the corresponding virtual address, but because
the pagesize is different, it will report EBUSY in PPC64. Maybe we need use
MAP_HUGETLB in the mmap parameters of eal_get_virtual_area().

-- 
You are receiving this mail because:
You are the assignee for the bug.

Re: [PATCH v2] vfio: do not coalesce DMA mappings

2023-04-24 Thread David Marchand

Hello Anatoly,

On Wed, Apr 5, 2023 at 4:17 PM Burakov, Anatoly
 wrote:
> >> Could you please provide some steps to reproduce the hotplug issue
> >> you're having? It would be great to have a test case for this patchset
> >> to put it in context.
> >
> > I am working on CDX bus
> > (http://patchwork.dpdk.org/project/dpdk/patch/20230124140746.594066-2-nipun.gu...@amd.com/)
> >  and trying out some cases for plug/unplug.
> >
> > The test is as follows:
> ># Run testpmd application
> >./dpdk-testpmd -c 0x3 -- -i --nb-cores=1
> >
> ># Bind to VFIO
> >echo "vfio-cdx" >  /sys/bus/cdx/devices/cdx-00\:00/driver_override
> >echo "cdx-00:00" > /sys/bus/cdx/drivers_probe
> >
> ># Plug a device
> >testpmd> port attach cdx:cdx-00:00
> >
> >#quit testpmd
> >testpmd> quit
> >
> > This gave error at testpmd exit that memory cannot be freed. On
> > debugging I updated this code and seems it should be seen with any of
> > the device.
> >
> > I see similar test case (without quit) mentioned
> > https://doc.dpdk.org/dts/test_plans/hotplug_test_plan.html, but the
> > difference is that it is with igb_uio and issue is being observed with
> > VFIO.
> >
> > Please note the device/bus mentioned in the commands is not yet
> > upstreamed in DPDK, but patches would be sent out soon.
> >
> > Thanks,
> > Nipun
> >
>
> Thanks, I can reproduce this issue with regular devices too (run testpmd
> with no devices, bind a NIC to VFIO, attach it, then quit). You're
> correct in that since the initial mapping was done with mapping large
> contiguous zones (such as when mempools are created before attach), any
> subsequent freeing of memory will cause these errors to happen.
>
> I don't think this can be fixed by anything other than not doing the
> contiguous mapping thing, so provisionally, I think this patch should be
> accepted. I'll play around with it some more and get back to you :)

Can we conclude on this topic?
It is best we merge this kind of change the sooner possible for a release.

Thanks.


-- 
David Marchand

[PATCH 0/1] eal: add tracepoints to track lcores and services

2023-04-24 Thread Arnaud Fiorini

The events generated by these tracepoints are then used in Trace
Compass[1] to show the lcore state and service state throughout the
execution of a program. A trace has been generated using the service
cores application and can be used to test the analysis.

[1] 
https://git.eclipse.org/r/c/tracecompass.incubator/org.eclipse.tracecompass.incubator/+/200457

Arnaud Fiorini (1):
  eal: add tracepoints to track lcores and services

 .mailmap |  1 +
 lib/eal/common/eal_common_thread.c   |  4 ++
 lib/eal/common/eal_common_trace_points.c | 21 +
 lib/eal/common/rte_service.c | 18 ++-
 lib/eal/include/eal_trace_internal.h | 60 
 5 files changed, 103 insertions(+), 1 deletion(-)

-- 
2.25.1

[PATCH 1/1] eal: add tracepoints to track lcores and services

2023-04-24 Thread Arnaud Fiorini

The tracepoints added are used to track lcore role and status,
as well as service mapping and service runstates. These
tracepoints are then used in analyses in Trace Compass.

Signed-off-by: Arnaud Fiorini 
---
 .mailmap |  1 +
 lib/eal/common/eal_common_thread.c   |  4 ++
 lib/eal/common/eal_common_trace_points.c | 21 +
 lib/eal/common/rte_service.c | 18 ++-
 lib/eal/include/eal_trace_internal.h | 60 
 5 files changed, 103 insertions(+), 1 deletion(-)

diff --git a/.mailmap b/.mailmap
index dc30369117..2a0b132572 100644
--- a/.mailmap
+++ b/.mailmap
@@ -120,6 +120,7 @@ Archana Muniganti  

 Archit Pandey 
 Arkadiusz Kubalewski 
 Arkadiusz Kusztal 
+Arnaud Fiorini 
 Arnon Warshavsky 
 Arshdeep Kaur 
 Artem V. Andreev 
diff --git a/lib/eal/common/eal_common_thread.c 
b/lib/eal/common/eal_common_thread.c
index 079a385630..25dbdd68e3 100644
--- a/lib/eal/common/eal_common_thread.c
+++ b/lib/eal/common/eal_common_thread.c
@@ -205,6 +205,8 @@ eal_thread_loop(void *arg)
__ATOMIC_ACQUIRE)) == NULL)
rte_pause();
 
+   rte_eal_trace_thread_lcore_running(lcore_id, f);
+
/* call the function and store the return value */
fct_arg = lcore_config[lcore_id].arg;
ret = f(fct_arg);
@@ -219,6 +221,8 @@ eal_thread_loop(void *arg)
 */
__atomic_store_n(&lcore_config[lcore_id].state, WAIT,
__ATOMIC_RELEASE);
+
+   rte_eal_trace_thread_lcore_stopped(lcore_id);
}
 
/* never reached */
diff --git a/lib/eal/common/eal_common_trace_points.c 
b/lib/eal/common/eal_common_trace_points.c
index 3f5bf5c55c..0f1240ea3a 100644
--- a/lib/eal/common/eal_common_trace_points.c
+++ b/lib/eal/common/eal_common_trace_points.c
@@ -70,6 +70,27 @@ RTE_TRACE_POINT_REGISTER(rte_eal_trace_thread_remote_launch,
lib.eal.thread.remote.launch)
 RTE_TRACE_POINT_REGISTER(rte_eal_trace_thread_lcore_ready,
lib.eal.thread.lcore.ready)
+RTE_TRACE_POINT_REGISTER(rte_eal_trace_thread_lcore_running,
+   lib.eal.thread.lcore.running)
+RTE_TRACE_POINT_REGISTER(rte_eal_trace_thread_lcore_stopped,
+   lib.eal.thread.lcore.stopped)
+
+RTE_TRACE_POINT_REGISTER(rte_eal_trace_service_map_lcore,
+   lib.eal.service.map.lcore)
+RTE_TRACE_POINT_REGISTER(rte_eal_trace_service_lcore_state_change,
+   lib.eal.service.lcore.state.change)
+RTE_TRACE_POINT_REGISTER(rte_eal_trace_service_lcore_start,
+   lib.eal.service.lcore.start)
+RTE_TRACE_POINT_REGISTER(rte_eal_trace_service_lcore_stop,
+   lib.eal.service.lcore.stop)
+RTE_TRACE_POINT_REGISTER(rte_eal_trace_service_run_begin,
+   lib.eal.service.run.begin)
+RTE_TRACE_POINT_REGISTER(rte_eal_trace_service_runstate_set,
+   lib.eal.service.run.state.set)
+RTE_TRACE_POINT_REGISTER(rte_eal_trace_service_run_end,
+   lib.eal.service.run.end)
+RTE_TRACE_POINT_REGISTER(rte_eal_trace_service_component_register,
+   lib.eal.service.component.register)
 
 RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_callback_register,
lib.eal.intr.register)
diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c
index 42ca1d001d..5daec007aa 100644
--- a/lib/eal/common/rte_service.c
+++ b/lib/eal/common/rte_service.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -16,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "eal_private.h"
 
@@ -276,6 +278,8 @@ rte_service_component_register(const struct 
rte_service_spec *spec,
if (id_ptr)
*id_ptr = free_slot;
 
+   rte_eal_trace_service_component_register(free_slot, spec->name);
+
return 0;
 }
 
@@ -336,6 +340,7 @@ rte_service_runstate_set(uint32_t id, uint32_t runstate)
__atomic_store_n(&s->app_runstate, RUNSTATE_STOPPED,
__ATOMIC_RELEASE);
 
+   rte_eal_trace_service_runstate_set(id, runstate);
return 0;
 }
 
@@ -427,11 +432,15 @@ service_run(uint32_t i, struct core_state *cs, uint64_t 
service_mask,
if (!rte_spinlock_trylock(&s->execute_lock))
return -EBUSY;
 
+   rte_eal_trace_service_run_begin(i, rte_lcore_id());
service_runner_do_callback(s, cs, i);
rte_spinlock_unlock(&s->execute_lock);
-   } else
+   } else {
+   rte_eal_trace_service_run_begin(i, rte_lcore_id());
service_runner_do_callback(s, cs, i);
+   }
 
+   rte_eal_trace_service_run_end(i, rte_lcore_id());
return 0;
 }
 
@@ -658,6 +667,7 @@ int32_t
 rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled)
 {
uint32_t on = enabled > 0;
+   rte_eal_trace_service_map_lcore(id, lcore, enabled);
return service_update(id, lcore, &on, 0);
 }
 
@@ -683,6 +693,8 @@ set_lcore_state(uint3

Re: [RFC PATCH 1/5] eventdev: add power monitoring API on event port

2023-04-24 Thread Ferruh Yigit

On 4/19/2023 11:15 AM, Jerin Jacob wrote:
> On Wed, Apr 19, 2023 at 3:24 PM Sivaprasad Tummala
>  wrote:
>>
>> A new API to allow power monitoring condition on event port to
>> optimize power when no events are arriving on an event port for
>> the worker core to process in an eventdev based pipelined application.
>>
>> Signed-off-by: Sivaprasad Tummala 
>> + *
>> + * @param dev_id
>> + *   Eventdev id
>> + * @param port_id
>> + *   Eventdev port id
>> + * @param pmc
>> + *   The pointer to power-optimized monitoring condition structure.
>> + *
>> + * @return
>> + *   - 0: Success.
>> + *   -ENOTSUP: Operation not supported.
>> + *   -EINVAL: Invalid parameters.
>> + *   -ENODEV: Invalid device ID.
>> + */
>> +__rte_experimental
>> +int
>> +rte_event_port_get_monitor_addr(uint8_t dev_id, uint8_t port_id,
>> +   struct rte_power_monitor_cond *pmc);
> 
> + eventdev driver maintainers
> 
> I think, we don't need to expose this application due to applications
> 1)To make applications to be transparent whether power saving is enabled or 
> not?
> 2)Some HW and Arch already supports power managent in driver and in HW
> (Not using  CPU architecture directly)
> 
> If so, that will be translated to following,
> a) Add rte_event_port_power_saving_ena_dis(uint8_t dev_id, uint8_t
> port_id, bool ena) for controlling power saving in slowpath.
> b) Create reusable PMD private function based on the CPU architecture
> power saving primitive to cover the PMD don't have native power saving
> support.
> c)Update rte_event_dequeue_burst() burst of PMD callback to use (b).
> 
> 

Hi Jerin,

ethdev approach seems applied here.

In ethdev, 'rte_event_port_get_monitor_addr()' equivalent is
'rte_eth_get_monitor_addr()'.

Although 'rte_eth_get_monitor_addr()' is public API, it is currently
only called from Rx/Tx callback functions implemented in the power library.
But I assume intention to make it public is to enable users to implement
their own callback functions that has custom algorithm for the power
management.

And probably same is true for the 'rte_event_port_get_monitor_addr()'.

Also instead of implementing power features for withing PMDs, isn't it
better to have a common eventdev layer for it?

For the PMDs benefit from HW event manager, just not implementing
.get_monitor_addr() dev_ops will make them free from power related APIs.

Re: [RFC PATCH 3/5] eventdev: support optional dequeue callbacks

2023-04-24 Thread Ferruh Yigit

On 4/19/2023 10:54 AM, Sivaprasad Tummala wrote:
> --- a/lib/eventdev/version.map
> +++ b/lib/eventdev/version.map
> @@ -131,6 +131,12 @@ EXPERIMENTAL {
>   rte_event_eth_tx_adapter_runtime_params_init;
>   rte_event_eth_tx_adapter_runtime_params_set;
>   rte_event_timer_remaining_ticks_get;
> +
> + # added in 23.07
> + rte_event_dequeue_callbacks
> + rte_event_add_dequeue_callback
> + rte_event_remove_dequeue_callback
> + rte_event_port_get_monitor_addr

'rte_event_port_get_monitor_addr' belongs to previous patch 1/5, where
it is introduced.

Re: [RFC 0/3] introduce coroutine library

2023-04-24 Thread Stephen Hemminger

On Mon, 24 Apr 2023 13:02:05 +
Chengwen Feng  wrote:

> This patchset introduces the coroutine library which will help refactor
> the hns3 PMD's reset process.
> 
> The hns3 single function reset process consists of the following steps:
> 1.stop_service();
> 2.prepare_reset();
> 3.delay(100ms);
> 4.notify_hw();
> 5.wait_hw_reset_done(); // multiple sleep waits are involved.
> 6.reinit();
> 7.restore_conf();
> 
> If the DPDK process take over multiple hns3 functions (e.g. 100),
> it's impractical to reset and restore functions in sequence:
> 1.proc_func(001); // will completed in 100+ms range.
> 2.proc_func(002); // will completed in 100~200+ms range.
> ...
> x.proc_func(100); // will completed in 9900~1+ms range.
> The later functions will process fail because it's too late to deal with.
> 
> One solution is that create a reset thread for each function, and it
> will lead to large number of threads if the DPDK process take over
> multiple hns3 functions.
> 
> So the current hns3 driver uses asynchronous mechanism, for examples, it
> use rte_eal_alarm_set() when process delay(100ms), it splits a serial
> process into multiple asynchronous processes, and the code is complex
> and difficult to understand.
> 
> The coroutine is a good mechanism to provide programmers with the 
> simplicity of keeping serial processes within a limited number of
> threads.
> 
> This patchset use  to build the coroutine framework, and it
> just provides a demo. More APIs maybe added in the future.
> 
> In addition, we would like to ask the community whether it it possible
> to accept the library. If not, whether it is allowed to provide the
> library in hns3 PMD.
> 
> Chengwen Feng (3):
>   lib/coroutine: add coroutine library
>   examples/coroutine: support coroutine examples
>   net/hns3: refactor reset process with coroutine

Interesting, but the DPDK really is not the right place for this.
Also, why so much sleeping. Can't this device be handled with an event based
model. Plus any complexity like this introduces more bugs into already fragile
interaction of DPDK userspace applications and threads.

Not only that, coroutines add to the pre-existing problems with locking.
If coroutine 1 acquires a lock, the coroutine 2 will deadlock itself.
And someone will spend days figuring that out. And the existing analyzer
tools will not know about the magic coroutine library.

Bottom line: please no

Re: [PATCH v2] vfio: do not coalesce DMA mappings

2023-04-24 Thread Stephen Hemminger

On Mon, 24 Apr 2023 17:22:46 +0200
David Marchand  wrote:

> > >  
> >
> > Thanks, I can reproduce this issue with regular devices too (run testpmd
> > with no devices, bind a NIC to VFIO, attach it, then quit). You're
> > correct in that since the initial mapping was done with mapping large
> > contiguous zones (such as when mempools are created before attach), any
> > subsequent freeing of memory will cause these errors to happen.
> >
> > I don't think this can be fixed by anything other than not doing the
> > contiguous mapping thing, so provisionally, I think this patch should be
> > accepted. I'll play around with it some more and get back to you :)  
> 
> Can we conclude on this topic?
> It is best we merge this kind of change the sooner possible for a release.
> 
> Thanks.

Shouldn't the coalesced mappings be able to have correct datastructure
(accounting) so that on shutdown the unmap's are done for the right size?

RE: [PATCH v2] vfio: do not coalesce DMA mappings

2023-04-24 Thread Gupta, Nipun




> -Original Message-
> From: Stephen Hemminger 
> Sent: Monday, April 24, 2023 9:41 PM
> To: David Marchand 
> Cc: Burakov, Anatoly ; Gupta, Nipun
> ; dev@dpdk.org; tho...@monjalon.net; Yigit, Ferruh
> ; Agarwal, Nikhil 
> Subject: Re: [PATCH v2] vfio: do not coalesce DMA mappings
> 
> 
> On Mon, 24 Apr 2023 17:22:46 +0200
> David Marchand  wrote:
> 
> > > >
> > >
> > > Thanks, I can reproduce this issue with regular devices too (run testpmd
> > > with no devices, bind a NIC to VFIO, attach it, then quit). You're
> > > correct in that since the initial mapping was done with mapping large
> > > contiguous zones (such as when mempools are created before attach), any
> > > subsequent freeing of memory will cause these errors to happen.
> > >
> > > I don't think this can be fixed by anything other than not doing the
> > > contiguous mapping thing, so provisionally, I think this patch should be
> > > accepted. I'll play around with it some more and get back to you :)
> >
> > Can we conclude on this topic?
> > It is best we merge this kind of change the sooner possible for a release.
> >
> > Thanks.
> 
> Shouldn't the coalesced mappings be able to have correct datastructure
> (accounting) so that on shutdown the unmap's are done for the right size?

This issue occurs only on the hotplug case. Other devices which are not hot 
plugged
and are existing from the start of the application need to have individual (non-
coalesced) mappings. So individual (non-coalesced) mappings are definitely
required. IMO we should not maintain separate mapping for each hot-plugged
device as it would be unrequired overhead.

Regards,
Nipun

Re: [PATCH] common/idpf: remove unnecessary compile option

2023-04-24 Thread Stephen Hemminger

On Mon, 24 Apr 2023 18:47:00 -0400
Qi Zhang  wrote:

> Remove compile option "__KERNEL" which should not be considered in
> DPDK. Also only #include  in idpf_osdep.h.
> 
> Signed-off-by: Qi Zhang 

This will cause some warnings in DPDK build depending on compiler
version and flags.  You need to leave the fallthrough comment or
use one of the other fallthrough annotations.

Re: [PATCH] common/idpf: remove unnecessary compile option

2023-04-24 Thread Tyler Retzlaff

On Mon, Apr 24, 2023 at 09:23:48AM -0700, Stephen Hemminger wrote:
> On Mon, 24 Apr 2023 18:47:00 -0400
> Qi Zhang  wrote:
> 
> > Remove compile option "__KERNEL" which should not be considered in
> > DPDK. Also only #include  in idpf_osdep.h.
> > 
> > Signed-off-by: Qi Zhang 
> 
> This will cause some warnings in DPDK build depending on compiler
> version and flags.  You need to leave the fallthrough comment or
> use one of the other fallthrough annotations.

if there are variations of annotating fallthrough i would not object to
a macro for it being exposed from rte_common.h

full disclosure it would let me use the equivalent that are provided
with windows and windows toolchains.

Re: [PATCH] common/idpf: remove unnecessary compile option

2023-04-24 Thread Stephen Hemminger

On Mon, 24 Apr 2023 10:29:19 -0700
Tyler Retzlaff  wrote:

> On Mon, Apr 24, 2023 at 09:23:48AM -0700, Stephen Hemminger wrote:
> > On Mon, 24 Apr 2023 18:47:00 -0400
> > Qi Zhang  wrote:
> >   
> > > Remove compile option "__KERNEL" which should not be considered in
> > > DPDK. Also only #include  in idpf_osdep.h.
> > > 
> > > Signed-off-by: Qi Zhang   
> > 
> > This will cause some warnings in DPDK build depending on compiler
> > version and flags.  You need to leave the fallthrough comment or
> > use one of the other fallthrough annotations.  
> 
> if there are variations of annotating fallthrough i would not object to
> a macro for it being exposed from rte_common.h
> 
> full disclosure it would let me use the equivalent that are provided
> with windows and windows toolchains.

Yes having something like __rte_fallthrough would help.
Wouldn't help code that is trying to always work in kernel, DPDK or other 
places.

Both Gcc and clang use statement attributes and C++ has [[fallthrough]]

https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html
https://clang.llvm.org/docs/AttributeReference.html#fallthrough

Re: [PATCH v5] app/testpmd: txonly multiflow port change support

2023-04-24 Thread Joshua Washington

After updating the patch, it seems that the `lcores_autotest` unit test now
times out on Windows Server 2019. I looked at the test logs, but they were
identical as far as I could tell, with the timed out test even printing "Test
OK" to stdout. Is this a flake? Or is there any other way to get extra
information about why the test timed out or run the test with extra
debugging information?

Thanks,
Josh

On Fri, Apr 21, 2023 at 4:20 PM Joshua Washington 
wrote:

> Google cloud routes traffic using IP addresses without the support of MAC
> addresses, so changing source IP address for txonly-multi-flow can have
> negative performance implications for net/gve when using testpmd. This
> patch updates txonly multiflow mode to modify source ports instead of
> source IP addresses.
>
> The change can be tested with the following command:
> dpdk-testpmd -- --forward-mode=txonly --txonly-multi-flow \
> --tx-ip=,
>
> Signed-off-by: Joshua Washington 
> Reviewed-by: Rushil Gupta 
> ---
>  app/test-pmd/txonly.c | 39 +++
>  1 file changed, 23 insertions(+), 16 deletions(-)
>
> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
> index b3d6873104..f79e0e5d0b 100644
> --- a/app/test-pmd/txonly.c
> +++ b/app/test-pmd/txonly.c
> @@ -56,7 +56,7 @@ uint32_t tx_ip_dst_addr = (198U << 24) | (18 << 16) | (0
> << 8) | 2;
>  #define IP_DEFTTL  64   /* from RFC 1340. */
>
>  static struct rte_ipv4_hdr pkt_ip_hdr; /**< IP header of transmitted
> packets. */
> -RTE_DEFINE_PER_LCORE(uint8_t, _ip_var); /**< IP address variation */
> +RTE_DEFINE_PER_LCORE(uint8_t, _src_var); /**< Source port variation */
>  static struct rte_udp_hdr pkt_udp_hdr; /**< UDP header of tx packets. */
>
>  static uint64_t timestamp_mask; /**< Timestamp dynamic flag mask */
> @@ -230,28 +230,35 @@ pkt_burst_prepare(struct rte_mbuf *pkt, struct
> rte_mempool *mbp,
> copy_buf_to_pkt(eth_hdr, sizeof(*eth_hdr), pkt, 0);
> copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> sizeof(struct rte_ether_hdr));
> +   copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> +   sizeof(struct rte_ether_hdr) +
> +   sizeof(struct rte_ipv4_hdr));
> if (txonly_multi_flow) {
> -   uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
> -   struct rte_ipv4_hdr *ip_hdr;
> -   uint32_t addr;
> +   uint16_t src_var = RTE_PER_LCORE(_src_var);
> +   struct rte_udp_hdr *udp_hdr;
> +   uint16_t port;
>
> -   ip_hdr = rte_pktmbuf_mtod_offset(pkt,
> -   struct rte_ipv4_hdr *,
> -   sizeof(struct rte_ether_hdr));
> +   udp_hdr = rte_pktmbuf_mtod_offset(pkt,
> +   struct rte_udp_hdr *,
> +   sizeof(struct rte_ether_hdr) +
> +   sizeof(struct rte_ipv4_hdr));
> /*
> -* Generate multiple flows by varying IP src addr. This
> -* enables packets are well distributed by RSS in
> +* Generate multiple flows by varying UDP source port.
> +* This enables packets are well distributed by RSS in
>  * receiver side if any and txonly mode can be a decent
>  * packet generator for developer's quick performance
>  * regression test.
> +*
> +* Only ports in the range 49152 (0xC000) and 65535
> (0x)
> +* will be used, with the least significant byte
> representing
> +* the lcore ID. As such, the most significant byte will
> cycle
> +* through 0xC0 and 0xFF.
>  */
> -   addr = (tx_ip_dst_addr | (ip_var++ << 8)) + rte_lcore_id();
> -   ip_hdr->src_addr = rte_cpu_to_be_32(addr);
> -   RTE_PER_LCORE(_ip_var) = ip_var;
> +   port = src_var++) % (0xFF - 0xC0) + 0xC0) & 0xFF) << 8)
> +   + rte_lcore_id();
> +   udp_hdr->src_port = rte_cpu_to_be_16(port);
> +   RTE_PER_LCORE(_src_var) = src_var;
> }
> -   copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> -   sizeof(struct rte_ether_hdr) +
> -   sizeof(struct rte_ipv4_hdr));
>
> if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND) ||
> txonly_multi_flow)
> update_pkt_header(pkt, pkt_len);
> @@ -393,7 +400,7 @@ pkt_burst_transmit(struct fwd_stream *fs)
> nb_tx = common_fwd_stream_transmit(fs, pkts_burst, nb_pkt);
>
> if (txonly_multi_flow)
> -   RTE_PER_LCORE(_ip_var) -= nb_pkt - nb_tx;
> +   RTE_PER_LCORE(_src_var) -= nb_pkt - nb_tx;
>
> if (unlikely(nb_tx < nb_pkt)) {
> if (verbose_level > 0 && fs->fwd_dropped == 0)
> --
> 2.40.0.634.g4ca3ef3

DPDK Release Status Meeting 2023-04-06

2023-04-24 Thread Mcnamara, John

Release status meeting minutes 2023-04-06
=

Agenda:
* Release Dates
* Subtrees
* Roadmaps
* LTS
* Defects
* Opens

Participants:
* AMD
* ARM
* Debian/Microsoft
* Intel
* Marvell NO
* Nvidia
* Red Hat


Release Dates
-

The following are the proposed current dates for 23.07:

* V1:  22 April 2023
* RC1: 31   May 2023
* RC2: 21  June 2023
* RC3: 28  June 2023
* Release: 12  July 2023


Subtrees


* next-net
  * No new PMDs in this release (so far).

* next-net-intel
  * No update

* next-net-mlx
  * No update

* next-net-mvl
  * No update

* next-eventdev
  * No update

* next-baseband
  * No major features so far.

* next-virtio
  * New series on guest notifications in slow path
  * VDUSE - Vhost VDPA in userspace. https://lwn.net/Articles/841054/

* next-crypto
  * Patches to add new algorithms
  * Updates to PDCP
  * Patches for asymmetric crypto
  * rte_security patch for out of place inline IPsec

* main
  * A lot of patches to improve the Windows port
  * New version of lock annotation support
  * Patches from ARM for PMU
* Postponed from last release
* For tracing
  * Power Management feature from AMD
  * Memarea feature
  * CDX bus patches from AMD
* Virtual PCI like bus

  * Call for updates to the external Roadmap:
https://core.dpdk.org/roadmap/

* Retrospective:
  * Bonding PMD: there is a gap in maintenance.



Proposed Schedule for 2023
--

See also http://core.dpdk.org/roadmap/#dates

23.07
  * Proposal deadline (RFC/v1 patches): 22 April 2023
  * API freeze (-rc1): 31 May 2023
  * PMD features freeze (-rc2): 21 June 2023
  * Builtin applications features freeze (-rc3): 28 June 2023
  * Release: 12 July 2023

23.11
  * Proposal deadline (RFC/v1 patches): 12 August 2023
  * API freeze (-rc1): 29 September 2023
  * PMD features freeze (-rc2): 20 October 2023
  * Builtin applications features freeze (-rc3): 27 October 2023
  * Release: 15 November 2023


LTS
---

Next LTS releases will be:

* 22.11.1
* 21.11.4
  * RC1 testing underway
* 20.11.8
  * RC1 sent and first test report received
* 19.11.15
  * CVE and critical fixes only.


* Distros
  * v20.11 in Debian 11
  * Ubuntu 22.04 contains 21.11

Defects
---

* Bugzilla links, 'Bugs',  added for hosted projects
  * https://www.dpdk.org/hosted-projects/


Opens
-

* None


DPDK Release Status Meetings


The DPDK Release Status Meeting is intended for DPDK Committers to discuss the
status of the master tree and sub-trees, and for project managers to track
progress or milestone dates.

The meeting occurs on every Thursday at 9:30 UTC over Jitsi on 
https://meet.jit.si/DPDK

You don't need an invite to join the meeting but if you want a calendar 
reminder just
send an email to "John McNamara john.mcnam...@intel.com" for the invite.

RE: [EXT] [RFC] lib: set/get max memzone segments

2023-04-24 Thread Ophir Munk

Thank you Devendra Singh Rawat for your valuable comments.

> >+int ecore_mz_mapping_alloc(void)
> >+{
> >+ecore_mz_mapping = rte_malloc("ecore_mz_map", 0,
> >+rte_memzone_max_get() * sizeof(struct rte_memzone *));
> 
> Second parameter of rte_malloc() should be size and Third parameter should
> be alignment 0 in this case.
> 
> Check
> https://doc.dpdk.org/api/rte__malloc_8h.html#a247c99e8d36300c52729c9e
> e58c2b489

Ack

> >--- a/drivers/net/qede/qede_main.c
> >+++ b/drivers/net/qede/qede_main.c
> >@@ -78,6 +78,12 @@ qed_probe(struct ecore_dev *edev, struct
> >rte_pci_device *pci_dev,
> > return rc;
> > }
> >
> >+rc = ecore_mz_mapping_alloc();
> 
> ecore_mz_mapping_alloc() should be called prior to calling
> ecore_hw_prepare().
> 

Ack

> >
> >@@ -721,6 +727,7 @@ static void qed_remove(struct ecore_dev *edev)
> > if (!edev)
> > return;
> >
> >+ecore_mz_mapping_free();
> > ecore_hw_remove(edev);
> > }
> 
> ecore_mz_mapping_free() should be called after ecore_hw_remove();

Ack

RE: [RFC] lib: set/get max memzone segments

2023-04-24 Thread Ophir Munk

Thank you Stephen Memminger for you comment.

> Subject: Re: [RFC] lib: set/get max memzone segments
> 
> On Wed, 19 Apr 2023 11:36:34 +0300
> Ophir Munk  wrote:
> 
> > +int ecore_mz_mapping_alloc(void)
> > +{
> > +   ecore_mz_mapping = rte_malloc("ecore_mz_map", 0,
> > +   rte_memzone_max_get() * sizeof(struct rte_memzone *));
> 
> Why not use rte_calloc(), 

rte_malloc() replaced with rte_zmalloc().

> and devices should be using NUMA aware
> allocation to put the memzone on same NUMA node as the PCI device.

I leave this optimization to driver developers. I don't think it should be part 
of this RFC.

Re: [dpdk-web] [RFC PATCH] process: new library approval in principle

2023-04-24 Thread Thomas Monjalon

17/04/2023 15:33, Jerin Jacob:
> On Wed, Mar 15, 2023 at 7:17 PM Jerin Jacob  wrote:
> > On Fri, Mar 3, 2023 at 11:55 PM Thomas Monjalon  wrote:
> @Thomas Monjalon  Could you check the below comments and share your
> opinion to make forward progress.
> 
> > > 13/02/2023 10:26, jer...@marvell.com:
> > > > --- /dev/null
> > > > +++ b/content/process/_index.md
> > >
> > > First question: is the website the best place for this process?
> > >
> > > Inside the code guides, we have a contributing section,
> > > but I'm not sure it is a good fit for the decision process.
> > >
> > > In the website, you are creating a new page "process".
> > > Is it what we want?
> > > What about making it a sub-page of "Technical Board"?
> >
> > Since it is a process, I thought of keeping "process" page.
> > No specific opinion on where to add it.
> > If not other objections, Then I can add at
> > doc/guides/contributing/new_library_policy.rst in DPDK repo.
> > Let me know if you think better name or better place to keep the file

Maybe that the contributing guide is the best place.
I'm OK with a new file doc/guides/contributing/new_library.rst
which could document more than the policy in future
(like things to remember and to check).

> > > > +Adding a new library to DPDK codebase with proper RFC and then full 
> > > > patch-sets is
> > > > +significant work and getting early approval-in-principle that a 
> > > > library help DPDK contributors
> > > > +avoid wasted effort if it is not suitable for various reasons.
> > >
> > > That's a long sentence we could split.
> >
> > OK Changing as:
> >
> > Adding a new library to DPDK codebase with proper RFC and full
> > patch-sets is significant work.
> >
> > Getting early approval-in-principle that a library can help DPDK
> > contributors avoid wasted effort
> > if it is not suitable for various reasons

It will be easier if starting with the goal:
In order to save effort, developers will get an early approval in principle,
or early feedback in case the library is not suitable for various reasons.

> >
> >
> > > > +   - Purpose of the library.
> > > > +   - Scope of the library.
> > >
> > > Not sure I understand the difference between Purpose and Scope.
> >
> > Purpose → The need for the library
> > Scope → I meant the work scope associated with it.
> >
> > I will change "Scope of the library" to,
> >
> > - Scope of work: Outline the various additional tasks planned for this
> > library, such as developing new test applications, adding new drivers,
> > and updating existing applications.

OK

> > > > +   - Public API specification header file as RFC
> > > > +   - Optional and good to have.
> > >
> > > You mean providing API is optional at this stage?
> >
> > Yes. I think, TB can request if more clarity is needed as mentioned below.
> > "TB may additionally request this collateral if needed to get more
> > clarity on scope and purpose"

OK

> > > > +3. Based on mailing list and TB meeting discussions, TB to vote for 
> > > > approval-in-principle and share
> > > > +the decision in the mailing list.
> > >
> > > I think we should say here that it is safe to start working
> > > on the implementation after this step,
> > > but the patches will need to match usual quality criterias
> > > to be effectively accepted.
> >
> > OK.
> >
> > I will add the following,
> >
> > 4.  Once TB approves the library in principle, it is safe to start
> > working on its implementation.
> > However, the patches will need to meet the usual quality criteria in
> > order to be effectively accepted.

OK

Re: [PATCH 1/3] security: introduce out of place support for inline ingress

2023-04-24 Thread Thomas Monjalon

18/04/2023 10:33, Jerin Jacob:
> On Tue, Apr 11, 2023 at 11:36 PM Stephen Hemminger
>  wrote:
> >
> > On Tue, 11 Apr 2023 15:34:07 +0530
> > Nithin Dabilpuram  wrote:
> >
> > > diff --git a/lib/security/rte_security.h b/lib/security/rte_security.h
> > > index 4bacf9fcd9..866cd4e8ee 100644
> > > --- a/lib/security/rte_security.h
> > > +++ b/lib/security/rte_security.h
> > > @@ -275,6 +275,17 @@ struct rte_security_ipsec_sa_options {
> > >*/
> > >   uint32_t ip_reassembly_en : 1;
> > >
> > > + /** Enable out of place processing on inline inbound packets.
> > > +  *
> > > +  * * 1: Enable driver to perform Out-of-place(OOP) processing for 
> > > this inline
> > > +  *  inbound SA if supported by driver. PMD need to register mbuf
> > > +  *  dynamic field using rte_security_oop_dynfield_register()
> > > +  *  and security session creation would fail if dynfield is not
> > > +  *  registered successfully.
> > > +  * * 0: Disable OOP processing for this session (default).
> > > +  */
> > > + uint32_t ingress_oop : 1;
> > > +
> > >   /** Reserved bit fields for future extension
> > >*
> > >* User should ensure reserved_opts is cleared as it may change in
> > > @@ -282,7 +293,7 @@ struct rte_security_ipsec_sa_options {
> > >*
> > >* Note: Reduce number of bits in reserved_opts for every new 
> > > option.
> > >*/
> > > - uint32_t reserved_opts : 17;
> > > + uint32_t reserved_opts : 16;
> > >  };
> >
> > NAK
> > Let me repeat the reserved bit rant. YAGNI
> >
> > Reserved space is not usable without ABI breakage unless the existing
> > code enforces that reserved space has to be zero.
> >
> > Just saying "User should ensure reserved_opts is cleared" is not enough.
> 
> Yes. I think, we need to enforce to have _init functions for the
> structures which is using reserved filed.
> 
> On the same note on YAGNI, I am wondering why NOT introduce
> RTE_NEXT_ABI marco kind of scheme to compile out ABI breaking changes.
> By keeping RTE_NEXT_ABI disable by default, enable explicitly if user
> wants it to avoid waiting for one year any ABI breaking changes.
> There are a lot of "fixed appliance" customers (not OS distribution
> driven customer) they are willing to recompile DPDK for new feature.
> What we are loosing with this scheme?

RTE_NEXT_ABI is described in the ABI policy.
We are not doing it currently, but I think we could
when it is not too much complicate in the code.

The only problems I see are:
- more #ifdef clutter
- 2 binary versions to test
- CI and checks must handle RTE_NEXT_ABI version

Re: [RFC 0/3] introduce coroutine library

2023-04-24 Thread fengchengwen

On 2023/4/25 0:08, Stephen Hemminger wrote:
> On Mon, 24 Apr 2023 13:02:05 +
> Chengwen Feng  wrote:
> 
>> This patchset introduces the coroutine library which will help refactor
>> the hns3 PMD's reset process.
>>
>> The hns3 single function reset process consists of the following steps:
>> 1.stop_service();
>> 2.prepare_reset();
>> 3.delay(100ms);
>> 4.notify_hw();
>> 5.wait_hw_reset_done(); // multiple sleep waits are involved.
>> 6.reinit();
>> 7.restore_conf();
>>
>> If the DPDK process take over multiple hns3 functions (e.g. 100),
>> it's impractical to reset and restore functions in sequence:
>> 1.proc_func(001); // will completed in 100+ms range.
>> 2.proc_func(002); // will completed in 100~200+ms range.
>> ...
>> x.proc_func(100); // will completed in 9900~1+ms range.
>> The later functions will process fail because it's too late to deal with.
>>
>> One solution is that create a reset thread for each function, and it
>> will lead to large number of threads if the DPDK process take over
>> multiple hns3 functions.
>>
>> So the current hns3 driver uses asynchronous mechanism, for examples, it
>> use rte_eal_alarm_set() when process delay(100ms), it splits a serial
>> process into multiple asynchronous processes, and the code is complex
>> and difficult to understand.
>>
>> The coroutine is a good mechanism to provide programmers with the 
>> simplicity of keeping serial processes within a limited number of
>> threads.
>>
>> This patchset use  to build the coroutine framework, and it
>> just provides a demo. More APIs maybe added in the future.
>>
>> In addition, we would like to ask the community whether it it possible
>> to accept the library. If not, whether it is allowed to provide the
>> library in hns3 PMD.
>>
>> Chengwen Feng (3):
>>   lib/coroutine: add coroutine library
>>   examples/coroutine: support coroutine examples
>>   net/hns3: refactor reset process with coroutine
> 
> Interesting, but the DPDK really is not the right place for this.
> Also, why so much sleeping. Can't this device be handled with an event based
> model. Plus any complexity like this introduces more bugs into already fragile
> interaction of DPDK userspace applications and threads.

A event base model will function as:
  event-handler() {
for (...) {
event = get_next_event();
proc_event();
}
  }
The root cause is that the proc_event() take too many time, and it will lead to 
other
function can't be processed timely.

For which proc_event() may wait a lot of time, the coroutine could also used to 
optimize
it.

> 
> Not only that, coroutines add to the pre-existing problems with locking.
> If coroutine 1 acquires a lock, the coroutine 2 will deadlock itself.
> And someone will spend days figuring that out. And the existing analyzer
> tools will not know about the magic coroutine library.

Analyzer tools like lock annotations maybe a problem.

Locks in DPDK APIs are mostly no-blocking. We can add some restrictions(by 
reviewer), such
as once holding a lock, you can't invoke rte_co_yield() or rte_co_delay() API.

In addition, any technology has two sides, the greatest advantage of coroutine 
I think is
removes a large number of callbacks in asychronous programming. And also 
high-level languages
generally provide coroutines (e.g. C++/Python). With the development, the 
analyzer tools maybe
evolved to support detect.

And one more, if not acceptable as public library, whether it is allowed 
intergration of this
library in hns3 PMD ? Our internal evaluation solution (use coroutine refactor) 
is feasible,
but the code needs to be upstream, hope to listen to community's comments.

> 
> Bottom line: please no
> 
> .
>

Re: [RFC 0/3] introduce coroutine library

2023-04-24 Thread Stephen Hemminger

On Tue, 25 Apr 2023 10:11:43 +0800
fengchengwen  wrote:

> On 2023/4/25 0:08, Stephen Hemminger wrote:
> > On Mon, 24 Apr 2023 13:02:05 +
> > Chengwen Feng  wrote:
> >   
> >> This patchset introduces the coroutine library which will help refactor
> >> the hns3 PMD's reset process.
> >>
> >> The hns3 single function reset process consists of the following steps:
> >> 1.stop_service();
> >> 2.prepare_reset();
> >> 3.delay(100ms);
> >> 4.notify_hw();
> >> 5.wait_hw_reset_done(); // multiple sleep waits are involved.
> >> 6.reinit();
> >> 7.restore_conf();
> >>
> >> If the DPDK process take over multiple hns3 functions (e.g. 100),
> >> it's impractical to reset and restore functions in sequence:
> >> 1.proc_func(001); // will completed in 100+ms range.
> >> 2.proc_func(002); // will completed in 100~200+ms range.
> >> ...
> >> x.proc_func(100); // will completed in 9900~1+ms range.
> >> The later functions will process fail because it's too late to deal with.
> >>
> >> One solution is that create a reset thread for each function, and it
> >> will lead to large number of threads if the DPDK process take over
> >> multiple hns3 functions.
> >>
> >> So the current hns3 driver uses asynchronous mechanism, for examples, it
> >> use rte_eal_alarm_set() when process delay(100ms), it splits a serial
> >> process into multiple asynchronous processes, and the code is complex
> >> and difficult to understand.
> >>
> >> The coroutine is a good mechanism to provide programmers with the 
> >> simplicity of keeping serial processes within a limited number of
> >> threads.
> >>
> >> This patchset use  to build the coroutine framework, and it
> >> just provides a demo. More APIs maybe added in the future.
> >>
> >> In addition, we would like to ask the community whether it it possible
> >> to accept the library. If not, whether it is allowed to provide the
> >> library in hns3 PMD.
> >>
> >> Chengwen Feng (3):
> >>   lib/coroutine: add coroutine library
> >>   examples/coroutine: support coroutine examples
> >>   net/hns3: refactor reset process with coroutine  
> > 
> > Interesting, but the DPDK really is not the right place for this.
> > Also, why so much sleeping. Can't this device be handled with an event based
> > model. Plus any complexity like this introduces more bugs into already 
> > fragile
> > interaction of DPDK userspace applications and threads.  
> 
> A event base model will function as:
>   event-handler() {
> for (...) {
> event = get_next_event();
> proc_event();
> }
>   }
> The root cause is that the proc_event() take too many time, and it will lead 
> to other
> function can't be processed timely.
> 
> For which proc_event() may wait a lot of time, the coroutine could also used 
> to optimize
> it.
> 
> > 
> > Not only that, coroutines add to the pre-existing problems with locking.
> > If coroutine 1 acquires a lock, the coroutine 2 will deadlock itself.
> > And someone will spend days figuring that out. And the existing analyzer
> > tools will not know about the magic coroutine library.  
> 
> Analyzer tools like lock annotations maybe a problem.
> 
> Locks in DPDK APIs are mostly no-blocking. We can add some restrictions(by 
> reviewer), such
> as once holding a lock, you can't invoke rte_co_yield() or rte_co_delay() API.

> In addition, any technology has two sides, the greatest advantage of 
> coroutine I think is
> removes a large number of callbacks in asychronous programming. And also 
> high-level languages
> generally provide coroutines (e.g. C++/Python). With the development, the 
> analyzer tools maybe
> evolved to support detect.
> 
> 
> And one more, if not acceptable as public library, whether it is allowed 
> intergration of this
> library in hns3 PMD ? Our internal evaluation solution (use coroutine 
> refactor) is feasible,
> but the code needs to be upstream, hope to listen to community's comments.


The standard DPDK architecture is to have dedicated threads.
Unless you convert the user application to some other model, there really is no 
other
useful work that can be done while waiting for your driver.

There was a previous DPDK library for lightweight threading, but it never got 
any
usage and was abandoned and dropped. Why is this better?

Re: [RFC 0/3] introduce coroutine library

2023-04-24 Thread fengchengwen

On 2023/4/25 10:16, Stephen Hemminger wrote:
> On Tue, 25 Apr 2023 10:11:43 +0800
> fengchengwen  wrote:
> 
>> On 2023/4/25 0:08, Stephen Hemminger wrote:
>>> On Mon, 24 Apr 2023 13:02:05 +
>>> Chengwen Feng  wrote:
>>>   
 This patchset introduces the coroutine library which will help refactor
 the hns3 PMD's reset process.

 The hns3 single function reset process consists of the following steps:
 1.stop_service();
 2.prepare_reset();
 3.delay(100ms);
 4.notify_hw();
 5.wait_hw_reset_done(); // multiple sleep waits are involved.
 6.reinit();
 7.restore_conf();

 If the DPDK process take over multiple hns3 functions (e.g. 100),
 it's impractical to reset and restore functions in sequence:
 1.proc_func(001); // will completed in 100+ms range.
 2.proc_func(002); // will completed in 100~200+ms range.
 ...
 x.proc_func(100); // will completed in 9900~1+ms range.
 The later functions will process fail because it's too late to deal with.

 One solution is that create a reset thread for each function, and it
 will lead to large number of threads if the DPDK process take over
 multiple hns3 functions.

 So the current hns3 driver uses asynchronous mechanism, for examples, it
 use rte_eal_alarm_set() when process delay(100ms), it splits a serial
 process into multiple asynchronous processes, and the code is complex
 and difficult to understand.

 The coroutine is a good mechanism to provide programmers with the 
 simplicity of keeping serial processes within a limited number of
 threads.

 This patchset use  to build the coroutine framework, and it
 just provides a demo. More APIs maybe added in the future.

 In addition, we would like to ask the community whether it it possible
 to accept the library. If not, whether it is allowed to provide the
 library in hns3 PMD.

 Chengwen Feng (3):
   lib/coroutine: add coroutine library
   examples/coroutine: support coroutine examples
   net/hns3: refactor reset process with coroutine  
>>>
>>> Interesting, but the DPDK really is not the right place for this.
>>> Also, why so much sleeping. Can't this device be handled with an event based
>>> model. Plus any complexity like this introduces more bugs into already 
>>> fragile
>>> interaction of DPDK userspace applications and threads.  
>>
>> A event base model will function as:
>>   event-handler() {
>> for (...) {
>> event = get_next_event();
>> proc_event();
>> }
>>   }
>> The root cause is that the proc_event() take too many time, and it will lead 
>> to other
>> function can't be processed timely.
>>
>> For which proc_event() may wait a lot of time, the coroutine could also used 
>> to optimize
>> it.
>>
>>>
>>> Not only that, coroutines add to the pre-existing problems with locking.
>>> If coroutine 1 acquires a lock, the coroutine 2 will deadlock itself.
>>> And someone will spend days figuring that out. And the existing analyzer
>>> tools will not know about the magic coroutine library.  
>>
>> Analyzer tools like lock annotations maybe a problem.
>>
>> Locks in DPDK APIs are mostly no-blocking. We can add some restrictions(by 
>> reviewer), such
>> as once holding a lock, you can't invoke rte_co_yield() or rte_co_delay() 
>> API.
> 
>> In addition, any technology has two sides, the greatest advantage of 
>> coroutine I think is
>> removes a large number of callbacks in asychronous programming. And also 
>> high-level languages
>> generally provide coroutines (e.g. C++/Python). With the development, the 
>> analyzer tools maybe
>> evolved to support detect.
>>
>>
>> And one more, if not acceptable as public library, whether it is allowed 
>> intergration of this
>> library in hns3 PMD ? Our internal evaluation solution (use coroutine 
>> refactor) is feasible,
>> but the code needs to be upstream, hope to listen to community's comments.
> 
> 
> The standard DPDK architecture is to have dedicated threads.
> Unless you convert the user application to some other model, there really is 
> no other

Instead of adding a new running model, this coroutine library just adapts to 
the current
DPDK framework.
My visions:
1. DPDK launch a default thread run coroutine service, this thread just like 
interrupt thread,
it could provide server for PMD drivers.
2. Application could launch coroutine scheduler in lcore (just like 2/3 commit) 
if it want to
use this library.

> useful work that can be done while waiting for your driver.
> 
> There was a previous DPDK library for lightweight threading, but it never got 
> any
> usage and was abandoned and dropped. Why is this better?

DPDK lightweight threading ? I didn't know before, but will take a closer look 
at.

This patchset is caused by a problem in the our driver reset process. The reset 
process is
complex and difficult to under

Re: [RFC 0/3] introduce coroutine library

2023-04-24 Thread Garrett D'Amore

First time poster here:

I worry a bit about a coroutine approach as it may be challenging for some uses 
like ours.  We have a purely event driven loop with a Reactor model written in 
D.  The details are not specifically needed here, except to point out that an 
approach based on ucontext.h or something like that would very likely be 
utterly incompatible in our environment.  While we don’t currently plan to 
integrate support for your hns3 device, I would have grave reservations about a 
general coroutine library making it’s way into DPDK drivers — it would almost 
certainly cause no end of grief for us at Weka.

I’m doubtful that we’re the only DPDK users in this situation.

• Garrett

On Apr 24, 2023 at 7:50 PM -0700, fengchengwen , wrote:
> On 2023/4/25 10:16, Stephen Hemminger wrote:
> > On Tue, 25 Apr 2023 10:11:43 +0800
> > fengchengwen  wrote:
> >
> > > On 2023/4/25 0:08, Stephen Hemminger wrote:
> > > > On Mon, 24 Apr 2023 13:02:05 +
> > > > Chengwen Feng  wrote:
> > > >
> > > > > This patchset introduces the coroutine library which will help 
> > > > > refactor
> > > > > the hns3 PMD's reset process.
> > > > >
> > > > > The hns3 single function reset process consists of the following 
> > > > > steps:
> > > > > 1.stop_service();
> > > > > 2.prepare_reset();
> > > > > 3.delay(100ms);
> > > > > 4.notify_hw();
> > > > > 5.wait_hw_reset_done(); // multiple sleep waits are involved.
> > > > > 6.reinit();
> > > > > 7.restore_conf();
> > > > >
> > > > > If the DPDK process take over multiple hns3 functions (e.g. 100),
> > > > > it's impractical to reset and restore functions in sequence:
> > > > > 1.proc_func(001); // will completed in 100+ms range.
> > > > > 2.proc_func(002); // will completed in 100~200+ms range.
> > > > > ...
> > > > > x.proc_func(100); // will completed in 9900~1+ms range.
> > > > > The later functions will process fail because it's too late to deal 
> > > > > with.
> > > > >
> > > > > One solution is that create a reset thread for each function, and it
> > > > > will lead to large number of threads if the DPDK process take over
> > > > > multiple hns3 functions.
> > > > >
> > > > > So the current hns3 driver uses asynchronous mechanism, for examples, 
> > > > > it
> > > > > use rte_eal_alarm_set() when process delay(100ms), it splits a serial
> > > > > process into multiple asynchronous processes, and the code is complex
> > > > > and difficult to understand.
> > > > >
> > > > > The coroutine is a good mechanism to provide programmers with the
> > > > > simplicity of keeping serial processes within a limited number of
> > > > > threads.
> > > > >
> > > > > This patchset use  to build the coroutine framework, and 
> > > > > it
> > > > > just provides a demo. More APIs maybe added in the future.
> > > > >
> > > > > In addition, we would like to ask the community whether it it possible
> > > > > to accept the library. If not, whether it is allowed to provide the
> > > > > library in hns3 PMD.
> > > > >
> > > > > Chengwen Feng (3):
> > > > > lib/coroutine: add coroutine library
> > > > > examples/coroutine: support coroutine examples
> > > > > net/hns3: refactor reset process with coroutine
> > > >
> > > > Interesting, but the DPDK really is not the right place for this.
> > > > Also, why so much sleeping. Can't this device be handled with an event 
> > > > based
> > > > model. Plus any complexity like this introduces more bugs into already 
> > > > fragile
> > > > interaction of DPDK userspace applications and threads.
> > >
> > > A event base model will function as:
> > > event-handler() {
> > > for (...) {
> > > event = get_next_event();
> > > proc_event();
> > > }
> > > }
> > > The root cause is that the proc_event() take too many time, and it will 
> > > lead to other
> > > function can't be processed timely.
> > >
> > > For which proc_event() may wait a lot of time, the coroutine could also 
> > > used to optimize
> > > it.
> > >
> > > >
> > > > Not only that, coroutines add to the pre-existing problems with locking.
> > > > If coroutine 1 acquires a lock, the coroutine 2 will deadlock itself.
> > > > And someone will spend days figuring that out. And the existing analyzer
> > > > tools will not know about the magic coroutine library.
> > >
> > > Analyzer tools like lock annotations maybe a problem.
> > >
> > > Locks in DPDK APIs are mostly no-blocking. We can add some 
> > > restrictions(by reviewer), such
> > > as once holding a lock, you can't invoke rte_co_yield() or rte_co_delay() 
> > > API.
> >
> > > In addition, any technology has two sides, the greatest advantage of 
> > > coroutine I think is
> > > removes a large number of callbacks in asychronous programming. And also 
> > > high-level languages
> > > generally provide coroutines (e.g. C++/Python). With the development, the 
> > > analyzer tools maybe
> > > evolved to support detect.
> > >
> > >
> > > And one more, if not acceptable as public library, whether it is allowed 
> > > intergration of this
> > > l

Re: [RFC PATCH 1/5] eventdev: add power monitoring API on event port

2023-04-24 Thread Jerin Jacob

On Mon, Apr 24, 2023 at 9:36 PM Ferruh Yigit  wrote:
>
> On 4/19/2023 11:15 AM, Jerin Jacob wrote:
> > On Wed, Apr 19, 2023 at 3:24 PM Sivaprasad Tummala
> >  wrote:
> >>
> >> A new API to allow power monitoring condition on event port to
> >> optimize power when no events are arriving on an event port for
> >> the worker core to process in an eventdev based pipelined application.
> >>
> >> Signed-off-by: Sivaprasad Tummala 
> >> + *
> >> + * @param dev_id
> >> + *   Eventdev id
> >> + * @param port_id
> >> + *   Eventdev port id
> >> + * @param pmc
> >> + *   The pointer to power-optimized monitoring condition structure.
> >> + *
> >> + * @return
> >> + *   - 0: Success.
> >> + *   -ENOTSUP: Operation not supported.
> >> + *   -EINVAL: Invalid parameters.
> >> + *   -ENODEV: Invalid device ID.
> >> + */
> >> +__rte_experimental
> >> +int
> >> +rte_event_port_get_monitor_addr(uint8_t dev_id, uint8_t port_id,
> >> +   struct rte_power_monitor_cond *pmc);
> >
> > + eventdev driver maintainers
> >
> > I think, we don't need to expose this application due to applications
> > 1)To make applications to be transparent whether power saving is enabled or 
> > not?
> > 2)Some HW and Arch already supports power managent in driver and in HW
> > (Not using  CPU architecture directly)
> >
> > If so, that will be translated to following,
> > a) Add rte_event_port_power_saving_ena_dis(uint8_t dev_id, uint8_t
> > port_id, bool ena) for controlling power saving in slowpath.
> > b) Create reusable PMD private function based on the CPU architecture
> > power saving primitive to cover the PMD don't have native power saving
> > support.
> > c)Update rte_event_dequeue_burst() burst of PMD callback to use (b).
> >
> >
>
> Hi Jerin,

Hi Ferruh,

>
> ethdev approach seems applied here.

Understands that. But none of the NIC HW supports power management at
HW level like eventdev, so that way
for what we are doing for ethdev is a correct abstraction for ethdev.

>
> In ethdev, 'rte_event_port_get_monitor_addr()' equivalent is
> 'rte_eth_get_monitor_addr()'.
>
> Although 'rte_eth_get_monitor_addr()' is public API, it is currently
> only called from Rx/Tx callback functions implemented in the power library.
> But I assume intention to make it public is to enable users to implement
> their own callback functions that has custom algorithm for the power
> management.

If there is a use case for customizing with own callback, we can provide that.
Provided NULL is valid with default algorithm.

>
> And probably same is true for the 'rte_event_port_get_monitor_addr()'.
>
>
> Also instead of implementing power features for withing PMDs, isn't it
> better to have a common eventdev layer for it?

We can have rte_evetdev_pmd_* APIs as non-public APIs.
My only objection is to NOT introduce _monitor_ APIs at eventdev level,
Instead, _monitor_ is one way to do it in SW, So we need higher level
of abstraction.

>
> For the PMDs benefit from HW event manager, just not implementing
> .get_monitor_addr() dev_ops will make them free from power related APIs.

But application fast path code gets diverged by exposing low level primitives.


>
>
>
>

RE: [PATCH] usertools: enhance CPU layout

2023-04-24 Thread Lu, Wenzhuo

Hi Stephen, Thomas, Brice,

> -Original Message-
> From: Brice Goglin 
> Sent: Tuesday, April 25, 2023 1:06 AM
> To: Stephen Hemminger ; Thomas Monjalon
> 
> Cc: Lu, Wenzhuo ; dev@dpdk.org;
> david.march...@redhat.com
> Subject: Re: [PATCH] usertools: enhance CPU layout
> 
> Le 21/04/2023 à 17:15, Stephen Hemminger a écrit :
> >
> >>> Better to understand more about our opinion of this script before send a
> v2 patch.
> >>> I've used 'lstopo'. It's a great tool.
> >>> To my opinion, considering there're Linux tools to show all kinds of
> information, the reason that DPDK has its own tool is to summarize and
> emphasize the information that is important to DPDK. Here it's that some cores
> are more powerful than others. When the users use a testpmd-like APP, they
> can choose the appropriate cores after DPDK reminds them about the
> difference between cores.
> >>> Add Thomas for more suggestions. Thanks.
> >> Adding Brice, hwloc maintainer.
> >>
> >> I think it would be better to contribute to the hwloc project.
> >> If we need a different set of info, we can probably tune it with options.
> > The script had a purpose which was back when DPDK was first started.
> > But as systems get more complex, it becomes something that has to deal
> > with lots of corner cases; and if some other tool can it then that is 
> > better.
> 
> 
> Hello
> 
> Indeed, hwloc/lstopo should be able to do something similar to that script. I
> didn't see anything network-related in the script, does it only show CPU info?
Yes, this script shows only CPU info.
Agree that 'lstopo' already shows everything that this script can show.

> 
> Regarding the original patch, we already support all levels or caches, dies,
> clusters, etc. Hybrid CPUs are also detected but they are only nicely shown in
> the graphical output [1]. The textual output only says at the very end that 
> there
> are two kinds and the bitmask of CPUs for each. I am open to improving this.
> 
> Brice
> 
> [1] https://twitter.com/bgoglin/status/1542117836008706049/photo/1

Brice, Thomas, Stephen, many thanks for your reply and comments.
As cpu_layout.py has been planned to be removed, and the work can been done by 
'lstopo', I'll withdraw this patch.

[PATCH v6] enhance NUMA affinity heuristic

2023-04-24 Thread Kaisen You

Trying to allocate memory on the first detected numa node,it has less
chance to find some memory actually available rather than on the main
lcore numa node (especially when the DPDK application is started only
on one numa node).

Fixes: 8b0a1b8cb481 ("eal: stop using pthread for lcores and control threads")
Fixes: 770d41bf3309 ("malloc: fix allocation with unknown socket ID")
Cc: sta...@dpdk.org

Signed-off-by: David Marchand 
Signed-off-by: Kaisen You 

---
Changes since v5:
- Add comments to the code,

Changes since v4:
- mod the patch title,

Changes since v3:
- add the assignment of socket_id in thread initialization,

Changes since v2:
- add uncommitted local change and fix compilation,

Changes since v1:
- accomodate for configurations with main lcore running on multiples
  physical cores belonging to different numa,
---
 lib/eal/common/eal_common_thread.c | 4 
 lib/eal/common/malloc_heap.c   | 6 ++
 2 files changed, 10 insertions(+)

diff --git a/lib/eal/common/eal_common_thread.c 
b/lib/eal/common/eal_common_thread.c
index 079a385630..d65bfe251b 100644
--- a/lib/eal/common/eal_common_thread.c
+++ b/lib/eal/common/eal_common_thread.c
@@ -252,6 +252,10 @@ static int ctrl_thread_init(void *arg)
struct rte_thread_ctrl_params *params = arg;
 
__rte_thread_init(rte_lcore_id(), cpuset);
+   /* set the value of the per-core variable _socket_id.
+* Convenient for threads to find memory.
+*/
+   RTE_PER_LCORE(_socket_id) = SOCKET_ID_ANY;
params->ret = rte_thread_set_affinity_by_id(rte_thread_self(), cpuset);
if (params->ret != 0) {
__atomic_store_n(¶ms->ctrl_thread_status,
diff --git a/lib/eal/common/malloc_heap.c b/lib/eal/common/malloc_heap.c
index d25bdc98f9..a624f08cf7 100644
--- a/lib/eal/common/malloc_heap.c
+++ b/lib/eal/common/malloc_heap.c
@@ -716,6 +716,12 @@ malloc_get_numa_socket(void)
if (conf->socket_mem[socket_id] != 0)
return socket_id;
}
+   /* Trying to allocate memory on the main lcore numa node.
+* especially when the DPDK application is started only on one numa 
node.
+*/
+   socket_id = rte_lcore_to_socket_id(rte_get_main_lcore());
+   if (socket_id != (unsigned int)SOCKET_ID_ANY)
+   return socket_id;
 
return rte_socket_id_by_idx(0);
 }
-- 
2.25.1

RE: 22.11.2 patches review and test

2023-04-24 Thread Xueming(Steven) Li



> -Original Message-
> From: David Marchand 
> Sent: 4/24/2023 14:54
> To: Xueming(Steven) Li 
> Cc: sta...@dpdk.org; dev@dpdk.org; Kevin Traynor ;
> Luca Boccassi ; NBU-Contact-Thomas Monjalon
> (EXTERNAL) 
> Subject: Re: 22.11.2 patches review and test
> 
> Hello Xueming,
> 
> On Sun, Apr 23, 2023 at 11:35 AM Xueming Li  wrote:
> >
> > Here is a list of patches targeted for stable release 22.11.2.
> >
> > The planned date for the final release is 5th MAY.
> >
> > Please help with testing and validation of your use cases and report
> > any issues/results with reply-all to this mail. For the final release
> > the fixes and reported validations will be added to the release notes.
> >
> > A release candidate tarball can be found at:
> >
> > https://dpdk.org/browse/dpdk-stable/tag/?id=v22.11.2-rc1
> >
> > These patches are located at branch 22.11 of dpdk-stable repo:
> > https://dpdk.org/browse/dpdk-stable/
> >
> > Thanks.
> 
> Looking at the UNH dashboard, I see a build failure for Alpine on the
> 22.11 branch.
> You probably need bc1db4f45af3 ("build: detect backtrace availability").
> 

Thanks, the patch works, included in branch.

> 
> --
> David Marchand

Re: [RFC PATCH 1/5] eventdev: add power monitoring API on event port

2023-04-24 Thread Mattias Rönnblom

On 2023-04-24 18:06, Ferruh Yigit wrote:
> On 4/19/2023 11:15 AM, Jerin Jacob wrote:
>> On Wed, Apr 19, 2023 at 3:24 PM Sivaprasad Tummala
>>  wrote:
>>>
>>> A new API to allow power monitoring condition on event port to
>>> optimize power when no events are arriving on an event port for
>>> the worker core to process in an eventdev based pipelined application.
>>>
>>> Signed-off-by: Sivaprasad Tummala 
>>> + *
>>> + * @param dev_id
>>> + *   Eventdev id
>>> + * @param port_id
>>> + *   Eventdev port id
>>> + * @param pmc
>>> + *   The pointer to power-optimized monitoring condition structure.
>>> + *
>>> + * @return
>>> + *   - 0: Success.
>>> + *   -ENOTSUP: Operation not supported.
>>> + *   -EINVAL: Invalid parameters.
>>> + *   -ENODEV: Invalid device ID.
>>> + */
>>> +__rte_experimental
>>> +int
>>> +rte_event_port_get_monitor_addr(uint8_t dev_id, uint8_t port_id,
>>> +   struct rte_power_monitor_cond *pmc);
>>
>> + eventdev driver maintainers
>>
>> I think, we don't need to expose this application due to applications
>> 1)To make applications to be transparent whether power saving is enabled or 
>> not?
>> 2)Some HW and Arch already supports power managent in driver and in HW
>> (Not using  CPU architecture directly)
>>
>> If so, that will be translated to following,
>> a) Add rte_event_port_power_saving_ena_dis(uint8_t dev_id, uint8_t
>> port_id, bool ena) for controlling power saving in slowpath.
>> b) Create reusable PMD private function based on the CPU architecture
>> power saving primitive to cover the PMD don't have native power saving
>> support.
>> c)Update rte_event_dequeue_burst() burst of PMD callback to use (b).
>>
>>
> 
> Hi Jerin,
> 
> ethdev approach seems applied here.
> 
> In ethdev, 'rte_event_port_get_monitor_addr()' equivalent is
> 'rte_eth_get_monitor_addr()'.
> 
> Although 'rte_eth_get_monitor_addr()' is public API, it is currently
> only called from Rx/Tx callback functions implemented in the power library.
> But I assume intention to make it public is to enable users to implement
> their own callback functions that has custom algorithm for the power
> management.
> 
> And probably same is true for the 'rte_event_port_get_monitor_addr()'.
> 
> 
> Also instead of implementing power features for withing PMDs, isn't it
> better to have a common eventdev layer for it?
> 

To allow that question to be answered, I think you need to be more 
specific what are "power features".

 From what it seems to me, the get_monitor_addr() family of functions 
address the pretty narrow case of allowing umwait (or the non-x86 
equivalent) to be used to wait for new events. It leaves all the heavy 
lifting to the app, which needs to figure out how loaded each CPU core 
is, what backlog of work there is, how to shuffle work around to get the 
most out of the power, how to translate wall-clock latency requirements 
into the equation, what CPU (and/or accelerator/NIC-level) power 
features to employ (e.g., DVFS, sleep states, umwait), etc.

In the context of Eventdev, optimizing for power may include packing 
more flows into the same port, in low-load situations. Keeping a few 
cores relatively busy, and the rest in some deep sleep state may well be 
the best solution for certain (most?) systems. For such a feature to 
work, the event device must be in the loop, but the mechanics could (and 
should) be generic. Eventdev could also control DVFS.

A reasonably generic power management mechanism could go into Eventdev a 
combination of the event device drivers, and some generic functions). 
(Various policies would still need to come from the app.)

I think keeping this kind of functionality in Eventdev works well 
provided the only source of work is Eventdev events (i.e., most or all 
fast path lcores are "pure" event-based lcores). No non-eventdev timer 
wheels, no non-eventdev lookaside accelerator or I/O device access, no 
control plane rings to poll, etc.

If such a model is too limiting, another option is to put the central 
power management function in the service framework (with a lot of help 
from Eventdev, RTE timer, and other sources of work as well).

> For the PMDs benefit from HW event manager, just not implementing
> .get_monitor_addr() dev_ops will make them free from power related APIs.
> 
> 
>

RE: [RFC 07/27] vhost: change to single IOTLB cache per device

2023-04-24 Thread Xia, Chenbo

Hi Maxime,

> -Original Message-
> From: Maxime Coquelin 
> Sent: Friday, March 31, 2023 11:43 PM
> To: dev@dpdk.org; david.march...@redhat.com; Xia, Chenbo
> ; m...@redhat.com; f...@redhat.com;
> jasow...@redhat.com; Liang, Cunming ; Xie, Yongji
> ; echau...@redhat.com; epere...@redhat.com;
> amore...@redhat.com
> Cc: Maxime Coquelin 
> Subject: [RFC 07/27] vhost: change to single IOTLB cache per device
> 
> This patch simplifies IOTLB implementation and improves
> IOTLB memory consumption by having a single IOTLB cache
> per device, instead of having one per queue.
> 
> In order to not impact performance, it keeps an IOTLB lock
> per virtqueue, so that there is no contention between
> multiple queue trying to acquire it.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  lib/vhost/iotlb.c  | 212 +++--
>  lib/vhost/iotlb.h  |  43 ++---
>  lib/vhost/vhost.c  |  18 ++--
>  lib/vhost/vhost.h  |  16 ++--
>  lib/vhost/vhost_user.c |  25 +++--
>  5 files changed, 160 insertions(+), 154 deletions(-)
> 

[...]

> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index d60e39b6bc..81ebef0137 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -7,7 +7,7 @@
>   * The vhost-user protocol connection is an external interface, so it
> must be
>   * robust against invalid inputs.
>   *
> - * This is important because the vhost-user frontend is only one step
> removed
> +* This is important because the vhost-user frontend is only one step
> removed

This is changed by accident?

Thanks,
Chenbo

>   * from the guest.  Malicious guests that have escaped will then launch
> further
>   * attacks from the vhost-user frontend.
>   *
> @@ -237,6 +237,8 @@ vhost_backend_cleanup(struct virtio_net *dev)
>   }
> 
>   dev->postcopy_listening = 0;
> +
> + vhost_user_iotlb_destroy(dev);
>  }
> 
>  static void
> @@ -539,7 +541,6 @@ numa_realloc(struct virtio_net **pdev, struct
> vhost_virtqueue **pvq)
>   if (vq != dev->virtqueue[vq->index]) {
>   VHOST_LOG_CONFIG(dev->ifname, INFO, "reallocated virtqueue on
> node %d\n", node);
>   dev->virtqueue[vq->index] = vq;
> - vhost_user_iotlb_init(dev, vq);
>   }
> 
>   if (vq_is_packed(dev)) {
> @@ -664,6 +665,8 @@ numa_realloc(struct virtio_net **pdev, struct
> vhost_virtqueue **pvq)
>   return;
>   }
>   dev->guest_pages = gp;
> +
> + vhost_user_iotlb_init(dev);
>  }
>  #else
>  static void
> @@ -1360,8 +1363,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
> 
>   /* Flush IOTLB cache as previous HVAs are now invalid */
>   if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
> - for (i = 0; i < dev->nr_vring; i++)
> - vhost_user_iotlb_flush_all(dev, 
> dev->virtqueue[i]);
> + vhost_user_iotlb_flush_all(dev);
> 
>   free_mem_region(dev);
>   rte_free(dev->mem);
> @@ -2194,7 +2196,7 @@ vhost_user_get_vring_base(struct virtio_net **pdev,
>   ctx->msg.size = sizeof(ctx->msg.payload.state);
>   ctx->fd_num = 0;
> 
> - vhost_user_iotlb_flush_all(dev, vq);
> + vhost_user_iotlb_flush_all(dev);
> 
>   vring_invalidate(dev, vq);
> 
> @@ -2639,15 +2641,14 @@ vhost_user_iotlb_msg(struct virtio_net **pdev,
>   if (!vva)
>   return RTE_VHOST_MSG_RESULT_ERR;
> 
> + vhost_user_iotlb_cache_insert(dev, imsg->iova, vva, len, imsg-
> >perm);
> +
>   for (i = 0; i < dev->nr_vring; i++) {
>   struct vhost_virtqueue *vq = dev->virtqueue[i];
> 
>   if (!vq)
>   continue;
> 
> - vhost_user_iotlb_cache_insert(dev, vq, imsg->iova, vva,
> - len, imsg->perm);
> -
>   if (is_vring_iotlb(dev, vq, imsg)) {
>   rte_spinlock_lock(&vq->access_lock);
>   translate_ring_addresses(&dev, &vq);
> @@ -2657,15 +2658,14 @@ vhost_user_iotlb_msg(struct virtio_net **pdev,
>   }
>   break;
>   case VHOST_IOTLB_INVALIDATE:
> + vhost_user_iotlb_cache_remove(dev, imsg->iova, imsg->size);
> +
>   for (i = 0; i < dev->nr_vring; i++) {
>   struct vhost_virtqueue *vq = dev->virtqueue[i];
> 
>   if (!vq)
>   continue;
> 
> - vhost_user_iotlb_cache_remove(dev, vq, imsg->iova,
> - imsg->size);
> -
>   if (is_vring_iotlb(dev, vq, imsg)) {
>   rte_spinlock_lock(&vq->access_lock);
>   vring_invalidate(dev, vq);
> @@ -2674,8 +2674,7 @@ vhost_user_iotlb_msg(struct virtio_net **pdev,
>   }
>   break;
>   defau

RE: [RFC 08/27] vhost: add offset field to IOTLB entries

2023-04-24 Thread Xia, Chenbo

> -Original Message-
> From: Maxime Coquelin 
> Sent: Friday, March 31, 2023 11:43 PM
> To: dev@dpdk.org; david.march...@redhat.com; Xia, Chenbo
> ; m...@redhat.com; f...@redhat.com;
> jasow...@redhat.com; Liang, Cunming ; Xie, Yongji
> ; echau...@redhat.com; epere...@redhat.com;
> amore...@redhat.com
> Cc: Maxime Coquelin 
> Subject: [RFC 08/27] vhost: add offset field to IOTLB entries
> 
> This patch is a preliminary work to prepare for VDUSE
> support, for which we need to keep track of the mmaped base
> address and offset in order to be able to unmap it later
> when IOTLB entry is invalidated.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  lib/vhost/iotlb.c  | 30 ++
>  lib/vhost/iotlb.h  |  2 +-
>  lib/vhost/vhost_user.c |  2 +-
>  3 files changed, 20 insertions(+), 14 deletions(-)
> 
> diff --git a/lib/vhost/iotlb.c b/lib/vhost/iotlb.c
> index a91115cf1c..51f118bc48 100644
> --- a/lib/vhost/iotlb.c
> +++ b/lib/vhost/iotlb.c
> @@ -17,6 +17,7 @@ struct vhost_iotlb_entry {
> 
>   uint64_t iova;
>   uint64_t uaddr;
> + uint64_t uoffset;
>   uint64_t size;
>   uint8_t perm;
>  };
> @@ -27,15 +28,18 @@ static bool
>  vhost_user_iotlb_share_page(struct vhost_iotlb_entry *a, struct
> vhost_iotlb_entry *b,
>   uint64_t align)
>  {
> - uint64_t a_end, b_start;
> + uint64_t a_start, a_end, b_start;
> 
>   if (a == NULL || b == NULL)
>   return false;
> 
> + a_start = a->uaddr + a->uoffset;
> + b_start = b->uaddr + b->uoffset;
> +
>   /* Assumes entry a lower than entry b */
> - RTE_ASSERT(a->uaddr < b->uaddr);
> - a_end = RTE_ALIGN_CEIL(a->uaddr + a->size, align);
> - b_start = RTE_ALIGN_FLOOR(b->uaddr, align);
> + RTE_ASSERT(a_start < b_start);
> + a_end = RTE_ALIGN_CEIL(a_start + a->size, align);
> + b_start = RTE_ALIGN_FLOOR(b_start, align);
> 
>   return a_end > b_start;
>  }
> @@ -43,11 +47,12 @@ vhost_user_iotlb_share_page(struct vhost_iotlb_entry
> *a, struct vhost_iotlb_entr
>  static void
>  vhost_user_iotlb_set_dump(struct virtio_net *dev, struct
> vhost_iotlb_entry *node)
>  {
> - uint64_t align;
> + uint64_t align, start;
> 
> - align = hua_to_alignment(dev->mem, (void *)(uintptr_t)node->uaddr);
> + start = node->uaddr + node->uoffset;
> + align = hua_to_alignment(dev->mem, (void *)(uintptr_t)start);
> 
> - mem_set_dump((void *)(uintptr_t)node->uaddr, node->size, false,
> align);
> + mem_set_dump((void *)(uintptr_t)start, node->size, false, align);
>  }
> 
>  static void
> @@ -56,10 +61,10 @@ vhost_user_iotlb_clear_dump(struct virtio_net *dev,
> struct vhost_iotlb_entry *no
>  {
>   uint64_t align, start, end;
> 
> - start = node->uaddr;
> - end = node->uaddr + node->size;
> + start = node->uaddr + node->uoffset;
> + end = start + node->size;
> 
> - align = hua_to_alignment(dev->mem, (void *)(uintptr_t)node->uaddr);
> + align = hua_to_alignment(dev->mem, (void *)(uintptr_t)start);
> 
>   /* Skip first page if shared with previous entry. */
>   if (vhost_user_iotlb_share_page(prev, node, align))
> @@ -234,7 +239,7 @@ vhost_user_iotlb_cache_random_evict(struct virtio_net
> *dev)
> 
>  void
>  vhost_user_iotlb_cache_insert(struct virtio_net *dev, uint64_t iova,
> uint64_t uaddr,
> - uint64_t size, uint8_t perm)
> + uint64_t uoffset, uint64_t size, uint8_t perm)
>  {
>   struct vhost_iotlb_entry *node, *new_node;
> 
> @@ -256,6 +261,7 @@ vhost_user_iotlb_cache_insert(struct virtio_net *dev,
> uint64_t iova, uint64_t ua
> 
>   new_node->iova = iova;
>   new_node->uaddr = uaddr;
> + new_node->uoffset = uoffset;
>   new_node->size = size;
>   new_node->perm = perm;
> 
> @@ -344,7 +350,7 @@ vhost_user_iotlb_cache_find(struct virtio_net *dev,
> uint64_t iova, uint64_t *siz
> 
>   offset = iova - node->iova;
>   if (!vva)
> - vva = node->uaddr + offset;
> + vva = node->uaddr + node->uoffset + offset;
> 
>   mapped += node->size - offset;
>   iova = node->iova + node->size;
> diff --git a/lib/vhost/iotlb.h b/lib/vhost/iotlb.h
> index 3490b9e6be..bee36c5903 100644
> --- a/lib/vhost/iotlb.h
> +++ b/lib/vhost/iotlb.h
> @@ -58,7 +58,7 @@ vhost_user_iotlb_wr_unlock_all(struct virtio_net *dev)
>  }
> 
>  void vhost_user_iotlb_cache_insert(struct virtio_net *dev, uint64_t iova,
> uint64_t uaddr,
> - uint64_t size, uint8_t perm);
> + uint64_t uoffset, uint64_t size, uint8_t
> perm);
>  void vhost_user_iotlb_cache_remove(struct virtio_net *dev, uint64_t iova,
> uint64_t size);
>  uint64_t vhost_user_iotlb_cache_find(struct virtio_net *dev, uint64_t
> iova,
>   uint64_t *size, uint8_t perm);
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user

RE: [RFC 09/27] vhost: add page size info to IOTLB entry

2023-04-24 Thread Xia, Chenbo

Hi Maxime,

> -Original Message-
> From: Maxime Coquelin 
> Sent: Friday, March 31, 2023 11:43 PM
> To: dev@dpdk.org; david.march...@redhat.com; Xia, Chenbo
> ; m...@redhat.com; f...@redhat.com;
> jasow...@redhat.com; Liang, Cunming ; Xie, Yongji
> ; echau...@redhat.com; epere...@redhat.com;
> amore...@redhat.com
> Cc: Maxime Coquelin 
> Subject: [RFC 09/27] vhost: add page size info to IOTLB entry
> 
> VDUSE will close the file descriptor after having mapped
> the shared memory, so it will not be possible to get the
> page size afterwards.
> 
> This patch adds an new page_shift field to the IOTLB entry,
> so that the information will be passed at IOTLB cache
> insertion time. The information is stored as a bit shift
> value so that IOTLB entry keeps fitting in a single
> cacheline.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  lib/vhost/iotlb.c  | 46 --
>  lib/vhost/iotlb.h  |  2 +-
>  lib/vhost/vhost.h  |  1 -
>  lib/vhost/vhost_user.c |  8 +---
>  4 files changed, 28 insertions(+), 29 deletions(-)
> 
> diff --git a/lib/vhost/iotlb.c b/lib/vhost/iotlb.c
> index 51f118bc48..188dfb8e38 100644
> --- a/lib/vhost/iotlb.c
> +++ b/lib/vhost/iotlb.c
> @@ -19,14 +19,14 @@ struct vhost_iotlb_entry {
>   uint64_t uaddr;
>   uint64_t uoffset;
>   uint64_t size;
> + uint8_t page_shift;
>   uint8_t perm;
>  };
> 
>  #define IOTLB_CACHE_SIZE 2048
> 
>  static bool
> -vhost_user_iotlb_share_page(struct vhost_iotlb_entry *a, struct
> vhost_iotlb_entry *b,
> - uint64_t align)
> +vhost_user_iotlb_share_page(struct vhost_iotlb_entry *a, struct
> vhost_iotlb_entry *b)
>  {
>   uint64_t a_start, a_end, b_start;
> 
> @@ -38,44 +38,41 @@ vhost_user_iotlb_share_page(struct vhost_iotlb_entry
> *a, struct vhost_iotlb_entr
> 
>   /* Assumes entry a lower than entry b */
>   RTE_ASSERT(a_start < b_start);
> - a_end = RTE_ALIGN_CEIL(a_start + a->size, align);
> - b_start = RTE_ALIGN_FLOOR(b_start, align);
> + a_end = RTE_ALIGN_CEIL(a_start + a->size, RTE_BIT64(a->page_shift));
> + b_start = RTE_ALIGN_FLOOR(b_start, RTE_BIT64(b->page_shift));
> 
>   return a_end > b_start;
>  }
> 
>  static void
> -vhost_user_iotlb_set_dump(struct virtio_net *dev, struct
> vhost_iotlb_entry *node)
> +vhost_user_iotlb_set_dump(struct vhost_iotlb_entry *node)
>  {
> - uint64_t align, start;
> + uint64_t start;
> 
>   start = node->uaddr + node->uoffset;
> - align = hua_to_alignment(dev->mem, (void *)(uintptr_t)start);
> -
> - mem_set_dump((void *)(uintptr_t)start, node->size, false, align);
> + mem_set_dump((void *)(uintptr_t)start, node->size, false,
> RTE_BIT64(node->page_shift));
>  }
> 
>  static void
> -vhost_user_iotlb_clear_dump(struct virtio_net *dev, struct
> vhost_iotlb_entry *node,
> +vhost_user_iotlb_clear_dump(struct vhost_iotlb_entry *node,
>   struct vhost_iotlb_entry *prev, struct vhost_iotlb_entry *next)
>  {
> - uint64_t align, start, end;
> + uint64_t start, end;
> 
>   start = node->uaddr + node->uoffset;
>   end = start + node->size;
> 
> - align = hua_to_alignment(dev->mem, (void *)(uintptr_t)start);
> -
>   /* Skip first page if shared with previous entry. */
> - if (vhost_user_iotlb_share_page(prev, node, align))
> - start = RTE_ALIGN_CEIL(start, align);
> + if (vhost_user_iotlb_share_page(prev, node))
> + start = RTE_ALIGN_CEIL(start, RTE_BIT64(node->page_shift));
> 
>   /* Skip last page if shared with next entry. */
> - if (vhost_user_iotlb_share_page(node, next, align))
> - end = RTE_ALIGN_FLOOR(end, align);
> + if (vhost_user_iotlb_share_page(node, next))
> + end = RTE_ALIGN_FLOOR(end, RTE_BIT64(node->page_shift));
> 
>   if (end > start)
> - mem_set_dump((void *)(uintptr_t)start, end - start, false,
> align);
> + mem_set_dump((void *)(uintptr_t)start, end - start, false,
> + RTE_BIT64(node->page_shift));
>  }
> 
>  static struct vhost_iotlb_entry *
> @@ -198,7 +195,7 @@ vhost_user_iotlb_cache_remove_all(struct virtio_net
> *dev)
>   vhost_user_iotlb_wr_lock_all(dev);
> 
>   RTE_TAILQ_FOREACH_SAFE(node, &dev->iotlb_list, next, temp_node) {
> - vhost_user_iotlb_set_dump(dev, node);
> + vhost_user_iotlb_set_dump(node);
> 
>   TAILQ_REMOVE(&dev->iotlb_list, node, next);
>   vhost_user_iotlb_pool_put(dev, node);
> @@ -223,7 +220,7 @@ vhost_user_iotlb_cache_random_evict(struct virtio_net
> *dev)
>   if (!entry_idx) {
>   struct vhost_iotlb_entry *next_node =
> RTE_TAILQ_NEXT(node, next);
> 
> - vhost_user_iotlb_clear_dump(dev, node, prev_node,
> next_node);
> + vhost_user_iotlb_clear_dump(node, prev_node, next_node);
> 
>   TAILQ_REMOVE(&dev->iotlb_list, node, next);
>

Re: [PATCH v5] app/testpmd: txonly multiflow port change support

2023-04-24 Thread David Marchand

Hello Joshua,

On Mon, Apr 24, 2023 at 7:56 PM Joshua Washington  wrote:
>
> After updating the patch, it seems that the `lcores_autotest` unit test now 
> times out on Windows Server 2019. I looked at the test logs, but they were 
> identical as far as I could tell, with the timed out test even printing "Test 
> OK" to stdout. Is this a flake? Or is there any other way to get extra 
> information about why the test timed out or run the test with extra debugging 
> information?

In general, the UNH dashboard provides an archive with logs for each
report, like for example:
https://lab.dpdk.org/results/dashboard/patchsets/26090/
https://lab.dpdk.org/results/dashboard/results/results-uploads/test_runs/b5a6a2743665426b937603587850aa6d/log_upload_file/2023/4/dpdk_5f34cc454df4_26090_2023-04-22_02-36-52_NA.zip

This timeout is something I did not notice so far, Ccing UNH guys, for info.


Regarding your patch, the CI passes fine on the current main branch.
And there is no relation between testpmd and the EAL unit tests.
So this report is very likely a false positive.

I triggered a retest on your patch.


-- 
David Marchand

87 matches

Mail list logo