[dpdk-dev] getting errno 14 while creating mbuf pool

2021-09-26 Thread Mohsen Meamarian
Hi friends,

when I increase mbuf , creating mbuf pool in vpp starting failed :

vnet[18135]: dpdk_pool_create:504: ioctl(VFIO_IOMMU_MAP_DMA) pool
'dpdk_mbuf_pool_socket0': Bad address (errno 14)

how can I allocate more memory for mbuf creating pool? I can't decrease
mbuf number. I use VFIO driver and enable intel_iommu in grub. also I have
14000 free hugepage on each Numa node.

Best regards
Mohsen Memariyan


[dpdk-dev] [PATCH] net/i40e: fix Rx packet statistics

2021-09-26 Thread Alvin Zhang
Some packets are discarded by the NIC because they are larger than
the MTU, these packets should be counted as "RX error" instead of
"RX packet".

The register 'GL_RXERR1' can count above discarded packets.
This patch adds reading and calculation of the 'GL_RXERR1' counter
when reporting DPDK statistics.

Fixes: f4a91c38b4ad ("i40e: add extended stats")
Cc: sta...@dpdk.org

Signed-off-by: Alvin Zhang 
---
 drivers/net/i40e/i40e_ethdev.c | 16 +---
 drivers/net/i40e/i40e_ethdev.h | 10 ++
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 7a2a828..30a2cdf 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -532,7 +532,7 @@ static int i40e_sw_tunnel_filter_insert(struct i40e_pf *pf,
 /* store statistics names and its offset in stats structure */
 struct rte_i40e_xstats_name_off {
char name[RTE_ETH_XSTATS_NAME_SIZE];
-   unsigned offset;
+   int offset;
 };
 
 static const struct rte_i40e_xstats_name_off rte_i40e_stats_strings[] = {
@@ -542,6 +542,8 @@ struct rte_i40e_xstats_name_off {
{"rx_dropped_packets", offsetof(struct i40e_eth_stats, rx_discards)},
{"rx_unknown_protocol_packets", offsetof(struct i40e_eth_stats,
rx_unknown_protocol)},
+   {"rx_err1", offsetof(struct i40e_pf, rx_err1) -
+   offsetof(struct i40e_pf, stats)},
{"tx_unicast_packets", offsetof(struct i40e_eth_stats, tx_unicast)},
{"tx_multicast_packets", offsetof(struct i40e_eth_stats, tx_multicast)},
{"tx_broadcast_packets", offsetof(struct i40e_eth_stats, tx_broadcast)},
@@ -3238,6 +3240,10 @@ void i40e_flex_payload_reg_set_default(struct i40e_hw 
*hw)
pf->offset_loaded,
&os->eth.rx_unknown_protocol,
&ns->eth.rx_unknown_protocol);
+   i40e_stat_update_48(hw, I40E_GL_RXERR1_H(hw->pf_id + I40E_MAX_VF),
+   I40E_GL_RXERR1_L(hw->pf_id + I40E_MAX_VF),
+   pf->offset_loaded, &pf->rx_err1_offset,
+   &pf->rx_err1);
i40e_stat_update_48_in_64(hw, I40E_GLPRT_GOTCH(hw->port),
  I40E_GLPRT_GOTCL(hw->port),
  pf->offset_loaded, &os->eth.tx_bytes,
@@ -3437,7 +3443,8 @@ void i40e_flex_payload_reg_set_default(struct i40e_hw *hw)
stats->ipackets = pf->main_vsi->eth_stats.rx_unicast +
pf->main_vsi->eth_stats.rx_multicast +
pf->main_vsi->eth_stats.rx_broadcast -
-   pf->main_vsi->eth_stats.rx_discards;
+   pf->main_vsi->eth_stats.rx_discards -
+   pf->rx_err1;
stats->opackets = ns->eth.tx_unicast +
ns->eth.tx_multicast +
ns->eth.tx_broadcast;
@@ -3451,7 +3458,8 @@ void i40e_flex_payload_reg_set_default(struct i40e_hw *hw)
pf->main_vsi->eth_stats.rx_discards;
stats->ierrors  = ns->crc_errors +
ns->rx_length_errors + ns->rx_undersize +
-   ns->rx_oversize + ns->rx_fragments + ns->rx_jabber;
+   ns->rx_oversize + ns->rx_fragments + ns->rx_jabber +
+   pf->rx_err1;
 
if (pf->vfs) {
for (i = 0; i < pf->vf_num; i++) {
@@ -6232,6 +6240,8 @@ struct i40e_vsi *
memset(&pf->stats_offset, 0, sizeof(struct i40e_hw_port_stats));
memset(&pf->internal_stats, 0, sizeof(struct i40e_eth_stats));
memset(&pf->internal_stats_offset, 0, sizeof(struct i40e_eth_stats));
+   pf->rx_err1 = 0;
+   pf->rx_err1_offset = 0;
 
ret = i40e_pf_get_switch_config(pf);
if (ret != I40E_SUCCESS) {
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index cd6deab..846c8d4 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -19,6 +19,13 @@
 #include "base/i40e_type.h"
 #include "base/virtchnl.h"
 
+#define I40E_GL_RXERR1_H(_i)   (0x00318004 + ((_i) * 8))
+/**
+ * _i=0...143,
+ * counters 0-127 are for the 128 VFs,
+ * counters 128-143 are for the 16 PFs
+ */
+
 #define I40E_VLAN_TAG_SIZE4
 
 #define I40E_AQ_LEN   32
@@ -1134,6 +1141,9 @@ struct i40e_pf {
 
struct i40e_hw_port_stats stats_offset;
struct i40e_hw_port_stats stats;
+   u64 rx_err1;/* rxerr1 */
+   u64 rx_err1_offset;
+
/* internal packet statistics, it should be excluded from the total */
struct i40e_eth_stats internal_stats_offset;
struct i40e_eth_stats internal_stats;
-- 
1.8.3.1



[dpdk-dev] [PATCH v3] net/ice: enable Rx timestamp on Flex Descriptor

2021-09-26 Thread Simei Su
Use the dynamic mbuf to register timestamp field and flag.
The ice has the feature to dump Rx timestamp value into dynamic
mbuf field by flex descriptor. This feature is turned on by dev
config "enable-rx-timestamp". Currently, it's only supported
under scalar path.

Signed-off-by: Simei Su 
---
v3:
* Define ice_tstamp_convert_32b_64b() as a static inline function in ice_rxtx.h.

v2:
* Refine release notes.
* Merge two helper functions into one.
* Remove one field in ice_rx_queue structure.

 doc/guides/rel_notes/release_21_11.rst |  3 +-
 drivers/net/ice/ice_ethdev.c   |  6 +++-
 drivers/net/ice/ice_rxtx.c | 59 ++
 drivers/net/ice/ice_rxtx.h | 33 +++
 drivers/net/ice/ice_rxtx_vec_common.h  |  3 ++
 5 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/doc/guides/rel_notes/release_21_11.rst 
b/doc/guides/rel_notes/release_21_11.rst
index 19356ac..0bbe82c 100644
--- a/doc/guides/rel_notes/release_21_11.rst
+++ b/doc/guides/rel_notes/release_21_11.rst
@@ -75,7 +75,8 @@ New Features
 
 * **Updated Intel ice driver.**
 
-  Added 1PPS out support by a devargs.
+  * Added 1PPS out support by a devargs.
+  * Added DEV_RX_OFFLOAD_TIMESTAMP support.
 
 * **Updated Marvell cnxk ethdev driver.**
 
diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index e24a3b6..534af03 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -31,6 +31,9 @@
 #define ICE_HW_DEBUG_MASK_ARG "hw_debug_mask"
 #define ICE_ONE_PPS_OUT_ARG   "pps_out"
 
+uint64_t ice_timestamp_dynflag;
+int ice_timestamp_dynfield_offset = -1;
+
 static const char * const ice_valid_args[] = {
ICE_SAFE_MODE_SUPPORT_ARG,
ICE_PIPELINE_MODE_SUPPORT_ARG,
@@ -3652,7 +3655,8 @@ ice_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
DEV_RX_OFFLOAD_QINQ_STRIP |
DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
DEV_RX_OFFLOAD_VLAN_EXTEND |
-   DEV_RX_OFFLOAD_RSS_HASH;
+   DEV_RX_OFFLOAD_RSS_HASH |
+   DEV_RX_OFFLOAD_TIMESTAMP;
dev_info->tx_offload_capa |=
DEV_TX_OFFLOAD_QINQ_INSERT |
DEV_TX_OFFLOAD_IPV4_CKSUM |
diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index 5d7ab4f..a043df2 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -302,6 +302,18 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
}
}
 
+   if (rxq->offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+   /* Register mbuf field and flag for Rx timestamp */
+   err = rte_mbuf_dyn_rx_timestamp_register(
+   &ice_timestamp_dynfield_offset,
+   &ice_timestamp_dynflag);
+   if (err != 0) {
+   PMD_INIT_LOG(ERR,
+   "Cannot register mbuf field/flag for 
timestamp");
+   return -EINVAL;
+   }
+   }
+
memset(&rx_ctx, 0, sizeof(rx_ctx));
 
rx_ctx.base = rxq->rx_ring_dma / ICE_QUEUE_BASE_ADDR_UNIT;
@@ -354,6 +366,9 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
QRXFLXP_CNTXT_RXDID_PRIO_M;
 
+   if (rxq->offloads & DEV_RX_OFFLOAD_TIMESTAMP)
+   regval |= QRXFLXP_CNTXT_TS_M;
+
ICE_WRITE_REG(hw, QRXFLXP_CNTXT(rxq->reg_idx), regval);
 
err = ice_clear_rxq_ctx(hw, rxq->reg_idx);
@@ -1546,6 +1561,9 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
int32_t i, j, nb_rx = 0;
uint64_t pkt_flags = 0;
uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+   struct ice_vsi *vsi = rxq->vsi;
+   struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
+   uint64_t ts_ns;
 
rxdp = &rxq->rx_ring[rxq->rx_tail];
rxep = &rxq->sw_ring[rxq->rx_tail];
@@ -1589,6 +1607,17 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
ice_rxd_to_vlan_tci(mb, &rxdp[j]);
rxq->rxd_to_pkt_fields(rxq, mb, &rxdp[j]);
 
+   if (rxq->offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+   ts_ns = ice_tstamp_convert_32b_64b(hw,
+   
rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high));
+   if (ice_timestamp_dynflag > 0) {
+   *RTE_MBUF_DYNFIELD(mb,
+   ice_timestamp_dynfield_offset,
+   rte_mbuf_timestamp_t *) = ts_ns;
+   mb->ol_flags |= ice_timestamp_dynflag;
+   }
+   }
+
mb->ol_flags |= pkt_flags;
}
 
@@ -1772,6 +1801,9 @@ ice_rec

[dpdk-dev] [PATCH v9 0/3] testpmd shows incorrect rx_offload configuration

2021-09-26 Thread Jie Wang
Launch testpmd with multiple queues, and check rx_offload info.

When testpmd shows the port configuration, it doesn't show RSS_HASH.

---
v9:
 - add a release notes update for the new API.
 - update the description of the new API.
 - optimize the new API.
 - optimize the assignment of the offloads.
v8: delete "rte_exit" and just print error log.
v7:
 - delete struct "rte_eth_dev_conf_info", and reuse struct "rte_eth_conf".
 - add "__rte_experimental" to the new API "rte_eth_dev_conf_info_get" 
declaration.
v6: split this patch into two patches.
v5: add an API to get device configuration info.
v4: delete the whitespace at the end of the line.
v3:
 - check and update the "offloads" of "port->dev_conf.rx/txmode".
 - update the commit log.
v2: copy "rx/txmode.offloads", instead of copying the entire struct
"dev->data->dev_conf.rx/txmode".

Jie Wang (3):
  ethdev: add an API to get device configuration info
  doc: update release notes for new API
  app/testpmd: fix testpmd doesn't show RSS hash offload

 app/test-pmd/cmdline.c | 14 +--
 app/test-pmd/testpmd.c | 34 ++
 app/test-pmd/testpmd.h |  2 ++
 app/test-pmd/util.c| 15 
 doc/guides/rel_notes/release_21_11.rst |  4 +++
 lib/ethdev/rte_ethdev.c| 23 +
 lib/ethdev/rte_ethdev.h| 21 
 lib/ethdev/version.map |  3 +++
 8 files changed, 114 insertions(+), 2 deletions(-)

-- 
2.25.1



[dpdk-dev] [PATCH v9 1/3] ethdev: add an API to get device configuration info

2021-09-26 Thread Jie Wang
This patch adds a new API "rte_eth_dev_conf_info_get()" to help users get
device configuration info.

Cc: sta...@dpdk.org

Signed-off-by: Jie Wang 
---
 lib/ethdev/rte_ethdev.c | 23 +++
 lib/ethdev/rte_ethdev.h | 21 +
 lib/ethdev/version.map  |  3 +++
 3 files changed, 47 insertions(+)

diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index daf5ca9242..a0f521323a 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -3457,6 +3457,29 @@ rte_eth_dev_info_get(uint16_t port_id, struct 
rte_eth_dev_info *dev_info)
return 0;
 }
 
+int
+rte_eth_dev_conf_info_get(uint16_t port_id,
+   struct rte_eth_conf *dev_conf_info)
+{
+   struct rte_eth_dev *dev;
+
+   RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+   dev = &rte_eth_devices[port_id];
+
+   if (dev_conf_info == NULL) {
+   RTE_ETHDEV_LOG(ERR,
+   "Cannot get ethdev port %u configuration info to 
NULL\n",
+   port_id);
+   return -EINVAL;
+   }
+
+   /* copy dev->data->dev_conf to dev_conf_info */
+   memcpy(dev_conf_info, &dev->data->dev_conf,
+   sizeof(struct rte_eth_conf));
+
+   return 0;
+}
+
 int
 rte_eth_dev_get_supported_ptypes(uint16_t port_id, uint32_t ptype_mask,
 uint32_t *ptypes, int num)
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 1da37896d8..c21ee6a1fe 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -3068,6 +3068,27 @@ int rte_eth_macaddr_get(uint16_t port_id, struct 
rte_ether_addr *mac_addr);
  */
 int rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Retrieve the configuration of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param dev_conf_info
+ *   A pointer to a structure of type *rte_eth_conf* to be filled with
+ *   the configuration of the Ethernet device.
+ *   And the memory of the structure should be allocated by the caller.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if bad parameter.
+ */
+__rte_experimental
+int rte_eth_dev_conf_info_get(uint16_t port_id,
+   struct rte_eth_conf *dev_conf_info);
+
 /**
  * Retrieve the firmware version of a device.
  *
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 904bce6ea1..4b0a1f0fae 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -247,6 +247,9 @@ EXPERIMENTAL {
rte_mtr_meter_policy_delete;
rte_mtr_meter_policy_update;
rte_mtr_meter_policy_validate;
+
+   # added in 21.11
+   rte_eth_dev_conf_info_get;
 };
 
 INTERNAL {
-- 
2.25.1



[dpdk-dev] [PATCH v9 2/3] doc: update release notes for new API

2021-09-26 Thread Jie Wang
Add information about new ethdev API.

Cc: sta...@dpdk.org

Signed-off-by: Jie Wang 
---
 doc/guides/rel_notes/release_21_11.rst | 4 
 1 file changed, 4 insertions(+)

diff --git a/doc/guides/rel_notes/release_21_11.rst 
b/doc/guides/rel_notes/release_21_11.rst
index dcff939ae8..95e569f51c 100644
--- a/doc/guides/rel_notes/release_21_11.rst
+++ b/doc/guides/rel_notes/release_21_11.rst
@@ -111,6 +111,10 @@ New Features
   Added command-line options to specify total number of processes and
   current process ID. Each process owns subset of Rx and Tx queues.
 
+* **Added support for users get device configuration.**
+  Added an API which can help users get device configuration.
+  The declarations for the API's can be found in ``rte_ethdev.h``.
+
 
 Removed Items
 -
-- 
2.25.1



[dpdk-dev] [PATCH v9 3/3] app/testpmd: fix testpmd doesn't show RSS hash offload

2021-09-26 Thread Jie Wang
The driver may change offloads info into dev->data->dev_conf
in dev_configure which may cause port->dev_conf and port->rx_conf
contain outdated values.

This patch updates the offloads info if it changes to fix this issue.

Fixes: ce8d561418d4 ("app/testpmd: add port configuration settings")
Cc: sta...@dpdk.org

Signed-off-by: Jie Wang 
---
 app/test-pmd/cmdline.c | 14 --
 app/test-pmd/testpmd.c | 34 ++
 app/test-pmd/testpmd.h |  2 ++
 app/test-pmd/util.c| 15 +++
 4 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index a9efd027c3..3ef8f99358 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -15997,6 +15997,7 @@ cmd_rx_offload_get_configuration_parsed(
struct rte_eth_dev_info dev_info;
portid_t port_id = res->port_id;
struct rte_port *port = &ports[port_id];
+   struct rte_eth_conf dev_conf_info;
uint64_t port_offloads;
uint64_t queue_offloads;
uint16_t nb_rx_queues;
@@ -16005,7 +16006,11 @@ cmd_rx_offload_get_configuration_parsed(
 
printf("Rx Offloading Configuration of port %d :\n", port_id);
 
-   port_offloads = port->dev_conf.rxmode.offloads;
+   ret = eth_dev_conf_info_get_print_err(port_id, &dev_conf_info);
+   if (ret != 0)
+   return;
+
+   port_offloads = dev_conf_info.rxmode.offloads;
printf("  Port :");
print_rx_offloads(port_offloads);
printf("\n");
@@ -16411,6 +16416,7 @@ cmd_tx_offload_get_configuration_parsed(
struct rte_eth_dev_info dev_info;
portid_t port_id = res->port_id;
struct rte_port *port = &ports[port_id];
+   struct rte_eth_conf dev_conf_info;
uint64_t port_offloads;
uint64_t queue_offloads;
uint16_t nb_tx_queues;
@@ -16419,7 +16425,11 @@ cmd_tx_offload_get_configuration_parsed(
 
printf("Tx Offloading Configuration of port %d :\n", port_id);
 
-   port_offloads = port->dev_conf.txmode.offloads;
+   ret = eth_dev_conf_info_get_print_err(port_id, &dev_conf_info);
+   if (ret != 0)
+   return;
+
+   port_offloads = dev_conf_info.txmode.offloads;
printf("  Port :");
print_tx_offloads(port_offloads);
printf("\n");
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 97ae52e17e..56e6e80739 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -2531,6 +2531,9 @@ start_port(portid_t pid)
}
 
if (port->need_reconfig > 0) {
+   struct rte_eth_conf dev_conf_info;
+   int k;
+
port->need_reconfig = 0;
 
if (flow_isolate_all) {
@@ -2568,6 +2571,37 @@ start_port(portid_t pid)
port->need_reconfig = 1;
return -1;
}
+   /* get rte_eth_conf info */
+   if (0 !=
+   eth_dev_conf_info_get_print_err(pi,
+   &dev_conf_info)) {
+   fprintf(stderr,
+   "port %d can not get device 
configuration info\n",
+   pi);
+   return -1;
+   }
+   /* Apply Rx offloads configuration */
+   if (dev_conf_info.rxmode.offloads !=
+   port->dev_conf.rxmode.offloads) {
+   port->dev_conf.rxmode.offloads |=
+   dev_conf_info.rxmode.offloads;
+   for (k = 0;
+k < port->dev_info.max_rx_queues;
+k++)
+   port->rx_conf[k].offloads |=
+   dev_conf_info.rxmode.offloads;
+   }
+   /* Apply Tx offloads configuration */
+   if (dev_conf_info.txmode.offloads !=
+   port->dev_conf.txmode.offloads) {
+   port->dev_conf.txmode.offloads |=
+   dev_conf_info.txmode.offloads;
+   for (k = 0;
+k < port->dev_info.max_tx_queues;
+k++)
+   port->tx_conf[k].offloads |=
+   dev_conf_info.txmode.offloads;
+   }
}
if (port->need_reconfig_queues > 0 && is_proc_primary()) {
port->need_reconfig_queues = 0;
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 5863b2f43f..48dad40986 100

Re: [dpdk-dev] [PATCH v4] net/ice: support IEEE 1588 PTP for E810

2021-09-26 Thread Zhang, Qi Z



> -Original Message-
> From: Su, Simei 
> Sent: Wednesday, September 22, 2021 4:47 PM
> To: Zhang, Qi Z 
> Cc: dev@dpdk.org; Wang, Haiyue ; Su, Simei
> 
> Subject: [PATCH v4] net/ice: support IEEE 1588 PTP for E810

No need to mention E810, net/ice already imply its E810.

> 
> Add ice support for new ethdev APIs to enable/disable and read/write/adjust
> IEEE1588 PTP timstamps. Currently, only scalar path supports 1588 PTP, vector
> path doesn't.

timestamps

> 
> The example command for running ptpclient is as below:
> ./build/examples/dpdk-ptpclient -c 1 -n 3 -- -T 0 -p 0x1
> 
> Signed-off-by: Simei Su 
> ---
> v4:
> * Rework code to consider ice_dev_start and ice_timesync_enable order.
> 
> v3:
> * Rework code to support scalar path only.
> * Update the doc/guides/nics/features/ice.ini to add "Timesync" feature.
> * Add release notes.
> 
> v2:
> * Change patchset to one patch based on share code update.
> * Change per device offload to per queue offload.
> 
>  doc/guides/nics/features/ice.ini   |   1 +
>  doc/guides/rel_notes/release_21_11.rst |   2 +-
>  drivers/net/ice/ice_ethdev.c   | 193
> +
>  drivers/net/ice/ice_ethdev.h   |   6 +
>  drivers/net/ice/ice_rxtx.c |  46 +++-
>  5 files changed, 245 insertions(+), 3 deletions(-)
> 
> diff --git a/doc/guides/nics/features/ice.ini 
> b/doc/guides/nics/features/ice.ini
> index e066787..a7978d2 100644
> --- a/doc/guides/nics/features/ice.ini
> +++ b/doc/guides/nics/features/ice.ini
> @@ -43,6 +43,7 @@ Linux= Y
>  Windows  = Y
>  x86-32   = Y
>  x86-64   = Y
> +Timesync = Y
> 
>  [rte_flow items]
>  ah   = Y
> diff --git a/doc/guides/rel_notes/release_21_11.rst
> b/doc/guides/rel_notes/release_21_11.rst
> index 1b9dac6..2005262 100644
> --- a/doc/guides/rel_notes/release_21_11.rst
> +++ b/doc/guides/rel_notes/release_21_11.rst
> @@ -71,7 +71,7 @@ New Features
> 
>Added 1PPS out support by a devargs.
>* Added Rx timstamp support by dynamic mbuf on Flex Descriptor.
> -
> +  * Added timesync API support under scalar path for E810.
> 
>  Removed Items
>  -
> diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c index
> 06adf43..26491c3 100644
> --- a/drivers/net/ice/ice_ethdev.c
> +++ b/drivers/net/ice/ice_ethdev.c
> @@ -18,6 +18,7 @@
>  #include "base/ice_flow.h"
>  #include "base/ice_dcb.h"
>  #include "base/ice_common.h"
> +#include "base/ice_ptp_hw.h"
> 
>  #include "rte_pmd_ice.h"
>  #include "ice_ethdev.h"
> @@ -31,6 +32,8 @@
>  #define ICE_HW_DEBUG_MASK_ARG "hw_debug_mask"
>  #define ICE_ONE_PPS_OUT_ARG   "pps_out"
> 
> +#define ICE_CYCLECOUNTER_MASK  0xULL
> +
>  uint64_t ice_timestamp_dynflag;
>  int ice_timestamp_dynfield_offset = -1;
> 
> @@ -149,6 +152,18 @@ static int ice_dev_udp_tunnel_port_add(struct
> rte_eth_dev *dev,
>   struct rte_eth_udp_tunnel *udp_tunnel);  static int
> ice_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
>   struct rte_eth_udp_tunnel *udp_tunnel);
> +static int ice_timesync_enable(struct rte_eth_dev *dev); static int
> +ice_timesync_read_rx_timestamp(struct rte_eth_dev *dev,
> +   struct timespec *timestamp,
> +   uint32_t flags);
> +static int ice_timesync_read_tx_timestamp(struct rte_eth_dev *dev,
> +   struct timespec *timestamp);
> +static int ice_timesync_adjust_time(struct rte_eth_dev *dev, int64_t
> +delta); static int ice_timesync_read_time(struct rte_eth_dev *dev,
> +   struct timespec *timestamp);
> +static int ice_timesync_write_time(struct rte_eth_dev *dev,
> +const struct timespec *timestamp); static int
> +ice_timesync_disable(struct rte_eth_dev *dev);
> 
>  static const struct rte_pci_id pci_id_ice_map[] = {
>   { RTE_PCI_DEVICE(ICE_INTEL_VENDOR_ID,
> ICE_DEV_ID_E823L_BACKPLANE) }, @@ -232,6 +247,13 @@ static const struct
> eth_dev_ops ice_eth_dev_ops = {
>   .udp_tunnel_port_del  = ice_dev_udp_tunnel_port_del,
>   .tx_done_cleanup  = ice_tx_done_cleanup,
>   .get_monitor_addr = ice_get_monitor_addr,
> + .timesync_enable  = ice_timesync_enable,
> + .timesync_read_rx_timestamp   = ice_timesync_read_rx_timestamp,
> + .timesync_read_tx_timestamp   = ice_timesync_read_tx_timestamp,
> + .timesync_adjust_time = ice_timesync_adjust_time,
> + .timesync_read_time   = ice_timesync_read_time,
> + .timesync_write_time  = ice_timesync_write_time,
> + .timesync_disable = ice_timesync_disable,
>  };
> 
>  /* store statistics names and its offset in stats structure */ @@ -5488,6
> +5510,177 @@ ice_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,  }
> 
>  static int
> +ice_timesyn

Re: [dpdk-dev] [RFC] ethdev: change queue release callback

2021-09-26 Thread Xueming(Steven) Li
On Wed, 2021-08-11 at 12:57 +0100, Ferruh Yigit wrote:
> On 8/10/2021 10:07 AM, Xueming(Steven) Li wrote:
> > 
> > 
> > > -Original Message-
> > > From: Ferruh Yigit 
> > > Sent: Tuesday, August 10, 2021 4:54 PM
> > > To: Xueming(Steven) Li ; Singh, Aman Deep 
> > > ; Andrew Rybchenko
> > > 
> > > Cc: dev@dpdk.org; Slava Ovsiienko ; 
> > > NBU-Contact-Thomas Monjalon 
> > > Subject: Re: [dpdk-dev] [RFC] ethdev: change queue release callback
> > > 
> > > On 8/10/2021 9:03 AM, Xueming(Steven) Li wrote:
> > > > Hi Singh and Ferruh,
> > > > 
> > > > > -Original Message-
> > > > > From: Ferruh Yigit 
> > > > > Sent: Monday, August 9, 2021 11:31 PM
> > > > > To: Singh, Aman Deep ; Andrew Rybchenko
> > > > > ; Xueming(Steven) Li
> > > > > 
> > > > > Cc: dev@dpdk.org; Slava Ovsiienko ;
> > > > > NBU-Contact-Thomas Monjalon 
> > > > > Subject: Re: [dpdk-dev] [RFC] ethdev: change queue release callback
> > > > > 
> > > > > On 8/9/2021 3:39 PM, Singh, Aman Deep wrote:
> > > > > > Hi Xueming,
> > > > > > 
> > > > > > On 7/28/2021 1:10 PM, Andrew Rybchenko wrote:
> > > > > > > On 7/27/21 6:41 AM, Xueming Li wrote:
> > > > > > > > To align with other eth device queue configuration callbacks,
> > > > > > > > change RX and TX queue release callback API parameter from queue
> > > > > > > > object to device and queue index.
> > > > > > > > 
> > > > > > > > Signed-off-by: Xueming Li 
> > > > > > > 
> > > > > > > In fact, there is no strong reasons to do it, but I think it is a
> > > > > > > nice cleanup to use (dev + queue index) on control path.
> > > > > > > 
> > > > > > > Hopefully it will not result in any regressions.
> > > > > > 
> > > > > > Combined there are 100+ API's for Rx/Tx queue_release that need to
> > > > > > be modified for it.
> > > > > > 
> > > > > > I believe all regression possibilities here will be caught, in
> > > > > > compilation phase itself.
> > > > > > 
> > > > > 
> > > > > Same here, it is a good cleanup but there is no strong reason for it.
> > > > > 
> > > > > Since it is all internal, there is no ABI restriction on the patch,
> > > > > and v21.11 will be full ABI break patches, to not cause conflicts 
> > > > > with this change, what would you think to have it on v22.02?
> > > > 
> > > > This patch is required by shared-rxq feature which ABI broken, target 
> > > > to 21.11.
> > > 
> > > Why it is required?
> > 
> > In rx burst function, rxq object is used in data path. For best data 
> > performance, it's shared-rxq object in case of shared rxq enabled.
> > I think eth api defined rxq object for performance as well, specific on 
> > data plane. 
> > Hardware saves port info received packet descriptor for my case.
> > Can't tell which device's queue with this shared rxq object, control path 
> > can't use this shared rxq anymore, have to be specific on dev and queue id.
> > 
> 
> I have seen shared Rx queue patch, but that just introduces the offload and
> doesn't have the PMD implementation, so hard to see the dependency, can you
> please put the pseudocode for PMDs for shared-rxq?
> How a queue will know if it is shared or not, during release?
> 
> Btw, shared Rx doesn't mention from this dependency in the patch.

Hi Ferruh, finally get PMD code ported:
http://mails.dpdk.org/archives/dev/2021-September/221326.html
 
> 
> > > 
> > > > I'll do it carefully, fortunately, the change is straightforward.
> > > > 
> > 
> 



Re: [dpdk-dev] [RFC V1] examples/l3fwd-power: fix memory leak for rte_pci_device

2021-09-26 Thread Huisong Li



在 2021/9/18 16:46, Thomas Monjalon 写道:

18/09/2021 05:24, Huisong Li:

在 2021/9/17 20:50, Thomas Monjalon 写道:

17/09/2021 04:13, Huisong Li:

在 2021/9/16 18:36, Thomas Monjalon 写道:

16/09/2021 10:01, Huisong Li:

在 2021/9/8 15:20, Thomas Monjalon 写道:

08/09/2021 04:01, Huisong Li:

在 2021/9/7 16:53, Thomas Monjalon 写道:

07/09/2021 05:41, Huisong Li:

Calling rte_eth_dev_close() will release resources of eth device and close
it. But rte_pci_device struct isn't released when app exit, which will lead
to memory leak.

That's a PMD issue.
When the last port of a PCI device is closed, the device should be freed.

Why is this a PMD problem? I don't understand.

In the PMD close function, freeing of PCI device must be managed,
so the app doesn't have to bother.

I know what you mean. Currently, there are two ways to close PMD device
(rte_eth_dev_close() and rte_dev_remove()).

For rte_dev_remove(), eth device can be closed and rte_pci_device also
can be freed, so it can make app not care about that.

But dev_close() is only used to close eth device, and nothing about
rte_pci_device is involved in the framework layer

call stack of dev_close(). The rte_pci_device is allocated and
initialized when the rte_pci_bus scans "/sys/bus/pci/devices" directory.

Generally, the PMD of eth devices operates on the basis of eth devices,
and rarely on rte_pci_device.

No. The PMD is doing the relation between the PCI device and the ethdev port.

It seems that the ethdev layer can create eth devices based on
rte_pci_device, but does not release rte_pci_device.

No, the ethdev layer does not manage any bus.
Only the PMD does that.

I don't mean that the ethdev layer manages the bus.

I mean, it neither allocate rte_pci_device nor free it.


And the rte_pci_device corresponding to the eth devices managed and
processed by rte_pci_bus.

So, PMD is closed only based on the port ID of the eth device, whilch
only shuts down eth devices, not frees rte_pci_device
and remove it from rte_pci_bus.

Not really.

I do not see any PMD driver releasing rte_pci_device in dev_close().

Maybe not but we should.

I'm sure.

As far as I know, the PMD does not free rte_pci_device for devices under
the PCI bus, whether ethdev or dmadev.


If there is no port using the PCI device, it should be released.

Yes.

As far as I know, most apps or examples in the DPDK project have only
one port for a pci device.

The number of ports per PCI device is driver-specific.


When the port is closed, the rte_pci_device should be freed. But none of
the apps seem to do this.

That's because from the app point of view, only ports should be managed.
The hardware device is managed by the PMD.
Only drivers (PMDs) have to do the relation between class ports
and hardware devices.

Yes. But the current app only closes the port to disable the PMD, and
the rte_pci_device cannot be freed.

Why not?

Because most apps in DPDK call dev_close() to close the eth device
corresponding to a port.

You don't say why the underlying PCI device could not be freed.

  From the current implementation, rte_eth_dev_close() in ethdev layer
and dev_close() in PMD both do not free it.

Because rte_pci_device cannot be released in dev_close() of PMD, and is
managed by framework layer.

No


Btw. Excluding rte_dev_probe() and rte_dev_remove(),  it seems that the
DPDK framework only automatically
scans PCI devices, but does not automatically release PCI devices when
the process exits.

Indeed, because such freeing is the responsibility of the PMD.

Do you mean to free rte_pci_device in the dev_close() API?

I mean free the PCI device in the PMD implementation of dev_close.

I don't think it's reasonable.

What is not reasonable, is to not free a device which is closed.


In the normal process, the rte_pci_device is allocated rte_eal_init()
when pci bus scan "/sys/bus/pci/devices"

by calling rte_bus_scan() and insert to rte_pci_bus.device_list.

Then, calling rte_bus_probe() in rte_eal_init to match rte_pci_device
and rte_pci_driver registered under rte_pci_bus

to generate an eth device.

  From this point of view, the rte_pci_device should be managed and
released by the rte_pci_bus.

Generally, the uninstallation operation should be reversed. Release the
eth device first and then release the rte_pci_device.

Same for mbuf in mempool: allocation is done by the app,
free is done by the PMD.


That doesn't seem to be the case. In the rx direction, the mbuf is 
allocated by PMD.


So it should be freed by the PMD.


Not everything is symmetrical.


Therefore the rte_pci_device  does not be freed in the PMD
implementation of dev_close.


How should PMD free it? What should we do? Any good suggestions?

Check that there is no other port sharing the same PCI device,
then call the PMD callback for rte_pci_remove_t.

For primary and secondary processes, their rte_pci_device is independent.

Yes it requires to free on both primary and secondary.


Is this for a scenario where there are multiple represent

[dpdk-dev] [PATCH] net/vhost: merge vhost stats loop in vhost Tx/Rx

2021-09-26 Thread Gaoxiang Liu
To improve performance in vhost Tx/Rx, merge vhost stats loop.
eth_vhost_tx has 2 loop of send num iteraion.
It can be merge into one.
eth_vhost_rx has the same issue as Tx.

Fixes: 4d6cf2ac93dc ("net/vhost: add extended statistics")

Signed-off-by: Gaoxiang Liu 
---
 drivers/net/vhost/rte_eth_vhost.c | 62 ++-
 1 file changed, 28 insertions(+), 34 deletions(-)

diff --git a/drivers/net/vhost/rte_eth_vhost.c 
b/drivers/net/vhost/rte_eth_vhost.c
index a202931e9a..e451ee2f55 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -336,38 +336,29 @@ vhost_count_xcast_packets(struct vhost_queue *vq,
 }
 
 static void
-vhost_update_packet_xstats(struct vhost_queue *vq, struct rte_mbuf **bufs,
-  uint16_t count, uint64_t nb_bytes,
-  uint64_t nb_missed)
+vhost_update_single_packet_xstats(struct vhost_queue *vq, struct rte_mbuf *buf
 {
uint32_t pkt_len = 0;
-   uint64_t i = 0;
uint64_t index;
struct vhost_stats *pstats = &vq->stats;
 
-   pstats->xstats[VHOST_BYTE] += nb_bytes;
-   pstats->xstats[VHOST_MISSED_PKT] += nb_missed;
-   pstats->xstats[VHOST_UNICAST_PKT] += nb_missed;
-
-   for (i = 0; i < count ; i++) {
-   pstats->xstats[VHOST_PKT]++;
-   pkt_len = bufs[i]->pkt_len;
-   if (pkt_len == 64) {
-   pstats->xstats[VHOST_64_PKT]++;
-   } else if (pkt_len > 64 && pkt_len < 1024) {
-   index = (sizeof(pkt_len) * 8)
-   - __builtin_clz(pkt_len) - 5;
-   pstats->xstats[index]++;
-   } else {
-   if (pkt_len < 64)
-   pstats->xstats[VHOST_UNDERSIZE_PKT]++;
-   else if (pkt_len <= 1522)
-   pstats->xstats[VHOST_1024_TO_1522_PKT]++;
-   else if (pkt_len > 1522)
-   pstats->xstats[VHOST_1523_TO_MAX_PKT]++;
-   }
-   vhost_count_xcast_packets(vq, bufs[i]);
+   pstats->xstats[VHOST_PKT]++;
+   pkt_len = buf->pkt_len;
+   if (pkt_len == 64) {
+   pstats->xstats[VHOST_64_PKT]++;
+   } else if (pkt_len > 64 && pkt_len < 1024) {
+   index = (sizeof(pkt_len) * 8)
+   - __builtin_clz(pkt_len) - 5;
+   pstats->xstats[index]++;
+   } else {
+   if (pkt_len < 64)
+   pstats->xstats[VHOST_UNDERSIZE_PKT]++;
+   else if (pkt_len <= 1522)
+   pstats->xstats[VHOST_1024_TO_1522_PKT]++;
+   else if (pkt_len > 1522)
+   pstats->xstats[VHOST_1523_TO_MAX_PKT]++;
}
+   vhost_count_xcast_packets(vq, buf);
 }
 
 static uint16_t
@@ -376,7 +367,6 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t 
nb_bufs)
struct vhost_queue *r = q;
uint16_t i, nb_rx = 0;
uint16_t nb_receive = nb_bufs;
-   uint64_t nb_bytes = 0;
 
if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
return 0;
@@ -411,11 +401,11 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t 
nb_bufs)
if (r->internal->vlan_strip)
rte_vlan_strip(bufs[i]);
 
-   nb_bytes += bufs[i]->pkt_len;
-   }
+   r->stats.bytes += bufs[i]->pkt_len;
+   r->stats->xstats[VHOST_BYTE] += bufs[i]->pkt_len;
 
-   r->stats.bytes += nb_bytes;
-   vhost_update_packet_xstats(r, bufs, nb_rx, nb_bytes, 0);
+   vhost_update_single_packet_xstats(r, bufs);
+   }
 
 out:
rte_atomic32_set(&r->while_queuing, 0);
@@ -471,16 +461,20 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t 
nb_bufs)
break;
}
 
-   for (i = 0; likely(i < nb_tx); i++)
+   for (i = 0; likely(i < nb_tx); i++) {
nb_bytes += bufs[i]->pkt_len;
+   vhost_update_single_packet_xstats(r, bufs);
+   }
 
nb_missed = nb_bufs - nb_tx;
 
r->stats.pkts += nb_tx;
r->stats.bytes += nb_bytes;
-   r->stats.missed_pkts += nb_bufs - nb_tx;
+   r->stats.missed_pkts += nb_missed;
 
-   vhost_update_packet_xstats(r, bufs, nb_tx, nb_bytes, nb_missed);
+   r->stats->xstats[VHOST_BYTE] += nb_bytes;
+   r->xstats->xstats[VHOST_MISSED_PKT] += nb_missed;
+   r->xstats->xstats[VHOST_UNICAST_PKT] += nb_missed;
 
/* According to RFC2863, ifHCOutUcastPkts, ifHCOutMulticastPkts and
 * ifHCOutBroadcastPkts counters are increased when packets are not
-- 
2.32.0



Re: [dpdk-dev] [PATCH v2 1/3] app/flow-perf: support meter policy API

2021-09-26 Thread Wisam Monther
Hi,

> -Original Message-
> From: Rongwei Liu 
> Sent: Wednesday, July 21, 2021 10:06 AM
> To: Matan Azrad ; Slava Ovsiienko
> ; Ori Kam ; NBU-Contact-
> Thomas Monjalon ; Wisam Monther
> 
> Cc: dev@dpdk.org; Raslan Darawsheh ; Haifei Luo
> ; Jiawei(Jonny) Wang 
> Subject: [PATCH v2 1/3] app/flow-perf: support meter policy API
> 
> Add option "policy-mtr" to indicate if meter creation will include policy or 
> not.
> Meter creation will keep same without it.
> 
> With "policy-mtr", policy is introduced. API create_meter_policy is to create 
> a
> policy. API create_meter_rule will use it to create meter.
> 
> Add option "policy-g_actions" to specify meter policy green color actions.
> W/o this, policy creation will fail since there is no default one.
> 
> Signed-off-by: Haifei Luo 
> Signed-off-by: Jiawei Wang 
> Signed-off-by: Rongwei Liu 
> ---
>  app/test-flow-perf/main.c  | 121 ++--
> -
>  doc/guides/tools/flow-perf.rst |   6 ++
>  2 files changed, 119 insertions(+), 8 deletions(-)
> 
> diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c index
> 9be8edc31d..e0d94f943a 100644
> --- a/app/test-flow-perf/main.c
> +++ b/app/test-flow-perf/main.c
> @@ -37,6 +37,7 @@
>  #include 
> 
>  #include "config.h"
> +#include "actions_gen.h"
>  #include "flow_gen.h"
> 
>  #define MAX_BATCHES_COUNT  100
> @@ -49,10 +50,12 @@ static uint8_t flow_group;
> 
>  static uint64_t encap_data;
>  static uint64_t decap_data;
> +static uint64_t g_actions;
> 
>  static uint64_t flow_items[MAX_ITEMS_NUM];  static uint64_t
> flow_actions[MAX_ACTIONS_NUM];  static uint64_t
> flow_attrs[MAX_ATTRS_NUM];
> +static uint32_t g_policy_id[MAX_PORTS];
>  static uint8_t items_idx, actions_idx, attrs_idx;
> 
>  static uint64_t ports_mask;
> @@ -62,6 +65,7 @@ static bool delete_flag;  static bool
> dump_socket_mem_flag;  static bool enable_fwd;  static bool unique_data;
> +static bool policy_mtr;
> 
>  static struct rte_mempool *mbuf_mp;
>  static uint32_t nb_lcores;
> @@ -69,6 +73,7 @@ static uint32_t rules_count;  static uint32_t rules_batch;
> static uint32_t hairpin_queues_num; /* total hairpin q number - default: 0 */
> static uint32_t nb_lcores;
> +static uint64_t meter_cir;

i see it's defined but not really used, i think all the meter_cir should be 
squashed w/ next commit

> 
>  #define MAX_PKT_BURST32
>  #define LCORE_MODE_PKT1
> @@ -134,6 +139,8 @@ usage(char *progname)
>   printf("  --portmask=N: hexadecimal bitmask of ports used\n");
>   printf("  --unique-data: flag to set using unique data for all"
>   " actions that support data, such as header modify and encap
> actions\n");
> + printf("  --policy-mtr: To create meter with policy\n");
> + printf("  --policy-g_actions: To set meter policy green color
> +actions\n");
> 
>   printf("To set flow attributes:\n");
>   printf("  --ingress: set ingress attribute in flows\n"); @@ -573,6
> +580,9 @@ args_parse(int argc, char **argv)
>   { "unique-data",0, 0, 0 },
>   { "portmask",   1, 0, 0 },
>   { "cores",  1, 0, 0 },
> + { "policy-mtr", 0, 0, 0 },
> + { "policy-g_actions",   1, 0, 0 },

i prefer to have those in the end with the actions


> + { "meter-profile-alg",  1, 0, 0 },

i don't see any use or parse for meter-profile-alg

>   /* Attributes */
>   { "ingress",0, 0, 0 },
>   { "egress", 0, 0, 0 },
> @@ -802,6 +812,32 @@ args_parse(int argc, char **argv)
>   RTE_MAX_LCORE);
>   }
>   }
> + if (strcmp(lgopts[opt_idx].name, "policy-mtr") == 0)

do we really need this parameter,
for std meter user can use --meter
for the new one, something like: --policy-meter=[G_ACTIONS]

I'm trying to reduce the dependencies for any new parameters

> + policy_mtr = true;
> + if (strcmp(lgopts[opt_idx].name,
> + "policy-g_actions") == 0) {
> + token = strtok(optarg, ",");
> + while (token != NULL) {
> + for (i = 0;
> +  i < RTE_DIM(flow_options); i++) {
> + if (strcmp(optarg,
> + flow_options[i].str) == 0) {
> + g_actions |=
> + flow_options[i].mask;
> + break;
> + }
> + }
> + /* Reached

Re: [dpdk-dev] [PATCH v2 2/3] app/flow-perf: add new meter CIR Configuration

2021-09-26 Thread Wisam Monther



> -Original Message-
> From: Rongwei Liu 
> Sent: Wednesday, July 21, 2021 10:06 AM
> To: Matan Azrad ; Slava Ovsiienko
> ; Ori Kam ; NBU-Contact-
> Thomas Monjalon ; Wisam Monther
> 
> Cc: dev@dpdk.org; Raslan Darawsheh ; Jiawei(Jonny)
> Wang 
> Subject: [PATCH v2 2/3] app/flow-perf: add new meter CIR Configuration
> 
> Add the new meter CIR configuration parameter, user can set the different
> value for committed information rate(CIR) parameter.
> 
> The usage as below:
> --meter-cir=N, default count is 125.
> 
> Signed-off-by: Jiawei Wang 
> Signed-off-by: Rongwei Liu 
> ---
>  app/test-flow-perf/main.c  | 8 +++-
>  doc/guides/tools/flow-perf.rst | 3 +++
>  2 files changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c index
> e0d94f943a..dd0aac8b06 100644
> --- a/app/test-flow-perf/main.c
> +++ b/app/test-flow-perf/main.c
> @@ -141,6 +141,8 @@ usage(char *progname)
>   " actions that support data, such as header modify and encap
> actions\n");
>   printf("  --policy-mtr: To create meter with policy\n");
>   printf("  --policy-g_actions: To set meter policy green color
> actions\n");
> + printf("  --meter-cir=N: to set committed information rate(CIR)"
> + " parameter in meter profile, default is %d\n", METER_CIR);
> 
>   printf("To set flow attributes:\n");
>   printf("  --ingress: set ingress attribute in flows\n"); @@ -582,7
> +584,7 @@ args_parse(int argc, char **argv)
>   { "cores",  1, 0, 0 },
>   { "policy-mtr", 0, 0, 0 },
>   { "policy-g_actions",   1, 0, 0 },
> - { "meter-profile-alg",  1, 0, 0 },
> + { "meter-cir",  1, 0, 0 },

The old one is irrelevant meter-profile-alg.
Moreover, i agree that meter-cir is control

>   /* Attributes */
>   { "ingress",0, 0, 0 },
>   { "egress", 0, 0, 0 },
> @@ -814,6 +816,10 @@ args_parse(int argc, char **argv)
>   }
>   if (strcmp(lgopts[opt_idx].name, "policy-mtr") == 0)
>   policy_mtr = true;
> + if (strcmp(lgopts[opt_idx].name, "meter-cir") == 0) {
> + n = atoi(optarg);
> + meter_cir = (uint64_t) n;
> + }
>   if (strcmp(lgopts[opt_idx].name,
>   "policy-g_actions") == 0) {
>   token = strtok(optarg, ",");
> diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
> index 90b6934537..113e078eb5 100644
> --- a/doc/guides/tools/flow-perf.rst
> +++ b/doc/guides/tools/flow-perf.rst
> @@ -105,6 +105,9 @@ The command line options are:
>  Such as header modify and encap actions. Default is using fixed
>  data for any action that support data for all flows.
> 
> +*``--meter-cir=N``
> + Set the committed information rate(CIR) parameter, default count is
> 125.

count -> value?

> +
>  Attributes:
> 
>  *``--ingress``
> --
> 2.27.0



Re: [dpdk-dev] [PATCH v2 3/3] app/flow-perf: add the supports for meter PPS

2021-09-26 Thread Wisam Monther
Hi,

> -Original Message-
> From: Rongwei Liu 
> Sent: Wednesday, July 21, 2021 10:06 AM
> To: Matan Azrad ; Slava Ovsiienko
> ; Ori Kam ; NBU-Contact-
> Thomas Monjalon ; Wisam Monther
> 
> Cc: dev@dpdk.org; Raslan Darawsheh ; Jiawei(Jonny)
> Wang 
> Subject: [PATCH v2 3/3] app/flow-perf: add the supports for meter PPS
> 
> The flow perf application used the srtcm_rfc2697 as meter profile while do
> the meter testing.
> 
> This patch adds the support new configuration parameter '--packet-mode' to
> generate the meter flows with the packet mode.
> 
> Signed-off-by: Jiawei Wang 
> Signed-off-by: Rongwei Liu 

Reviewed-by: Wisam Jaddo 

Please send new versions to fix the first two patches, and you can add my ack 
to the third patch for this version.

BRs,
Wisam Jaddo


[dpdk-dev] [PATCH v4] net/ice: enable Rx timestamp on Flex Descriptor

2021-09-26 Thread Simei Su
Use the dynamic mbuf to register timestamp field and flag.
The ice has the feature to dump Rx timestamp value into dynamic
mbuf field by flex descriptor. This feature is turned on by dev
config "enable-rx-timestamp". Currently, it's only supported
under scalar path.

Signed-off-by: Simei Su 
---
v4:
* Rebase code.

v3:
* Define ice_tstamp_convert_32b_64b() as a static inline function in ice_rxtx.h.

v2:
* Refine release notes.
* Merge two helper functions into one.
* Remove one field in ice_rx_queue structure.

 doc/guides/rel_notes/release_21_11.rst |  1 +
 drivers/net/ice/ice_ethdev.c   |  6 +++-
 drivers/net/ice/ice_rxtx.c | 59 ++
 drivers/net/ice/ice_rxtx.h | 33 +++
 drivers/net/ice/ice_rxtx_vec_common.h  |  3 ++
 5 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/doc/guides/rel_notes/release_21_11.rst 
b/doc/guides/rel_notes/release_21_11.rst
index 4b250c0..8e29833 100644
--- a/doc/guides/rel_notes/release_21_11.rst
+++ b/doc/guides/rel_notes/release_21_11.rst
@@ -81,6 +81,7 @@ New Features
 
   * Added 1PPS out support by a devargs.
   * Added IPv4 and L4(TCP/UDP/SCTP) checksum hash support in RSS flow.
+  * Added DEV_RX_OFFLOAD_TIMESTAMP support.
 
 * **Updated Marvell cnxk ethdev driver.**
 
diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index a1d28c3..6b85f68 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -32,6 +32,9 @@
 #define ICE_ONE_PPS_OUT_ARG   "pps_out"
 #define ICE_RX_LOW_LATENCY_ARG"rx_low_latency"
 
+uint64_t ice_timestamp_dynflag;
+int ice_timestamp_dynfield_offset = -1;
+
 static const char * const ice_valid_args[] = {
ICE_SAFE_MODE_SUPPORT_ARG,
ICE_PIPELINE_MODE_SUPPORT_ARG,
@@ -3671,7 +3674,8 @@ ice_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
DEV_RX_OFFLOAD_QINQ_STRIP |
DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
DEV_RX_OFFLOAD_VLAN_EXTEND |
-   DEV_RX_OFFLOAD_RSS_HASH;
+   DEV_RX_OFFLOAD_RSS_HASH |
+   DEV_RX_OFFLOAD_TIMESTAMP;
dev_info->tx_offload_capa |=
DEV_TX_OFFLOAD_QINQ_INSERT |
DEV_TX_OFFLOAD_IPV4_CKSUM |
diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index 5d7ab4f..bb75183 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -302,6 +302,18 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
}
}
 
+   if (rxq->offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+   /* Register mbuf field and flag for Rx timestamp */
+   err = rte_mbuf_dyn_rx_timestamp_register(
+   &ice_timestamp_dynfield_offset,
+   &ice_timestamp_dynflag);
+   if (err) {
+   PMD_DRV_LOG(ERR,
+   "Cannot register mbuf field/flag for 
timestamp");
+   return -EINVAL;
+   }
+   }
+
memset(&rx_ctx, 0, sizeof(rx_ctx));
 
rx_ctx.base = rxq->rx_ring_dma / ICE_QUEUE_BASE_ADDR_UNIT;
@@ -354,6 +366,9 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq)
regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
QRXFLXP_CNTXT_RXDID_PRIO_M;
 
+   if (rxq->offloads & DEV_RX_OFFLOAD_TIMESTAMP)
+   regval |= QRXFLXP_CNTXT_TS_M;
+
ICE_WRITE_REG(hw, QRXFLXP_CNTXT(rxq->reg_idx), regval);
 
err = ice_clear_rxq_ctx(hw, rxq->reg_idx);
@@ -1546,6 +1561,9 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
int32_t i, j, nb_rx = 0;
uint64_t pkt_flags = 0;
uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+   struct ice_vsi *vsi = rxq->vsi;
+   struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
+   uint64_t ts_ns;
 
rxdp = &rxq->rx_ring[rxq->rx_tail];
rxep = &rxq->sw_ring[rxq->rx_tail];
@@ -1589,6 +1607,17 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
ice_rxd_to_vlan_tci(mb, &rxdp[j]);
rxq->rxd_to_pkt_fields(rxq, mb, &rxdp[j]);
 
+   if (rxq->offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+   ts_ns = ice_tstamp_convert_32b_64b(hw,
+   
rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high));
+   if (ice_timestamp_dynflag > 0) {
+   *RTE_MBUF_DYNFIELD(mb,
+   ice_timestamp_dynfield_offset,
+   rte_mbuf_timestamp_t *) = ts_ns;
+   mb->ol_flags |= ice_timestamp_dynflag;
+   }
+   }
+
mb->ol_flags |= pkt_flags;
}
 
@@ -1772,6 +1801,9 

[dpdk-dev] [PATCH 00/11] net/mlx5: support shared Rx queue

2021-09-26 Thread Xueming Li
Implemetation of Shared Rx queue.

Depends-on: series-18996 ("ethdev: introduce shared Rx queue")
Depends-on: series-18065 ("net/mlx5: keep indirect actions across port restart")
Depends-on: series-18939 ("ethdev: change queue release callback")

Xueming Li (11):
  common/mlx5: support receive queue user index
  common/mlx5: support receive memory pool
  net/mlx5: clean Rx queue code
  net/mlx5: split multiple packet Rq memory pool
  net/mlx5: split Rx queue
  net/mlx5: move Rx queue reference count
  net/mlx5: move Rx queue hairpin info to private data
  net/mlx5: remove port info from shareable Rx queue
  net/mlx5: move Rx queue DevX resource
  net/mlx5: remove Rx queue data list from device
  net/mlx5: support shared Rx queue

 doc/guides/nics/features/mlx5.ini|   1 +
 doc/guides/nics/mlx5.rst |   6 +
 drivers/common/mlx5/mlx5_common_devx.c   | 310 ++--
 drivers/common/mlx5/mlx5_common_devx.h   |  19 +-
 drivers/common/mlx5/mlx5_devx_cmds.c |  52 ++
 drivers/common/mlx5/mlx5_devx_cmds.h |  16 +
 drivers/common/mlx5/mlx5_prm.h   |  93 +++-
 drivers/common/mlx5/version.map  |   1 +
 drivers/net/mlx5/linux/mlx5_os.c |   2 +
 drivers/net/mlx5/linux/mlx5_verbs.c  | 161 +++---
 drivers/net/mlx5/mlx5.c  |  11 +-
 drivers/net/mlx5/mlx5.h  |  17 +-
 drivers/net/mlx5/mlx5_devx.c | 249 -
 drivers/net/mlx5/mlx5_ethdev.c   |  16 +-
 drivers/net/mlx5/mlx5_flow.c |  45 +-
 drivers/net/mlx5/mlx5_mr.c   |   7 +-
 drivers/net/mlx5/mlx5_rss.c  |   6 +-
 drivers/net/mlx5/mlx5_rx.c   |  31 +-
 drivers/net/mlx5/mlx5_rx.h   |  49 +-
 drivers/net/mlx5/mlx5_rxq.c  | 618 ---
 drivers/net/mlx5/mlx5_rxtx.c |   6 +-
 drivers/net/mlx5/mlx5_rxtx_vec.c |   8 +-
 drivers/net/mlx5/mlx5_stats.c|   9 +-
 drivers/net/mlx5/mlx5_trigger.c  | 161 +++---
 drivers/net/mlx5/mlx5_vlan.c |  16 +-
 drivers/regex/mlx5/mlx5_regex_fastpath.c |   2 +-
 26 files changed, 1289 insertions(+), 623 deletions(-)

-- 
2.33.0



[dpdk-dev] [PATCH 08/11] net/mlx5: remove port info from shareable Rx queue

2021-09-26 Thread Xueming Li
To prepare for shared Rx queue, removes port info from shareable Rx
queue control.

Signed-off-by: Xueming Li 
---
 drivers/net/mlx5/mlx5_devx.c |  2 +-
 drivers/net/mlx5/mlx5_mr.c   |  7 ---
 drivers/net/mlx5/mlx5_rx.c   | 15 +++
 drivers/net/mlx5/mlx5_rx.h   |  5 -
 drivers/net/mlx5/mlx5_rxq.c  | 10 --
 drivers/net/mlx5/mlx5_rxtx_vec.c |  2 +-
 6 files changed, 17 insertions(+), 24 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c
index 4d479c19e6c..71e4bce1588 100644
--- a/drivers/net/mlx5/mlx5_devx.c
+++ b/drivers/net/mlx5/mlx5_devx.c
@@ -916,7 +916,7 @@ mlx5_rxq_devx_obj_drop_create(struct rte_eth_dev *dev)
}
rxq->rxq_ctrl = rxq_ctrl;
rxq_ctrl->type = MLX5_RXQ_TYPE_STANDARD;
-   rxq_ctrl->priv = priv;
+   rxq_ctrl->sh = priv->sh;
rxq_ctrl->obj = rxq;
rxq_data = &rxq_ctrl->rxq;
/* Create CQ using DevX API. */
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index 44afda731fc..8d48b4614ee 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -82,10 +82,11 @@ mlx5_rx_addr2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t 
addr)
struct mlx5_rxq_ctrl *rxq_ctrl =
container_of(rxq, struct mlx5_rxq_ctrl, rxq);
struct mlx5_mr_ctrl *mr_ctrl = &rxq->mr_ctrl;
-   struct mlx5_priv *priv = rxq_ctrl->priv;
+   struct mlx5_priv *priv = RXQ_PORT(rxq_ctrl);
+   struct mlx5_dev_ctx_shared *sh = rxq_ctrl->sh;
 
-   return mlx5_mr_addr2mr_bh(priv->sh->pd, &priv->mp_id,
- &priv->sh->share_cache, mr_ctrl, addr,
+   return mlx5_mr_addr2mr_bh(sh->pd, &priv->mp_id,
+ &sh->share_cache, mr_ctrl, addr,
  priv->config.mr_ext_memseg_en);
 }
 
diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index e3b1051ba46..09de26c0d39 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -118,15 +118,7 @@ int
 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
 {
struct mlx5_rxq_data *rxq = rx_queue;
-   struct mlx5_rxq_ctrl *rxq_ctrl =
-   container_of(rxq, struct mlx5_rxq_ctrl, rxq);
-   struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv);
 
-   if (dev->rx_pkt_burst == NULL ||
-   dev->rx_pkt_burst == removed_rx_burst) {
-   rte_errno = ENOTSUP;
-   return -rte_errno;
-   }
if (offset >= (1 << rxq->cqe_n)) {
rte_errno = EINVAL;
return -rte_errno;
@@ -438,10 +430,10 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
sm.is_wq = 1;
sm.queue_id = rxq->idx;
sm.state = IBV_WQS_RESET;
-   if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm))
+   if (mlx5_queue_state_modify(RXQ_DEV(rxq_ctrl), &sm))
return -1;
if (rxq_ctrl->dump_file_n <
-   rxq_ctrl->priv->config.max_dump_files_num) {
+   RXQ_PORT(rxq_ctrl)->config.max_dump_files_num) {
MKSTR(err_str, "Unexpected CQE error syndrome "
  "0x%02x CQN = %u RQN = %u wqe_counter = %u"
  " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome,
@@ -478,8 +470,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
sm.is_wq = 1;
sm.queue_id = rxq->idx;
sm.state = IBV_WQS_RDY;
-   if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv),
-   &sm))
+   if (mlx5_queue_state_modify(RXQ_DEV(rxq_ctrl), &sm))
return -1;
if (vec) {
const uint32_t elts_n =
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index 2ed544556f5..4eed4176324 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -23,6 +23,10 @@
 /* Support tunnel matching. */
 #define MLX5_FLOW_TUNNEL 10
 
+#define RXQ_PORT(rxq_ctrl) LIST_FIRST(&(rxq_ctrl)->owners)->priv
+#define RXQ_DEV(rxq_ctrl) ETH_DEV(RXQ_PORT(rxq_ctrl))
+#define RXQ_PORT_ID(rxq_ctrl) PORT_ID(RXQ_PORT(rxq_ctrl))
+
 struct mlx5_rxq_stats {
 #ifdef MLX5_PMD_SOFT_COUNTERS
uint64_t ipackets; /**< Total of successfully received packets. */
@@ -163,7 +167,6 @@ struct mlx5_rxq_ctrl {
LIST_HEAD(priv, mlx5_rxq_priv) owners; /* Owner rxq list. */
struct mlx5_rxq_obj *obj; /* Verbs/DevX elements. */
struct mlx5_dev_ctx_shared *sh; /* Shared context. */
-   struct mlx5_priv *priv; /* Back pointer to private data. */
enum mlx5_rxq_type type; /* Rxq type. */
unsigned int socket; /* CPU socket ID for allocations. */
unsigned int irq:1; /* Whether IRQ is

[dpdk-dev] [PATCH 04/11] net/mlx5: split multiple packet Rq memory pool

2021-09-26 Thread Xueming Li
Port info is invisible from shared Rx queue, split MPR mempool from
device to Rx queue, also changed pool flag to mp_sc.

Signed-off-by: Xueming Li 
---
 drivers/net/mlx5/mlx5.c |   1 -
 drivers/net/mlx5/mlx5_rx.h  |   4 +-
 drivers/net/mlx5/mlx5_rxq.c | 109 
 drivers/net/mlx5/mlx5_trigger.c |  10 ++-
 4 files changed, 47 insertions(+), 77 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index f84e061fe71..3abb8c97e76 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1602,7 +1602,6 @@ mlx5_dev_close(struct rte_eth_dev *dev)
mlx5_drop_action_destroy(dev);
if (priv->mreg_cp_tbl)
mlx5_hlist_destroy(priv->mreg_cp_tbl);
-   mlx5_mprq_free_mp(dev);
if (priv->sh->ct_mng)
mlx5_flow_aso_ct_mng_close(priv->sh);
mlx5_os_free_shared_dr(priv);
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index d44c8078dea..a8e0c3162b0 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -179,8 +179,8 @@ struct mlx5_rxq_ctrl {
 extern uint8_t rss_hash_default_key[];
 
 unsigned int mlx5_rxq_cqe_num(struct mlx5_rxq_data *rxq_data);
-int mlx5_mprq_free_mp(struct rte_eth_dev *dev);
-int mlx5_mprq_alloc_mp(struct rte_eth_dev *dev);
+int mlx5_mprq_free_mp(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl);
+int mlx5_mprq_alloc_mp(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl 
*rxq_ctrl);
 int mlx5_rx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id);
 int mlx5_rx_queue_stop(struct rte_eth_dev *dev, uint16_t queue_id);
 int mlx5_rx_queue_start_primary(struct rte_eth_dev *dev, uint16_t queue_id);
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 7e97cdd4bc0..14de8d0e6a4 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1087,7 +1087,7 @@ mlx5_mprq_buf_init(struct rte_mempool *mp, void 
*opaque_arg,
 }
 
 /**
- * Free mempool of Multi-Packet RQ.
+ * Free RXQ mempool of Multi-Packet RQ.
  *
  * @param dev
  *   Pointer to Ethernet device.
@@ -1096,16 +1096,15 @@ mlx5_mprq_buf_init(struct rte_mempool *mp, void 
*opaque_arg,
  *   0 on success, negative errno value on failure.
  */
 int
-mlx5_mprq_free_mp(struct rte_eth_dev *dev)
+mlx5_mprq_free_mp(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl)
 {
-   struct mlx5_priv *priv = dev->data->dev_private;
-   struct rte_mempool *mp = priv->mprq_mp;
-   unsigned int i;
+   struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
+   struct rte_mempool *mp = rxq->mprq_mp;
 
if (mp == NULL)
return 0;
-   DRV_LOG(DEBUG, "port %u freeing mempool (%s) for Multi-Packet RQ",
-   dev->data->port_id, mp->name);
+   DRV_LOG(DEBUG, "port %u queue %hu freeing mempool (%s) for Multi-Packet 
RQ",
+   dev->data->port_id, rxq->idx, mp->name);
/*
 * If a buffer in the pool has been externally attached to a mbuf and it
 * is still in use by application, destroying the Rx queue can spoil
@@ -1123,34 +1122,28 @@ mlx5_mprq_free_mp(struct rte_eth_dev *dev)
return -rte_errno;
}
rte_mempool_free(mp);
-   /* Unset mempool for each Rx queue. */
-   for (i = 0; i != priv->rxqs_n; ++i) {
-   struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
-
-   if (rxq == NULL)
-   continue;
-   rxq->mprq_mp = NULL;
-   }
-   priv->mprq_mp = NULL;
+   rxq->mprq_mp = NULL;
return 0;
 }
 
 /**
- * Allocate a mempool for Multi-Packet RQ. All configured Rx queues share the
- * mempool. If already allocated, reuse it if there're enough elements.
+ * Allocate RXQ a mempool for Multi-Packet RQ.
+ * If already allocated, reuse it if there're enough elements.
  * Otherwise, resize it.
  *
  * @param dev
  *   Pointer to Ethernet device.
+ * @param rxq_ctrl
+ *   Pointer to RXQ.
  *
  * @return
  *   0 on success, negative errno value on failure.
  */
 int
-mlx5_mprq_alloc_mp(struct rte_eth_dev *dev)
+mlx5_mprq_alloc_mp(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl)
 {
-   struct mlx5_priv *priv = dev->data->dev_private;
-   struct rte_mempool *mp = priv->mprq_mp;
+   struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
+   struct rte_mempool *mp = rxq->mprq_mp;
char name[RTE_MEMPOOL_NAMESIZE];
unsigned int desc = 0;
unsigned int buf_len;
@@ -1158,28 +1151,15 @@ mlx5_mprq_alloc_mp(struct rte_eth_dev *dev)
unsigned int obj_size;
unsigned int strd_num_n = 0;
unsigned int strd_sz_n = 0;
-   unsigned int i;
-   unsigned int n_ibv = 0;
 
-   if (!mlx5_mprq_enabled(dev))
+   if (rxq_ctrl == NULL || rxq_ctrl->type != MLX5_RXQ_TYPE_STANDARD)
return 0;
-   /* Count the total number of descriptors configured. */
-   for (i = 0; i != priv->rxqs_n; ++i) {
-   struct 

[dpdk-dev] [PATCH 01/11] common/mlx5: support receive queue user index

2021-09-26 Thread Xueming Li
RQ user index is saved in CQE when packet received by RQ.

Signed-off-by: Xueming Li 
---
 drivers/common/mlx5/mlx5_prm.h   | 8 +++-
 drivers/regex/mlx5/mlx5_regex_fastpath.c | 2 +-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index d361bcf90ef..72af3710a8f 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -393,7 +393,13 @@ struct mlx5_cqe {
uint16_t hdr_type_etc;
uint16_t vlan_info;
uint8_t lro_num_seg;
-   uint8_t rsvd3[3];
+   union {
+   uint8_t user_index_bytes[3];
+   struct {
+   uint8_t user_index_hi;
+   uint16_t user_index_low;
+   } __rte_packed;
+   };
uint32_t flow_table_metadata;
uint8_t rsvd4[4];
uint32_t byte_cnt;
diff --git a/drivers/regex/mlx5/mlx5_regex_fastpath.c 
b/drivers/regex/mlx5/mlx5_regex_fastpath.c
index c79445ce7d3..a151e4ce8dc 100644
--- a/drivers/regex/mlx5/mlx5_regex_fastpath.c
+++ b/drivers/regex/mlx5/mlx5_regex_fastpath.c
@@ -567,7 +567,7 @@ mlx5_regexdev_dequeue(struct rte_regexdev *dev, uint16_t 
qp_id,
uint16_t wq_counter
= (rte_be_to_cpu_16(cqe->wqe_counter) + 1) &
  MLX5_REGEX_MAX_WQE_INDEX;
-   size_t sqid = cqe->rsvd3[2];
+   size_t sqid = cqe->user_index_bytes[2];
struct mlx5_regex_sq *sq = &queue->sqs[sqid];
 
/* UMR mode WQE counter move as WQE set(4 WQEBBS).*/
-- 
2.33.0



[dpdk-dev] [PATCH 05/11] net/mlx5: split Rx queue

2021-09-26 Thread Xueming Li
To prepare shared RX queue, splits rxq data into shareable and private.
Struct mlx5_rxq_priv is per queue data.
Struct mlx5_rxq_ctrl is shared queue resources and data.

Signed-off-by: Xueming Li 
---
 drivers/net/mlx5/mlx5.c|  4 +++
 drivers/net/mlx5/mlx5.h|  5 ++-
 drivers/net/mlx5/mlx5_ethdev.c | 10 ++
 drivers/net/mlx5/mlx5_rx.h | 15 ++--
 drivers/net/mlx5/mlx5_rxq.c| 66 --
 5 files changed, 86 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 3abb8c97e76..749729d6fbe 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1585,6 +1585,10 @@ mlx5_dev_close(struct rte_eth_dev *dev)
mlx5_free(dev->intr_handle);
dev->intr_handle = NULL;
}
+   if (priv->rxq_privs != NULL) {
+   mlx5_free(priv->rxq_privs);
+   priv->rxq_privs = NULL;
+   }
if (priv->txqs != NULL) {
/* XXX race condition if mlx5_tx_burst() is still running. */
rte_delay_us_sleep(1000);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index e02714e2319..d06f828ed33 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1335,6 +1335,8 @@ enum mlx5_txq_modify_type {
MLX5_TXQ_MOD_ERR2RDY, /* modify state from error to ready. */
 };
 
+struct mlx5_rxq_priv;
+
 /* HW objects operations structure. */
 struct mlx5_obj_ops {
int (*rxq_obj_modify_vlan_strip)(struct mlx5_rxq_obj *rxq_obj, int on);
@@ -1404,7 +1406,8 @@ struct mlx5_priv {
/* RX/TX queues. */
unsigned int rxqs_n; /* RX queues array size. */
unsigned int txqs_n; /* TX queues array size. */
-   struct mlx5_rxq_data *(*rxqs)[]; /* RX queues. */
+   struct mlx5_rxq_priv *(*rxq_privs)[]; /* RX queue non-shared data. */
+   struct mlx5_rxq_data *(*rxqs)[]; /* (Shared) RX queues. */
struct mlx5_txq_data *(*txqs)[]; /* TX queues. */
struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. */
struct rte_eth_rss_conf rss_conf; /* RSS configuration. */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 82e2284d986..7071a5f7039 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -104,6 +104,16 @@ mlx5_dev_configure(struct rte_eth_dev *dev)
   MLX5_RSS_HASH_KEY_LEN);
priv->rss_conf.rss_key_len = MLX5_RSS_HASH_KEY_LEN;
priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
+   priv->rxq_privs = mlx5_realloc(priv->rxq_privs,
+  MLX5_MEM_ANY | MLX5_MEM_ZERO,
+  sizeof(void *) * rxqs_n, 0,
+  SOCKET_ID_ANY);
+   if (priv->rxq_privs == NULL) {
+   DRV_LOG(ERR, "port %u cannot allocate rxq private data",
+   dev->data->port_id);
+   rte_errno = ENOMEM;
+   return -rte_errno;
+   }
priv->rxqs = (void *)dev->data->rx_queues;
priv->txqs = (void *)dev->data->tx_queues;
if (txqs_n != priv->txqs_n) {
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index a8e0c3162b0..db6252e8e86 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -161,7 +161,9 @@ struct mlx5_rxq_ctrl {
struct mlx5_rxq_data rxq; /* Data path structure. */
LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */
uint32_t refcnt; /* Reference counter. */
+   LIST_HEAD(priv, mlx5_rxq_priv) owners; /* Owner rxq list. */
struct mlx5_rxq_obj *obj; /* Verbs/DevX elements. */
+   struct mlx5_dev_ctx_shared *sh; /* Shared context. */
struct mlx5_priv *priv; /* Back pointer to private data. */
enum mlx5_rxq_type type; /* Rxq type. */
unsigned int socket; /* CPU socket ID for allocations. */
@@ -174,6 +176,14 @@ struct mlx5_rxq_ctrl {
uint32_t hairpin_status; /* Hairpin binding status. */
 };
 
+/* RX queue private data. */
+struct mlx5_rxq_priv {
+   uint16_t idx; /* Queue index. */
+   struct mlx5_rxq_ctrl *ctrl; /* Shared Rx Queue. */
+   LIST_ENTRY(mlx5_rxq_priv) owner_entry; /* Entry in shared rxq_ctrl. */
+   struct mlx5_priv *priv; /* Back pointer to private data. */
+};
+
 /* mlx5_rxq.c */
 
 extern uint8_t rss_hash_default_key[];
@@ -197,13 +207,14 @@ void mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev);
 int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 int mlx5_rxq_obj_verify(struct rte_eth_dev *dev);
-struct mlx5_rxq_ctrl *mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx,
+struct mlx5_rxq_ctrl *mlx5_rxq_new(struct rte_eth_dev *dev,
+  struct mlx5_rxq_priv *rxq,
   uint16_t desc, unsigned int soc

[dpdk-dev] [PATCH 06/11] net/mlx5: move Rx queue reference count

2021-09-26 Thread Xueming Li
Rx queue reference count is counter of RQ, used on RQ table.
To prepare for shared Rx queue, move it from rxq_ctrl to Rx queue
private data.

Signed-off-by: Xueming Li 
---
 drivers/net/mlx5/mlx5_rx.h  |   8 +-
 drivers/net/mlx5/mlx5_rxq.c | 173 +---
 drivers/net/mlx5/mlx5_trigger.c |  57 +--
 3 files changed, 144 insertions(+), 94 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index db6252e8e86..fe19414c130 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -160,7 +160,6 @@ enum mlx5_rxq_type {
 struct mlx5_rxq_ctrl {
struct mlx5_rxq_data rxq; /* Data path structure. */
LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */
-   uint32_t refcnt; /* Reference counter. */
LIST_HEAD(priv, mlx5_rxq_priv) owners; /* Owner rxq list. */
struct mlx5_rxq_obj *obj; /* Verbs/DevX elements. */
struct mlx5_dev_ctx_shared *sh; /* Shared context. */
@@ -179,6 +178,7 @@ struct mlx5_rxq_ctrl {
 /* RX queue private data. */
 struct mlx5_rxq_priv {
uint16_t idx; /* Queue index. */
+   uint32_t refcnt; /* Reference counter. */
struct mlx5_rxq_ctrl *ctrl; /* Shared Rx Queue. */
LIST_ENTRY(mlx5_rxq_priv) owner_entry; /* Entry in shared rxq_ctrl. */
struct mlx5_priv *priv; /* Back pointer to private data. */
@@ -216,7 +216,11 @@ struct mlx5_rxq_ctrl *mlx5_rxq_new(struct rte_eth_dev *dev,
 struct mlx5_rxq_ctrl *mlx5_rxq_hairpin_new
(struct rte_eth_dev *dev, struct mlx5_rxq_priv *rxq, uint16_t desc,
 const struct rte_eth_hairpin_conf *hairpin_conf);
-struct mlx5_rxq_ctrl *mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx);
+struct mlx5_rxq_priv *mlx5_rxq_ref(struct rte_eth_dev *dev, uint16_t idx);
+uint32_t mlx5_rxq_deref(struct rte_eth_dev *dev, uint16_t idx);
+struct mlx5_rxq_priv *mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx);
+struct mlx5_rxq_ctrl *mlx5_rxq_ctrl_get(struct rte_eth_dev *dev, uint16_t idx);
+struct mlx5_rxq_data *mlx5_rxq_data_get(struct rte_eth_dev *dev, uint16_t idx);
 int mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx);
 int mlx5_rxq_verify(struct rte_eth_dev *dev);
 int rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl);
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 70e73690aa7..7f28646f55c 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -386,15 +386,13 @@ mlx5_get_rx_port_offloads(void)
 static int
 mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx)
 {
-   struct mlx5_priv *priv = dev->data->dev_private;
-   struct mlx5_rxq_ctrl *rxq_ctrl;
+   struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx);
 
-   if (!(*priv->rxqs)[idx]) {
+   if (rxq == NULL) {
rte_errno = EINVAL;
return -rte_errno;
}
-   rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
-   return (__atomic_load_n(&rxq_ctrl->refcnt, __ATOMIC_RELAXED) == 1);
+   return (__atomic_load_n(&rxq->refcnt, __ATOMIC_RELAXED) == 1);
 }
 
 /* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */
@@ -874,8 +872,8 @@ mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev)
intr_handle->type = RTE_INTR_HANDLE_EXT;
for (i = 0; i != n; ++i) {
/* This rxq obj must not be released in this function. */
-   struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
-   struct mlx5_rxq_obj *rxq_obj = rxq_ctrl ? rxq_ctrl->obj : NULL;
+   struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
+   struct mlx5_rxq_obj *rxq_obj = rxq ? rxq->ctrl->obj : NULL;
int rc;
 
/* Skip queues that cannot request interrupts. */
@@ -885,11 +883,9 @@ mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev)
intr_handle->intr_vec[i] =
RTE_INTR_VEC_RXTX_OFFSET +
RTE_MAX_RXTX_INTR_VEC_ID;
-   /* Decrease the rxq_ctrl's refcnt */
-   if (rxq_ctrl)
-   mlx5_rxq_release(dev, i);
continue;
}
+   mlx5_rxq_ref(dev, i);
if (count >= RTE_MAX_RXTX_INTR_VEC_ID) {
DRV_LOG(ERR,
"port %u too many Rx queues for interrupt"
@@ -949,7 +945,7 @@ mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev)
 * Need to access directly the queue to release the reference
 * kept in mlx5_rx_intr_vec_enable().
 */
-   mlx5_rxq_release(dev, i);
+   mlx5_rxq_deref(dev, i);
}
 free:
rte_intr_free_epoll_fd(intr_handle);
@@ -998,19 +994,14 @@ mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
 int
 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
-   struct mlx5_rxq_c

[dpdk-dev] [PATCH 07/11] net/mlx5: move Rx queue hairpin info to private data

2021-09-26 Thread Xueming Li
Hairpin info of Rx queue can't be shared, moves to private queue data.

Signed-off-by: Xueming Li 
---
 drivers/net/mlx5/mlx5_rx.h  |  4 ++--
 drivers/net/mlx5/mlx5_rxq.c | 13 +
 drivers/net/mlx5/mlx5_trigger.c | 24 
 3 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index fe19414c130..2ed544556f5 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -171,8 +171,6 @@ struct mlx5_rxq_ctrl {
uint32_t flow_tunnels_n[MLX5_FLOW_TUNNEL]; /* Tunnels counters. */
uint32_t wqn; /* WQ number. */
uint16_t dump_file_n; /* Number of dump files. */
-   struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */
-   uint32_t hairpin_status; /* Hairpin binding status. */
 };
 
 /* RX queue private data. */
@@ -182,6 +180,8 @@ struct mlx5_rxq_priv {
struct mlx5_rxq_ctrl *ctrl; /* Shared Rx Queue. */
LIST_ENTRY(mlx5_rxq_priv) owner_entry; /* Entry in shared rxq_ctrl. */
struct mlx5_priv *priv; /* Back pointer to private data. */
+   struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */
+   uint32_t hairpin_status; /* Hairpin binding status. */
 };
 
 /* mlx5_rxq.c */
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 7f28646f55c..21cb1000899 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1649,8 +1649,8 @@ mlx5_rxq_hairpin_new(struct rte_eth_dev *dev, struct 
mlx5_rxq_priv *rxq,
tmpl->rxq.elts_n = log2above(desc);
tmpl->rxq.elts = NULL;
tmpl->rxq.mr_ctrl.cache_bh = (struct mlx5_mr_btree) { 0 };
-   tmpl->hairpin_conf = *hairpin_conf;
tmpl->rxq.idx = idx;
+   rxq->hairpin_conf = *hairpin_conf;
mlx5_rxq_ref(dev, idx);
LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
return tmpl;
@@ -1869,14 +1869,11 @@ const struct rte_eth_hairpin_conf *
 mlx5_rxq_get_hairpin_conf(struct rte_eth_dev *dev, uint16_t idx)
 {
struct mlx5_priv *priv = dev->data->dev_private;
-   struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
+   struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, idx);
 
-   if (idx < priv->rxqs_n && (*priv->rxqs)[idx]) {
-   rxq_ctrl = container_of((*priv->rxqs)[idx],
-   struct mlx5_rxq_ctrl,
-   rxq);
-   if (rxq_ctrl->type == MLX5_RXQ_TYPE_HAIRPIN)
-   return &rxq_ctrl->hairpin_conf;
+   if (idx < priv->rxqs_n && rxq != NULL) {
+   if (rxq->ctrl->type == MLX5_RXQ_TYPE_HAIRPIN)
+   return &rxq->hairpin_conf;
}
return NULL;
 }
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index a49254c96f6..f376f4d6fc4 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -273,7 +273,7 @@ mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
}
rxq_ctrl = rxq->ctrl;
if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
-   rxq_ctrl->hairpin_conf.peers[0].queue != i) {
+   rxq->hairpin_conf.peers[0].queue != i) {
rte_errno = ENOMEM;
DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
"Rx queue %d", dev->data->port_id,
@@ -303,7 +303,7 @@ mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
if (ret)
goto error;
/* Qs with auto-bind will be destroyed directly. */
-   rxq_ctrl->hairpin_status = 1;
+   rxq->hairpin_status = 1;
txq_ctrl->hairpin_status = 1;
mlx5_txq_release(dev, i);
}
@@ -406,9 +406,9 @@ mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, 
uint16_t peer_queue,
}
peer_info->qp_id = rxq_ctrl->obj->rq->id;
peer_info->vhca_id = priv->config.hca_attr.vhca_id;
-   peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
-   peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
-   peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
+   peer_info->peer_q = rxq->hairpin_conf.peers[0].queue;
+   peer_info->tx_explicit = rxq->hairpin_conf.tx_explicit;
+   peer_info->manual_bind = rxq->hairpin_conf.manual_bind;
}
return 0;
 }
@@ -530,20 +530,20 @@ mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, 
uint16_t cur_queue,
dev->data->port_id, cur_queue);
return -rte_errno;
}
-   if (rxq_ctrl->hairpin_status != 0) {
+   if (rxq->hairpin_status != 0) {
DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
dev-

[dpdk-dev] [PATCH 09/11] net/mlx5: move Rx queue DevX resource

2021-09-26 Thread Xueming Li
To support shared RX queue, move DevX RQ which is per queue resource to
Rx queue private data.

Signed-off-by: Xueming Li 
---
 drivers/net/mlx5/linux/mlx5_verbs.c | 154 +++
 drivers/net/mlx5/mlx5.h |  11 +-
 drivers/net/mlx5/mlx5_devx.c| 227 ++--
 drivers/net/mlx5/mlx5_rx.h  |   1 +
 drivers/net/mlx5/mlx5_rxq.c |  44 +++---
 drivers/net/mlx5/mlx5_rxtx.c|   6 +-
 drivers/net/mlx5/mlx5_trigger.c |   2 +-
 drivers/net/mlx5/mlx5_vlan.c|  16 +-
 8 files changed, 241 insertions(+), 220 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_verbs.c 
b/drivers/net/mlx5/linux/mlx5_verbs.c
index d4fa202ac4b..a2a9b9c1f98 100644
--- a/drivers/net/mlx5/linux/mlx5_verbs.c
+++ b/drivers/net/mlx5/linux/mlx5_verbs.c
@@ -71,13 +71,13 @@ const struct mlx5_mr_ops mlx5_mr_verbs_ops = {
 /**
  * Modify Rx WQ vlan stripping offload
  *
- * @param rxq_obj
- *   Rx queue object.
+ * @param rxq
+ *   Rx queue.
  *
  * @return 0 on success, non-0 otherwise
  */
 static int
-mlx5_rxq_obj_modify_wq_vlan_strip(struct mlx5_rxq_obj *rxq_obj, int on)
+mlx5_rxq_obj_modify_wq_vlan_strip(struct mlx5_rxq_priv *rxq, int on)
 {
uint16_t vlan_offloads =
(on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) |
@@ -89,14 +89,14 @@ mlx5_rxq_obj_modify_wq_vlan_strip(struct mlx5_rxq_obj 
*rxq_obj, int on)
.flags = vlan_offloads,
};
 
-   return mlx5_glue->modify_wq(rxq_obj->wq, &mod);
+   return mlx5_glue->modify_wq(rxq->ctrl->obj->wq, &mod);
 }
 
 /**
  * Modifies the attributes for the specified WQ.
  *
- * @param rxq_obj
- *   Verbs Rx queue object.
+ * @param rxq
+ *   Verbs Rx queue.
  * @param type
  *   Type of change queue state.
  *
@@ -104,14 +104,14 @@ mlx5_rxq_obj_modify_wq_vlan_strip(struct mlx5_rxq_obj 
*rxq_obj, int on)
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_ibv_modify_wq(struct mlx5_rxq_obj *rxq_obj, uint8_t type)
+mlx5_ibv_modify_wq(struct mlx5_rxq_priv *rxq, uint8_t type)
 {
struct ibv_wq_attr mod = {
.attr_mask = IBV_WQ_ATTR_STATE,
.wq_state = (enum ibv_wq_state)type,
};
 
-   return mlx5_glue->modify_wq(rxq_obj->wq, &mod);
+   return mlx5_glue->modify_wq(rxq->ctrl->obj->wq, &mod);
 }
 
 /**
@@ -181,21 +181,18 @@ mlx5_ibv_modify_qp(struct mlx5_txq_obj *obj, enum 
mlx5_txq_modify_type type,
 /**
  * Create a CQ Verbs object.
  *
- * @param dev
- *   Pointer to Ethernet device.
- * @param idx
- *   Queue index in DPDK Rx queue array.
+ * @param rxq
+ *   Pointer to Rx queue.
  *
  * @return
  *   The Verbs CQ object initialized, NULL otherwise and rte_errno is set.
  */
 static struct ibv_cq *
-mlx5_rxq_ibv_cq_create(struct rte_eth_dev *dev, uint16_t idx)
+mlx5_rxq_ibv_cq_create(struct mlx5_rxq_priv *rxq)
 {
-   struct mlx5_priv *priv = dev->data->dev_private;
-   struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
-   struct mlx5_rxq_ctrl *rxq_ctrl =
-   container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+   struct mlx5_priv *priv = rxq->priv;
+   struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl;
+   struct mlx5_rxq_data *rxq_data = &rxq_ctrl->rxq;
struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
unsigned int cqe_n = mlx5_rxq_cqe_num(rxq_data);
struct {
@@ -241,7 +238,7 @@ mlx5_rxq_ibv_cq_create(struct rte_eth_dev *dev, uint16_t 
idx)
DRV_LOG(DEBUG,
"Port %u Rx CQE compression is disabled for HW"
" timestamp.",
-   dev->data->port_id);
+   priv->dev_data->port_id);
}
 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
if (RTE_CACHE_LINE_SIZE == 128) {
@@ -257,21 +254,18 @@ mlx5_rxq_ibv_cq_create(struct rte_eth_dev *dev, uint16_t 
idx)
 /**
  * Create a WQ Verbs object.
  *
- * @param dev
- *   Pointer to Ethernet device.
- * @param idx
- *   Queue index in DPDK Rx queue array.
+ * @param rxq
+ *   Pointer to Rx queue.
  *
  * @return
  *   The Verbs WQ object initialized, NULL otherwise and rte_errno is set.
  */
 static struct ibv_wq *
-mlx5_rxq_ibv_wq_create(struct rte_eth_dev *dev, uint16_t idx)
+mlx5_rxq_ibv_wq_create(struct mlx5_rxq_priv *rxq)
 {
-   struct mlx5_priv *priv = dev->data->dev_private;
-   struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
-   struct mlx5_rxq_ctrl *rxq_ctrl =
-   container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+   struct mlx5_priv *priv = rxq->priv;
+   struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl;
+   struct mlx5_rxq_data *rxq_data = &rxq_ctrl->rxq;
struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
unsigned int wqe_n = 1 << rxq_data->elts_n;
struct {
@@ -338,7 +332,7 @@ mlx5_rxq_ibv_wq_create(struct rte_eth_dev *dev, uint16_t 
idx)
DRV_LOG(ERR,
"Port %u Rx queue %u requested %u*%u

[dpdk-dev] [PATCH 02/11] common/mlx5: support receive memory pool

2021-09-26 Thread Xueming Li
Adds DevX supports of PRM shared receive memory pool(RMP) object.
RMP is used to support shared Rx queue. Multiple RQ could share same
RMP. Memory buffers are supplied to RMP.

This patch makes RMP RQ optional, created only if mlx5_devx_rq.rmp
is set.

Signed-off-by: Xueming Li 
---
 drivers/common/mlx5/mlx5_common_devx.c | 310 +
 drivers/common/mlx5/mlx5_common_devx.h |  19 +-
 drivers/common/mlx5/mlx5_devx_cmds.c   |  52 +
 drivers/common/mlx5/mlx5_devx_cmds.h   |  16 ++
 drivers/common/mlx5/mlx5_prm.h |  85 ++-
 drivers/common/mlx5/version.map|   1 +
 drivers/net/mlx5/mlx5_devx.c   |   2 +-
 7 files changed, 434 insertions(+), 51 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_common_devx.c 
b/drivers/common/mlx5/mlx5_common_devx.c
index 22c8d356c45..cd6f13a66b6 100644
--- a/drivers/common/mlx5/mlx5_common_devx.c
+++ b/drivers/common/mlx5/mlx5_common_devx.c
@@ -271,6 +271,39 @@ mlx5_devx_sq_create(void *ctx, struct mlx5_devx_sq 
*sq_obj, uint16_t log_wqbb_n,
return -rte_errno;
 }
 
+/**
+ * Destroy DevX Receive Queue resources.
+ *
+ * @param[in] rq_res
+ *   DevX RQ resource to destroy.
+ */
+static void
+mlx5_devx_wq_res_destroy(struct mlx5_devx_wq_res *rq_res)
+{
+   if (rq_res->umem_obj)
+   claim_zero(mlx5_os_umem_dereg(rq_res->umem_obj));
+   if (rq_res->umem_buf)
+   mlx5_free((void *)(uintptr_t)rq_res->umem_buf);
+   memset(rq_res, 0, sizeof(*rq_res));
+}
+
+/**
+ * Destroy DevX Receive Memory Pool.
+ *
+ * @param[in] rmp
+ *   DevX RMP to destroy.
+ */
+static void
+mlx5_devx_rmp_destroy(struct mlx5_devx_rmp *rmp)
+{
+   MLX5_ASSERT(rmp->ref_cnt == 0);
+   if (rmp->rmp) {
+   claim_zero(mlx5_devx_cmd_destroy(rmp->rmp));
+   rmp->rmp = NULL;
+   }
+   mlx5_devx_wq_res_destroy(&rmp->wq);
+}
+
 /**
  * Destroy DevX Receive Queue.
  *
@@ -280,55 +313,47 @@ mlx5_devx_sq_create(void *ctx, struct mlx5_devx_sq 
*sq_obj, uint16_t log_wqbb_n,
 void
 mlx5_devx_rq_destroy(struct mlx5_devx_rq *rq)
 {
-   if (rq->rq)
+   if (rq->rq) {
claim_zero(mlx5_devx_cmd_destroy(rq->rq));
-   if (rq->umem_obj)
-   claim_zero(mlx5_os_umem_dereg(rq->umem_obj));
-   if (rq->umem_buf)
-   mlx5_free((void *)(uintptr_t)rq->umem_buf);
+   rq->rq = NULL;
+   }
+   if (rq->rmp == NULL) {
+   mlx5_devx_wq_res_destroy(&rq->wq);
+   } else {
+   MLX5_ASSERT(rq->rmp->ref_cnt > 0);
+   rq->rmp->ref_cnt--;
+   if (rq->rmp->ref_cnt == 0)
+   mlx5_devx_rmp_destroy(rq->rmp);
+   }
+   rq->db_rec = 0;
 }
 
 /**
- * Create Receive Queue using DevX API.
- *
- * Get a pointer to partially initialized attributes structure, and updates the
- * following fields:
- *   wq_umem_valid
- *   wq_umem_id
- *   wq_umem_offset
- *   dbr_umem_valid
- *   dbr_umem_id
- *   dbr_addr
- *   log_wq_pg_sz
- * All other fields are updated by caller.
+ * Create WQ resources using DevX API.
  *
  * @param[in] ctx
  *   Context returned from mlx5 open_device() glue function.
- * @param[in/out] rq_obj
- *   Pointer to RQ to create.
+ * @param[in/out] rq_rest
+ *   Pointer to RQ resource to create.
  * @param[in] wqe_size
  *   Size of WQE structure.
  * @param[in] log_wqbb_n
  *   Log of number of WQBBs in queue.
- * @param[in] attr
- *   Pointer to RQ attributes structure.
- * @param[in] socket
- *   Socket to use for allocation.
+ * @param[in] wq_attr
+ *   Pointer to WQ attributes structure.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-int
-mlx5_devx_rq_create(void *ctx, struct mlx5_devx_rq *rq_obj, uint32_t wqe_size,
-   uint16_t log_wqbb_n,
-   struct mlx5_devx_create_rq_attr *attr, int socket)
+static int
+mlx5_devx_wq_res_create(void *ctx, struct mlx5_devx_wq_res *rq_res,
+   uint32_t wqe_size, uint16_t log_wqbb_n,
+   struct mlx5_devx_wq_attr *wq_attr, int socket)
 {
-   struct mlx5_devx_obj *rq = NULL;
struct mlx5dv_devx_umem *umem_obj = NULL;
void *umem_buf = NULL;
size_t alignment = MLX5_WQE_BUF_ALIGNMENT;
-   uint32_t umem_size, umem_dbrec;
-   uint16_t rq_size = 1 << log_wqbb_n;
+   uint32_t umem_size;
int ret;
 
if (alignment == (size_t)-1) {
@@ -337,8 +362,7 @@ mlx5_devx_rq_create(void *ctx, struct mlx5_devx_rq *rq_obj, 
uint32_t wqe_size,
return -rte_errno;
}
/* Allocate memory buffer for WQEs and doorbell record. */
-   umem_size = wqe_size * rq_size;
-   umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE);
+   umem_size = wqe_size * (1 << log_wqbb_n);
umem_size += MLX5_DBR_SIZE;
umem_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size,
   alignment, socket);
@@ -355,14 +379,58 @@ mlx5_d

[dpdk-dev] [PATCH 03/11] net/mlx5: clean Rx queue code

2021-09-26 Thread Xueming Li
Removes unused rxq code.

Signed-off-by: Xueming Li 
---
 drivers/net/mlx5/mlx5_rxq.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 396de327d11..7e97cdd4bc0 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -674,9 +674,7 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, 
uint16_t desc,
struct rte_mempool *mp)
 {
struct mlx5_priv *priv = dev->data->dev_private;
-   struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
-   struct mlx5_rxq_ctrl *rxq_ctrl =
-   container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+   struct mlx5_rxq_ctrl *rxq_ctrl;
struct rte_eth_rxseg_split *rx_seg =
(struct rte_eth_rxseg_split *)conf->rx_seg;
struct rte_eth_rxseg_split rx_single = {.mp = mp};
@@ -743,9 +741,7 @@ mlx5_rx_hairpin_queue_setup(struct rte_eth_dev *dev, 
uint16_t idx,
const struct rte_eth_hairpin_conf *hairpin_conf)
 {
struct mlx5_priv *priv = dev->data->dev_private;
-   struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
-   struct mlx5_rxq_ctrl *rxq_ctrl =
-   container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+   struct mlx5_rxq_ctrl *rxq_ctrl;
int res;
 
res = mlx5_rx_queue_pre_setup(dev, idx, &desc);
-- 
2.33.0



[dpdk-dev] [PATCH 10/11] net/mlx5: remove Rx queue data list from device

2021-09-26 Thread Xueming Li
Rx queue data list(priv->rxqs) can be replaced by Rx queue
list(priv->rxq_privs), removes it and replace with universal wrapper
API.

Signed-off-by: Xueming Li 
---
 drivers/net/mlx5/linux/mlx5_verbs.c |  7 ++---
 drivers/net/mlx5/mlx5.c | 10 +--
 drivers/net/mlx5/mlx5.h |  1 -
 drivers/net/mlx5/mlx5_devx.c| 13 +
 drivers/net/mlx5/mlx5_ethdev.c  |  6 +---
 drivers/net/mlx5/mlx5_flow.c| 45 +++--
 drivers/net/mlx5/mlx5_rss.c |  6 ++--
 drivers/net/mlx5/mlx5_rx.c  | 16 --
 drivers/net/mlx5/mlx5_rx.h  |  9 +++---
 drivers/net/mlx5/mlx5_rxq.c | 23 ++-
 drivers/net/mlx5/mlx5_rxtx_vec.c|  6 ++--
 drivers/net/mlx5/mlx5_stats.c   |  9 +++---
 drivers/net/mlx5/mlx5_trigger.c |  2 +-
 13 files changed, 66 insertions(+), 87 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_verbs.c 
b/drivers/net/mlx5/linux/mlx5_verbs.c
index a2a9b9c1f98..0e68a13208b 100644
--- a/drivers/net/mlx5/linux/mlx5_verbs.c
+++ b/drivers/net/mlx5/linux/mlx5_verbs.c
@@ -527,11 +527,10 @@ mlx5_ibv_ind_table_new(struct rte_eth_dev *dev, const 
unsigned int log_n,
 
MLX5_ASSERT(ind_tbl);
for (i = 0; i != ind_tbl->queues_n; ++i) {
-   struct mlx5_rxq_data *rxq = (*priv->rxqs)[ind_tbl->queues[i]];
-   struct mlx5_rxq_ctrl *rxq_ctrl =
-   container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+   struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev,
+ind_tbl->queues[i]);
 
-   wq[i] = rxq_ctrl->obj->wq;
+   wq[i] = rxq->ctrl->obj->wq;
}
MLX5_ASSERT(i > 0);
/* Finalise indirection table. */
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 749729d6fbe..6681b74c8f0 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1572,20 +1572,12 @@ mlx5_dev_close(struct rte_eth_dev *dev)
mlx5_mp_os_req_stop_rxtx(dev);
/* Free the eCPRI flex parser resource. */
mlx5_flex_parser_ecpri_release(dev);
-   if (priv->rxqs != NULL) {
+   if (priv->rxq_privs != NULL) {
/* XXX race condition if mlx5_rx_burst() is still running. */
rte_delay_us_sleep(1000);
for (i = 0; (i != priv->rxqs_n); ++i)
mlx5_rxq_release(dev, i);
priv->rxqs_n = 0;
-   priv->rxqs = NULL;
-   }
-   if (priv->representor) {
-   /* Each representor has a dedicated interrupts handler */
-   mlx5_free(dev->intr_handle);
-   dev->intr_handle = NULL;
-   }
-   if (priv->rxq_privs != NULL) {
mlx5_free(priv->rxq_privs);
priv->rxq_privs = NULL;
}
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c674f5ba9c4..6a9c99a8826 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1406,7 +1406,6 @@ struct mlx5_priv {
unsigned int rxqs_n; /* RX queues array size. */
unsigned int txqs_n; /* TX queues array size. */
struct mlx5_rxq_priv *(*rxq_privs)[]; /* RX queue non-shared data. */
-   struct mlx5_rxq_data *(*rxqs)[]; /* (Shared) RX queues. */
struct mlx5_txq_data *(*txqs)[]; /* TX queues. */
struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. */
struct rte_eth_rss_conf rss_conf; /* RSS configuration. */
diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c
index d219e255f0a..371ff387c99 100644
--- a/drivers/net/mlx5/mlx5_devx.c
+++ b/drivers/net/mlx5/mlx5_devx.c
@@ -682,15 +682,16 @@ mlx5_devx_tir_attr_set(struct rte_eth_dev *dev, const 
uint8_t *rss_key,
 
/* NULL queues designate drop queue. */
if (ind_tbl->queues != NULL) {
-   struct mlx5_rxq_data *rxq_data =
-   (*priv->rxqs)[ind_tbl->queues[0]];
-   struct mlx5_rxq_ctrl *rxq_ctrl =
-   container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
-   rxq_obj_type = rxq_ctrl->type;
+   struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev,
+ind_tbl->queues[0]);
 
+   rxq_obj_type = rxq->ctrl->type;
/* Enable TIR LRO only if all the queues were configured for. */
for (i = 0; i < ind_tbl->queues_n; ++i) {
-   if (!(*priv->rxqs)[ind_tbl->queues[i]]->lro) {
+   struct mlx5_rxq_data *rxq_i =
+   mlx5_rxq_data_get(dev, ind_tbl->queues[i]);
+
+   if (rxq_i != NULL && !rxq_i->lro) {
lro = false;
break;
}
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 7071a5f7039..16e96da8d24 100644
--- a/d

[dpdk-dev] [PATCH 11/11] net/mlx5: support shared Rx queue

2021-09-26 Thread Xueming Li
This patch introduces shared RXQ. All share Rx queues with same group
and queue id shares same rxq_ctrl. Rxq_ctrl and rxq_data are shared,
all queues from different member port share same WQ and CQ, essentially
one Rx WQ, mbufs are filled into this singleton WQ.

Shared rxq_data is set into device Rx queues of all member ports as
rxq object, used for receiving packets. Polling queue of any member
ports returns packets of any member, mbuf->port is used to identify
source port.

Signed-off-by: Xueming Li 
---
 doc/guides/nics/features/mlx5.ini |   1 +
 doc/guides/nics/mlx5.rst  |   6 +
 drivers/net/mlx5/linux/mlx5_os.c  |   2 +
 drivers/net/mlx5/mlx5.h   |   2 +
 drivers/net/mlx5/mlx5_devx.c  |   9 +-
 drivers/net/mlx5/mlx5_rx.h|   7 +
 drivers/net/mlx5/mlx5_rxq.c   | 208 ++
 drivers/net/mlx5/mlx5_trigger.c   |  76 ++-
 8 files changed, 255 insertions(+), 56 deletions(-)

diff --git a/doc/guides/nics/features/mlx5.ini 
b/doc/guides/nics/features/mlx5.ini
index f01abd4231f..ff5e669acc1 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -11,6 +11,7 @@ Removal event= Y
 Rx interrupt = Y
 Fast mbuf free   = Y
 Queue start/stop = Y
+Shared Rx queue  = Y
 Burst mode info  = Y
 Power mgmt address monitor = Y
 MTU update   = Y
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index ca3e7f560da..494ee957c1d 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -113,6 +113,7 @@ Features
 - Connection tracking.
 - Sub-Function representors.
 - Sub-Function.
+- Shared Rx queue.
 
 
 Limitations
@@ -464,6 +465,11 @@ Limitations
   - In order to achieve best insertion rate, application should manage the 
flows per lcore.
   - Better to disable memory reclaim by setting ``reclaim_mem_mode`` to 0 to 
accelerate the flow object allocation and release with cache.
 
+ Shared Rx queue:
+
+  - Counter of received packets and bytes number of devices in same share 
group are same.
+  - Counter of received packets and bytes number of queues in same group and 
queue ID are same.
+
 Statistics
 --
 
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 27233b679c6..b631768b4f9 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -457,6 +457,7 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv)
mlx5_glue->dr_create_flow_action_default_miss();
if (!sh->default_miss_action)
DRV_LOG(WARNING, "Default miss action is not supported.");
+   LIST_INIT(&sh->shared_rxqs);
return 0;
 error:
/* Rollback the created objects. */
@@ -531,6 +532,7 @@ mlx5_os_free_shared_dr(struct mlx5_priv *priv)
MLX5_ASSERT(sh && sh->refcnt);
if (sh->refcnt > 1)
return;
+   MLX5_ASSERT(LIST_EMPTY(&sh->shared_rxqs));
 #ifdef HAVE_MLX5DV_DR
if (sh->rx_domain) {
mlx5_glue->dr_destroy_domain(sh->rx_domain);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 6a9c99a8826..c671c8a354f 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1193,6 +1193,7 @@ struct mlx5_dev_ctx_shared {
struct mlx5_flex_parser_profiles fp[MLX5_FLEX_PARSER_MAX];
/* Flex parser profiles information. */
void *devx_rx_uar; /* DevX UAR for Rx. */
+   LIST_HEAD(shared_rxqs, mlx5_rxq_ctrl) shared_rxqs; /* Shared RXQs. */
struct mlx5_aso_age_mng *aso_age_mng;
/* Management data for aging mechanism using ASO Flow Hit. */
struct mlx5_geneve_tlv_option_resource *geneve_tlv_option_resource;
@@ -1257,6 +1258,7 @@ struct mlx5_rxq_obj {
};
struct mlx5_devx_obj *rq; /* DevX RQ object for hairpin. */
struct {
+   struct mlx5_devx_rmp devx_rmp; /* RMP for shared RQ. */
struct mlx5_devx_cq cq_obj; /* DevX CQ object. */
void *devx_channel;
};
diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c
index 371ff387c99..01561639038 100644
--- a/drivers/net/mlx5/mlx5_devx.c
+++ b/drivers/net/mlx5/mlx5_devx.c
@@ -170,6 +170,8 @@ mlx5_rxq_devx_obj_release(struct mlx5_rxq_priv *rxq)
memset(&rxq->devx_rq, 0, sizeof(rxq->devx_rq));
mlx5_devx_cq_destroy(&rxq_obj->cq_obj);
memset(&rxq_obj->cq_obj, 0, sizeof(rxq_obj->cq_obj));
+   if (!RXQ_CTRL_LAST(rxq))
+   return;
if (rxq_obj->devx_channel) {
mlx5_os_devx_destroy_event_channel
(rxq_obj->devx_channel);
@@ -270,6 +272,8 @@ mlx5_rxq_create_devx_rq_resources(struct mlx5_rxq_priv *rxq)
rq_attr.wq_attr.pd = priv->sh->pdn;
rq_attr.counter_set_id = priv->counter_set_id;
/* Create

Re: [dpdk-dev] [PATCH 4/5] kvargs: remove experimental function to compare string

2021-09-26 Thread Olivier Matz
On Fri, Sep 24, 2021 at 05:57:39PM +0200, Olivier Matz wrote:
> The function was designed to be used as a handler for
> rte_kvargs_process() to compare the value string in a kvlist. For
> readability, its usages in DPDK have been replaced by
> rte_kvargs_get_with_value() in previous commit.
> 
> Remove this function, as it is not used anymore.
> 
> Signed-off-by: Olivier Matz 

Self-nack, I forgot to remove the rte_kvargs_strcmp entry
from version.map.

Will send a v2.


Re: [dpdk-dev] [RFC V1] examples/l3fwd-power: fix memory leak for rte_pci_device

2021-09-26 Thread Thomas Monjalon
26/09/2021 14:20, Huisong Li:
> 在 2021/9/18 16:46, Thomas Monjalon 写道:
> > 18/09/2021 05:24, Huisong Li:
> >> 在 2021/9/17 20:50, Thomas Monjalon 写道:
> >>> 17/09/2021 04:13, Huisong Li:
>  How should PMD free it? What should we do? Any good suggestions?
> >>> Check that there is no other port sharing the same PCI device,
> >>> then call the PMD callback for rte_pci_remove_t.
> >> For primary and secondary processes, their rte_pci_device is independent.
> > Yes it requires to free on both primary and secondary.
> >
> >> Is this for a scenario where there are multiple representor ports under
> >> the same PCI address in the same processe?
> > A PCI device can have multiple physical or representor ports.
> Got it.
> >
>  Would it be more appropriate to do this in rte_eal_cleanup() if it
>  cann't be done in the API above?
> >>> rte_eal_cleanup is a last cleanup for what was not done earlier.
> >>> We could do that but first we should properly free devices when closed.
> >>>
> >> Totally, it is appropriate that rte_eal_cleanup is responsible for
> >> releasing devices under the pci bus.
> > Yes, but if a device is closed while the rest of the app keep running,
> > we should not wait to free it.
> 
>  From this point of view, it seems to make sense. However, according to 
> the OVS-DPDK
> 
> usage, it calls dev_close() first, and then check whether all ports 
> under the PCI address are
> 
> closed to free rte_pci_device by calling rte_dev_remove().
> 
> 
> If we do not want the user to be aware of this, and we want 
> rte_pci_device to be freed
> 
> in a timely manner. Can we add a code logic calculating the number of 
> ports under a PCI address
> 
> and calling rte_dev_remove() to rte_eth_dev_close() to free 
> rte_pci_device and delete it from rte_pci_bus?
> 
> If we do, we may need to make some extra work, otherwise some 
> applications, such as OVS-DPDK, will
> 
> fail due to a second call to rte_dev_remove().

I don't understand the proposal.
Please could explain again the code path?
It may deserve a separate mail thread.




[dpdk-dev] Symmetric hashing not working in ESXI version 7.0.0

2021-09-26 Thread Ramkumar Nallathambi
Hi Everyone,



We are facing an issue with symmetric hashing. Below is the
definition of symmetric hashing.



The traffic from server A to server B and server B to server A should fall
under the same flow. This is what we call symmetricity.



I have configured the customized keys for symmetric hashing as suggested in
the DPDK forum. After changing the key to the below symmetric hashing
started working.



*static uint8_t hash_key[RSS_HASH_KEY_LENGTH] = {*

*0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A,*

*0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A,*

*0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A,*

*0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A,*

*0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A,*

*};*



*But it is working in ESXI server version 6.7.0, but does not work in ESXi
server version 7.0.0.*



Anyone aware of this issue. *We are using the vmxnet3 driver and DPDK
version 19.11.*



Regards,

Ramkumar N.


[dpdk-dev] [PATCH] Enhance code readability when dma_map in ifc/ifcvp_vdpa

2021-09-26 Thread jilei chen
From: Jilei Chen 

Signed-off-by: Jilei Chen 
---
 drivers/net/ifc/ifcvf_vdpa.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ifc/ifcvf_vdpa.c b/drivers/net/ifc/ifcvf_vdpa.c
index da4667ba54..fe7ec87a8a 100644
--- a/drivers/net/ifc/ifcvf_vdpa.c
+++ b/drivers/net/ifc/ifcvf_vdpa.c
@@ -37,6 +37,8 @@
 
 #define IFCVF_VDPA_MODE"vdpa"
 #define IFCVF_SW_FALLBACK_LM   "sw-live-migration"
+#define IFCVF_MAP  1
+#define IFCVF_UNMAP0
 
 static const char * const ifcvf_valid_arguments[] = {
IFCVF_VDPA_MODE,
@@ -535,7 +537,7 @@ update_datapath(struct ifcvf_internal *internal)
if (!rte_atomic32_read(&internal->running) &&
(rte_atomic32_read(&internal->started) &&
 rte_atomic32_read(&internal->dev_attached))) {
-   ret = ifcvf_dma_map(internal, 1);
+   ret = ifcvf_dma_map(internal, IFCVF_MAP);
if (ret)
goto err;
 
@@ -565,7 +567,7 @@ update_datapath(struct ifcvf_internal *internal)
if (ret)
goto err;
 
-   ret = ifcvf_dma_map(internal, 0);
+   ret = ifcvf_dma_map(internal, IFCVF_UNMAP);
if (ret)
goto err;
 
@@ -869,7 +871,7 @@ ifcvf_sw_fallback_switchover(struct ifcvf_internal 
*internal)
 unset_intr:
vdpa_disable_vfio_intr(internal);
 unmap:
-   ifcvf_dma_map(internal, 0);
+   ifcvf_dma_map(internal, IFCVF_UNMAP);
 error:
return -1;
 }
@@ -926,7 +928,7 @@ ifcvf_dev_close(int vid)
vdpa_disable_vfio_intr(internal);
 
/* unset DMA map for guest memory */
-   ifcvf_dma_map(internal, 0);
+   ifcvf_dma_map(internal, IFCVF_UNMAP);
 
internal->sw_fallback_running = false;
} else {
-- 
2.12.2





[dpdk-dev] [PATCH] Enhance code readability when dma_map in ifc/ifcvp_vdpa

2021-09-26 Thread jilei chen
Signed-off-by: jilei chen 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 1dc813d0a3..c2bf26f2b7 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -36,6 +36,8 @@ RTE_LOG_REGISTER(ifcvf_vdpa_logtype, pmd.vdpa.ifcvf, NOTICE);
 
 #define IFCVF_VDPA_MODE"vdpa"
 #define IFCVF_SW_FALLBACK_LM   "sw-live-migration"
+#define IFCVF_MAP  1
+#define IFCVF_UNMAP0
 
 #define THREAD_NAME_LEN16
 
@@ -538,7 +540,7 @@ update_datapath(struct ifcvf_internal *internal)
if (!rte_atomic32_read(&internal->running) &&
(rte_atomic32_read(&internal->started) &&
 rte_atomic32_read(&internal->dev_attached))) {
-   ret = ifcvf_dma_map(internal, 1);
+   ret = ifcvf_dma_map(internal, IFCVF_MAP);
if (ret)
goto err;
 
@@ -568,7 +570,7 @@ update_datapath(struct ifcvf_internal *internal)
if (ret)
goto err;
 
-   ret = ifcvf_dma_map(internal, 0);
+   ret = ifcvf_dma_map(internal, IFCVF_UNMAP);
if (ret)
goto err;
 
@@ -875,7 +877,7 @@ ifcvf_sw_fallback_switchover(struct ifcvf_internal 
*internal)
 unset_intr:
vdpa_disable_vfio_intr(internal);
 unmap:
-   ifcvf_dma_map(internal, 0);
+   ifcvf_dma_map(internal, IFCVF_UNMAP);
 error:
return -1;
 }
@@ -934,7 +936,7 @@ ifcvf_dev_close(int vid)
vdpa_disable_vfio_intr(internal);
 
/* unset DMA map for guest memory */
-   ifcvf_dma_map(internal, 0);
+   ifcvf_dma_map(internal, IFCVF_UNMAP);
 
internal->sw_fallback_running = false;
} else {
-- 
2.12.2





[dpdk-dev] Symmetric hashing not working in ESXI version 7.0.0

2021-09-26 Thread Ramkumar Nallathambi
Hi Everyone,



We are facing an issue with symmetric hashing. Below is the
definition of symmetric hashing.



The traffic from server A to server B and server B to server A should fall
under the same flow. This is what we call symmetricity.



I have configured the customized keys for symmetric hashing as suggested in
the DPDK forum. After changing the key to the below symmetric hashing
started working.



*static uint8_t hash_key[RSS_HASH_KEY_LENGTH] = {*

*0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A,*

*0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A,*

*0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A,*

*0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A,*

*0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A, 0x6D, 0x5A,*

*};*



*But it is working in ESXI server version 6.7.0, but does not work in ESXi
server version 7.0.0.*



Anyone aware of this issue. *We are using the vmxnet3 driver and DPDK
version 19.11.*



Regards,

Ramkumar N.


Re: [dpdk-dev] [PATCH v3 2/8] ethdev: new API to aggregate shared Rx queue group

2021-09-26 Thread Ajit Khaparde
On Fri, Sep 17, 2021 at 1:02 AM Xueming Li  wrote:
>
> This patch introduces new api to aggreated ports among same shared Rx
s/aggregated/aggregate

> queue group.  Only queues with specified share group is aggregated.
s/is/are

> Rx burst and device close are expected to be supported by new device.
>
> Signed-off-by: Xueming Li 
Minor nits - typos actually!

> ---
>  lib/ethdev/ethdev_driver.h | 23 ++-
>  lib/ethdev/rte_ethdev.c| 22 ++
>  lib/ethdev/rte_ethdev.h| 16 
>  lib/ethdev/version.map |  3 +++
>  4 files changed, 63 insertions(+), 1 deletion(-)
>
> diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
> index 524757cf6f..72156a4153 100644
> --- a/lib/ethdev/ethdev_driver.h
> +++ b/lib/ethdev/ethdev_driver.h
> @@ -786,10 +786,28 @@ typedef int (*eth_get_monitor_addr_t)(void *rxq,
>   * @return
>   *   Negative errno value on error, number of info entries otherwise.
>   */
> -
>  typedef int (*eth_representor_info_get_t)(struct rte_eth_dev *dev,
> struct rte_eth_representor_info *info);
>
> +/**
> + * @internal
> + * Aggregate shared Rx queue.
> + *
> + * Create a new port used for shared Rx queue polling.
> + *
> + * Only queues with specified share group are aggregated.
> + * At least Rx burst and device close should be supported.
> + *
> + * @param dev
> + *   Ethdev handle of port.
> + * @param group
> + *   Shared Rx queue group to aggregate.
> + * @return
> + *   UINT16_MAX if failed, otherwise aggregated port number.
> + */
> +typedef int (*eth_shared_rxq_aggregate_t)(struct rte_eth_dev *dev,
> + uint32_t group);
> +
>  /**
>   * @internal A structure containing the functions exported by an Ethernet 
> driver.
>   */
> @@ -950,6 +968,9 @@ struct eth_dev_ops {
>
> eth_representor_info_get_t representor_info_get;
> /**< Get representor info. */
> +
> +   eth_shared_rxq_aggregate_t shared_rxq_aggregate;
> +   /**< Aggregate shared Rx queue. */
>  };
>
>  /**
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index b3a58d5e65..9f2ef58309 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -6301,6 +6301,28 @@ rte_eth_representor_info_get(uint16_t port_id,
> return eth_err(port_id, (*dev->dev_ops->representor_info_get)(dev, 
> info));
>  }
>
> +uint16_t
> +rte_eth_shared_rxq_aggregate(uint16_t port_id, uint32_t group)
> +{
> +   struct rte_eth_dev *dev;
> +   uint64_t offloads;
> +
> +   RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
> +   dev = &rte_eth_devices[port_id];
> +
> +   RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->shared_rxq_aggregate,
> +   UINT16_MAX);
> +
> +   offloads = dev->data->dev_conf.rxmode.offloads;
> +   if ((offloads & RTE_ETH_RX_OFFLOAD_SHARED_RXQ) == 0) {
> +   RTE_ETHDEV_LOG(ERR, "port_id=%u doesn't support Rx offload\n",
> +  port_id);
> +   return UINT16_MAX;
> +   }
> +
> +   return (*dev->dev_ops->shared_rxq_aggregate)(dev, group);
> +}
> +
>  RTE_LOG_REGISTER_DEFAULT(rte_eth_dev_logtype, INFO);
>
>  RTE_INIT(ethdev_init_telemetry)
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index a578c9db9d..f15d2142b2 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -4895,6 +4895,22 @@ __rte_experimental
>  int rte_eth_representor_info_get(uint16_t port_id,
>  struct rte_eth_representor_info *info);
>
> +/**
> + * Aggregate shared Rx queue ports to one port for polling.
> + *
> + * Only queues with specified share group is aggregated.
s/is/are

> + * Any operation besides Rx burst and device close is unexpected.
> + *
> + * @param port_id
> + *   The port identifier of the device from shared Rx queue group.
> + * @param group
> + *   Shared Rx queue group to aggregate.
> + * @return
> + *   UINT16_MAX if failed, otherwise aggregated port number.
> + */
> +__rte_experimental
> +uint16_t rte_eth_shared_rxq_aggregate(uint16_t port_id, uint32_t group);
> +
>  #include 
>
>  /**
> diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
> index 3eece75b72..97a2233508 100644
> --- a/lib/ethdev/version.map
> +++ b/lib/ethdev/version.map
> @@ -249,6 +249,9 @@ EXPERIMENTAL {
> rte_mtr_meter_policy_delete;
> rte_mtr_meter_policy_update;
> rte_mtr_meter_policy_validate;
> +
> +   # added in 21.11
> +   rte_eth_shared_rxq_aggregate;
>  };
>
>  INTERNAL {
> --
> 2.33.0
>


Re: [dpdk-dev] [PATCH] net/bnxt: fix function driver register/unregister functions

2021-09-26 Thread Ajit Khaparde
On Wed, Sep 22, 2021 at 1:10 AM Kalesh A P
 wrote:
>
> From: Kalesh AP 
>
> 1. Fix to use correct fields in the request structure of
>HWRM_FUNC_DRV_RGTR.
> 2. Remove the "flags" argument to bnxt_hwrm_func_driver_unregister()
>as it is not needed.
>
> Fixes: beb3087f5056 ("net/bnxt: add driver register/unregister")
> Cc: sta...@dpdk.org
>
> Signed-off-by: Kalesh AP 
> Reviewed-by: Ajit Khaparde 
> Reviewed-by: Lance Richardson 
> Reviewed-by: Somnath Kotur 
Patch applied to dpdk-next-net-brcm. Thanks

> ---
>  drivers/net/bnxt/bnxt_ethdev.c | 2 +-
>  drivers/net/bnxt/bnxt_hwrm.c   | 9 -
>  drivers/net/bnxt/bnxt_hwrm.h   | 2 +-
>  3 files changed, 6 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
> index 097dd10..dc7dee1 100644
> --- a/drivers/net/bnxt/bnxt_ethdev.c
> +++ b/drivers/net/bnxt/bnxt_ethdev.c
> @@ -6156,7 +6156,7 @@ bnxt_uninit_resources(struct bnxt *bp, bool 
> reconfig_dev)
> bp->pf->vf_req_buf = NULL;
> }
>
> -   rc = bnxt_hwrm_func_driver_unregister(bp, 0);
> +   rc = bnxt_hwrm_func_driver_unregister(bp);
> bp->flags &= ~BNXT_FLAG_REGISTERED;
> bnxt_free_ctx_mem(bp);
> if (!reconfig_dev) {
> diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
> index 585cded..d4d8581 100644
> --- a/drivers/net/bnxt/bnxt_hwrm.c
> +++ b/drivers/net/bnxt/bnxt_hwrm.c
> @@ -1059,9 +1059,9 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp)
> HWRM_PREP(&req, HWRM_FUNC_DRV_RGTR, BNXT_USE_CHIMP_MB);
> req.enables = rte_cpu_to_le_32(HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_VER |
> HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_ASYNC_EVENT_FWD);
> -   req.ver_maj = RTE_VER_YEAR;
> -   req.ver_min = RTE_VER_MONTH;
> -   req.ver_upd = RTE_VER_MINOR;
> +   req.ver_maj_8b = RTE_VER_YEAR;
> +   req.ver_min_8b = RTE_VER_MONTH;
> +   req.ver_upd_8b = RTE_VER_MINOR;
>
> if (BNXT_PF(bp)) {
> req.enables |= rte_cpu_to_le_32(
> @@ -1370,7 +1370,7 @@ int bnxt_hwrm_ver_get(struct bnxt *bp, uint32_t timeout)
> return rc;
>  }
>
> -int bnxt_hwrm_func_driver_unregister(struct bnxt *bp, uint32_t flags)
> +int bnxt_hwrm_func_driver_unregister(struct bnxt *bp)
>  {
> int rc;
> struct hwrm_func_drv_unrgtr_input req = {.req_type = 0 };
> @@ -1380,7 +1380,6 @@ int bnxt_hwrm_func_driver_unregister(struct bnxt *bp, 
> uint32_t flags)
> return 0;
>
> HWRM_PREP(&req, HWRM_FUNC_DRV_UNRGTR, BNXT_USE_CHIMP_MB);
> -   req.flags = flags;
>
> rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
>
> diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
> index b311ff5..6dc23b9 100644
> --- a/drivers/net/bnxt/bnxt_hwrm.h
> +++ b/drivers/net/bnxt/bnxt_hwrm.h
> @@ -145,7 +145,7 @@ int bnxt_hwrm_func_buf_unrgtr(struct bnxt *bp);
>  int bnxt_hwrm_func_driver_register(struct bnxt *bp);
>  int bnxt_hwrm_func_qcaps(struct bnxt *bp);
>  int bnxt_hwrm_func_reset(struct bnxt *bp);
> -int bnxt_hwrm_func_driver_unregister(struct bnxt *bp, uint32_t flags);
> +int bnxt_hwrm_func_driver_unregister(struct bnxt *bp);
>  int bnxt_hwrm_func_qstats(struct bnxt *bp, uint16_t fid,
>   struct rte_eth_stats *stats,
>   struct hwrm_func_qstats_output *func_qstats);
> --
> 2.10.1
>


Re: [dpdk-dev] [PATCH] net/bnxt: fix Tx queue startup state

2021-09-26 Thread Ajit Khaparde
On Fri, Sep 24, 2021 at 5:55 PM Ajit Khaparde
 wrote:
>
> Default queue state of Tx queues on startup is not correct.
> Fix this by setting the state when the port is started.
>
> Fixes: 6eb3cc2294fd ("net/bnxt: add initial Tx code")
> Cc: sta...@dpdk.org
>
> Signed-off-by: Ajit Khaparde 
> Reviewed-by: Lance Richardson 
Patch applied to dpdk-next-net-brcm. Thanks

> ---
>  drivers/net/bnxt/bnxt_ethdev.c | 10 ++
>  drivers/net/bnxt/bnxt_txq.c|  5 -
>  2 files changed, 10 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
> index 097dd10de9..eb9147927e 100644
> --- a/drivers/net/bnxt/bnxt_ethdev.c
> +++ b/drivers/net/bnxt/bnxt_ethdev.c
> @@ -814,6 +814,16 @@ static int bnxt_start_nic(struct bnxt *bp)
> }
> }
>
> +   for (j = 0; j < bp->tx_nr_rings; j++) {
> +   struct bnxt_tx_queue *txq = bp->tx_queues[j];
> +
> +   if (!txq->tx_deferred_start) {
> +   bp->eth_dev->data->tx_queue_state[j] =
> +   RTE_ETH_QUEUE_STATE_STARTED;
> +   txq->tx_started = true;
> +   }
> +   }
> +
> rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, &bp->vnic_info[0], 0, NULL);
> if (rc) {
> PMD_DRV_LOG(ERR,
> diff --git a/drivers/net/bnxt/bnxt_txq.c b/drivers/net/bnxt/bnxt_txq.c
> index 830416af3d..3ffc334cff 100644
> --- a/drivers/net/bnxt/bnxt_txq.c
> +++ b/drivers/net/bnxt/bnxt_txq.c
> @@ -166,11 +166,6 @@ int bnxt_tx_queue_setup_op(struct rte_eth_dev *eth_dev,
>
> eth_dev->data->tx_queues[queue_idx] = txq;
>
> -   if (txq->tx_deferred_start)
> -   txq->tx_started = false;
> -   else
> -   txq->tx_started = true;
> -
> return 0;
>  err:
> bnxt_tx_queue_release_op(txq);
> --
> 2.30.1 (Apple Git-130)
>


Re: [dpdk-dev] [PATCH] net/bnxt: fix mem zone free for Tx and Rx rings

2021-09-26 Thread Ajit Khaparde
On Fri, Sep 24, 2021 at 5:57 PM Ajit Khaparde
 wrote:
>
> The device cleanup logic was freeing most of the ring related memory,
> but was not freeing up the mem zone associated with the rings.
> This patch fixes the issue.
>
> Fixes: 2eb53b134aae ("net/bnxt: add initial Rx code")
> Fixes: 6eb3cc2294fd ("net/bnxt: add initial Tx code")
> Cc: sta...@dpdk.org
>
> Signed-off-by: Ajit Khaparde 
> Reviewed-by: Lance Richardson 
> Reviewed-by: Somnath Kotur 
Patch applied to dpdk-next-net-brcm.

> ---
>  drivers/net/bnxt/bnxt_rxr.c | 3 +++
>  drivers/net/bnxt/bnxt_txr.c | 3 +++
>  2 files changed, 6 insertions(+)
>
> diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c
> index a40fa50138..4c1ee4294e 100644
> --- a/drivers/net/bnxt/bnxt_rxr.c
> +++ b/drivers/net/bnxt/bnxt_rxr.c
> @@ -1207,6 +1207,9 @@ void bnxt_free_rx_rings(struct bnxt *bp)
> rte_free(rxq->cp_ring->cp_ring_struct);
> rte_free(rxq->cp_ring);
>
> +   rte_memzone_free(rxq->mz);
> +   rxq->mz = NULL;
> +
> rte_free(rxq);
> bp->rx_queues[i] = NULL;
> }
> diff --git a/drivers/net/bnxt/bnxt_txr.c b/drivers/net/bnxt/bnxt_txr.c
> index 47824334ae..9e45ddd7a8 100644
> --- a/drivers/net/bnxt/bnxt_txr.c
> +++ b/drivers/net/bnxt/bnxt_txr.c
> @@ -38,6 +38,9 @@ void bnxt_free_tx_rings(struct bnxt *bp)
> rte_free(txq->cp_ring->cp_ring_struct);
> rte_free(txq->cp_ring);
>
> +   rte_memzone_free(txq->mz);
> +   txq->mz = NULL;
> +
> rte_free(txq);
> bp->tx_queues[i] = NULL;
> }
> --
> 2.30.1 (Apple Git-130)
>


[dpdk-dev] [PATCH v2] net/vhost: merge vhost stats loop in vhost Tx/Rx

2021-09-26 Thread Gaoxiang Liu
To improve performance in vhost Tx/Rx, merge vhost stats loop.
eth_vhost_tx has 2 loop of send num iteraion.
It can be merge into one.
eth_vhost_rx has the same issue as Tx.

Fixes: 4d6cf2ac93dc ("net/vhost: add extended statistics")

Signed-off-by: Gaoxiang Liu 
---

v2:
 * Fix coding style issues.
---
 drivers/net/vhost/rte_eth_vhost.c | 62 ++-
 1 file changed, 28 insertions(+), 34 deletions(-)

diff --git a/drivers/net/vhost/rte_eth_vhost.c 
b/drivers/net/vhost/rte_eth_vhost.c
index a202931e9a..1d554caf9e 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -336,38 +336,29 @@ vhost_count_xcast_packets(struct vhost_queue *vq,
 }
 
 static void
-vhost_update_packet_xstats(struct vhost_queue *vq, struct rte_mbuf **bufs,
-  uint16_t count, uint64_t nb_bytes,
-  uint64_t nb_missed)
+vhost_update_single_packet_xstats(struct vhost_queue *vq, struct rte_mbuf *buf)
 {
uint32_t pkt_len = 0;
-   uint64_t i = 0;
uint64_t index;
struct vhost_stats *pstats = &vq->stats;
 
-   pstats->xstats[VHOST_BYTE] += nb_bytes;
-   pstats->xstats[VHOST_MISSED_PKT] += nb_missed;
-   pstats->xstats[VHOST_UNICAST_PKT] += nb_missed;
-
-   for (i = 0; i < count ; i++) {
-   pstats->xstats[VHOST_PKT]++;
-   pkt_len = bufs[i]->pkt_len;
-   if (pkt_len == 64) {
-   pstats->xstats[VHOST_64_PKT]++;
-   } else if (pkt_len > 64 && pkt_len < 1024) {
-   index = (sizeof(pkt_len) * 8)
-   - __builtin_clz(pkt_len) - 5;
-   pstats->xstats[index]++;
-   } else {
-   if (pkt_len < 64)
-   pstats->xstats[VHOST_UNDERSIZE_PKT]++;
-   else if (pkt_len <= 1522)
-   pstats->xstats[VHOST_1024_TO_1522_PKT]++;
-   else if (pkt_len > 1522)
-   pstats->xstats[VHOST_1523_TO_MAX_PKT]++;
-   }
-   vhost_count_xcast_packets(vq, bufs[i]);
+   pstats->xstats[VHOST_PKT]++;
+   pkt_len = buf->pkt_len;
+   if (pkt_len == 64) {
+   pstats->xstats[VHOST_64_PKT]++;
+   } else if (pkt_len > 64 && pkt_len < 1024) {
+   index = (sizeof(pkt_len) * 8)
+   - __builtin_clz(pkt_len) - 5;
+   pstats->xstats[index]++;
+   } else {
+   if (pkt_len < 64)
+   pstats->xstats[VHOST_UNDERSIZE_PKT]++;
+   else if (pkt_len <= 1522)
+   pstats->xstats[VHOST_1024_TO_1522_PKT]++;
+   else if (pkt_len > 1522)
+   pstats->xstats[VHOST_1523_TO_MAX_PKT]++;
}
+   vhost_count_xcast_packets(vq, buf);
 }
 
 static uint16_t
@@ -376,7 +367,6 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t 
nb_bufs)
struct vhost_queue *r = q;
uint16_t i, nb_rx = 0;
uint16_t nb_receive = nb_bufs;
-   uint64_t nb_bytes = 0;
 
if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
return 0;
@@ -411,11 +401,11 @@ eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t 
nb_bufs)
if (r->internal->vlan_strip)
rte_vlan_strip(bufs[i]);
 
-   nb_bytes += bufs[i]->pkt_len;
-   }
+   r->stats.bytes += bufs[i]->pkt_len;
+   r->stats->xstats[VHOST_BYTE] += bufs[i]->pkt_len;
 
-   r->stats.bytes += nb_bytes;
-   vhost_update_packet_xstats(r, bufs, nb_rx, nb_bytes, 0);
+   vhost_update_single_packet_xstats(r, bufs);
+   }
 
 out:
rte_atomic32_set(&r->while_queuing, 0);
@@ -471,16 +461,20 @@ eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t 
nb_bufs)
break;
}
 
-   for (i = 0; likely(i < nb_tx); i++)
+   for (i = 0; likely(i < nb_tx); i++) {
nb_bytes += bufs[i]->pkt_len;
+   vhost_update_single_packet_xstats(r, bufs);
+   }
 
nb_missed = nb_bufs - nb_tx;
 
r->stats.pkts += nb_tx;
r->stats.bytes += nb_bytes;
-   r->stats.missed_pkts += nb_bufs - nb_tx;
+   r->stats.missed_pkts += nb_missed;
 
-   vhost_update_packet_xstats(r, bufs, nb_tx, nb_bytes, nb_missed);
+   r->stats->xstats[VHOST_BYTE] += nb_bytes;
+   r->xstats->xstats[VHOST_MISSED_PKT] += nb_missed;
+   r->xstats->xstats[VHOST_UNICAST_PKT] += nb_missed;
 
/* According to RFC2863, ifHCOutUcastPkts, ifHCOutMulticastPkts and
 * ifHCOutBroadcastPkts counters are increased when packets are not
-- 
2.32.0




Re: [dpdk-dev] [RFC V1] examples/l3fwd-power: fix memory leak for rte_pci_device

2021-09-26 Thread Huisong Li



在 2021/9/27 3:16, Thomas Monjalon 写道:

26/09/2021 14:20, Huisong Li:

在 2021/9/18 16:46, Thomas Monjalon 写道:

18/09/2021 05:24, Huisong Li:

在 2021/9/17 20:50, Thomas Monjalon 写道:

17/09/2021 04:13, Huisong Li:

How should PMD free it? What should we do? Any good suggestions?

Check that there is no other port sharing the same PCI device,
then call the PMD callback for rte_pci_remove_t.

For primary and secondary processes, their rte_pci_device is independent.

Yes it requires to free on both primary and secondary.


Is this for a scenario where there are multiple representor ports under
the same PCI address in the same processe?

A PCI device can have multiple physical or representor ports.

Got it.

Would it be more appropriate to do this in rte_eal_cleanup() if it
cann't be done in the API above?

rte_eal_cleanup is a last cleanup for what was not done earlier.
We could do that but first we should properly free devices when closed.


Totally, it is appropriate that rte_eal_cleanup is responsible for
releasing devices under the pci bus.

Yes, but if a device is closed while the rest of the app keep running,
we should not wait to free it.

  From this point of view, it seems to make sense. However, according to
the OVS-DPDK

usage, it calls dev_close() first, and then check whether all ports
under the PCI address are

closed to free rte_pci_device by calling rte_dev_remove().


If we do not want the user to be aware of this, and we want
rte_pci_device to be freed

in a timely manner. Can we add a code logic calculating the number of
ports under a PCI address

and calling rte_dev_remove() to rte_eth_dev_close() to free
rte_pci_device and delete it from rte_pci_bus?

If we do, we may need to make some extra work, otherwise some
applications, such as OVS-DPDK, will

fail due to a second call to rte_dev_remove().

I don't understand the proposal.
Please could explain again the code path?


1. This RFC patch intended to free rte_pci_device in DPDK app by calling

rte_dev_remove() after calling dev_close().

2. For the above-mentioned usage in OVS-DPDK, please see function

netdev_dpdk_destruct() in lib/netdev-dpdk.c.

3. Later, you suggest that the release of rte_pci_device should be done

in the dev_close() API, not in the rte_eal_init() which is not real-time.

To sum up, the above proposal comes out.


It may deserve a separate mail thread.


.


[dpdk-dev] [PATCH v4 0/2] add unsafe API to get inflight packets

2021-09-26 Thread Xuan Ding
This patchset introduces an unsafe API to get the number of inflight
packets in DMA engine in some situations. Like vring state changes or
device is destroyed. Compared with rte_vhost_async_get_inflight(),
this is a lock free version.

v4:
* Added use case for API in vhost example.

v3:
* Fixed one typo.
* Revised the doc to be more accuracy.

v2:
* Fixed some format issues.

Xuan Ding (2):
  vhost: add unsafe API to check inflight packets
  examples/vhost: use API to check inflight packets

 doc/guides/prog_guide/vhost_lib.rst|  5 +
 doc/guides/rel_notes/release_21_11.rst |  5 +
 examples/vhost/main.c  | 26 +++---
 examples/vhost/main.h  |  1 -
 lib/vhost/rte_vhost_async.h| 14 ++
 lib/vhost/version.map  |  3 +++
 lib/vhost/vhost.c  | 26 ++
 7 files changed, 64 insertions(+), 16 deletions(-)

-- 
2.17.1



[dpdk-dev] [PATCH v4 1/2] vhost: add unsafe API to check inflight packets

2021-09-26 Thread Xuan Ding
In async data path, when vring state changes or device is destroyed,
it is necessary to know the number of inflight packets in DMA engine.
This patch provides a thread unsafe API to return the number of
inflight packets for a vhost queue without using any lock.

Signed-off-by: Xuan Ding 
---
 doc/guides/prog_guide/vhost_lib.rst|  5 +
 doc/guides/rel_notes/release_21_11.rst |  5 +
 lib/vhost/rte_vhost_async.h| 14 ++
 lib/vhost/version.map  |  3 +++
 lib/vhost/vhost.c  | 26 ++
 5 files changed, 53 insertions(+)

diff --git a/doc/guides/prog_guide/vhost_lib.rst 
b/doc/guides/prog_guide/vhost_lib.rst
index 8874033165..0c4fb9ea91 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -305,6 +305,11 @@ The following is an overview of some key Vhost API 
functions:
   This function returns the amount of in-flight packets for the vhost
   queue using async acceleration.
 
+* ``rte_vhost_async_get_inflight_thread_unsafe(vid, queue_id)``
+
+  Get the number of inflight packets for a vhost queue without
+  performing any locking.
+
 * ``rte_vhost_clear_queue_thread_unsafe(vid, queue_id, **pkts, count)``
 
   Clear inflight packets which are submitted to DMA engine in vhost async data
diff --git a/doc/guides/rel_notes/release_21_11.rst 
b/doc/guides/rel_notes/release_21_11.rst
index 675b573834..c9814c68df 100644
--- a/doc/guides/rel_notes/release_21_11.rst
+++ b/doc/guides/rel_notes/release_21_11.rst
@@ -55,6 +55,11 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* **Added vhost API to get the number of inflight packets.**
+
+  Added an API which can get the number of inflight packets in
+  vhost async data path without using lock.
+
 * **Enabled new devargs parser.**
 
   * Enabled devargs syntax
diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index b25ff446f7..0af414bf78 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -246,6 +246,20 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
 __rte_experimental
 int rte_vhost_async_get_inflight(int vid, uint16_t queue_id);
 
+/**
+ * This function is lock-free version to return the amount of in-flight
+ * packets for the vhost queue which uses async channel acceleration.
+ *
+ * @param vid
+ *  id of vhost device to enqueue data
+ * @param queue_id
+ *  queue id to enqueue data
+ * @return
+ *  the amount of in-flight packets on success; -1 on failure
+ */
+__rte_experimental
+int rte_vhost_async_get_inflight_thread_unsafe(int vid, uint16_t queue_id);
+
 /**
  * This function checks async completion status and clear packets for
  * a specific vhost device queue. Packets which are inflight will be
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index c92a9d4962..b150dc408d 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -85,4 +85,7 @@ EXPERIMENTAL {
rte_vhost_async_channel_register_thread_unsafe;
rte_vhost_async_channel_unregister_thread_unsafe;
rte_vhost_clear_queue_thread_unsafe;
+
+   #added in 21.11
+   rte_vhost_async_get_inflight_thread_unsafe;
 };
diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index 355ff37651..24ae1025ea 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -1886,5 +1886,31 @@ int rte_vhost_async_get_inflight(int vid, uint16_t 
queue_id)
return ret;
 }
 
+int
+rte_vhost_async_get_inflight_thread_unsafe(int vid, uint16_t queue_id)
+{
+   struct vhost_virtqueue *vq;
+   struct virtio_net *dev = get_device(vid);
+   int ret = -1;
+
+   if (dev == NULL)
+   return ret;
+
+   if (queue_id >= VHOST_MAX_VRING)
+   return ret;
+
+   vq = dev->virtqueue[queue_id];
+
+   if (vq == NULL)
+   return ret;
+
+   if (!vq->async_registered)
+   return ret;
+
+   ret = vq->async_pkts_inflight_n;
+
+   return ret;
+}
+
 RTE_LOG_REGISTER_SUFFIX(vhost_config_log_level, config, INFO);
 RTE_LOG_REGISTER_SUFFIX(vhost_data_log_level, data, WARNING);
-- 
2.17.1



[dpdk-dev] [PATCH v4 2/2] examples/vhost: use API to check inflight packets

2021-09-26 Thread Xuan Ding
In async data path, call rte_vhost_async_get_inflight_thread_unsafe()
API to directly return the number of inflight packets instead of
maintaining a local variable.

Signed-off-by: Xuan Ding 
---
 examples/vhost/main.c | 26 +++---
 examples/vhost/main.h |  1 -
 2 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index bc3d71c898..f0b74b5086 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -851,11 +851,8 @@ complete_async_pkts(struct vhost_dev *vdev)
 
complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
-   if (complete_count) {
+   if (complete_count)
free_pkts(p_cpl, complete_count);
-   __atomic_sub_fetch(&vdev->pkts_inflight, complete_count, 
__ATOMIC_SEQ_CST);
-   }
-
 }
 
 static __rte_always_inline void
@@ -898,7 +895,6 @@ drain_vhost(struct vhost_dev *vdev)
complete_async_pkts(vdev);
ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
-   __atomic_add_fetch(&vdev->pkts_inflight, ret - cpu_cpl_nr, 
__ATOMIC_SEQ_CST);
 
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
@@ -1230,8 +1226,6 @@ drain_eth_rx(struct vhost_dev *vdev)
enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
VIRTIO_RXQ, pkts, rx_count,
m_cpu_cpl, &cpu_cpl_nr);
-   __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count - 
cpu_cpl_nr,
-   __ATOMIC_SEQ_CST);
 
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
@@ -1360,6 +1354,7 @@ destroy_device(int vid)
struct vhost_dev *vdev = NULL;
int lcore;
uint16_t i;
+   int pkts_inflight;
 
TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
if (vdev->vid == vid)
@@ -1406,13 +1401,13 @@ destroy_device(int vid)
 
if (async_vhost_driver) {
uint16_t n_pkt = 0;
-   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+   pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vid, 
VIRTIO_RXQ);
+   struct rte_mbuf *m_cpl[pkts_inflight];
 
-   while (vdev->pkts_inflight) {
+   while (pkts_inflight) {
n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, 
VIRTIO_RXQ,
-   m_cpl, vdev->pkts_inflight);
+   m_cpl, pkts_inflight);
free_pkts(m_cpl, n_pkt);
-   __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, 
__ATOMIC_SEQ_CST);
}
 
rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
@@ -1509,6 +1504,7 @@ static int
 vring_state_changed(int vid, uint16_t queue_id, int enable)
 {
struct vhost_dev *vdev = NULL;
+   int pkts_inflight;
 
TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
if (vdev->vid == vid)
@@ -1523,13 +1519,13 @@ vring_state_changed(int vid, uint16_t queue_id, int 
enable)
if (async_vhost_driver) {
if (!enable) {
uint16_t n_pkt = 0;
-   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+   pkts_inflight = 
rte_vhost_async_get_inflight_thread_unsafe(vid, queue_id);
+   struct rte_mbuf *m_cpl[pkts_inflight];
 
-   while (vdev->pkts_inflight) {
+   while (pkts_inflight) {
n_pkt = 
rte_vhost_clear_queue_thread_unsafe(vid, queue_id,
-   m_cpl, 
vdev->pkts_inflight);
+   m_cpl, pkts_inflight);
free_pkts(m_cpl, n_pkt);
-   __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, 
__ATOMIC_SEQ_CST);
}
}
}
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index e7b1ac60a6..0ccdce4b4a 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -51,7 +51,6 @@ struct vhost_dev {
uint64_t features;
size_t hdr_len;
uint16_t nr_vrings;
-   uint16_t pkts_inflight;
struct rte_vhost_memory *mem;
struct device_statistics stats;
TAILQ_ENTRY(vhost_dev) global_vdev_entry;
-- 
2.17.1



Re: [dpdk-dev] [PATCH] Enhance code readability when dma_map in ifc/ifcvp_vdpa

2021-09-26 Thread Wang, Xiao W
Hi Jilei,

Please notice the patch format requirement, the subject of the patch should 
start with "vdpa/ifc: ".
You also need to keep it concise, around ~50 characters.
Refer " doc/guides/contributing/patches.rst" for more detail.

Back to this patch, it looks we can just change function ifcvf_dma_map(struct 
ifcvf_internal *internal, int do_map) to
ifcvf_dma_map(struct ifcvf_internal *internal, bool do_map), and use "true" or 
"false" when calling it.
This would align with vdpa_enable_vfio_intr(). In your next version patch, you 
can also change the "1", "0" parameter to
"true", "false" when calling vdpa_enable_vfio_intr().

BRs,
Xiao

> -Original Message-
> From: jilei chen 
> Sent: Monday, September 27, 2021 12:45 AM
> To: Wang, Xiao W 
> Cc: dev@dpdk.org
> Subject: [PATCH] Enhance code readability when dma_map in
> ifc/ifcvp_vdpa
> 
> Signed-off-by: jilei chen 
> ---
>  drivers/vdpa/ifc/ifcvf_vdpa.c | 10 ++
>  1 file changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
> index 1dc813d0a3..c2bf26f2b7 100644
> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
> @@ -36,6 +36,8 @@ RTE_LOG_REGISTER(ifcvf_vdpa_logtype,
> pmd.vdpa.ifcvf, NOTICE);
> 
>  #define IFCVF_VDPA_MODE  "vdpa"
>  #define IFCVF_SW_FALLBACK_LM "sw-live-migration"
> +#define IFCVF_MAP1
> +#define IFCVF_UNMAP  0
> 
>  #define THREAD_NAME_LEN  16
> 
> @@ -538,7 +540,7 @@ update_datapath(struct ifcvf_internal *internal)
>   if (!rte_atomic32_read(&internal->running) &&
>   (rte_atomic32_read(&internal->started) &&
>rte_atomic32_read(&internal->dev_attached))) {
> - ret = ifcvf_dma_map(internal, 1);
> + ret = ifcvf_dma_map(internal, IFCVF_MAP);
>   if (ret)
>   goto err;
> 
> @@ -568,7 +570,7 @@ update_datapath(struct ifcvf_internal *internal)
>   if (ret)
>   goto err;
> 
> - ret = ifcvf_dma_map(internal, 0);
> + ret = ifcvf_dma_map(internal, IFCVF_UNMAP);
>   if (ret)
>   goto err;
> 
> @@ -875,7 +877,7 @@ ifcvf_sw_fallback_switchover(struct ifcvf_internal
> *internal)
>  unset_intr:
>   vdpa_disable_vfio_intr(internal);
>  unmap:
> - ifcvf_dma_map(internal, 0);
> + ifcvf_dma_map(internal, IFCVF_UNMAP);
>  error:
>   return -1;
>  }
> @@ -934,7 +936,7 @@ ifcvf_dev_close(int vid)
>   vdpa_disable_vfio_intr(internal);
> 
>   /* unset DMA map for guest memory */
> - ifcvf_dma_map(internal, 0);
> + ifcvf_dma_map(internal, IFCVF_UNMAP);
> 
>   internal->sw_fallback_running = false;
>   } else {
> --
> 2.12.2
> 
> 



Re: [dpdk-dev] [PATCH v3 2/2] vhost: enable IOMMU for async vhost

2021-09-26 Thread Hu, Jiayu
Hi Xuan,

> -Original Message-
> From: Ding, Xuan 
> Sent: Saturday, September 25, 2021 6:04 PM
> To: dev@dpdk.org; Burakov, Anatoly ;
> maxime.coque...@redhat.com; Xia, Chenbo 
> Cc: Hu, Jiayu ; Jiang, Cheng1 ;
> Richardson, Bruce ; Pai G, Sunil
> ; Wang, Yinan ; Yang,
> YvonneX ; Ding, Xuan 
> Subject: [PATCH v3 2/2] vhost: enable IOMMU for async vhost
> 
> The use of IOMMU has many advantages, such as isolation and address
> translation. This patch extends the capbility of DMA engine to use IOMMU if
> the DMA engine is bound to vfio.
> 
> When set memory table, the guest memory will be mapped into the default
> container of DPDK.
> 
> Signed-off-by: Xuan Ding 
> ---
>  lib/vhost/vhost.h  |   4 ++
>  lib/vhost/vhost_user.c | 112
> -
>  2 files changed, 114 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index
> 89a31e4ca8..bc5695e899 100644
> --- a/lib/vhost/vhost.h
> +++ b/lib/vhost/vhost.h
> @@ -370,6 +370,10 @@ struct virtio_net {
>   int16_t broadcast_rarp;
>   uint32_tnr_vring;
>   int async_copy;
> +
> + /* Record the dma map status for each region. */
> + bool*async_map_status;
> +
>   int extbuf;
>   int linearbuf;
>   struct vhost_virtqueue  *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> 29a4c9af60..3990e9b057 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -45,6 +45,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
> 
>  #include "iotlb.h"
>  #include "vhost.h"
> @@ -141,6 +143,63 @@ get_blk_size(int fd)
>   return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;  }
> 
> +static int
> +async_dma_map(struct rte_vhost_mem_region *region, bool
> +*dma_map_success, bool do_map) {
> + uint64_t host_iova;
> + int ret = 0;
> +
> + host_iova = rte_mem_virt2iova((void *)(uintptr_t)region-
> >host_user_addr);
> + if (do_map) {
> + /* Add mapped region into the default container of DPDK. */
> + ret =
> rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> +  region->host_user_addr,
> +  host_iova,
> +  region->size);
> + *dma_map_success = ret == 0;
> +
> + if (ret) {
> + /*
> +  * DMA device may bind with kernel driver, in this
> case,
> +  * we don't need to program IOMMU manually.
> However, if no
> +  * device is bound with vfio/uio in DPDK, and vfio
> kernel
> +  * module is loaded, the API will still be called and
> return
> +  * with ENODEV/ENOSUP.
> +  *
> +  * DPDK VFIO only returns ENODEV/ENOSUP in very
> similar
> +  * situations(VFIO either unsupported, or supported
> +  * but no devices found). Either way, no mappings
> could be
> +  * performed. We treat it as normal case in async
> path.
> +  */
> + if (rte_errno == ENODEV && rte_errno == ENOTSUP) {
> + return 0;
> + } else {
> + VHOST_LOG_CONFIG(ERR, "DMA engine map
> failed\n");
> + return ret;
> + }
> + }
> +
> + } else {
> + /* No need to do vfio unmap if the map failed. */
> + if (!*dma_map_success)
> + return 0;
> +
> + /* Remove mapped region from the default container of
> DPDK. */
> + ret =
> rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> +region->host_user_addr,
> +host_iova,
> +region->size);
> + if (ret) {
> + VHOST_LOG_CONFIG(ERR, "DMA engine unmap
> failed\n");
> + return ret;
> + }
> + /* Clear the flag once the unmap succeeds. */
> + *dma_map_success = 0;
> + }
> +
> + return ret;
> +}
> +
>  static void
>  free_mem_region(struct virtio_net *dev)  { @@ -153,6 +212,9 @@
> free_mem_region(struct virtio_net *dev)
>   for (i = 0; i < dev->mem->nregions; i++) {
>   reg = &dev->mem->regions[i];
>   if (reg->host_user_addr) {
> + if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> + async_dma_map(reg, &dev-
> >async_map_status[i], false);
> +
>   munmap(reg->mmap_addr, reg->mmap_size

Re: [dpdk-dev] [PATCH] net/octeontx2: use fast udata and mdata flags

2021-09-26 Thread Anoob Joseph
> 
> Using fast metadata and userdata flags instead of driver callbacks for
> set_pkt_metadata and get_userdata in inline IPsec.
> 
> Signed-off-by: Tejasree Kondoj 

Acked-by: Anoob Joseph 


Re: [dpdk-dev] [PATCH v3 2/2] vhost: enable IOMMU for async vhost

2021-09-26 Thread Ding, Xuan
Hi Jiayu,

> -Original Message-
> From: Hu, Jiayu 
> Sent: Monday, September 27, 2021 12:18 PM
> To: Ding, Xuan ; dev@dpdk.org; Burakov, Anatoly
> ; maxime.coque...@redhat.com; Xia, Chenbo
> 
> Cc: Jiang, Cheng1 ; Richardson, Bruce
> ; Pai G, Sunil ; Wang,
> Yinan ; Yang, YvonneX 
> Subject: RE: [PATCH v3 2/2] vhost: enable IOMMU for async vhost
> 
> Hi Xuan,
> 
> > -Original Message-
> > From: Ding, Xuan 
> > Sent: Saturday, September 25, 2021 6:04 PM
> > To: dev@dpdk.org; Burakov, Anatoly ;
> > maxime.coque...@redhat.com; Xia, Chenbo 
> > Cc: Hu, Jiayu ; Jiang, Cheng1 ;
> > Richardson, Bruce ; Pai G, Sunil
> > ; Wang, Yinan ; Yang,
> > YvonneX ; Ding, Xuan 
> > Subject: [PATCH v3 2/2] vhost: enable IOMMU for async vhost
> >
> > The use of IOMMU has many advantages, such as isolation and address
> > translation. This patch extends the capbility of DMA engine to use IOMMU if
> > the DMA engine is bound to vfio.
> >
> > When set memory table, the guest memory will be mapped into the default
> > container of DPDK.
> >
> > Signed-off-by: Xuan Ding 
> > ---
> >  lib/vhost/vhost.h  |   4 ++
> >  lib/vhost/vhost_user.c | 112
> > -
> >  2 files changed, 114 insertions(+), 2 deletions(-)
> >
> > diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index
> > 89a31e4ca8..bc5695e899 100644
> > --- a/lib/vhost/vhost.h
> > +++ b/lib/vhost/vhost.h
> > @@ -370,6 +370,10 @@ struct virtio_net {
> >  int16_tbroadcast_rarp;
> >  uint32_tnr_vring;
> >  intasync_copy;
> > +
> > +/* Record the dma map status for each region. */
> > +bool*async_map_status;
> > +
> >  intextbuf;
> >  intlinearbuf;
> >  struct vhost_virtqueue*virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
> > diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> > 29a4c9af60..3990e9b057 100644
> > --- a/lib/vhost/vhost_user.c
> > +++ b/lib/vhost/vhost_user.c
> > @@ -45,6 +45,8 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> > +#include 
> >
> >  #include "iotlb.h"
> >  #include "vhost.h"
> > @@ -141,6 +143,63 @@ get_blk_size(int fd)
> >  return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;  }
> >
> > +static int
> > +async_dma_map(struct rte_vhost_mem_region *region, bool
> > +*dma_map_success, bool do_map) {
> > +uint64_t host_iova;
> > +int ret = 0;
> > +
> > +host_iova = rte_mem_virt2iova((void *)(uintptr_t)region-
> > >host_user_addr);
> > +if (do_map) {
> > +/* Add mapped region into the default container of DPDK. */
> > +ret =
> > rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> > + region->host_user_addr,
> > + host_iova,
> > + region->size);
> > +*dma_map_success = ret == 0;
> > +
> > +if (ret) {
> > +/*
> > + * DMA device may bind with kernel driver, in this
> > case,
> > + * we don't need to program IOMMU manually.
> > However, if no
> > + * device is bound with vfio/uio in DPDK, and vfio
> > kernel
> > + * module is loaded, the API will still be called and
> > return
> > + * with ENODEV/ENOSUP.
> > + *
> > + * DPDK VFIO only returns ENODEV/ENOSUP in very
> > similar
> > + * situations(VFIO either unsupported, or supported
> > + * but no devices found). Either way, no mappings
> > could be
> > + * performed. We treat it as normal case in async
> > path.
> > + */
> > +if (rte_errno == ENODEV && rte_errno == ENOTSUP) {
> > +return 0;
> > +} else {
> > +VHOST_LOG_CONFIG(ERR, "DMA engine map
> > failed\n");
> > +return ret;
> > +}
> > +}
> > +
> > +} else {
> > +/* No need to do vfio unmap if the map failed. */
> > +if (!*dma_map_success)
> > +return 0;
> > +
> > +/* Remove mapped region from the default container of
> > DPDK. */
> > +ret =
> > rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> > +   region->host_user_addr,
> > +   host_iova,
> > +   region->size);
> > +if (ret) {
> > +VHOST_LOG_CONFIG(ERR, "DMA engine unmap
> > failed\n");
> > +return ret;
> > +}
> > +/* Clear the flag once the unmap succeeds. */
> > +*dma_map_success = 0;
> > +}
> > +
> > +return ret;
> > +}
> > +
> >  static void
> >  free_mem_region(struct virtio_net *dev)  { @@ -153,6 +212,9 @@
> > free_mem_region(struct virtio_net *dev)
> >  for (i = 0; i < dev->mem->nregions; i++) {
> >  reg = &dev->mem->regions[i];
> >  if (reg->host_user_addr) {
> > +if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> > +async_dma_map(reg, &dev-
> > >async_map_status[i], false);
> > +
> >  munmap(reg->mmap_addr, reg->mmap_size);
> >  close(reg->fd);
> >  }
> > @@ -203,6 +265,11 @@ vhost_backend_cleanup(struct virtio_net *dev)
> >  }
> >
> >  dev->postcopy_listening = 0;
> > +
> > +if (dev->async_map_status) {
> > +rte_free(dev->async_map_status);
> > +dev->async_map_status = NULL;
> > +}
> >  }
> >
> >  static void
> > @@ -621,6 +688,17 @@ numa_realloc(struct virtio_net *dev, int index)
> >  }
> >  dev->mem = mem;
> >
> > +if (dev->async_copy && rte_vfio_is_enabled("vfio")) {
> > +dev->async_map_status = rte_zmalloc_socket("async-dma-
> > map-status",
> > +sizeof(bool) * dev->mem->nregion

[dpdk-dev] [PATCH v3] examples/l3fwd: add changes to use event vector

2021-09-26 Thread Shijith Thotton
Added changes to receive packets as event vector. By default this is
disabled and can be enabled using the option --event-vector. Vector
size and timeout to form the vector can be configured using options
--event-vector-size and --event-vector-tmo.

Example:
dpdk-l3fwd -l 0-3 -n 4 -- -p 0x03 --mode=eventdev \
--eventq-sched=ordered --event-vector --event-vector-size 16

Signed-off-by: Shijith Thotton 
---
v3:
* Added the prefix "event" to vector options.

v2:
* Fixed setting event vector attribute.

 doc/guides/sample_app_ug/l3_forward.rst|   7 +
 examples/l3fwd/l3fwd.h |  26 
 examples/l3fwd/l3fwd_em.c  | 104 +
 examples/l3fwd/l3fwd_em.h  |  37 +
 examples/l3fwd/l3fwd_em_hlm.h  |  69 +
 examples/l3fwd/l3fwd_em_sequential.h   |  25 
 examples/l3fwd/l3fwd_event.c   |  57 ---
 examples/l3fwd/l3fwd_event.h   |  25 
 examples/l3fwd/l3fwd_event_internal_port.c |  28 +++-
 examples/l3fwd/l3fwd_fib.c | 164 +
 examples/l3fwd/l3fwd_lpm.c | 121 +++
 examples/l3fwd/main.c  |  58 
 12 files changed, 698 insertions(+), 23 deletions(-)

diff --git a/doc/guides/sample_app_ug/l3_forward.rst 
b/doc/guides/sample_app_ug/l3_forward.rst
index 2d5cd5f1c0..96a4d13bf4 100644
--- a/doc/guides/sample_app_ug/l3_forward.rst
+++ b/doc/guides/sample_app_ug/l3_forward.rst
@@ -74,6 +74,7 @@ The application has a number of command line options::
  [--mode]
  [--eventq-sched]
  [--event-eth-rxqs]
+ [--event-vector [--event-vector-size SIZE] 
[--event-vector-tmo NS]]
  [-E]
  [-L]
 
@@ -115,6 +116,12 @@ Where,
 
 * ``--event-eth-rxqs:`` Optional, Number of ethernet RX queues per device. 
Only valid if --mode=eventdev.
 
+* ``--event-vector:`` Optional, Enable event vectorization. Only valid if 
--mode=eventdev.
+
+* ``--event-vector-size:`` Optional, Max vector size if event vectorization is 
enabled.
+
+* ``--event-vector-tmo:`` Optional, Max timeout to form vector in nanoseconds 
if event vectorization is enabled.
+
 * ``-E:`` Optional, enable exact match,
   legacy flag, please use ``--lookup=em`` instead.
 
diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
index a808d60247..9607ee0fbb 100644
--- a/examples/l3fwd/l3fwd.h
+++ b/examples/l3fwd/l3fwd.h
@@ -28,6 +28,8 @@
 #define MEMPOOL_CACHE_SIZE 256
 #define MAX_RX_QUEUE_PER_LCORE 16
 
+#define VECTOR_SIZE_DEFAULT   MAX_PKT_BURST
+#define VECTOR_TMO_NS_DEFAULT 1E6 /* 1ms */
 /*
  * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send.
  */
@@ -221,6 +223,14 @@ int
 lpm_event_main_loop_tx_q(__rte_unused void *dummy);
 int
 lpm_event_main_loop_tx_q_burst(__rte_unused void *dummy);
+int
+lpm_event_main_loop_tx_d_vector(__rte_unused void *dummy);
+int
+lpm_event_main_loop_tx_d_burst_vector(__rte_unused void *dummy);
+int
+lpm_event_main_loop_tx_q_vector(__rte_unused void *dummy);
+int
+lpm_event_main_loop_tx_q_burst_vector(__rte_unused void *dummy);
 
 int
 em_event_main_loop_tx_d(__rte_unused void *dummy);
@@ -230,6 +240,14 @@ int
 em_event_main_loop_tx_q(__rte_unused void *dummy);
 int
 em_event_main_loop_tx_q_burst(__rte_unused void *dummy);
+int
+em_event_main_loop_tx_d_vector(__rte_unused void *dummy);
+int
+em_event_main_loop_tx_d_burst_vector(__rte_unused void *dummy);
+int
+em_event_main_loop_tx_q_vector(__rte_unused void *dummy);
+int
+em_event_main_loop_tx_q_burst_vector(__rte_unused void *dummy);
 
 int
 fib_event_main_loop_tx_d(__rte_unused void *dummy);
@@ -239,6 +257,14 @@ int
 fib_event_main_loop_tx_q(__rte_unused void *dummy);
 int
 fib_event_main_loop_tx_q_burst(__rte_unused void *dummy);
+int
+fib_event_main_loop_tx_d_vector(__rte_unused void *dummy);
+int
+fib_event_main_loop_tx_d_burst_vector(__rte_unused void *dummy);
+int
+fib_event_main_loop_tx_q_vector(__rte_unused void *dummy);
+int
+fib_event_main_loop_tx_q_burst_vector(__rte_unused void *dummy);
 
 
 /* Return ipv4/ipv6 fwd lookup struct for LPM, EM or FIB. */
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index 2a8ab6aab5..ff5e56766c 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -878,6 +878,110 @@ em_event_main_loop_tx_q_burst(__rte_unused void *dummy)
return 0;
 }
 
+/* Same eventdev loop for single and burst of vector */
+static __rte_always_inline void
+em_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
+const uint8_t flags)
+{
+   const int event_p_id = l3fwd_get_free_event_port(evt_rsrc);
+   const uint8_t tx_q_id =
+   evt_rsrc->evq.event_q_id[evt_rsrc->evq.nb_queues - 1];
+   const uint8_t event_d_id = evt_rsrc->event_d_id;
+   const uint16_t deq_len = evt_rs

Re: [dpdk-dev] [PATCH v9 1/3] ethdev: add an API to get device configuration info

2021-09-26 Thread Thomas Monjalon
26/09/2021 11:20, Jie Wang:
> This patch adds a new API "rte_eth_dev_conf_info_get()" to help users get
> device configuration info.
> 
> Cc: sta...@dpdk.org

No we don't backport new features.

> Signed-off-by: Jie Wang 
[...]
> + * Retrieve the configuration of an Ethernet device.
> + *
> + * @param port_id
> + *   The port identifier of the Ethernet device.
> + * @param dev_conf_info
> + *   A pointer to a structure of type *rte_eth_conf* to be filled with
> + *   the configuration of the Ethernet device.
> + *   And the memory of the structure should be allocated by the caller.
> + * @return
> + *   - (0) if successful.
> + *   - (-ENODEV) if *port_id* invalid.
> + *   - (-EINVAL) if bad parameter.
> + */
> +__rte_experimental
> +int rte_eth_dev_conf_info_get(uint16_t port_id,
> + struct rte_eth_conf *dev_conf_info);

It does not make sense to me.
rte_eth_conf is passed by the app to rte_eth_dev_configure.
Why the app would need to get the same info back?





[dpdk-dev] [PATCH v2] examples/l2fwd-event: changes to use event vector

2021-09-26 Thread Shijith Thotton
Added changes to receive packets as event vector. By default this is
disabled and can be enabled using the option --event-vector. Vector
size and timeout to form the vector can be configured using options
--event-vector-size and --event-vector-tmo.

Example:
dpdk-l2fwd-event -l 0-3 -n 4 -- -p 0x03 --mode=eventdev \
--eventq-sched=ordered --event-vector --event-vector-size 16

Signed-off-by: Shijith Thotton 
---
v2:
* Added the prefix "event" to vector options.

 doc/guides/sample_app_ug/l2_forward_event.rst |  13 +-
 examples/l2fwd-event/l2fwd_common.h   |  12 ++
 examples/l2fwd-event/l2fwd_event.c| 204 --
 examples/l2fwd-event/l2fwd_event_generic.c|  21 ++
 .../l2fwd-event/l2fwd_event_internal_port.c   |  22 ++
 examples/l2fwd-event/main.c   |  48 +
 6 files changed, 306 insertions(+), 14 deletions(-)

diff --git a/doc/guides/sample_app_ug/l2_forward_event.rst 
b/doc/guides/sample_app_ug/l2_forward_event.rst
index 540ca015db..904f6f1a4a 100644
--- a/doc/guides/sample_app_ug/l2_forward_event.rst
+++ b/doc/guides/sample_app_ug/l2_forward_event.rst
@@ -52,7 +52,12 @@ The application requires a number of command line options:
 
 .. code-block:: console
 
-.//examples/dpdk-l2fwd-event [EAL options] -- -p PORTMASK [-q 
NQ] --[no-]mac-updating --mode=MODE --eventq-sched=SCHED_MODE
+.//examples/dpdk-l2fwd-event [EAL options] -- -p PORTMASK
+[-q NQ]
+[--[no-]mac-updating]
+[--mode=MODE]
+
[--eventq-sched=SCHED_MODE]
+[--event-vector 
[--event-vector-size SIZE] [--event-vector-tmo NS]]
 
 where,
 
@@ -68,6 +73,12 @@ where,
 
 *   --config: Configure forwarding port pair mapping. Alternate port pairs by 
default.
 
+*   --event-vector: Enable event vectorization. Only valid if --mode=eventdev.
+
+*   --event-vector-size: Max vector size if event vectorization is enabled.
+
+*   --event-vector-tmo: Max timeout to form vector in nanoseconds if event 
vectorization is enabled.
+
 Sample usage commands are given below to run the application into different 
mode:
 
 Poll mode with 4 lcores, 16 ports and 8 RX queues per lcore and MAC address 
updating enabled,
diff --git a/examples/l2fwd-event/l2fwd_common.h 
b/examples/l2fwd-event/l2fwd_common.h
index 939221d45a..5e380ded16 100644
--- a/examples/l2fwd-event/l2fwd_common.h
+++ b/examples/l2fwd-event/l2fwd_common.h
@@ -56,6 +56,9 @@
 #define DEFAULT_TIMER_PERIOD   10 /* default period is 10 seconds */
 #define MAX_TIMER_PERIOD   86400 /* 1 day max */
 
+#define VECTOR_SIZE_DEFAULT   MAX_PKT_BURST
+#define VECTOR_TMO_NS_DEFAULT 1E6 /* 1ms */
+
 /* Per-port statistics struct */
 struct l2fwd_port_statistics {
uint64_t dropped;
@@ -63,6 +66,13 @@ struct l2fwd_port_statistics {
uint64_t rx;
 } __rte_cache_aligned;
 
+/* Event vector attributes */
+struct l2fwd_event_vector_params {
+   uint8_t enabled;
+   uint16_t size;
+   uint64_t timeout_ns;
+};
+
 struct l2fwd_resources {
volatile uint8_t force_quit;
uint8_t event_mode;
@@ -75,9 +85,11 @@ struct l2fwd_resources {
uint32_t enabled_port_mask;
uint64_t timer_period;
struct rte_mempool *pktmbuf_pool;
+   struct rte_mempool *evt_vec_pool;
uint32_t dst_ports[RTE_MAX_ETHPORTS];
struct rte_ether_addr eth_addr[RTE_MAX_ETHPORTS];
struct l2fwd_port_statistics port_stats[RTE_MAX_ETHPORTS];
+   struct l2fwd_event_vector_params evt_vec;
void *evt_rsrc;
void *poll_rsrc;
 } __rte_cache_aligned;
diff --git a/examples/l2fwd-event/l2fwd_event.c 
b/examples/l2fwd-event/l2fwd_event.c
index 7ba5311d66..09080fa666 100644
--- a/examples/l2fwd-event/l2fwd_event.c
+++ b/examples/l2fwd-event/l2fwd_event.c
@@ -346,19 +346,198 @@ l2fwd_event_main_loop_tx_q_brst_mac(struct 
l2fwd_resources *rsrc)
L2FWD_EVENT_TX_ENQ | L2FWD_EVENT_BURST);
 }
 
+static __rte_always_inline void
+l2fwd_event_vector_fwd(struct l2fwd_resources *rsrc,
+  struct rte_event_vector *vec,
+  const uint64_t timer_period, const uint32_t flags)
+{
+   struct rte_mbuf **mbufs = vec->mbufs;
+   uint16_t i, j;
+
+   rte_prefetch0(rte_pktmbuf_mtod(mbufs[0], void *));
+
+   /* If vector attribute is valid, mbufs will be from same port/queue */
+   if (vec->attr_valid) {
+   vec->port = rsrc->dst_ports[mbufs[0]->port];
+   if (flags & L2FWD_EVENT_TX_DIRECT)
+   vec->queue = 0;
+
+   if (timer_period > 0)
+   __atomic_fetch_add(&rsrc->port_stats[mbufs[0]->port].rx,
+  vec->nb_elem, __ATOMIC_RELAXED);
+
+   f

Re: [dpdk-dev] [PATCH v2 1/4] vhost: support async dequeue for split ring

2021-09-26 Thread Jiang, Cheng1
Hi Wenwu,

Comments are inline.

> -Original Message-
> From: Ma, WenwuX 
> Sent: Saturday, September 18, 2021 3:27 AM
> To: dev@dpdk.org
> Cc: maxime.coque...@redhat.com; Xia, Chenbo ;
> Jiang, Cheng1 ; Hu, Jiayu ;
> Pai G, Sunil ; Yang, YvonneX
> ; Wang, YuanX ; Ma,
> WenwuX ; Wang, Yinan 
> Subject: [PATCH v2 1/4] vhost: support async dequeue for split ring
> 
> From: Yuan Wang 
> 
> This patch implements asynchronous dequeue data path for split ring.
> A new asynchronous dequeue function is introduced. With this function, the
> application can try to receive packets from the guest with offloading copies
> to the async channel, thus saving precious CPU cycles.
> 
> Signed-off-by: Yuan Wang 
> Signed-off-by: Jiayu Hu 
> Signed-off-by: Wenwu Ma 
> Tested-by: Yinan Wang 
> Tested-by: Yvonne Yang 
> ---
>  doc/guides/prog_guide/vhost_lib.rst |   9 +
>  lib/vhost/rte_vhost_async.h |  33 +-
>  lib/vhost/version.map   |   3 +
>  lib/vhost/vhost.h   |   3 +-
>  lib/vhost/virtio_net.c  | 530 
>  5 files changed, 575 insertions(+), 3 deletions(-)
> 
> diff --git a/doc/guides/prog_guide/vhost_lib.rst
> b/doc/guides/prog_guide/vhost_lib.rst
> index 171e0096f6..9ed544db7a 100644
> --- a/doc/guides/prog_guide/vhost_lib.rst
> +++ b/doc/guides/prog_guide/vhost_lib.rst
> @@ -303,6 +303,15 @@ The following is an overview of some key Vhost API
> functions:
>Clear inflight packets which are submitted to DMA engine in vhost async
> data
>path. Completed packets are returned to applications through ``pkts``.
> 
> +* ``rte_vhost_async_try_dequeue_burst(vid, queue_id, mbuf_pool, pkts,
> +count, nr_inflight)``
> +
> +  This function tries to receive packets from the guest with offloading
> + copies to the async channel. The packets that are transfer completed
> + are returned in ``pkts``. The other packets that their copies are
> + submitted  to the async channel but not completed are called "in-flight
> packets".
> +  This function will not return in-flight packets until their copies
> + are  completed by the async channel.
> +
>  Vhost-user Implementations
>  --
> 
> diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h index
> ad71555a7f..973efa19b1 100644
> --- a/lib/vhost/rte_vhost_async.h
> +++ b/lib/vhost/rte_vhost_async.h
> @@ -84,11 +84,12 @@ struct rte_vhost_async_channel_ops {  };
> 
>  /**
> - * inflight async packet information
> + * in-flight async packet information
>   */
>  struct async_inflight_info {
>   struct rte_mbuf *mbuf;
> - uint16_t descs; /* num of descs inflight */
> + struct virtio_net_hdr nethdr;
> + uint16_t descs; /* num of descs in-flight */
>   uint16_t nr_buffers; /* num of buffers inflight for packed ring */  };
> 
> @@ -255,5 +256,33 @@ int rte_vhost_async_get_inflight(int vid, uint16_t
> queue_id);  __rte_experimental  uint16_t
> rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id,
>   struct rte_mbuf **pkts, uint16_t count);

Blank line is needed here.

> +/**
> + * This function tries to receive packets from the guest with
> +offloading
> + * copies to the async channel. The packets that are transfer completed
> + * are returned in "pkts". The other packets that their copies are
> +submitted to
> + * the async channel but not completed are called "in-flight packets".
> + * This function will not return in-flight packets until their copies
> +are
> + * completed by the async channel.
> + *
> + * @param vid
> + *  id of vhost device to dequeue data

The Initials should be in uppercase. The following also needs to be changed.

> + * @param queue_id
> + *  queue id to dequeue data

Should be 'ID of virtqueue ..'.

Thanks,
Cheng

> + * @param mbuf_pool
> + *  mbuf_pool where host mbuf is allocated.
> + * @param pkts
> + *  blank array to keep successfully dequeued packets
> + * @param count
> + *  size of the packet array
> + * @param nr_inflight
> + *  the amount of in-flight packets. If error occurred, its value is set to 
> -1.
> + * @return
> + *  num of successfully dequeued packets  */ __rte_experimental
> +uint16_t rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
> + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t
> count,
> + int *nr_inflight);
> 
>  #endif /* _RTE_VHOST_ASYNC_H_ */
> diff --git a/lib/vhost/version.map b/lib/vhost/version.map index
> c92a9d4962..1e033ad8e2 100644
> --- a/lib/vhost/version.map
> +++ b/lib/vhost/version.map
> @@ -85,4 +85,7 @@ EXPERIMENTAL {
>   rte_vhost_async_channel_register_thread_unsafe;
>   rte_vhost_async_channel_unregister_thread_unsafe;
>   rte_vhost_clear_queue_thread_unsafe;
> +
> + # added in 21.11
> + rte_vhost_async_try_dequeue_burst;
>  };



Re: [dpdk-dev] [PATCH v2 2/4] examples/vhost: refactor vhost enqueue and dequeue datapaths

2021-09-26 Thread Jiang, Cheng1
Hi,

> -Original Message-
> From: Ma, WenwuX 
> Sent: Saturday, September 18, 2021 3:27 AM
> To: dev@dpdk.org
> Cc: maxime.coque...@redhat.com; Xia, Chenbo ;
> Jiang, Cheng1 ; Hu, Jiayu ;
> Pai G, Sunil ; Yang, YvonneX
> ; Ma, WenwuX 
> Subject: [PATCH v2 2/4] examples/vhost: refactor vhost enqueue and
> dequeue datapaths
> 
> Previously, by judging the flag, we call different enqueue/dequeue
> functions in data path.
> 
> Now, we use an ops that was initialized when Vhost was created,
> so that we can call ops directly in Vhost data path without any more
> flag judgment.
> 
> Signed-off-by: Wenwu Ma 
> Reviewed-by: Maxime Coquelin 
> Tested-by: Yvonne Yang 
> ---
>  examples/vhost/main.c   | 100 +---
>  examples/vhost/main.h   |  28 --
>  examples/vhost/virtio_net.c |  16 +-
>  3 files changed, 98 insertions(+), 46 deletions(-)
> 
> diff --git a/examples/vhost/main.c b/examples/vhost/main.c
> index d0bf1f31e3..254f7097bc 100644
> --- a/examples/vhost/main.c
> +++ b/examples/vhost/main.c
> @@ -106,6 +106,8 @@ static uint32_t burst_rx_retry_num =
> BURST_RX_RETRIES;
>  static char *socket_files;
>  static int nb_sockets;
> 
> +static struct vhost_queue_ops vdev_queue_ops[MAX_VHOST_DEVICE];
> +
>  /* empty vmdq configuration structure. Filled in programatically */
>  static struct rte_eth_conf vmdq_conf_default = {
>   .rxmode = {
> @@ -879,22 +881,8 @@ drain_vhost(struct vhost_dev *vdev)
>   uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
>   struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
> 
> - if (builtin_net_driver) {
> - ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
> - } else if (async_vhost_driver) {
> - uint16_t enqueue_fail = 0;
> -
> - complete_async_pkts(vdev);
> - ret = rte_vhost_submit_enqueue_burst(vdev->vid,
> VIRTIO_RXQ, m, nr_xmit);
> - __atomic_add_fetch(&vdev->pkts_inflight, ret,
> __ATOMIC_SEQ_CST);
> -
> - enqueue_fail = nr_xmit - ret;
> - if (enqueue_fail)
> - free_pkts(&m[ret], nr_xmit - ret);
> - } else {
> - ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
> - m, nr_xmit);
> - }
> + ret = vdev_queue_ops[vdev->vid].enqueue_pkt_burst(vdev,
> + VIRTIO_RXQ, m, nr_xmit);
> 

Now, the line char number limit is 100, so you don't have to put it in 2 lines.

>   if (enable_stats) {
>   __atomic_add_fetch(&vdev->stats.rx_total_atomic, nr_xmit,
> @@ -1173,6 +1161,33 @@ drain_mbuf_table(struct mbuf_table *tx_q)
>   }
>  }
> 
> +uint16_t
> +async_enqueue_pkts(struct vhost_dev *vdev, uint16_t queue_id,
> + struct rte_mbuf **pkts, uint32_t rx_count)
> +{
> + uint16_t enqueue_count;
> + uint16_t enqueue_fail = 0;
> +
> + complete_async_pkts(vdev);
> + enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
> + queue_id, pkts, rx_count);

Same here.

> + __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count,
> + __ATOMIC_SEQ_CST);

Same here.

> +
> + enqueue_fail = rx_count - enqueue_count;
> + if (enqueue_fail)
> + free_pkts(&pkts[enqueue_count], enqueue_fail);
> +
> + return enqueue_count;
> +}
> +
> +uint16_t
> +sync_enqueue_pkts(struct vhost_dev *vdev, uint16_t queue_id,
> + struct rte_mbuf **pkts, uint32_t rx_count)
> +{
> + return rte_vhost_enqueue_burst(vdev->vid, queue_id, pkts,
> rx_count);
> +}
> +
>  static __rte_always_inline void
>  drain_eth_rx(struct vhost_dev *vdev)
>  {
> @@ -1203,25 +1218,8 @@ drain_eth_rx(struct vhost_dev *vdev)
>   }
>   }
> 
> - if (builtin_net_driver) {
> - enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
> - pkts, rx_count);
> - } else if (async_vhost_driver) {
> - uint16_t enqueue_fail = 0;
> -
> - complete_async_pkts(vdev);
> - enqueue_count = rte_vhost_submit_enqueue_burst(vdev-
> >vid,
> - VIRTIO_RXQ, pkts, rx_count);
> - __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count,
> __ATOMIC_SEQ_CST);
> -
> - enqueue_fail = rx_count - enqueue_count;
> - if (enqueue_fail)
> - free_pkts(&pkts[enqueue_count], enqueue_fail);
> -
> - } else {
> - enqueue_count = rte_vhost_enqueue_burst(vdev->vid,
> VIRTIO_RXQ,
> - pkts, rx_count);
> - }
> + enqueue_count = vdev_queue_ops[vdev-
> >vid].enqueue_pkt_burst(vdev,
> + VIRTIO_RXQ, pkts, rx_count);
> 
>   if (enable_stats) {
>   __atomic_add_fetch(&vdev->stats.rx_total_atomic,
> rx_count,
> @@ -1234,6 +1232,14 @@ drain_eth_rx(struct vh