[dpdk-dev] [PATCH] net/failsafe: fix RSS hash offload reporting

2020-12-22 Thread Andrew Rybchenko
If sub-devices support RSS hash offload, the offload should be
reported by the failsafe device since handling is transparent
from failsafe point of view.

Fixes: 5d308972954c ("ethdev: add mbuf RSS update as an offload")
Cc: sta...@dpdk.org

Signed-off-by: Andrew Rybchenko 
---
 drivers/net/failsafe/failsafe_ops.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/failsafe/failsafe_ops.c 
b/drivers/net/failsafe/failsafe_ops.c
index 492047f587..9946b696f3 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -1192,7 +1192,8 @@ fs_dev_infos_get(struct rte_eth_dev *dev,
DEV_RX_OFFLOAD_JUMBO_FRAME |
DEV_RX_OFFLOAD_SCATTER |
DEV_RX_OFFLOAD_TIMESTAMP |
-   DEV_RX_OFFLOAD_SECURITY;
+   DEV_RX_OFFLOAD_SECURITY |
+   DEV_RX_OFFLOAD_RSS_HASH;
 
infos->rx_queue_offload_capa =
DEV_RX_OFFLOAD_VLAN_STRIP |
@@ -1209,7 +1210,8 @@ fs_dev_infos_get(struct rte_eth_dev *dev,
DEV_RX_OFFLOAD_JUMBO_FRAME |
DEV_RX_OFFLOAD_SCATTER |
DEV_RX_OFFLOAD_TIMESTAMP |
-   DEV_RX_OFFLOAD_SECURITY;
+   DEV_RX_OFFLOAD_SECURITY |
+   DEV_RX_OFFLOAD_RSS_HASH;
 
infos->tx_offload_capa =
DEV_TX_OFFLOAD_MULTI_SEGS |
-- 
2.29.2



[dpdk-dev] [PATCH] net/i40e: refactor of hash flow

2020-12-22 Thread Zhang,Alvin
From: Alvin Zhang 

1. Delete original code.
2. Add 2 tables(One maps flow pattern and RSS type to PCTYPE,
   another maps RSS type to input set).
3. Parse RSS pattern and RSS type to get PCTYPE.
4. Parse RSS action to get queues, RSS function and hash field.
5. Create and destroy RSS filters.
6. Create new files for hash flows.
7. Update doc for correcting wrong descriptions.

Signed-off-by: Alvin Zhang 
---
 doc/guides/nics/i40e.rst   |4 +-
 drivers/net/i40e/i40e_ethdev.c |  840 ++--
 drivers/net/i40e/i40e_ethdev.h |   53 +-
 drivers/net/i40e/i40e_flow.c   |  617 +-
 drivers/net/i40e/i40e_hash.c   | 1379 
 drivers/net/i40e/i40e_hash.h   |   34 +
 drivers/net/i40e/meson.build   |1 +
 7 files changed, 1661 insertions(+), 1267 deletions(-)
 create mode 100644 drivers/net/i40e/i40e_hash.c
 create mode 100644 drivers/net/i40e/i40e_hash.h

diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
index 4e5c467..64f20e7 100644
--- a/doc/guides/nics/i40e.rst
+++ b/doc/guides/nics/i40e.rst
@@ -562,9 +562,9 @@ Generic flow API
 - ``RSS Flow``
 
   RSS Flow supports to set hash input set, hash function, enable hash
-  and configure queue region.
+  and configure queues.
   For example:
-  Configure queue region as queue 0, 1, 2, 3.
+  Configure queues as queue 0, 1, 2, 3.
 
   .. code-block:: console
 
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index f54769c..b06ce08 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -39,6 +39,7 @@
 #include "i40e_pf.h"
 #include "i40e_regs.h"
 #include "rte_pmd_i40e.h"
+#include "i40e_hash.h"
 
 #define ETH_I40E_FLOATING_VEB_ARG  "enable_floating_veb"
 #define ETH_I40E_FLOATING_VEB_LIST_ARG "floating_veb_list"
@@ -396,7 +397,6 @@ static int i40e_sw_tunnel_filter_insert(struct i40e_pf *pf,
 static void i40e_tunnel_filter_restore(struct i40e_pf *pf);
 static void i40e_filter_restore(struct i40e_pf *pf);
 static void i40e_notify_all_vfs_link_status(struct rte_eth_dev *dev);
-static int i40e_pf_config_rss(struct i40e_pf *pf);
 
 static const char *const valid_keys[] = {
ETH_I40E_FLOATING_VEB_ARG,
@@ -1764,10 +1764,6 @@ static inline void i40e_config_automask(struct i40e_pf 
*pf)
/* initialize queue region configuration */
i40e_init_queue_region_conf(dev);
 
-   /* initialize RSS configuration from rte_flow */
-   memset(&pf->rss_info, 0,
-   sizeof(struct i40e_rte_flow_rss_conf));
-
/* reset all stats of the device, including pf and main vsi */
i40e_dev_stats_reset(dev);
 
@@ -4426,7 +4422,6 @@ static int i40e_dev_xstats_get_names(__rte_unused struct 
rte_eth_dev *dev,
 {
struct i40e_pf *pf;
struct i40e_hw *hw;
-   int ret;
 
if (!vsi || !lut)
return -EINVAL;
@@ -4435,12 +4430,16 @@ static int i40e_dev_xstats_get_names(__rte_unused 
struct rte_eth_dev *dev,
hw = I40E_VSI_TO_HW(vsi);
 
if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) {
-   ret = i40e_aq_set_rss_lut(hw, vsi->vsi_id,
- vsi->type != I40E_VSI_SRIOV,
- lut, lut_size);
-   if (ret) {
-   PMD_DRV_LOG(ERR, "Failed to set RSS lookup table");
-   return ret;
+   enum i40e_status_code status;
+
+   status = i40e_aq_set_rss_lut(hw, vsi->vsi_id,
+vsi->type != I40E_VSI_SRIOV,
+lut, lut_size);
+   if (status) {
+   PMD_DRV_LOG(ERR,
+   "Failed to update RSS lookup table, error 
status: %d",
+   status);
+   return -EIO;
}
} else {
uint32_t *lut_dw = (uint32_t *)lut;
@@ -7573,7 +7572,7 @@ struct i40e_vsi *
 }
 
 /* Disable RSS */
-static void
+void
 i40e_pf_disable_rss(struct i40e_pf *pf)
 {
struct i40e_hw *hw = I40E_PF_TO_HW(pf);
@@ -7591,7 +7590,6 @@ struct i40e_vsi *
uint16_t key_idx = (vsi->type == I40E_VSI_SRIOV) ?
   I40E_VFQF_HKEY_MAX_INDEX :
   I40E_PFQF_HKEY_MAX_INDEX;
-   int ret = 0;
 
if (!key || key_len == 0) {
PMD_DRV_LOG(DEBUG, "No key to be configured");
@@ -7604,11 +7602,16 @@ struct i40e_vsi *
 
if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) {
struct i40e_aqc_get_set_rss_key_data *key_dw =
-   (struct i40e_aqc_get_set_rss_key_data *)key;
+   (struct i40e_aqc_get_set_rss_key_data *)key;
+   enum i40e_status_code status =
+   i40e_aq_set_rss_key(hw, vsi->vsi_id, key_dw);
 
-   ret = i40e_aq_set_rss_key(hw, vsi->vsi_id, key_dw);
- 

Re: [dpdk-dev] [PATCH v1 2/2] net/iavf: introduce iavf driver on vfio-user client

2020-12-22 Thread Xia, Chenbo
Hi Jingjing,

> -Original Message-
> From: Wu, Jingjing 
> Sent: Saturday, December 19, 2020 10:38 PM
> To: dev@dpdk.org
> Cc: Wu, Jingjing ; Xing, Beilei 
> ;
> Xia, Chenbo ; Lu, Xiuchun 
> Subject: [PATCH v1 2/2] net/iavf: introduce iavf driver on vfio-user client
> 
> This patch add a new net driver based on vdev abstraction,
> i.e. iavf_client_ethdev.c. It is using common iavf functions
> to talk with Emulated pci interfaces based on vfio-user.
> 
>   --
>   | -- |
>   | |  iavf driver   | |> (iavf_client_ethdev.c)
>   | -- |
>   | -- |
>   | | device emulate | |
>   | | vfio-user adapt| |
>   | -- |
>   --
> |
> |
>   --
>   |  vfio-user |
>   |   client   |
>   --
> 
> Signed-off-by: Jingjing Wu 
> ---
>  drivers/common/iavf/iavf_prototype.h  |   1 +
>  drivers/common/iavf/version.map   |   1 +
>  drivers/net/iavf/iavf.h   |  18 +-
>  drivers/net/iavf/iavf_client_ethdev.c | 298 ++
>  drivers/net/iavf/iavf_ethdev.c|  26 +--
>  drivers/net/iavf/iavf_rxtx.c  |  23 +-
>  drivers/net/iavf/meson.build  |   1 +
>  7 files changed, 347 insertions(+), 21 deletions(-)
>  create mode 100644 drivers/net/iavf/iavf_client_ethdev.c
> 
> diff --git a/drivers/common/iavf/iavf_prototype.h
> b/drivers/common/iavf/iavf_prototype.h
> index f34e77db0f..3998d26dc0 100644
> --- a/drivers/common/iavf/iavf_prototype.h
> +++ b/drivers/common/iavf/iavf_prototype.h
> @@ -83,6 +83,7 @@ void iavf_destroy_spinlock(struct iavf_spinlock *sp);
>  __rte_internal
>  void iavf_vf_parse_hw_config(struct iavf_hw *hw,
>struct virtchnl_vf_resource *msg);
> +__rte_internal
>  enum iavf_status iavf_vf_reset(struct iavf_hw *hw);
>  __rte_internal
>  enum iavf_status iavf_aq_send_msg_to_pf(struct iavf_hw *hw,
> diff --git a/drivers/common/iavf/version.map b/drivers/common/iavf/version.map
> index 8808779ab7..4dc2d42196 100644
> --- a/drivers/common/iavf/version.map
> +++ b/drivers/common/iavf/version.map
> @@ -7,6 +7,7 @@ INTERNAL {
>   iavf_set_mac_type;
>   iavf_shutdown_adminq;
>   iavf_vf_parse_hw_config;
> + iavf_vf_reset;
>   client_vfio_user_setup;
>   client_vfio_user_get_bar_addr;
>   iavf_write_addr;
> diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
> index 6d5912d8c1..c34f971721 100644
> --- a/drivers/net/iavf/iavf.h
> +++ b/drivers/net/iavf/iavf.h
> @@ -195,7 +195,10 @@ struct iavf_adapter {
>   struct iavf_hw hw;
>   struct rte_eth_dev *eth_dev;
>   struct iavf_info vf;
> -
> +#ifdef RTE_LIBRTE_IAVF_CLIENT
> + /* used for avf_client driver */
> + struct vfio_device *user_dev;
> +#endif
>   bool rx_bulk_alloc_allowed;
>   /* For vector PMD */
>   bool rx_vec_allowed;
> @@ -231,6 +234,16 @@ iavf_init_adminq_parameter(struct iavf_hw *hw)
>   hw->aq.asq_buf_size = IAVF_AQ_BUF_SZ;
>  }
> 
> +static inline void
> +iavf_disable_irq0(struct iavf_hw *hw)
> +{
> + /* Disable all interrupt types */
> + IAVF_WRITE_REG(hw, IAVF_VFINT_ICR0_ENA1, 0);
> + IAVF_WRITE_REG(hw, IAVF_VFINT_DYN_CTL01,
> +IAVF_VFINT_DYN_CTL01_ITR_INDX_MASK);
> + IAVF_WRITE_FLUSH(hw);
> +}
> +
>  static inline uint16_t
>  iavf_calc_itr_interval(int16_t interval)
>  {
> @@ -284,6 +297,9 @@ _atomic_set_cmd(struct iavf_info *vf, enum virtchnl_ops
> ops)
>   return !ret;
>  }
> 
> +extern const struct eth_dev_ops iavf_eth_dev_ops;
> +
> +int iavf_init_vf(struct rte_eth_dev *dev);
>  int iavf_check_api_version(struct iavf_adapter *adapter);
>  int iavf_get_vf_resource(struct iavf_adapter *adapter);
>  void iavf_handle_virtchnl_msg(struct rte_eth_dev *dev);
> diff --git a/drivers/net/iavf/iavf_client_ethdev.c
> b/drivers/net/iavf/iavf_client_ethdev.c
> new file mode 100644
> index 00..03f759c761
> --- /dev/null
> +++ b/drivers/net/iavf/iavf_client_ethdev.c
> @@ -0,0 +1,298 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2017 Intel Corporation
> + */
> +
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include "iavf.h"
> +#include "iavf_rxtx.h"
> +
> +static int iavf_client_dev_close(struct rte_eth_dev *dev);
> +static int iavf_client_dev_reset(struct rte_eth_dev *dev);
> +
> +/* set iavf_client_dev_ops to iavf's by default */
> +static struct eth_dev_ops iavf_client_eth_dev_ops;
> +
> +static const char *valid_args[] = {
> +#define AVF_CLIENT_ARG_PATH   "path"
> + AVF_CLIENT_ARG_PATH,
> + NULL
> +};
> +
> +/* set up vfio_device for iavf_client*/
> +static int
> +iavf_client_vfio_user_setup(struct rte_eth_dev *dev, const char *path)
> +{
> + struct iavf_adapter *adapter =

[dpdk-dev] [PATCH v1] app/testpmd: fix dynamic config error for max-pkt-len

2020-12-22 Thread Steve Yang
When 'max-pkt-len' value caused the 'rx_offloads' flag change, the all
offloads of rx queues ('rx_conf[qid].offloads') weren't synchronized,
that will cause the offloads check failed with 'rx_queue_offload_capa'
within 'rte_eth_rx_queue_setup'.

Apply rx offloads configuration once it changed when 'max-pkt-len'
command parsed.

Fixes: 384161e00627 ("app/testpmd: adjust on the fly VLAN configuration")

Signed-off-by: Steve Yang 
---
 app/test-pmd/cmdline.c | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 2ccbaa039e..d72a40d7de 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -1902,7 +1902,23 @@ cmd_config_max_pkt_len_parsed(void *parsed_result,
rx_offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
else
rx_offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
-   port->dev_conf.rxmode.offloads = rx_offloads;
+
+   if (rx_offloads != port->dev_conf.rxmode.offloads) {
+   uint16_t k;
+   int ret;
+
+   port->dev_conf.rxmode.offloads = rx_offloads;
+   /* Apply Rx offloads configuration */
+   ret = eth_dev_info_get_print_err(pid,
+   &port->dev_info);
+   if (ret != 0)
+   rte_exit(EXIT_FAILURE,
+   "rte_eth_dev_info_get() failed\n");
+
+   for (k = 0;
+k < port->dev_info.nb_rx_queues; k++)
+   port->rx_conf[k].offloads = rx_offloads;
+   }
} else {
printf("Unknown parameter\n");
return;
-- 
2.17.1



[dpdk-dev] [PATCH v2 0/2] examples/vhost: sample code refactor

2020-12-22 Thread Cheng Jiang
Refactor the vhost sample code. Add ioat ring space count and check
in ioat callback, optimize vhost data path for batch enqueue, replase
rte_atomicNN_xxx to atomic_XXX and refactor vhost async data path.
---
v2:
 * optimized patch structure
 * optimized git log
 * replased rte_atomicNN_xxx to atomic_XXX

Cheng Jiang (2):
  examples/vhost: add ioat ring space count and check
  examples/vhost: refactor vhost data path

 examples/vhost/ioat.c |  15 ++--
 examples/vhost/main.c | 163 +-
 examples/vhost/main.h |   7 +-
 3 files changed, 125 insertions(+), 60 deletions(-)

--
2.29.2



[dpdk-dev] [PATCH v2 1/2] examples/vhost: add ioat ring space count and check

2020-12-22 Thread Cheng Jiang
Add ioat ring space count and check, if ioat ring space is not enough
for the next async vhost packet enqueue, then just return to prevent
enqueue failure.

Signed-off-by: Cheng Jiang 
---
 examples/vhost/ioat.c | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 71d8a1f1f5..b0b04aa453 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -17,6 +17,7 @@ struct packet_tracker {
unsigned short next_read;
unsigned short next_write;
unsigned short last_remain;
+   unsigned short ioat_space;
 };
 
 struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
@@ -113,7 +114,7 @@ open_ioat(const char *value)
goto out;
}
rte_rawdev_start(dev_id);
-
+   cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE;
dma_info->nr++;
i++;
}
@@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
src = descs[i_desc].src;
dst = descs[i_desc].dst;
i_seg = 0;
+   if (cb_tracker[dev_id].ioat_space < src->nr_segs)
+   break;
while (i_seg < src->nr_segs) {
-   /*
-* TODO: Assuming that the ring space of the
-* IOAT device is large enough, so there is no
-* error here, and the actual error handling
-* will be added later.
-*/
rte_ioat_enqueue_copy(dev_id,
(uintptr_t)(src->iov[i_seg].iov_base)
+ src->offset,
@@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
i_seg++;
}
write &= mask;
-   cb_tracker[dev_id].size_track[write] = i_seg;
+   cb_tracker[dev_id].size_track[write] = src->nr_segs;
+   cb_tracker[dev_id].ioat_space -= src->nr_segs;
write++;
}
} else {
@@ -186,6 +184,7 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
int dev_id = dma_bind[vid].dmas[queue_id * 2
+ VIRTIO_RXQ].dev_id;
n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump);
+   cb_tracker[dev_id].ioat_space += n_seg;
n_seg += cb_tracker[dev_id].last_remain;
if (!n_seg)
return 0;
-- 
2.29.2



[dpdk-dev] [PATCH v2 2/2] examples/vhost: refactor vhost data path

2020-12-22 Thread Cheng Jiang
Change the vm2vm data path to batch enqueue for better performance.
Support latest async vhost API, refactor vhost async data path,
replase rte_atomicNN_xxx to atomic_XXX and clean some codes.

Signed-off-by: Cheng Jiang 
---
 examples/vhost/main.c | 163 +-
 examples/vhost/main.h |   7 +-
 2 files changed, 118 insertions(+), 52 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 8d8c3038bf..d400939a2d 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -179,9 +179,18 @@ struct mbuf_table {
struct rte_mbuf *m_table[MAX_PKT_BURST];
 };
 
+struct vhost_bufftable {
+   uint32_t len;
+   uint64_t pre_tsc;
+   struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
 /* TX queue for each data core. */
 struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
 
+/* TX queue for each vhost device. */
+struct vhost_bufftable vhost_bufftable[RTE_MAX_LCORE * MAX_VHOST_DEVICE];
+
 #define MBUF_TABLE_DRAIN_TSC   ((rte_get_tsc_hz() + US_PER_S - 1) \
 / US_PER_S * BURST_TX_DRAIN_US)
 #define VLAN_HLEN   4
@@ -804,39 +813,82 @@ unlink_vmdq(struct vhost_dev *vdev)
}
 }
 
+static inline void
+free_pkts(struct rte_mbuf **pkts, uint16_t n)
+{
+   while (n--)
+   rte_pktmbuf_free(pkts[n]);
+}
+
 static __rte_always_inline void
-virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
+complete_async_pkts(struct vhost_dev *vdev)
+{
+   struct rte_mbuf *p_cpl[MAX_PKT_BURST];
+   uint16_t complete_count;
+
+   complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
+   VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
+   if (complete_count) {
+   atomic_fetch_sub(&vdev->nr_async_pkts, complete_count);
+   free_pkts(p_cpl, complete_count);
+   }
+}
+
+static __rte_always_inline void
+sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
struct rte_mbuf *m)
 {
uint16_t ret;
-   struct rte_mbuf *m_cpl[1];
 
if (builtin_net_driver) {
ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
-   } else if (async_vhost_driver) {
-   ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
-   &m, 1);
-
-   if (likely(ret))
-   dst_vdev->nr_async_pkts++;
-
-   while (likely(dst_vdev->nr_async_pkts)) {
-   if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
-   VIRTIO_RXQ, m_cpl, 1))
-   dst_vdev->nr_async_pkts--;
-   }
} else {
ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
}
 
if (enable_stats) {
-   rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
-   rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
+   atomic_fetch_add(&dst_vdev->stats.rx_total_atomic, 1);
+   atomic_fetch_add(&dst_vdev->stats.rx_atomic, ret);
src_vdev->stats.tx_total++;
src_vdev->stats.tx += ret;
}
 }
 
+static __rte_always_inline void
+drain_vhost(struct vhost_dev *vdev)
+{
+   uint16_t ret;
+   uint64_t queue_id = rte_lcore_id() * MAX_VHOST_DEVICE + vdev->vid;
+   uint16_t nr_xmit = vhost_bufftable[queue_id].len;
+   struct rte_mbuf **m = vhost_bufftable[queue_id].m_table;
+
+   if (builtin_net_driver) {
+   ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
+   } else if (async_vhost_driver) {
+   uint32_t cpu_cpl_nr;
+   struct rte_mbuf *m_cpu_cpl[nr_xmit];
+   complete_async_pkts(vdev);
+   ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+   m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+   atomic_fetch_add(&vdev->nr_async_pkts, ret - cpu_cpl_nr);
+   if (cpu_cpl_nr)
+   free_pkts(m_cpu_cpl, cpu_cpl_nr);
+   if (nr_xmit - ret)
+   free_pkts(&m[ret], nr_xmit - ret);
+   } else {
+   ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
+   m, nr_xmit);
+   }
+
+   if (enable_stats) {
+   atomic_fetch_add(&vdev->stats.rx_total_atomic, nr_xmit);
+   atomic_fetch_add(&vdev->stats.rx_atomic, ret);
+   }
+
+   if (!async_vhost_driver)
+   free_pkts(m, nr_xmit);
+}
+
 /*
  * Check if the packet destination MAC address is for a local device. If so 
then put
  * the packet on that devices RX queue. If not then return.
@@ -846,7 +898,8 @@ virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
 {
struct rte_ether_hdr *pkt_hdr;
struct vhost_dev *dst_vdev;
-
+   struct vhost_bufftable *vhost_txq;
+   const u

Re: [dpdk-dev] [PATCH 0/8] Introduce emudev library and iavf emudev driver

2020-12-22 Thread Maxime Coquelin
Hi Chenbo,

Thanks for the detailed reply.

On 12/22/20 4:09 AM, Xia, Chenbo wrote:
> Hi Maxime,
> 
>> -Original Message-
>> From: Maxime Coquelin 
>> Sent: Monday, December 21, 2020 8:02 PM
>> To: Xia, Chenbo ; Thomas Monjalon 
>> ;
>> David Marchand 
>> Cc: dev ; Stephen Hemminger ; 
>> Liang,
>> Cunming ; Lu, Xiuchun ; Li,
>> Miao ; Wu, Jingjing 
>> Subject: Re: [dpdk-dev] [PATCH 0/8] Introduce emudev library and iavf emudev
>> driver
>>
>>
>>
>> On 12/21/20 10:52 AM, Maxime Coquelin wrote:
>>> Hi Chenbo,
>>>
>>> On 12/19/20 7:11 AM, Xia, Chenbo wrote:
 Hi David,

> -Original Message-
> From: David Marchand 
> Sent: Friday, December 18, 2020 5:54 PM
> To: Xia, Chenbo 
> Cc: dev ; Thomas Monjalon ; Stephen
> Hemminger ; Liang, Cunming
> ; Lu, Xiuchun ; Li, Miao
> ; Wu, Jingjing 
> Subject: Re: [PATCH 0/8] Introduce emudev library and iavf emudev driver
>
> On Fri, Dec 18, 2020 at 9:02 AM Chenbo Xia  wrote:
>>
>> This series introduces a new device abstraction called emudev for
> emulated
>> devices. A new library (librte_emudev) is implemented. The first emudev
>> driver is also introduced, which emulates Intel Adaptive Virtual
> Function
>> (iavf) as a software network device.
>>
>> This series has a dependency on librte_vfio_user patch series:
>> http://patchwork.dpdk.org/cover/85389/
>>
>> Background & Motivation
>> ---
>> The disaggregated/multi-process QEMU is using VFIO-over-socket/vfio-user
>> as the main transport mechanism to disaggregate IO services from QEMU.
>> Therefore, librte_vfio_user is introduced in DPDK to accommodate
>> emulated devices for high performance I/O. Although vfio-user library
>> provides possibility of emulating devices in DPDK, DPDK does not have
>> a device abstraction for emulated devices. A good device abstraction
> will
>> be useful for applications or high performance data path driver. With
>> this consideration, emudev library is designed and implemented. It also
>> make it possbile to keep modular design on emulated devices by
> implementing
>> data path related logic in a standalone driver (e.g., an ethdev driver)
>> and keeps the unrelated logic in the emudev driver.
>
> Since you mention performance, how does it compare to vhost-user/virtio?

 I think it depends on the device specification (i.e., how complex its data
>> path
 handling is). A first try on iavf spec shows better performance than virtio
 in our simple tests.
>>>
>>> That's interesting! How big is the performance difference? And how do
>>> we explain it?
>>>
>>> If there are improvements that could be done in the Virtio
>>> specification, it would be great to know and work on their
>>> implementations. It worries me a bit that every one is coming with
>>> his new device emulation every release, making things like live-
>>> migration difficult to achieve in the future.
>>
>> I did a quick review of the IAVF emudev driver to understand what other
>> factors than ring layout could explain a performance gain.
>>
>> My understanding is that part of the performance gain may come from
>> following things that are supported/implemented in Vhost-user backend
>> and not in IAVF driver:
>> 1. Memory hotplug. It seems the datapath is not safe against memory
>> hotplug in the VM, which causes the memory tables to be updated
>> asynchronously from the datapath. In order to support it in Vhost-user
>> library, we had to introduce locks to ensure the datapath isn't
>> accessing the shared memory while it is being remapped.
> 
> I think now it uses the similar way that vhost-user does.
> 
> First, in the vfio-user patch series, we introduce a callback lock_dp to lock
> the data path when messages like DMA MAP/UNMAP come. It will lock datapath
> in our data path driver.
> 
> Note that the data path handling is in our data path driver:
> http://patchwork.dpdk.org/cover/85500/
> 
> For modular design, iavf_emu driver emulates the device but the iavf back-end
> driver does data path handling.

My analysis was actually based on the data path driver series.

My point was that iavfbe_recv_pkts() and iavfbe_xmit_pkts() are not safe
against asynchronous changes like memory table updates.

As far as I can see, the access_lock aren't taken by these functions, so
if for example a memory table update happen during the these functions
execution, it could lead to undefined behaviour. Only things checked
there is whether the queue is enabled when entering the function, but
this value can be changed right after having being checked.

For example, in Vhost-user lib, we protected rte_vhost_dequeue_burst()
and rte_vhost_enqueue_burst() with a spinlock. Note that this spinlock
is per-virtqueue in order to avoid contention between the different
queues.

>>
>> 2. Live-migration. This feature does not seem supported in the dri

[dpdk-dev] [PATCH] net/failsafe: report minimum and maximum MTU in device info

2020-12-22 Thread Andrew Rybchenko
Take minimum and maximum MTU values for subdevices and
report maximum of minimums and minimum of maximums.

Fixes: ad97ceece12c ("ethdev: add min/max MTU to device info")
Cc: sta...@dpdk.org

Signed-off-by: Andrew Rybchenko 
---
 drivers/net/failsafe/failsafe_ops.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/failsafe/failsafe_ops.c 
b/drivers/net/failsafe/failsafe_ops.c
index 76d64871b4..5e6fb369e1 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -1095,6 +1095,8 @@ static void
 fs_dev_merge_info(struct rte_eth_dev_info *info,
  const struct rte_eth_dev_info *sinfo)
 {
+   info->min_mtu = RTE_MAX(info->min_mtu, sinfo->min_mtu);
+   info->max_mtu = RTE_MIN(info->max_mtu, sinfo->max_mtu);
info->max_rx_pktlen = RTE_MIN(info->max_rx_pktlen, 
sinfo->max_rx_pktlen);
info->max_rx_queues = RTE_MIN(info->max_rx_queues, 
sinfo->max_rx_queues);
info->max_tx_queues = RTE_MIN(info->max_tx_queues, 
sinfo->max_tx_queues);
@@ -1172,6 +1174,8 @@ fs_dev_infos_get(struct rte_eth_dev *dev,
int ret;
 
/* Use maximum upper bounds by default */
+   infos->min_mtu = RTE_ETHER_MIN_MTU;
+   infos->max_mtu = UINT16_MAX;
infos->max_rx_pktlen = UINT32_MAX;
infos->max_rx_queues = RTE_MAX_QUEUES_PER_PORT;
infos->max_tx_queues = RTE_MAX_QUEUES_PER_PORT;
-- 
2.29.2



Re: [dpdk-dev] [PATCH 5/5] librte_ethdev: add to use apistats

2020-12-22 Thread Morten Brørup
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Hideyuki Yamashita
> Sent: Tuesday, December 22, 2020 3:50 AM
> 
> Hello,
> 
> Thanks for your comments.
> Please see my comments inline tagged with [HY].
> 
> > > diff --git a/lib/librte_ethdev/rte_ethdev.h
> b/lib/librte_ethdev/rte_ethdev.h
> > > index f5f8919..bef9bc6 100644
> > > --- a/lib/librte_ethdev/rte_ethdev.h
> > > +++ b/lib/librte_ethdev/rte_ethdev.h
> > > @@ -160,6 +160,7 @@ extern "C" {
> > >
> > >  #include "rte_ethdev_trace_fp.h"
> > >  #include "rte_dev_info.h"
> > > +#include 
> > >
> > >  extern int rte_eth_dev_logtype;
> > >
> > > @@ -4849,6 +4850,9 @@ rte_eth_rx_burst(uint16_t port_id, uint16_t
> queue_id,
> > >   nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
> > >rx_pkts, nb_pkts);
> > >
> > > + int lcore_id = rte_lcore_id();
> > > + rte_apicounts->rx_burst_counts[lcore_id]++;

[...]

> > As a generic one: such sort of statistics can be easily collected by the
> app itself.
> > Either by just incrementing counters before rx/tx_burst function call
> directly or
> > via rx/tx callbacks.
> > So I don't see any point to push it inside ethdev layer.
> > Konstantin
> [HY]
> You are correct.
> Application can do it.
> But if applications want to do it, then every applicaiton needs
> to do it.
> The reason why I propose my patchset is to provide such
> common function (count invocation of rx_burst/tx_burst)
> so that application only needs to "retrieve the information".
> 
> I think it is benefitical than all application  do similar thing.
> Your feedback is highly appreciated.

For performance reasons, I am strongly opposed to adding anything into the 
ethdev rx/tx functions, unless it is absolutely critical for a large user base.

I get your argument about avoiding additional application code by doing it 
directly in the ethdev rx/tx functions - this is the benefit that this library 
adds to DPDK. So as a compromise, I suggest surrounding the added code in these 
functions by #ifdef/#endif, so there is no performance degradation if the 
library is not used.

Personally, I would prefer a much more advanced library for measuring CPU load 
and RX/TX usage. However, I can also imagine that simple DPDK applications 
would benefit from this library, because is easy to understand and requires 
nearly no effort to use.

> 
> Thanks!
> BR,
> Hideyuki Yamashita
> NTT TechnoCross
> 
> > >  #ifdef RTE_ETHDEV_RXTX_CALLBACKS
> > >   struct rte_eth_rxtx_callback *cb;
> > >
> > > @@ -5124,6 +5128,9 @@ rte_eth_tx_burst(uint16_t port_id, uint16_t
> queue_id,
> > >   }
> > >  #endif
> > >
> > > + int lcore_id = rte_lcore_id();
> > > + rte_apicounts->tx_burst_counts[lcore_id]++;
> > >  #ifdef RTE_ETHDEV_RXTX_CALLBACKS
> > >   struct rte_eth_rxtx_callback *cb;
> > >



[dpdk-dev] net/ena: traffic lock

2020-12-22 Thread Rajesh Kumar
Dpdk: 19.11

Driver: ena



During longevity(after 24+ hrs) testing at 10Gbps, one of tx-queue is
getting into unrecoverable state ( its not able to find enough
tx-descriptor nor its freeing up).



So for every tx-burst, eth_ena_xmit_pkts() neither finds free tx-descriptor
nor able to free txd (ena_com_tx_comp_req_id_get() is always returning
ENA_COM_TRY_AGAIN).



We see eth_ena_xmit_pkts() has been refactored in latest LTS version, is
there any related issue got fixed ? Can you help





(gdb) p *(struct ena_ring *) rte_eth_devices[2].data->tx_queues[5]

$14 = {

  next_to_use = 4979,

  next_to_clean = 3958,

  type = ENA_RING_TYPE_TX,

  tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV,

  {

empty_tx_reqs = 0x11e406b00,

empty_rx_reqs = 0x11e406b00

  },

  {

tx_buffer_info = 0x11d2dfc80,

rx_buffer_info = 0x11d2dfc80

  },

 rx_refill_buffer = 0x0,

  ring_size = 1024,

  ena_com_io_cq = 0x11e40e640,

  ena_com_io_sq = 0x11e4168c0,

  ena_bufs = {{

  len = 0,

  req_id = 0

} },

  mb_pool = 0x0,

  port_id = 2,

  id = 5,

  tx_max_header_size = 96 '`',

 configured = 1,

  push_buf_intermediate_buf = 0x11e406a00 "",

  adapter = 0x11e40e040,

  offloads = 2,

  sgl_size = 17,

  {

rx_stats = {

  cnt = 4979,

  bytes = 417580,

  refill_partial = 35426,

  bad_csum = 0,

  mbuf_alloc_fail = 0,

  bad_desc_num = 38603,

---Type  to continue, or q  to quit---

  bad_req_id = 3178

},

tx_stats = {

  cnt = 4979,

  bytes = 417580,

  prepare_ctx_err = 35426, ß Errors

  linearize = 0,

  linearize_failed = 0,

  tx_poll = 38603,

  doorbells = 3178,

  bad_req_id = 0,

  available_desc = 2 <<-- No free descriptor

}

  },

  numa_socket_id = 0

}



Thanks,

*-Rajesh*


Re: [dpdk-dev] [PATCH v3 1/3] build: add aarch64 clang to meson cross-compile

2020-12-22 Thread Ruifeng Wang

> -Original Message-
> From: dev  On Behalf Of Juraj Linke?
> Sent: Friday, October 2, 2020 5:38 PM
> To: tho...@monjalon.net; david.march...@redhat.com;
> acon...@redhat.com; maicolgabr...@hotmail.com
> Cc: dev@dpdk.org; Juraj Linkeš 
> Subject: [dpdk-dev] [PATCH v3 1/3] build: add aarch64 clang to meson cross-
> compile
> 
> Create meson cross file arm64_armv8_linux_clang_ubuntu1804.
> Use clang/LLVM toolchain with sysroot pointing to gcc cross stdlib.
> 
> Signed-off-by: Juraj Linkeš 
> ---
>  config/arm/arm64_armv8_linux_clang_ubuntu1804 | 20
> +++
>  1 file changed, 20 insertions(+)
>  create mode 100644 config/arm/arm64_armv8_linux_clang_ubuntu1804
> 
> diff --git a/config/arm/arm64_armv8_linux_clang_ubuntu1804
> b/config/arm/arm64_armv8_linux_clang_ubuntu1804
> new file mode 100644
> index 0..67f475eb0
> --- /dev/null
> +++ b/config/arm/arm64_armv8_linux_clang_ubuntu1804
> @@ -0,0 +1,20 @@
> +[binaries]
> +c = 'clang'
> +cpp = 'clang++'
> +ar = 'llvm-ar'
> +strip = 'llvm-strip'
> +llvm-config = 'llvm-config'
> +pcap-config = 'llvm-config'
> +pkgconfig = 'aarch64-linux-gnu-pkg-config'
> +
> +[host_machine]
> +system = 'linux'
> +cpu_family = 'aarch64'
> +cpu = 'armv8-a'
> +endian = 'little'
> +
> +[properties]
> +implementor_id = 'generic'
> +implementor_pn = 'default'
> +c_args = ['-target', 'aarch64-linux-gnu', '--sysroot',
> +'/usr/aarch64-linux-gnu', '--gcc-toolchain=/usr'] c_link_args =
> +['-target', 'aarch64-linux-gnu', '-fuse-ld=lld']
> --
> 2.20.1

I understand specific flags are needed by clang to do cross compiling.
Path to gnu libc which is required could be different on some other systems.
The cross file works on Ubuntu, and could be referred to by other systems.

Reviewed-by: Ruifeng Wang 


Re: [dpdk-dev] [RFC PATCH v1 4/6] app/eventdev: add release barriers for pipeline test

2020-12-22 Thread Pavan Nikhilesh Bhagavatula


>Add release barriers before updating the processed packets for worker
>lcores to ensure the worker lcore has really finished data processing
>and then it can update the processed packets number.
>

I believe we can live with minor inaccuracies in stats being presented
as atomics are pretty heavy when scheduler is limited to burst size as 1. 

One option is to move it before a pipeline operation (pipeline_event_tx, 
pipeline_fwd_event etc.)
as they imply implicit release barrier (as all the changes done to the event 
should be visible to the next core).

>Fixes: 314bcf58ca8f ("app/eventdev: add pipeline queue worker
>functions")
>Cc: pbhagavat...@marvell.com
>Cc: sta...@dpdk.org
>
>Signed-off-by: Phil Yang 
>Signed-off-by: Feifei Wang 
>Reviewed-by: Ruifeng Wang 
>---
> app/test-eventdev/test_pipeline_queue.c | 64
>+
> 1 file changed, 56 insertions(+), 8 deletions(-)
>
>diff --git a/app/test-eventdev/test_pipeline_queue.c b/app/test-
>eventdev/test_pipeline_queue.c
>index 7bebac34f..0c0ec0ceb 100644
>--- a/app/test-eventdev/test_pipeline_queue.c
>+++ b/app/test-eventdev/test_pipeline_queue.c
>@@ -30,7 +30,13 @@ pipeline_queue_worker_single_stage_tx(void
>*arg)
>
>   if (ev.sched_type == RTE_SCHED_TYPE_ATOMIC) {
>   pipeline_event_tx(dev, port, &ev);
>-  w->processed_pkts++;
>+
>+  /* release barrier here ensures stored operation
>+   * of the event completes before the number of
>+   * processed pkts is visible to the main core
>+   */
>+  __atomic_fetch_add(&(w->processed_pkts), 1,
>+  __ATOMIC_RELEASE);
>   } else {
>   ev.queue_id++;
>   pipeline_fwd_event(&ev,
>RTE_SCHED_TYPE_ATOMIC);
>@@ -59,7 +65,13 @@ pipeline_queue_worker_single_stage_fwd(void
>*arg)
>   rte_event_eth_tx_adapter_txq_set(ev.mbuf, 0);
>   pipeline_fwd_event(&ev, RTE_SCHED_TYPE_ATOMIC);
>   pipeline_event_enqueue(dev, port, &ev);
>-  w->processed_pkts++;
>+
>+  /* release barrier here ensures stored operation
>+   * of the event completes before the number of
>+   * processed pkts is visible to the main core
>+   */
>+  __atomic_fetch_add(&(w->processed_pkts), 1,
>+  __ATOMIC_RELEASE);
>   }
>
>   return 0;
>@@ -84,7 +96,13 @@
>pipeline_queue_worker_single_stage_burst_tx(void *arg)
>   if (ev[i].sched_type ==
>RTE_SCHED_TYPE_ATOMIC) {
>   pipeline_event_tx(dev, port, &ev[i]);
>   ev[i].op = RTE_EVENT_OP_RELEASE;
>-  w->processed_pkts++;
>+
>+  /* release barrier here ensures stored
>operation
>+   * of the event completes before the
>number of
>+   * processed pkts is visible to the main
>core
>+   */
>+  __atomic_fetch_add(&(w-
>>processed_pkts), 1,
>+  __ATOMIC_RELEASE);
>   } else {
>   ev[i].queue_id++;
>   pipeline_fwd_event(&ev[i],
>@@ -121,7 +139,13 @@
>pipeline_queue_worker_single_stage_burst_fwd(void *arg)
>   }
>
>   pipeline_event_enqueue_burst(dev, port, ev, nb_rx);
>-  w->processed_pkts += nb_rx;
>+
>+  /* release barrier here ensures stored operation
>+   * of the event completes before the number of
>+   * processed pkts is visible to the main core
>+   */
>+  __atomic_fetch_add(&(w->processed_pkts), nb_rx,
>+  __ATOMIC_RELEASE);
>   }
>
>   return 0;
>@@ -146,7 +170,13 @@ pipeline_queue_worker_multi_stage_tx(void
>*arg)
>
>   if (ev.queue_id == tx_queue[ev.mbuf->port]) {
>   pipeline_event_tx(dev, port, &ev);
>-  w->processed_pkts++;
>+
>+  /* release barrier here ensures stored operation
>+   * of the event completes before the number of
>+   * processed pkts is visible to the main core
>+   */
>+  __atomic_fetch_add(&(w->processed_pkts), 1,
>+  __ATOMIC_RELEASE);
>   continue;
>   }
>
>@@ -180,7 +210,13 @@
>pipeline_queue_worker_multi_stage_fwd(void *arg)
>   ev.queue_id = tx_queue[ev.mbuf->port];
>   rte_event_eth_tx_adapter_txq_set(ev.mbuf, 0);
>   pipeline_fwd_event(&ev,
>RTE_SCHED_TYPE_ATOMIC);
>-  w->processed_pkts++;
>+
>+  

Re: [dpdk-dev] [PATCH v2] net/virtio-user: fix advertising of protocol features

2020-12-22 Thread Maxime Coquelin



On 12/18/20 2:23 PM, Olivier Matz wrote:
> When connected to a vhost-user backend, the flag
> VHOST_USER_F_PROTOCOL_FEATURES is not advertised, preventing to do
> multiqueue (the VHOST_USER_PROTOCOL_F_MQ protocol feature is ignored by
> some backends if the VHOST_USER_F_PROTOCOL_FEATURES feature is not set).
> 
> When setting vhost-user features, advertise this flag if it was
> advertised by our peer.
> 
> Fixes: 8e7561054ac7 ("net/virtio: support vhost-user protocol features")
> Cc: sta...@dpdk.org
> 
> Suggested-by: Maxime Coquelin 
> Signed-off-by: Olivier Matz 
> ---
> 
> v2
> * fix obvious fallthrough case, sorry for the noise
> 
>  drivers/net/virtio/virtio_user/vhost_user.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/virtio/virtio_user/vhost_user.c 
> b/drivers/net/virtio/virtio_user/vhost_user.c
> index b93e65c60b..350eed4182 100644
> --- a/drivers/net/virtio/virtio_user/vhost_user.c
> +++ b/drivers/net/virtio/virtio_user/vhost_user.c
> @@ -297,13 +297,18 @@ vhost_user_sock(struct virtio_user_dev *dev,
>   if (has_reply_ack)
>   msg.flags |= VHOST_USER_NEED_REPLY_MASK;
>   /* Fallthrough */
> - case VHOST_USER_SET_FEATURES:
>   case VHOST_USER_SET_PROTOCOL_FEATURES:
>   case VHOST_USER_SET_LOG_BASE:
>   msg.payload.u64 = *((__u64 *)arg);
>   msg.size = sizeof(m.payload.u64);
>   break;
>  
> + case VHOST_USER_SET_FEATURES:
> + msg.payload.u64 = *((__u64 *)arg) | (dev->device_features &
> + (1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
> + msg.size = sizeof(m.payload.u64);
> + break;
> +
>   case VHOST_USER_SET_OWNER:
>   case VHOST_USER_RESET_OWNER:
>   break;
> 

Reviewed-by: Maxime Coquelin 

Thanks!
Maxime



Re: [dpdk-dev] [PATCH 21.02 1/3] vhost: refactor postcopy region registration

2020-12-22 Thread Maxime Coquelin
Hi Chenbo,

On 12/9/20 3:16 PM, Xia, Chenbo wrote:
> Hi Maxime,
> 
>> -Original Message-
>> From: Maxime Coquelin 
>> Sent: Monday, November 16, 2020 7:36 PM
>> To: dev@dpdk.org; Xia, Chenbo ; Ding, Xuan
>> 
>> Cc: Maxime Coquelin 
>> Subject: [PATCH 21.02 1/3] vhost: refactor postcopy region registration
>>
>> This patch moves the registration of memory regions to
>> userfaultfd to a dedicated function, with the goal of
>> simplifying VHOST_USER_SET_MEM_TABLE request handling
>> function.
>>
>> Signed-off-by: Maxime Coquelin 
>> ---
>>  lib/librte_vhost/vhost_user.c | 77 +--
>>  1 file changed, 46 insertions(+), 31 deletions(-)
>>
>> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
>> index 45c8ac09da..b8a9e41a2d 100644
>> --- a/lib/librte_vhost/vhost_user.c
>> +++ b/lib/librte_vhost/vhost_user.c
>> @@ -998,6 +998,49 @@ vhost_memory_changed(struct VhostUserMemory *new,
>>  return false;
>>  }
>>
>> +#ifdef RTE_LIBRTE_VHOST_POSTCOPY
>> +static int
>> +vhost_user_postcopy_region_register(struct virtio_net *dev,
>> +struct rte_vhost_mem_region *reg)
>> +{
>> +struct uffdio_register reg_struct;
>> +
>> +/*
>> + * Let's register all the mmap'ed area to ensure
>> + * alignment on page boundary.
>> + */
>> +reg_struct.range.start = (uint64_t)(uintptr_t)reg->mmap_addr;
>> +reg_struct.range.len = reg->mmap_size;
>> +reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
>> +
>> +if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER,
>> +®_struct)) {
>> +VHOST_LOG_CONFIG(ERR, "Failed to register ufd for region "
>> +"%" PRIx64 " - %" PRIx64 " (ufd = %d) %s\n",
>> +(uint64_t)reg_struct.range.start,
>> +(uint64_t)reg_struct.range.start +
>> +(uint64_t)reg_struct.range.len - 1,
>> +dev->postcopy_ufd,
>> +strerror(errno));
>> +return -1;
>> +}
>> +
>> +VHOST_LOG_CONFIG(INFO, "\t userfaultfd registered for range : %"
>> PRIx64 " - %" PRIx64 "\n",
>> +(uint64_t)reg_struct.range.start,
>> +(uint64_t)reg_struct.range.start +
>> +(uint64_t)reg_struct.range.len - 1);
>> +
>> +return 0;
>> +}
>> +#else
>> +static int
>> +vhost_user_postcopy_region_register(struct virtio_net *dev,
>> +struct rte_vhost_mem_region *reg)
>> +{
>> +return -1;
>> +}
> 
> Better add __rte_unused here to avoid warnings when postcopy not configured 😊

Good catch!
I will post a v2 with adding these.

Thanks,
Maxime

> Thanks,
> Chenbo
> 
>> +#endif
>> +
>>  static int
>>  vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg
>> *msg,
>>  int main_fd)
>> @@ -1209,38 +1252,10 @@ vhost_user_set_mem_table(struct virtio_net **pdev,
>> struct VhostUserMsg *msg,
>>  }
>>
>>  /* Now userfault register and we can use the memory */
>> -for (i = 0; i < memory->nregions; i++) {
>> -#ifdef RTE_LIBRTE_VHOST_POSTCOPY
>> -reg = &dev->mem->regions[i];
>> -struct uffdio_register reg_struct;
>> -
>> -/*
>> - * Let's register all the mmap'ed area to ensure
>> - * alignment on page boundary.
>> - */
>> -reg_struct.range.start =
>> -(uint64_t)(uintptr_t)reg->mmap_addr;
>> -reg_struct.range.len = reg->mmap_size;
>> -reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
>> -
>> -if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER,
>> -®_struct)) {
>> -VHOST_LOG_CONFIG(ERR,
>> -"Failed to register ufd for region %d: 
>> (ufd
>> = %d) %s\n",
>> -i, dev->postcopy_ufd,
>> -strerror(errno));
>> +for (i = 0; i < memory->nregions; i++)
>> +if (vhost_user_postcopy_region_register(dev,
>> +&dev->mem->regions[i]) < 0)
>>  goto free_mem_table;
>> -}
>> -VHOST_LOG_CONFIG(INFO,
>> -"\t userfaultfd registered for range : "
>> -"%" PRIx64 " - %" PRIx64 "\n",
>> -(uint64_t)reg_struct.range.start,
>> -(uint64_t)reg_struct.range.start +
>> -(uint64_t)reg_struct.range.len - 1);
>> -#else
>> -goto free_mem_table;
>> -#endif
>> -}
>>  }
>>
>>  for (i = 0; i < dev->nr_vring; i++) {
>> --
>> 2.26.2
> 



Re: [dpdk-dev] [PATCH 21.02 v2 1/2] net/virtio: fix missing backend features negotiation

2020-12-22 Thread Maxime Coquelin



On 12/21/20 7:23 AM, Xia, Chenbo wrote:
> Hi Maxime,
> 
>> -Original Message-
>> From: Maxime Coquelin 
>> Sent: Wednesday, November 25, 2020 11:21 PM
>> To: dev@dpdk.org; Xia, Chenbo ; amore...@redhat.com;
>> jasow...@redhat.com; david.march...@redhat.com
>> Cc: Maxime Coquelin ; sta...@dpdk.org
>> Subject: [PATCH 21.02 v2 1/2] net/virtio: fix missing backend features
>> negotiation
>>
>> This patch adds missing backend features negotiation for
>> in Vhost-vDPA. Without it, IOTLB messages v2 could be sent
>> by Virtio-user PMD while not supported by the backend.
>>
>> Fixes: 6b901437056e ("net/virtio: introduce vhost-vDPA backend")
>> Cc: sta...@dpdk.org
>>
>> Signed-off-by: Maxime Coquelin 
>> ---
>>  drivers/net/virtio/virtio_user/vhost.h   |  4 
>>  drivers/net/virtio/virtio_user/vhost_vdpa.c  | 14 ++
>>  drivers/net/virtio/virtio_user/virtio_user_dev.c | 14 ++
>>  3 files changed, 28 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/net/virtio/virtio_user/vhost.h
>> b/drivers/net/virtio/virtio_user/vhost.h
>> index 210a3704e7..c1dcc50b58 100644
>> --- a/drivers/net/virtio/virtio_user/vhost.h
>> +++ b/drivers/net/virtio/virtio_user/vhost.h
>> @@ -86,6 +86,10 @@ enum vhost_user_request {
>>  VHOST_USER_MAX
>>  };
>>
>> +#ifndef VHOST_BACKEND_F_IOTLB_MSG_V2
>> +#define VHOST_BACKEND_F_IOTLB_MSG_V2 1
>> +#endif
>> +
>>  extern const char * const vhost_msg_strings[VHOST_USER_MAX];
>>
>>  struct vhost_memory_region {
>> diff --git a/drivers/net/virtio/virtio_user/vhost_vdpa.c
>> b/drivers/net/virtio/virtio_user/vhost_vdpa.c
>> index c7b9349fc8..b6c81d6f17 100644
>> --- a/drivers/net/virtio/virtio_user/vhost_vdpa.c
>> +++ b/drivers/net/virtio/virtio_user/vhost_vdpa.c
>> @@ -35,6 +35,8 @@
>>  #define VHOST_VDPA_SET_STATUS _IOW(VHOST_VIRTIO, 0x72, __u8)
>>  #define VHOST_VDPA_SET_VRING_ENABLE _IOW(VHOST_VIRTIO, 0x75, \
>>   struct vhost_vring_state)
>> +#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
>> +#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
>>
>>  static uint64_t vhost_req_user_to_vdpa[] = {
>>  [VHOST_USER_SET_OWNER] = VHOST_SET_OWNER,
>> @@ -51,6 +53,8 @@ static uint64_t vhost_req_user_to_vdpa[] = {
>>  [VHOST_USER_SET_STATUS] = VHOST_VDPA_SET_STATUS,
>>  [VHOST_USER_GET_STATUS] = VHOST_VDPA_GET_STATUS,
>>  [VHOST_USER_SET_VRING_ENABLE] = VHOST_VDPA_SET_VRING_ENABLE,
>> +[VHOST_USER_GET_PROTOCOL_FEATURES] = VHOST_GET_BACKEND_FEATURES,
>> +[VHOST_USER_SET_PROTOCOL_FEATURES] = VHOST_SET_BACKEND_FEATURES,
>>  };
>>
>>  /* no alignment requirement */
>> @@ -86,6 +90,11 @@ vhost_vdpa_dma_map(struct virtio_user_dev *dev, void 
>> *addr,
>>  {
>>  struct vhost_msg msg = {};
>>
>> +if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)))
>> {
>> +PMD_DRV_LOG(ERR, "IOTLB_MSG_V2 not supported by the backend.");
>> +return -1;
>> +}
>> +
>>  msg.type = VHOST_IOTLB_MSG_V2;
>>  msg.iotlb.type = VHOST_IOTLB_UPDATE;
>>  msg.iotlb.iova = iova;
>> @@ -108,6 +117,11 @@ vhost_vdpa_dma_unmap(struct virtio_user_dev *dev,
>> __rte_unused void *addr,
>>  {
>>  struct vhost_msg msg = {};
>>
>> +if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)))
>> {
>> +PMD_DRV_LOG(ERR, "IOTLB_MSG_V2 not supported by the backend.");
>> +return -1;
>> +}
>> +
>>  msg.type = VHOST_IOTLB_MSG_V2;
>>  msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
>>  msg.iotlb.iova = iova;
>> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c
>> b/drivers/net/virtio/virtio_user/virtio_user_dev.c
>> index 053f0267ca..96bc6b232d 100644
>> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
>> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
>> @@ -439,11 +439,13 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
>>   1ULL << VIRTIO_F_RING_PACKED   |   \
>>   1ULL << VHOST_USER_F_PROTOCOL_FEATURES)
>>
>> -#define VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES \
>> +#define VHOST_USER_SUPPORTED_PROTOCOL_FEATURES  \
>>  (1ULL << VHOST_USER_PROTOCOL_F_MQ | \
>>   1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK |  \
>>   1ULL << VHOST_USER_PROTOCOL_F_STATUS)
>>
>> +#define VHOST_VDPA_SUPPORTED_PROTOCOL_FEATURES  \
>> +(1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
>>  int
>>  virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues,
>>   int cq, int queue_size, const char *mac, char **ifname,
>> @@ -462,9 +464,13 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char
>> *path, int queues,
>>  dev->mac_specified = 0;
>>  dev->frontend_features = 0;
>>  dev->unsupported_features = ~VIRTIO_USER_SUPPORTED_FEATURES;
>> -dev->protocol_features = VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES;
>>  dev->backend_type = backend_type;
>>
>> +if (dev->bac

Re: [dpdk-dev] [PATCH v1 1/2] app/test: remove unnecessary barriers for ring stress test

2020-12-22 Thread Ananyev, Konstantin
Hi Feifei,

> 
> The variable "wrk_cmd" is a signal to control threads from running and
> stopping. When worker lcores load "wrk_cmd == WRK_CMD_RUN", they start
> running and when worker lcores load "wrk_cmd == WRK_CMD_STOP", they
> stop.
> 
> For the wmb in test_mt1, no storing operations must keep the order
> after storing "wrk_cmd". Thus the wmb is unnecessary.

I think there is a bug in my original code, we should do smp_wmb() *before*  
setting wrk_cmd, not after:

/* launch on all workers */
RTE_LCORE_FOREACH_WORKER(lc) {
arg[lc].rng = r;
arg[lc].stats = init_stat;
rte_eal_remote_launch(test, &arg[lc], lc);
}

/* signal worker to start test */
+  rte_smp_wmb();
wrk_cmd = WRK_CMD_RUN;
-   rte_smp_wmb();

usleep(run_time * US_PER_S);


I still think we'd better have some synchronisation here.
Otherwise what would prevent compiler and/or cpu to update wrk_cmd out of order
(before _init_ phase is completed)?
We probably can safely assume no reordering from the compiler here,
as we have function calls straight before and after 'wrk_cmd = WRK_CMD_RUN;'
But for consistency and easier maintenance, I still think it is better
to have something here, after all it is not performance critical pass. 

> For the rmb in test_worker, the parameters have been prepared when
> worker lcores call "test_worker". It is unnessary to wait wrk_cmd to be
> loaded, then the parameters can be loaded, So the rmb can be removed.

It is not only about parameters loading,  it is to prevent worker core to start 
too early.

As I understand, your goal is to get rid of rte_smp_*() calls.
Might be better to replace such places here with _atomic_ semantics.
Then, as I can see, we also can get rid of 'volatile' fo wrk_cmd.
 
> In the meanwhile, fix a typo. The note above storing "stop" into
> "wrk_cmd" should be "stop test" rather than "start test".
> 
> Signed-off-by: Feifei Wang 
> Reviewed-by: Honnappa Nagarahalli 
> Reviewed-by: Ruifeng Wang 
> ---
>  app/test/test_ring_stress_impl.h | 5 +
>  1 file changed, 1 insertion(+), 4 deletions(-)
> 
> diff --git a/app/test/test_ring_stress_impl.h 
> b/app/test/test_ring_stress_impl.h
> index f9ca63b90..384555ef9 100644
> --- a/app/test/test_ring_stress_impl.h
> +++ b/app/test/test_ring_stress_impl.h
> @@ -198,7 +198,6 @@ test_worker(void *arg, const char *fname, int32_t prcs)
>   fill_ring_elm(&loc_elm, lc);
> 
>   while (wrk_cmd != WRK_CMD_RUN) {
> - rte_smp_rmb();
>   rte_pause();
>   }
> 
> @@ -357,13 +356,11 @@ test_mt1(int (*test)(void *))
> 
>   /* signal worker to start test */
>   wrk_cmd = WRK_CMD_RUN;
> - rte_smp_wmb();
> 
>   usleep(run_time * US_PER_S);
> 
> - /* signal worker to start test */
> + /* signal worker to stop test */
>   wrk_cmd = WRK_CMD_STOP;
> - rte_smp_wmb();
> 
>   /* wait for workers and collect stats. */
>   mc = rte_lcore_id();
> --
> 2.17.1



Re: [dpdk-dev] [PATCH 5/5] librte_ethdev: add to use apistats

2020-12-22 Thread Ananyev, Konstantin
> Hello,
> 
> Thanks for your comments.
> Please see my comments inline tagged with [HY].
> 
> > Hi,
> >
> > >
> > > This patch modifies to use apistats by librte_ethdev.
> > >
> > > Signed-off-by: Hideyuki Yamashita 
> > > ---
> > >  lib/librte_ethdev/meson.build|  6 ++-
> > >  lib/librte_ethdev/rte_apistats.c | 64 
> > >  lib/librte_ethdev/rte_apistats.h | 64 
> > >  lib/librte_ethdev/rte_ethdev.h   |  7 
> > >  lib/librte_ethdev/version.map|  5 +++
> > >  5 files changed, 144 insertions(+), 2 deletions(-)
> > >  create mode 100644 lib/librte_ethdev/rte_apistats.c
> > >  create mode 100644 lib/librte_ethdev/rte_apistats.h
> > >
> > > diff --git a/lib/librte_ethdev/meson.build b/lib/librte_ethdev/meson.build
> > > index e4b6102..d03e784 100644
> > > --- a/lib/librte_ethdev/meson.build
> > > +++ b/lib/librte_ethdev/meson.build
> > > @@ -8,7 +8,8 @@ sources = files('ethdev_private.c',
> > >   'rte_ethdev.c',
> > >   'rte_flow.c',
> > >   'rte_mtr.c',
> > > - 'rte_tm.c')
> > > + 'rte_tm.c' ,
> > > +'rte_apistats.c')
> > >
> > >  headers = files('rte_ethdev.h',
> > >   'rte_ethdev_driver.h',
> > > @@ -24,6 +25,7 @@ headers = files('rte_ethdev.h',
> > >   'rte_mtr.h',
> > >   'rte_mtr_driver.h',
> > >   'rte_tm.h',
> > > - 'rte_tm_driver.h')
> > > + 'rte_tm_driver.h',
> > > +'rte_apistats.h')
> > >
> > >  deps += ['net', 'kvargs', 'meter', 'telemetry']
> > > diff --git a/lib/librte_ethdev/rte_apistats.c 
> > > b/lib/librte_ethdev/rte_apistats.c
> > > new file mode 100644
> > > index 000..c4bde34
> > > --- /dev/null
> > > +++ b/lib/librte_ethdev/rte_apistats.c
> > > @@ -0,0 +1,64 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > + * Copyright(c) 2019 NTT TechnoCross Corporation
> > > + */
> > > +
> > > +
> > > +#include 
> > > +#include 
> > > +#include 
> > > +#include 
> > > +#include 
> > > +#include 
> > > +
> > > +#include "rte_apistats.h"
> > > +
> > > +/* Macros for printing using RTE_LOG */
> > > +#define RTE_LOGTYPE_APISTATS RTE_LOGTYPE_USER1
> > > +
> > > +#define MZ_APISTATS "rte_apistats"
> > > +
> > > +struct rte_apistats *rte_apicounts;
> > > +
> > > +int rte_apistats_init(void)
> > > +{
> > > + int i;
> > > + const struct rte_memzone *mz = NULL;
> > > + const unsigned int flags = 0;
> > > +
> > > + /** Allocate stats in shared memory fo multi process support */
> > > + if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> > > + mz = rte_memzone_lookup(MZ_APISTATS);
> > > + if (mz == NULL) {
> > > + RTE_LOG(ERR, APISTATS, "Cannot get info structure\n");
> > > + return -1;
> > > + }
> > > + rte_apicounts = mz->addr;
> > > + } else {
> > > + /* RTE_PROC_PRIMARY */
> > > + mz = rte_memzone_reserve(MZ_APISTATS, sizeof(*rte_apicounts),
> > > + rte_socket_id(), flags);
> > > + if (mz == NULL) {
> > > + RTE_LOG(ERR, APISTATS, "Cannot reserve memory zone\n");
> > > + return -ENOMEM;
> > > + }
> > > + rte_apicounts = mz->addr;
> > > + memset(rte_apicounts, 0, sizeof(*rte_apicounts));
> > > + }
> > > +
> > > + /* set up array for data */
> > > + RTE_LCORE_FOREACH(i) {
> > > + rte_apicounts->lcoreid_list[i] = 1;
> > > + RTE_LOG(INFO, APISTATS, "Enable core usage for lcore %u\n", i);
> > > + }
> > > + return 0;
> > > +}
> > > +
> > > +int rte_apistats_uninit(void)
> > > +{
> > > + const struct rte_memzone *mz = NULL;
> > > + /* free up the memzone */
> > > + mz = rte_memzone_lookup(MZ_APISTATS);
> > > + if (mz)
> > > + rte_memzone_free(mz);
> > > + return 0;
> > > +}
> > > diff --git a/lib/librte_ethdev/rte_apistats.h 
> > > b/lib/librte_ethdev/rte_apistats.h
> > > new file mode 100644
> > > index 000..afea50e
> > > --- /dev/null
> > > +++ b/lib/librte_ethdev/rte_apistats.h
> > > @@ -0,0 +1,64 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > + * Copyright(c) 2019 NTT TechnoCross Corporation
> > > + */
> > > +
> > > +#ifndef _RTE_APISTATS_H_
> > > +#define _RTE_APISTATS_H_
> > > +
> > > +/**
> > > + * @file
> > > + * RTE apistats
> > > + *
> > > + * library to provide rte_rx_burst/tx_burst api stats.
> > > + */
> > > +
> > > +
> > > +#ifdef __cplusplus
> > > +extern "C" {
> > > +#endif
> > > +
> > > +#include 
> > > +#include 
> > > +
> > > +/**
> > > + * A structure for rte_rx_burst/tx_burst api statistics.
> > > + */
> > > +struct rte_apistats {
> > > + int lcoreid_list[RTE_MAX_LCORE];/**< In use lcoreid list */
> > > + /**< Total rte_rx_burst call counts */
> > > + uint64_t rx_burst_counts[RTE_MAX_LCORE];
> > > +
> > > + /**< Total rte_tx_burst call counts */
> > > + uint64_t tx_burst_counts[RTE_MAX_LCORE];
> > > +};
> > > +
> > > +extern struct rte_apistats *rte_apicounts;
> > > +
> > > +/**
> > > + *  Initialize rte_rx_burst/tx_burst call count area.
> > > + *  @b EXPERIMENTAL: this API may change withou

[dpdk-dev] [PATCH 0/3] vhost: make virtqueue cache-friendly

2020-12-22 Thread Maxime Coquelin
As done for Virtio PMD, this series improves cache utilization
of the vhost_virtqueue struct by removing unused field,
make the live-migration cache dynamically allocated at
live-migration setup time and by moving fields
around so that hot fields are on the first cachelines.

With this series, The struct vhost_virtqueue size goes
from 832B (13 cachelines) down to 320B (5 cachelines).

With this series and the virtio one, I measure a gain
of up to 8% in IO loop micro-benchmark with packed
ring, and 5% with split ring.

I don't have a setup at hand to run PVP testing, but
it might be interresting to get the numbers as I
suspect the cache pressure is higher in this test as
in real use-cases.

Maxime Coquelin (3):
  vhost: remove unused Vhost virtqueue field
  vhost: move dirty logging cache out of the virtqueue
  vhost: optimize vhost virtqueue struct

 lib/librte_vhost/vhost.c  | 14 +++--
 lib/librte_vhost/vhost.h  | 54 +--
 lib/librte_vhost/vhost_user.c | 25 
 3 files changed, 64 insertions(+), 29 deletions(-)

-- 
2.29.2



[dpdk-dev] [PATCH 1/3] vhost: remove unused Vhost virtqueue field

2020-12-22 Thread Maxime Coquelin
This patch removes the "backend" field of the
vhost_virtqueue struct, which is not used by the
library.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost.c | 2 --
 lib/librte_vhost/vhost.h | 2 --
 2 files changed, 4 deletions(-)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index b83cf639eb..4e5df862aa 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -557,8 +557,6 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
 
vhost_user_iotlb_init(dev, vring_idx);
-   /* Backends are set to -1 indicating an inactive device. */
-   vq->backend = -1;
 }
 
 static void
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 361c9f79b3..d132e4ae54 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -143,8 +143,6 @@ struct vhost_virtqueue {
 #define VIRTIO_INVALID_EVENTFD (-1)
 #define VIRTIO_UNINITIALIZED_EVENTFD   (-2)
 
-   /* Backend value to determine if device should started/stopped */
-   int backend;
int enabled;
int access_ok;
int ready;
-- 
2.29.2



[dpdk-dev] [PATCH 3/3] vhost: optimize vhost virtqueue struct

2020-12-22 Thread Maxime Coquelin
This patch moves vhost_virtuqueue struct fields in order
to both optimize packing and move hot fields on the first
cachelines.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost.h | 52 +---
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index e2f14034b4..ce76330d15 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -133,7 +133,7 @@ struct vhost_virtqueue {
struct vring_used   *used;
struct vring_packed_desc_event *device_event;
};
-   uint32_tsize;
+   uint16_tsize;
 
uint16_tlast_avail_idx;
uint16_tlast_used_idx;
@@ -143,29 +143,12 @@ struct vhost_virtqueue {
 #define VIRTIO_INVALID_EVENTFD (-1)
 #define VIRTIO_UNINITIALIZED_EVENTFD   (-2)
 
-   int enabled;
-   int access_ok;
-   int ready;
-   int notif_enable;
-#define VIRTIO_UNINITIALIZED_NOTIF (-1)
+   boolenabled;
+   boolaccess_ok;
+   boolready;
 
rte_spinlock_t  access_lock;
 
-   /* Used to notify the guest (trigger interrupt) */
-   int callfd;
-   /* Currently unused as polling mode is enabled */
-   int kickfd;
-
-   /* Physical address of used ring, for logging */
-   uint64_tlog_guest_addr;
-
-   /* inflight share memory info */
-   union {
-   struct rte_vhost_inflight_info_split *inflight_split;
-   struct rte_vhost_inflight_info_packed *inflight_packed;
-   };
-   struct rte_vhost_resubmit_info *resubmit_inflight;
-   uint64_tglobal_counter;
 
union {
struct vring_used_elem  *shadow_used_split;
@@ -176,22 +159,36 @@ struct vhost_virtqueue {
uint16_tshadow_aligned_idx;
/* Record packed ring first dequeue desc index */
uint16_tshadow_last_used_idx;
-   struct vhost_vring_addr ring_addrs;
 
-   struct batch_copy_elem  *batch_copy_elems;
uint16_tbatch_copy_nb_elems;
+   struct batch_copy_elem  *batch_copy_elems;
boolused_wrap_counter;
boolavail_wrap_counter;
 
-   struct log_cache_entry *log_cache;
+   /* Physical address of used ring, for logging */
uint16_t log_cache_nb_elem;
+   uint64_tlog_guest_addr;
+   struct log_cache_entry *log_cache;
 
rte_rwlock_tiotlb_lock;
rte_rwlock_tiotlb_pending_lock;
struct rte_mempool *iotlb_pool;
TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list;
-   int iotlb_cache_nr;
TAILQ_HEAD(, vhost_iotlb_entry) iotlb_pending_list;
+   int iotlb_cache_nr;
+
+   /* Used to notify the guest (trigger interrupt) */
+   int callfd;
+   /* Currently unused as polling mode is enabled */
+   int kickfd;
+
+   /* inflight share memory info */
+   union {
+   struct rte_vhost_inflight_info_split *inflight_split;
+   struct rte_vhost_inflight_info_packed *inflight_packed;
+   };
+   struct rte_vhost_resubmit_info *resubmit_inflight;
+   uint64_tglobal_counter;
 
/* operation callbacks for async dma */
struct rte_vhost_async_channel_ops  async_ops;
@@ -210,6 +207,11 @@ struct vhost_virtqueue {
boolasync_inorder;
boolasync_registered;
uint16_tasync_threshold;
+
+   int notif_enable;
+#define VIRTIO_UNINITIALIZED_NOTIF (-1)
+
+   struct vhost_vring_addr ring_addrs;
 } __rte_cache_aligned;
 
 /* Virtio device status as per Virtio specification */
-- 
2.29.2



[dpdk-dev] [PATCH 2/3] vhost: move dirty logging cache out of the virtqueue

2020-12-22 Thread Maxime Coquelin
This patch moves the per-virtqueue's dirty logging cache
out of the virtqueue struct, by allocating it dynamically
only when live-migration is enabled.

It saves 8 cachelines in vhost_virtqueue struct.

Signed-off-by: Maxime Coquelin 
---
 lib/librte_vhost/vhost.c  | 12 
 lib/librte_vhost/vhost.h  |  2 +-
 lib/librte_vhost/vhost_user.c | 25 +
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 4e5df862aa..ec6459b2d1 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -144,6 +144,10 @@ __vhost_log_cache_sync(struct virtio_net *dev, struct 
vhost_virtqueue *vq)
if (unlikely(!dev->log_base))
return;
 
+   /* No cache, nothing to sync */
+   if (unlikely(!vq->log_cache))
+   return;
+
rte_smp_wmb();
 
log_base = (unsigned long *)(uintptr_t)dev->log_base;
@@ -176,6 +180,14 @@ vhost_log_cache_page(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
uint32_t offset = page / (sizeof(unsigned long) << 3);
int i;
 
+   if (unlikely(!vq->log_cache)) {
+   /* No logging cache allocated, write dirty log map directly */
+   rte_smp_wmb();
+   vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
+
+   return;
+   }
+
for (i = 0; i < vq->log_cache_nb_elem; i++) {
struct log_cache_entry *elem = vq->log_cache + i;
 
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index d132e4ae54..e2f14034b4 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -183,7 +183,7 @@ struct vhost_virtqueue {
boolused_wrap_counter;
boolavail_wrap_counter;
 
-   struct log_cache_entry log_cache[VHOST_LOG_CACHE_NR];
+   struct log_cache_entry *log_cache;
uint16_t log_cache_nb_elem;
 
rte_rwlock_tiotlb_lock;
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 45c8ac09da..7ac3963a07 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -1978,6 +1978,11 @@ vhost_user_get_vring_base(struct virtio_net **pdev,
rte_free(vq->batch_copy_elems);
vq->batch_copy_elems = NULL;
 
+   if (vq->log_cache) {
+   rte_free(vq->log_cache);
+   vq->log_cache = NULL;
+   }
+
msg->size = sizeof(msg->payload.state);
msg->fd_num = 0;
 
@@ -2077,6 +2082,7 @@ vhost_user_set_log_base(struct virtio_net **pdev, struct 
VhostUserMsg *msg,
int fd = msg->fds[0];
uint64_t size, off;
void *addr;
+   uint32_t i;
 
if (validate_msg_fds(msg, 1) != 0)
return RTE_VHOST_MSG_RESULT_ERR;
@@ -2130,6 +2136,25 @@ vhost_user_set_log_base(struct virtio_net **pdev, struct 
VhostUserMsg *msg,
dev->log_base = dev->log_addr + off;
dev->log_size = size;
 
+   for (i = 0; i < dev->nr_vring; i++) {
+   struct vhost_virtqueue *vq = dev->virtqueue[i];
+
+   if (vq->log_cache) {
+   rte_free(vq->log_cache);
+   vq->log_cache = NULL;
+   }
+   vq->log_cache_nb_elem = 0;
+   vq->log_cache = rte_zmalloc("vq log cache",
+   sizeof(struct log_cache_entry) * 
VHOST_LOG_CACHE_NR,
+   0);
+   /*
+* If log cache alloc fail, don't fail migration, but no
+* caching will be done, which will impact performance
+*/
+   if (!vq->log_cache)
+   VHOST_LOG_CONFIG(ERR, "Failed to allocate VQ logging 
cache\n");
+   }
+
/*
 * The spec is not clear about it (yet), but QEMU doesn't expect
 * any payload in the reply.
-- 
2.29.2



[dpdk-dev] [PATCH v7 0/2] support enqueue & dequeue callbacks on cryptodev

2020-12-22 Thread Abhinandan Gujjar
In an eventdev world, multiple workers (with ordered queue) will be
working on IPsec ESP processing. The ESP header's sequence number is
unique and has to be sequentially incremented in an orderly manner.
This rises a need for incrementing sequence number in crypto stage
especially in event crypto adapter. By adding a user callback to
cryptodev at enqueue burst, the user callback will get executed
in the context of event crypto adapter. This helps the application
to increment the ESP sequence number atomically and orderly manner.
The user callback at the dequeue burst helps IPsec application to
take care of ARW processing.

v6->v7:
-Fixed issues in documentation
-Generated patch set with updated system time
-Updated to call dequeue callbacks after HW dequeue
-Updated release notes for 21.02

v5->v6:
-Removed error code in remove callback APIs & cb init
-Updated release notes & documentation

v4->v5:
-Added dequeue callback APIs
-Updated documentation
-Updated errno and return values
-Updated cleanup function

v3->v4:
-Move callback init and cleanup under dev_configure
-Update with memory ordering
-Removed extra level of indirection
-Add documentation

v2->v3:
-Moved RCU under the cryptodev APIs
-RCU is maintained per queue-pair
-Changed name of few variables
-Updated callback test with negative cases
-Updated with required changes for meson

v1->v2:
-Moved callback related members to the end of cryptodev struct
-Added support for RCU


Abhinandan Gujjar (2):
  cryptodev: support enqueue and dequeue callback functions
  test: add testcase for crypto enqueue and dequeue callback

 app/test/test_cryptodev.c   | 244 ++-
 config/rte_config.h |   1 +
 doc/guides/prog_guide/cryptodev_lib.rst |  44 +++
 doc/guides/rel_notes/release_21_02.rst  |   9 +
 lib/librte_cryptodev/meson.build|   2 +-
 lib/librte_cryptodev/rte_cryptodev.c| 398 +++-
 lib/librte_cryptodev/rte_cryptodev.h| 246 ++-
 lib/librte_cryptodev/version.map|   7 +
 8 files changed, 944 insertions(+), 7 deletions(-)

-- 
2.25.1



[dpdk-dev] [PATCH v7 1/2] cryptodev: support enqueue and dequeue callback functions

2020-12-22 Thread Abhinandan Gujjar
This patch adds APIs to add/remove callback functions on crypto
enqueue/dequeue burst. The callback function will be called for
each burst of crypto ops received/sent on a given crypto device
queue pair.

Signed-off-by: Abhinandan Gujjar 
Acked-by: Konstantin Ananyev 
---
 config/rte_config.h |   1 +
 doc/guides/prog_guide/cryptodev_lib.rst |  44 +++
 doc/guides/rel_notes/release_21_02.rst  |   9 +
 lib/librte_cryptodev/meson.build|   2 +-
 lib/librte_cryptodev/rte_cryptodev.c| 398 +++-
 lib/librte_cryptodev/rte_cryptodev.h| 246 ++-
 lib/librte_cryptodev/version.map|   7 +
 7 files changed, 702 insertions(+), 5 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index a0b5160ff..87f9786d7 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -62,6 +62,7 @@
 /* cryptodev defines */
 #define RTE_CRYPTO_MAX_DEVS 64
 #define RTE_CRYPTODEV_NAME_LEN 64
+#define RTE_CRYPTO_CALLBACKS 1
 
 /* compressdev defines */
 #define RTE_COMPRESS_MAX_DEVS 64
diff --git a/doc/guides/prog_guide/cryptodev_lib.rst 
b/doc/guides/prog_guide/cryptodev_lib.rst
index 473b014a1..9b1cf8d49 100644
--- a/doc/guides/prog_guide/cryptodev_lib.rst
+++ b/doc/guides/prog_guide/cryptodev_lib.rst
@@ -338,6 +338,50 @@ start of private data information. The offset is counted 
from the start of the
 rte_crypto_op including other crypto information such as the IVs (since there 
can
 be an IV also for authentication).
 
+User callback APIs
+~~
+The add APIs configures a user callback function to be called for each burst 
of crypto
+ops received/sent on a given crypto device queue pair. The return value is a 
pointer
+that can be used later to remove the callback using remove API. Application is 
expected
+to register a callback function of type ``rte_cryptodev_callback_fn``. 
Multiple callback
+functions can be added for a given queue pair. API does not restrict on 
maximum number of
+callbacks.
+
+Callbacks registered by application would not survive 
``rte_cryptodev_configure`` as it
+reinitializes the callback list. It is user responsibility to remove all 
installed
+callbacks before calling ``rte_cryptodev_configure`` to avoid possible memory 
leakage.
+
+So, the application is expected to add user callback after 
``rte_cryptodev_configure``.
+The callbacks can also be added at the runtime. These callbacks get executed 
when
+``rte_cryptodev_enqueue_burst``/``rte_cryptodev_dequeue_burst`` is called.
+
+.. code-block:: c
+
+   struct rte_cryptodev_cb *
+   rte_cryptodev_add_enq_callback(uint8_t dev_id, uint16_t qp_id,
+  rte_cryptodev_callback_fn cb_fn,
+  void *cb_arg);
+
+   struct rte_cryptodev_cb *
+   rte_cryptodev_add_deq_callback(uint8_t dev_id, uint16_t qp_id,
+  rte_cryptodev_callback_fn cb_fn,
+  void *cb_arg);
+
+   uint16_t (* rte_cryptodev_callback_fn)(uint16_t dev_id, uint16_t qp_id,
+  struct rte_crypto_op **ops,
+  uint16_t nb_ops, void 
*user_param);
+
+The remove API removes a callback function added by
+``rte_cryptodev_add_enq_callback``/``rte_cryptodev_add_deq_callback``.
+
+.. code-block:: c
+
+   int rte_cryptodev_remove_enq_callback(uint8_t dev_id, uint16_t qp_id,
+ struct rte_cryptodev_cb *cb);
+
+   int rte_cryptodev_remove_deq_callback(uint8_t dev_id, uint16_t qp_id,
+ struct rte_cryptodev_cb *cb);
+
 
 Enqueue / Dequeue Burst APIs
 
diff --git a/doc/guides/rel_notes/release_21_02.rst 
b/doc/guides/rel_notes/release_21_02.rst
index 638f98168..8c7866401 100644
--- a/doc/guides/rel_notes/release_21_02.rst
+++ b/doc/guides/rel_notes/release_21_02.rst
@@ -55,6 +55,13 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* **Added enqueue & dequeue callback APIs for cryptodev library.**
+
+  Cryptodev library is added with enqueue & dequeue callback APIs to
+  enable applications to add/remove user callbacks which gets called
+  for every enqueue/dequeue operation.
+
+
 
 Removed Items
 -
@@ -84,6 +91,8 @@ API Changes
Also, make sure to start the actual text at the margin.
===
 
+* cryptodev: The structure ``rte_cryptodev`` has been updated with pointers
+  for adding enqueue and dequeue callbacks.
 
 ABI Changes
 ---
diff --git a/lib/librte_cryptodev/meson.build b/lib/librte_cryptodev/meson.build
index c4c6b3b6a..8c5493f4c 100644
--- a/lib/librte_cryptodev/meson.build
+++ b/lib/librte_cryptodev/meson.build
@@ -9,4 +9,4 @@ headers

[dpdk-dev] [PATCH v7 2/2] test: add testcase for crypto enqueue and dequeue callback

2020-12-22 Thread Abhinandan Gujjar
This patch adds test cases for testing functionality of
enqueue and dequeue callback mechanism.

Signed-off-by: Abhinandan Gujjar 
Acked-by: Konstantin Ananyev 
---
 app/test/test_cryptodev.c | 244 +-
 1 file changed, 242 insertions(+), 2 deletions(-)

diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c
index 8189053c1..ae2245609 100644
--- a/app/test/test_cryptodev.c
+++ b/app/test/test_cryptodev.c
@@ -10727,6 +10727,246 @@ test_null_burst_operation(void)
return TEST_SUCCESS;
 }
 
+static uint16_t
+test_enq_callback(uint16_t dev_id, uint16_t qp_id, struct rte_crypto_op **ops,
+ uint16_t nb_ops, void *user_param)
+{
+   RTE_SET_USED(dev_id);
+   RTE_SET_USED(qp_id);
+   RTE_SET_USED(ops);
+   RTE_SET_USED(user_param);
+
+   printf("crypto enqueue callback called\n");
+   return nb_ops;
+}
+
+static uint16_t
+test_deq_callback(uint16_t dev_id, uint16_t qp_id, struct rte_crypto_op **ops,
+ uint16_t nb_ops, void *user_param)
+{
+   RTE_SET_USED(dev_id);
+   RTE_SET_USED(qp_id);
+   RTE_SET_USED(ops);
+   RTE_SET_USED(user_param);
+
+   printf("crypto dequeue callback called\n");
+   return nb_ops;
+}
+
+/*
+ * Thread using enqueue/dequeue callback with RCU.
+ */
+static int
+test_enqdeq_callback_thread(void *arg)
+{
+   RTE_SET_USED(arg);
+   /* DP thread calls rte_cryptodev_enqueue_burst()/
+* rte_cryptodev_dequeue_burst() and invokes callback.
+*/
+   test_null_burst_operation();
+   return 0;
+}
+
+static int
+test_enq_callback_setup(void)
+{
+   struct crypto_testsuite_params *ts_params = &testsuite_params;
+   struct rte_cryptodev_info dev_info;
+   struct rte_cryptodev_qp_conf qp_conf = {
+   .nb_descriptors = MAX_NUM_OPS_INFLIGHT
+   };
+
+   struct rte_cryptodev_cb *cb;
+   uint16_t qp_id = 0;
+
+   /* Stop the device in case it's started so it can be configured */
+   rte_cryptodev_stop(ts_params->valid_devs[0]);
+
+   rte_cryptodev_info_get(ts_params->valid_devs[0], &dev_info);
+
+   TEST_ASSERT_SUCCESS(rte_cryptodev_configure(ts_params->valid_devs[0],
+   &ts_params->conf),
+   "Failed to configure cryptodev %u",
+   ts_params->valid_devs[0]);
+
+   qp_conf.nb_descriptors = MAX_NUM_OPS_INFLIGHT;
+   qp_conf.mp_session = ts_params->session_mpool;
+   qp_conf.mp_session_private = ts_params->session_priv_mpool;
+
+   TEST_ASSERT_SUCCESS(rte_cryptodev_queue_pair_setup(
+   ts_params->valid_devs[0], qp_id, &qp_conf,
+   rte_cryptodev_socket_id(ts_params->valid_devs[0])),
+   "Failed test for "
+   "rte_cryptodev_queue_pair_setup: num_inflights "
+   "%u on qp %u on cryptodev %u",
+   qp_conf.nb_descriptors, qp_id,
+   ts_params->valid_devs[0]);
+
+   /* Test with invalid crypto device */
+   cb = rte_cryptodev_add_enq_callback(RTE_CRYPTO_MAX_DEVS,
+   qp_id, test_enq_callback, NULL);
+   TEST_ASSERT_NULL(cb, "Add callback on qp %u on "
+   "cryptodev %u did not fail",
+   qp_id, RTE_CRYPTO_MAX_DEVS);
+
+   /* Test with invalid queue pair */
+   cb = rte_cryptodev_add_enq_callback(ts_params->valid_devs[0],
+   dev_info.max_nb_queue_pairs + 1,
+   test_enq_callback, NULL);
+   TEST_ASSERT_NULL(cb, "Add callback on qp %u on "
+   "cryptodev %u did not fail",
+   dev_info.max_nb_queue_pairs + 1,
+   ts_params->valid_devs[0]);
+
+   /* Test with NULL callback */
+   cb = rte_cryptodev_add_enq_callback(ts_params->valid_devs[0],
+   qp_id, NULL, NULL);
+   TEST_ASSERT_NULL(cb, "Add callback on qp %u on "
+   "cryptodev %u did not fail",
+   qp_id, ts_params->valid_devs[0]);
+
+   /* Test with valid configuration */
+   cb = rte_cryptodev_add_enq_callback(ts_params->valid_devs[0],
+   qp_id, test_enq_callback, NULL);
+   TEST_ASSERT_NOT_NULL(cb, "Failed test to add callback on "
+   "qp %u on cryptodev %u",
+   qp_id, ts_params->valid_devs[0]);
+
+   rte_cryptodev_start(ts_params->valid_devs[0]);
+
+   /* Launch a thread */
+   rte_eal_remote_launch(test_enqdeq_callback_thread, NULL,
+   rte_get_next_lcore(-1, 1, 0));
+
+   /* Wait until reader exited. */
+   rte_eal_mp_wait_lcore();
+
+   /* Test with invalid crypto device */
+   TEST_ASSERT_FAIL(rte_cryptodev_remove_enq_callback(
+   RTE_CRYPTO_MAX_DEVS, qp_id, cb),
+   "Expected call to fail as crypto device is 

Re: [dpdk-dev] [PATCH 40/40] net/virtio: move Vhost-vDPA data to its backend

2020-12-22 Thread Maxime Coquelin



On 12/20/20 10:14 PM, Maxime Coquelin wrote:
> As done earlier for Vhost-user and Vhost-kernel, this
> patch moves the Vhost-vDPA specific data to its backend
> file.
> 
> Signed-off-by: Maxime Coquelin 
> ---
>  drivers/net/virtio/virtio_user/vhost_vdpa.c   | 77 ++-
>  .../net/virtio/virtio_user/virtio_user_dev.h  |  3 -
>  2 files changed, 58 insertions(+), 22 deletions(-)
> 

...

> @@ -269,16 +298,23 @@ vhost_vdpa_set_status(struct virtio_user_dev *dev, 
> uint8_t status)
>  static int
>  vhost_vdpa_setup(struct virtio_user_dev *dev)
>  {
> + struct vhost_vdpa_data *data;
>   uint32_t did = (uint32_t)-1;
>  
> - dev->vhostfd = open(dev->path, O_RDWR);
> - if (dev->vhostfd < 0) {
> + data = malloc(sizeof(*data));
> + if (!data) {
> + PMD_DRV_LOG(ERR, "(%s) Faidle to allocate backend data", 
> dev->path);
> + return -1;
> + }
> +
> + data->vhostfd = open(dev->path, O_RDWR);
> + if (data->vhostfd < 0) {
>   PMD_DRV_LOG(ERR, "Failed to open %s: %s\n",
>   dev->path, strerror(errno));
>   return -1;
>   }
>  
> - if (ioctl(dev->vhostfd, VHOST_VDPA_GET_DEVICE_ID, &did) < 0 ||
> + if (ioctl(data->vhostfd, VHOST_VDPA_GET_DEVICE_ID, &did) < 0 ||
>   did != VIRTIO_ID_NETWORK) {
>   PMD_DRV_LOG(ERR, "Invalid vdpa device ID: %u\n", did);
>   return -1;
> @@ -288,9 +324,12 @@ vhost_vdpa_setup(struct virtio_user_dev *dev)
>  }
>  
>  static int
> -vhost_vdpa_destroy(struct virtio_user_dev *dev __rte_unused)
> +vhost_vdpa_destroy(struct virtio_user_dev *dev )
>  {
> - return;
> + struct vhost_vdpa_data *data = dev->backend_data;
> +
> + close(data->vhostfd);
> +

Note to self: free(data); here.

>   return 0;
>  }
>  
> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h 
> b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> index 5a2c9d38dd..2e0d6504f6 100644
> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> @@ -28,9 +28,6 @@ struct virtio_user_dev {
>   enum virtio_user_backend_type backend_type;
>   boolis_server;  /* server or client mode */
>  
> - /* for vhost_vdpa backend */
> - int vhostfd;
> -
>   /* for both vhost_user and vhost_kernel */
>   int callfds[VIRTIO_MAX_VIRTQUEUES];
>   int kickfds[VIRTIO_MAX_VIRTQUEUES];
> 



[dpdk-dev] [dpdk-announce] DPDK Summit APAC - CFP is NOW OPEN!

2020-12-22 Thread Jill Lovato
DPDK Community,

We are pleased to announce that the Call for Proposals (CFP) and
Registration for the upcoming DPDK Summit APAC
, which takes place
virtually March 22-23, are now open.

The DPDK Summit APAC is a community event focused on software developers
who contribute to or use DPDK. The event will include presentations on the
latest developments in DPDK, as well as in-depth discussions on the topics
that are of most interest to the DPDK open source community.

We will accommodate presentations in all languages, however, we request
slides be in English. You may present in the language of your choice.

The CFP will be open for submissions through JANUARY 8, 2021: h
ttps://events.linuxfoundation.org/dpdk-summit-apac/program/cfp/#dates-to-remember


Please let us know if there are any questions, and we hope to "see" you
soon.

-- 
*Jill Lovato*
Senior PR Manager
The Linux Foundation
jlov...@linuxfoundation.org
Phone: +1.503.703.8268


[dpdk-dev] net/ena: traffic lock

2020-12-22 Thread RajeshKumar Kalidass
Dpdk: 19.11
Driver: ena

During longevity(after 24+ hrs) testing at 10Gbps, one of tx-queue is getting 
into unrecoverable state ( its not able to find enough tx-descriptor nor its 
freeing up).

So for every tx-burst, eth_ena_xmit_pkts() neither finds free tx-descriptor nor 
able to free txd (ena_com_tx_comp_req_id_get() is always returning 
ENA_COM_TRY_AGAIN).

We see eth_ena_xmit_pkts() has been refactored in latest LTS version, is there 
any related issue got fixed ? Can you help


(gdb) p *(struct ena_ring *) rte_eth_devices[2].data->tx_queues[5]
$14 = {
  next_to_use = 4979,
  next_to_clean = 3958,
  type = ENA_RING_TYPE_TX,
  tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV,
  {
empty_tx_reqs = 0x11e406b00,
empty_rx_reqs = 0x11e406b00
  },
  {
tx_buffer_info = 0x11d2dfc80,
rx_buffer_info = 0x11d2dfc80
  },
 rx_refill_buffer = 0x0,
  ring_size = 1024,
  ena_com_io_cq = 0x11e40e640,
  ena_com_io_sq = 0x11e4168c0,
  ena_bufs = {{
  len = 0,
  req_id = 0
} },
  mb_pool = 0x0,
  port_id = 2,
  id = 5,
  tx_max_header_size = 96 '`',
 configured = 1,
  push_buf_intermediate_buf = 0x11e406a00 "",
  adapter = 0x11e40e040,
  offloads = 2,
  sgl_size = 17,
  {
rx_stats = {
  cnt = 4979,
  bytes = 417580,
  refill_partial = 35426,
  bad_csum = 0,
  mbuf_alloc_fail = 0,
  bad_desc_num = 38603,
---Type  to continue, or q  to quit---
  bad_req_id = 3178
},
tx_stats = {
  cnt = 4979,
  bytes = 417580,
  prepare_ctx_err = 35426, <-- Errors
  linearize = 0,
  linearize_failed = 0,
  tx_poll = 38603,
  doorbells = 3178,
  bad_req_id = 0,
  available_desc = 2
}
  },
  numa_socket_id = 0
}

Thanks,
-Rajesh

This message may contain confidential and privileged information. If it has 
been sent to you in error, please reply to advise the sender of the error and 
then immediately delete it. If you are not the intended recipient, do not read, 
copy, disclose or otherwise use this message. The sender disclaims any 
liability for such unauthorized use. NOTE that all incoming emails sent to 
Gigamon email accounts will be archived and may be scanned by us and/or by 
external service providers to detect and prevent threats to our systems, 
investigate illegal or inappropriate behavior, and/or eliminate unsolicited 
promotional emails ("spam").


Re: [dpdk-dev] [PATCH 0/9] Introduce vfio-user library

2020-12-22 Thread Thanos Makatos
> Hello,
> 
> On Fri, Dec 18, 2020 at 8:54 AM Chenbo Xia  wrote:
> > *librte_vfio_user* library is an implementation of VFIO-over-socket[1]
> (also
> > known as vfio-user) which is a protocol that allows a device to be 
> > virtualized
> > in a separate process outside of QEMU.
> 
> What is the status of the specification on QEMU side?
> Integrating an implementation in DPDK is premature until we have an
> agreed specification.

We're in the process of reviewing the specification, the latest version (v7) is 
here: https://www.mail-archive.com/qemu-devel@nongnu.org/msg763207.html. We 
haven't had any reviews yet for that revision, IMO we're getting close.

FYI John Johnson is implementing the relevant changes in multiprocess QEMU. 
John Levon, Swapnil, and I are implementing the server part in libvfio-user 
(formerly known as MUSER). We also have a mailing list now: 
https://lists.nongnu.org/mailman/listinfo/libvfio-user-devel. We've been 
working on integrating the two parts.

Finally, Changpeng is implementing an NVMe controller in SPDK. We're trying to 
make it work with multiprocess QEMU and libvfio-user, we're very close.




Re: [dpdk-dev] DPDK ENA PMD spurious ierrors

2020-12-22 Thread Chauskin, Igor
Hi Roger,

 

Thanks for reporting this. Your suggestion seems like a valid workaround, we’ll 
work on preparing it.

 

Regards,

Igor

 

From: Roger Melton (rmelton)  
Sent: Thursday, December 17, 2020 01:57
To: dev@dpdk.org; m...@semihalf.com; m...@semihalf.com; Tzalik, Guy 
; Chauskin, Igor 
Subject: [EXTERNAL] DPDK ENA PMD spurious ierrors

 


CAUTION: This email originated from outside of the organization. Do not click 
links or open attachments unless you can confirm the sender and know the 
content is safe.

 

We are seeing issues with the DPDK 18.11 ENA PMD incrementing rx_errors stat on 
good packets (checksum later validated by software). We’ve tried several 
versions from DPDK 18.11 stable, including 18.11.9 and 18.11.10.  Looking 
through ENA PMD commits, I see there have been a number of rx stats 
improvements.  Some but not all have been back ported into DPDK 18.11 stable, 
some of those presumably because they depend on updates to the base HW/HAL 
layer.  For example,  in the latest ENA PMD driver, PKT_RX_L4_CKSUM_BAD can 
only be set if the base layer has set l4_csum_checked in the RX context, a 
feature that is not available in the DPDK 18.11 ENA base driver.

 

Is there a way to avoid incorrectly updating ierrors in DPDK 18.11 that does 
not require upgrading the base HW/HAL?  For example, if an application does not 
enable IPV4, UDP or TCP RX checksum offloads, would l3_csum_err or l4_csum_err 
ever be valid?  If not, then would it be valid to pass ena_rx_mbuf_prepare() a 
pointer to the adapter and check for l3/l4 checksum errors only if any of 
DEV_RX_OFFLOAD_*CKSUM are set in 
adapter->rte_eth_dev_data->dev_conf.rxmode.offloads?  From DPDK 18.11.10, if RX 
checksum offloads are not enabled, skip the highlighted code:

 

static inline void ena_rx_mbuf_prepare(struct rte_mbuf *mbuf,

   struct 
ena_com_rx_ctx *ena_rx_ctx)

{

uint64_t ol_flags = 0;

uint32_t packet_type = 0;

 

if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP)

packet_type |= RTE_PTYPE_L4_TCP;

else if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)

packet_type |= RTE_PTYPE_L4_UDP;

 

if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4)

packet_type |= RTE_PTYPE_L3_IPV4;

else if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV6)

packet_type |= RTE_PTYPE_L3_IPV6;

 

if (unlikely(ena_rx_ctx->l4_csum_err))

ol_flags |= PKT_RX_L4_CKSUM_BAD;

if (unlikely(ena_rx_ctx->l3_csum_err))

ol_flags |= PKT_RX_IP_CKSUM_BAD;

 

mbuf->ol_flags = ol_flags;

mbuf->packet_type = packet_type;

}

 

While reviewing the code, I also noticed that at the top of the tree, ierrors 
are incremented in the transmit path:

 

static int ena_check_and_linearize_mbuf(struct ena_ring *tx_ring,


struct rte_mbuf *mbuf)

{

struct ena_com_dev *ena_dev;

int num_segments, header_len, rc;

 

--- snip ---

rc = rte_pktmbuf_linearize(mbuf);

if (unlikely(rc)) {

PMD_DRV_LOG(WARNING, "Mbuf linearize failed\n");


rte_atomic64_inc(&tx_ring->adapter->drv_stats->ierrors);

++tx_ring->tx_stats.linearize_failed;

return rc;

}

 

return rc;

}

 

This was introduced by 7830e905b7 net/ena: expose extended stats.

 

Shouldn’t oerrors be incremented in this case?

 

Thanks in advance for your help.

 

Regards,

Roger Melton



Re: [dpdk-dev] DPDK ENA PMD spurious ierrors

2020-12-22 Thread Chauskin, Igor
Hi Roger,

 

We plan to address both issues.

 

Thanks,

Igor

 

From: Roger Melton (rmelton)  
Sent: Friday, December 18, 2020 20:37
To: Chauskin, Igor ; dev@dpdk.org; m...@semihalf.com; 
m...@semihalf.com; Tzalik, Guy 
Subject: RE: [EXTERNAL] DPDK ENA PMD spurious ierrors

 


CAUTION: This email originated from outside of the organization. Do not click 
links or open attachments unless you can confirm the sender and know the 
content is safe.

 

Hi Igor,

 

Thanks for the reply.  By work on preparing it do you mean that you plan to:

 

1.  check rxmode.offloads in ena_rx_mbuf_prepare() before setting 
PKT_RX_[L3,L4]_CKSUM_BAD bits in ol_flags, and 
2.  increment oerrors instead of ierrors in the TX path?

 

FWIW, Since we do not enable hardware checksum offloads and since our 
application validates L3/L4 ingress checksums, our workaround in DPDK 18.11.10 
is to eliminate incrementing ierrors.  There’s no point in wasting cycles.

 

Regards,

Roger

 

 

 

From: "Chauskin, Igor" mailto:igo...@amazon.com> >
Date: Friday, December 18, 2020 at 12:29 PM
To: "Roger Melton (rmelton)" mailto:rmel...@cisco.com> >, 
"dev@dpdk.org  " mailto:dev@dpdk.org> >, 
"m...@semihalf.com  " mailto:m...@semihalf.com> >, "m...@semihalf.com  " 
mailto:m...@semihalf.com> >, "Tzalik, Guy" 
mailto:gtza...@amazon.com> >
Subject: RE: DPDK ENA PMD spurious ierrors

 

Hi Roger,

 

Thanks for reporting this. Your suggestion seems like a valid workaround, we’ll 
work on preparing it.

 

Regards,

Igor

 

From: Roger Melton (rmelton) mailto:rmel...@cisco.com> > 
Sent: Thursday, December 17, 2020 01:57
To: dev@dpdk.org  ; m...@semihalf.com 
 ; m...@semihalf.com  ; 
Tzalik, Guy mailto:gtza...@amazon.com> >; Chauskin, Igor 
mailto:igo...@amazon.com> >
Subject: [EXTERNAL] DPDK ENA PMD spurious ierrors

 


CAUTION: This email originated from outside of the organization. Do not click 
links or open attachments unless you can confirm the sender and know the 
content is safe.

 

We are seeing issues with the DPDK 18.11 ENA PMD incrementing rx_errors stat on 
good packets (checksum later validated by software). We’ve tried several 
versions from DPDK 18.11 stable, including 18.11.9 and 18.11.10.  Looking 
through ENA PMD commits, I see there have been a number of rx stats 
improvements.  Some but not all have been back ported into DPDK 18.11 stable, 
some of those presumably because they depend on updates to the base HW/HAL 
layer.  For example,  in the latest ENA PMD driver, PKT_RX_L4_CKSUM_BAD can 
only be set if the base layer has set l4_csum_checked in the RX context, a 
feature that is not available in the DPDK 18.11 ENA base driver.

 

Is there a way to avoid incorrectly updating ierrors in DPDK 18.11 that does 
not require upgrading the base HW/HAL?  For example, if an application does not 
enable IPV4, UDP or TCP RX checksum offloads, would l3_csum_err or l4_csum_err 
ever be valid?  If not, then would it be valid to pass ena_rx_mbuf_prepare() a 
pointer to the adapter and check for l3/l4 checksum errors only if any of 
DEV_RX_OFFLOAD_*CKSUM are set in 
adapter->rte_eth_dev_data->dev_conf.rxmode.offloads?  From DPDK 18.11.10, if RX 
checksum offloads are not enabled, skip the highlighted code:

 

static inline void ena_rx_mbuf_prepare(struct rte_mbuf *mbuf,

   struct 
ena_com_rx_ctx *ena_rx_ctx)

{

uint64_t ol_flags = 0;

uint32_t packet_type = 0;

 

if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP)

packet_type |= RTE_PTYPE_L4_TCP;

else if (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)

packet_type |= RTE_PTYPE_L4_UDP;

 

if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4)

packet_type |= RTE_PTYPE_L3_IPV4;

else if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV6)

packet_type |= RTE_PTYPE_L3_IPV6;

 

if (unlikely(ena_rx_ctx->l4_csum_err))

ol_flags |= PKT_RX_L4_CKSUM_BAD;

if (unlikely(ena_rx_ctx->l3_csum_err))

ol_flags |= PKT_RX_IP_CKSUM_BAD;

 

mbuf->ol_flags = ol_flags;

mbuf->packet_type = packet_type;

}

 

While reviewing the code, I also noticed that at the top of the tree, ierrors 
are incremented in the transmit path:

 

static int ena_check_and_linearize_mbuf(struct ena_ring *tx_ring,


struct rte_mbuf *mbuf)

{

struct ena_com_dev *ena_dev;

int num_segments, header_len, rc;

 

--- snip ---

rc = rt

Re: [dpdk-dev] net/ena: traffic lock

2020-12-22 Thread Chauskin, Igor
Hi Rajesh,

 

For the tx queue getting into unrecoverable state - can you please share the
details of your usecase? (instance types, traffic type, number of queues in
use, etc.). Also, please share your instances ids, if possible.

 

Regarding prepare_ctx_err counter - under certain circumstances this counter
indeed can be incorrectly incremented and we're working on a fix to this.
However, this shouldn't have effect beyond statistics (unless your
application explicitly relies on this counter in its business logic). 

 

Thanks,

Igor

 

From: RajeshKumar Kalidass  
Sent: Tuesday, December 22, 2020 10:03
To: m...@semihalf.com; Chauskin, Igor ; Tzalik, Guy
; a...@semihalf.com; dev@dpdk.org
Cc: Tanmay Kishore ; Rakesh Jagota

Subject: [EXTERNAL] net/ena: traffic lock 

 


CAUTION: This email originated from outside of the organization. Do not
click links or open attachments unless you can confirm the sender and know
the content is safe.

 

Dpdk: 19.11

Driver: ena

 

During longevity(after 24+ hrs) testing at 10Gbps, one of tx-queue is
getting into unrecoverable state ( its not able to find enough tx-descriptor
nor its freeing up).

 

So for every tx-burst, eth_ena_xmit_pkts() neither finds free tx-descriptor
nor able to free txd (ena_com_tx_comp_req_id_get() is always returning
ENA_COM_TRY_AGAIN).

 

We see eth_ena_xmit_pkts() has been refactored in latest LTS version, is
there any related issue got fixed ? Can you help

 

 

(gdb) p *(struct ena_ring *) rte_eth_devices[2].data->tx_queues[5]

$14 = {

  next_to_use = 4979,

  next_to_clean = 3958,

  type = ENA_RING_TYPE_TX,

  tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV,

  {

empty_tx_reqs = 0x11e406b00,

empty_rx_reqs = 0x11e406b00

  },

  {

tx_buffer_info = 0x11d2dfc80,

rx_buffer_info = 0x11d2dfc80

  },

 rx_refill_buffer = 0x0,

  ring_size = 1024,

  ena_com_io_cq = 0x11e40e640,

  ena_com_io_sq = 0x11e4168c0,

  ena_bufs = {{

  len = 0,

  req_id = 0

} },

  mb_pool = 0x0,

  port_id = 2,

  id = 5,

  tx_max_header_size = 96 '`',

 configured = 1,

  push_buf_intermediate_buf = 0x11e406a00 "",

  adapter = 0x11e40e040,

  offloads = 2,

  sgl_size = 17,

  {

rx_stats = {

  cnt = 4979,

  bytes = 417580,

  refill_partial = 35426,

  bad_csum = 0,

  mbuf_alloc_fail = 0,

  bad_desc_num = 38603,

---Type  to continue, or q  to quit---

  bad_req_id = 3178

},

tx_stats = {

  cnt = 4979,

  bytes = 417580,

  prepare_ctx_err = 35426, <-- Errors

  linearize = 0,

  linearize_failed = 0,

  tx_poll = 38603,

  doorbells = 3178,

  bad_req_id = 0,

  available_desc = 2

}

  },

  numa_socket_id = 0

}

 

Thanks,

-Rajesh

 

This message may contain confidential and privileged information. If it has
been sent to you in error, please reply to advise the sender of the error
and then immediately delete it. If you are not the intended recipient, do
not read, copy, disclose or otherwise use this message. The sender disclaims
any liability for such unauthorized use. NOTE that all incoming emails sent
to Gigamon email accounts will be archived and may be scanned by us and/or
by external service providers to detect and prevent threats to our systems,
investigate illegal or inappropriate behavior, and/or eliminate unsolicited
promotional emails ("spam"). 



Re: [dpdk-dev] [PATCH v4] eal: add generic thread-local-storage functions

2020-12-22 Thread Dmitry Kozlyuk
> [...]
> +int
> +rte_thread_tls_set_value(rte_tls_key key, const void *value)
> +{
> + int err;
> +
> + if (!key) {
> + RTE_LOG(DEBUG, EAL, "invalid tls key passed to function.\n");
> + return -1;
> + }
> + err = pthread_setspecific(key->thread_index, value);
> + if (err) {
> + RTE_LOG(DEBUG, EAL, "pthread_setspecific failed: %s\n",
> + strerror(err));
> + free(key);

Why free(key) here? Probably a typo.

> [...]
> +__rte_experimental
> +int
> +rte_thread_tls_create_key(rte_tls_key *key, void (*destructor)(void *));
> +
> +/**
> + * Function to delete a TLS data key visible to all threads in the process
> + * rte_tls_key is the opaque pointer allocated by rte_thread_tls_create_key.
> + *
> + * @param key
> + *   The rte_tls_key will cantain the allocated key

cantain -> contain


> diff --git a/lib/librte_eal/windows/meson.build 
> b/lib/librte_eal/windows/meson.build
> index 3b2faf29eb..1f1398dfe9 100644
> --- a/lib/librte_eal/windows/meson.build
> +++ b/lib/librte_eal/windows/meson.build
> @@ -21,4 +21,10 @@ sources += files(
>   'getopt.c',
>  )
>  
> +if (dpdk_conf.has('use_external_pthread'))

Please describe the new option in meson_options.txt.
Maybe drop "external" from the name, what do you think?



Re: [dpdk-dev] [PATCH v1] app/testpmd: fix dynamic config error for max-pkt-len

2020-12-22 Thread Li, Xiaoyun
Hi
Comments inline

> -Original Message-
> From: dev  On Behalf Of Steve Yang
> Sent: Tuesday, December 22, 2020 16:14
> To: dev@dpdk.org
> Cc: Lu, Wenzhuo ; Xing, Beilei ;
> Iremonger, Bernard ; Yang, SteveX
> 
> Subject: [dpdk-dev] [PATCH v1] app/testpmd: fix dynamic config error for max-
> pkt-len
> 
> When 'max-pkt-len' value caused the 'rx_offloads' flag change, the all 
> offloads
> of rx queues ('rx_conf[qid].offloads') weren't synchronized, that will cause 
> the
> offloads check failed with 'rx_queue_offload_capa'
> within 'rte_eth_rx_queue_setup'.
> 
> Apply rx offloads configuration once it changed when 'max-pkt-len'
> command parsed.

Grammar and tense inconsistence...
You can phrase like the following:

Configuring 'max-pkt-len' would change 'rx_offloads' in dev_conf while 
rx_conf.offloads of each queue still kept the old value.
It would cause the failure of offloads check in ''rte_eth_rx_queue_setup'.

This patch applied rx offloads configuration for each queue once it changed.

> 
> Fixes: 384161e00627 ("app/testpmd: adjust on the fly VLAN configuration")
> 
> Signed-off-by: Steve Yang 
> ---
>  app/test-pmd/cmdline.c | 18 +-
>  1 file changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index
> 2ccbaa039e..d72a40d7de 100644
> --- a/app/test-pmd/cmdline.c
> +++ b/app/test-pmd/cmdline.c
> @@ -1902,7 +1902,23 @@ cmd_config_max_pkt_len_parsed(void
> *parsed_result,
>   rx_offloads |=
> DEV_RX_OFFLOAD_JUMBO_FRAME;
>   else
>   rx_offloads &=
> ~DEV_RX_OFFLOAD_JUMBO_FRAME;
> - port->dev_conf.rxmode.offloads = rx_offloads;

I understand what you're doing here. But I think there's a better place to do 
this.
This config cmd will call init_port_config() later. And rxtx_port_config() will 
be called in it.
I think you should do this in rxtx_port_config().
Check if rx_conf  is equal to dev_conf, and if it's not consistent, apply 
dev_conf.

Although if you insist on your way doing this, there're some issues too. See 
below.

> +
> + if (rx_offloads != port->dev_conf.rxmode.offloads) {
> + uint16_t k;
> + int ret;
> +
> + port->dev_conf.rxmode.offloads = rx_offloads;
> + /* Apply Rx offloads configuration */
> + ret = eth_dev_info_get_print_err(pid,
> + &port->dev_info);
> + if (ret != 0)
> + rte_exit(EXIT_FAILURE,
> + "rte_eth_dev_info_get() failed\n");

rte_exit if for the main process of the application not for cmdline.
Because rte_exit will just terminate the application and return to the shell. 
You wouldn't want that.
You only needs to 'return;' or maybe printf a error message and return.

> +
> + for (k = 0;

Why are you using 'k'? There's no problem of this just looks a bit weird. 
There's no 'i' used in this function so why not just use 'i'.

> +  k < port->dev_info.nb_rx_queues; k++)
> + port->rx_conf[k].offloads =
> rx_offloads;
> + }
>   } else {
>   printf("Unknown parameter\n");
>   return;
> --
> 2.17.1



Re: [dpdk-dev] [PATCH v2] net/iavf: fix negative GTP-U flow rules create successfully

2020-12-22 Thread Guo, Jia
Hi, murphy

> -Original Message-
> From: Murphy Yang 
> Sent: Friday, December 18, 2020 2:19 PM
> To: dev@dpdk.org
> Cc: Yang, Qiming ; Yang, SteveX
> ; Guo, Jia ; Wu, Jingjing
> ; Xing, Beilei ; Yang,
> MurphyX 
> Subject: [PATCH v2] net/iavf: fix negative GTP-U flow rules create
> successfully
> 
> Currently, when use 'flow' command to create a negative GTP-U rule, it will
> be created successfully.
> 
> The list shows the impacted outer and inner 'ipv4' GTP-U patterns with 'ipv4'
> or 'gtpu' type:
>  - iavf_pattern_eth_ipv4_gtpu_ipv4_udp
>  - iavf_pattern_eth_ipv4_gtpu_eh_ipv4_udp
>  - iavf_pattern_eth_ipv4_gtpu_ipv4_tcp
>  - iavf_pattern_eth_ipv4_gtpu_eh_ipv4_tcp
>  - more impacted patterns with 'gtpu' type:
>> iavf_pattern_eth_ipv4_gtpu_ipv4
>> iavf_pattern_eth_ipv4_gtpu_eh_ipv4
> 
> Same as the outer and inner 'ipv6' GTP-U patterns.
> 
> So, this commit adds the invalid RSS combinations in 'invalid_rss_comb'
> array to make result correct.
> 
> The list of added invalid RSS combinations:
>  - ETH_RSS_IPV4 | ETH_RSS_NONFRAG_IPV4_TCP
>  - ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_TCP
>  - ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV4
>  - ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV4_UDP
>  - ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV4_TCP
>  - ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV6
>  - ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV6_UDP
>  - ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV6_TCP
> 

Seem that you add some specific negative flow case for the "ipv4(outer/inner) + 
gtpu" and "ipv4(outer/inner) + ipv4-udp(tcp)"  in the currently invalid 
checking mode, so you commit log could be more clear and clean for that, 
thanks.  

> Fixes: 91f27b2e39ab ("net/iavf: refactor RSS")
> 
> Signed-off-by: Murphy Yang 
> ---
> v2:
> - add invalid RSS combinations
> 
>  drivers/net/iavf/iavf_hash.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/drivers/net/iavf/iavf_hash.c b/drivers/net/iavf/iavf_hash.c index
> c4c73e6644..8393d8535b 100644
> --- a/drivers/net/iavf/iavf_hash.c
> +++ b/drivers/net/iavf/iavf_hash.c
> @@ -806,7 +806,15 @@ static void iavf_refine_proto_hdrs(struct
> virtchnl_proto_hdrs *proto_hdrs,
> 
>  static uint64_t invalid_rss_comb[] = {
>   ETH_RSS_IPV4 | ETH_RSS_NONFRAG_IPV4_UDP,
> + ETH_RSS_IPV4 | ETH_RSS_NONFRAG_IPV4_TCP,
>   ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_UDP,
> + ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_TCP,
> + ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV4,
> + ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV4_UDP,
> + ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV4_TCP,
> + ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV6,
> + ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV6_UDP,
> + ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV6_TCP,

Shouldn't it be " ETH_RSS_GTPU | ETH_RSS_IPV4" if outer ip case is the same? 
Whatever you should considerate both if you check one.

>   RTE_ETH_RSS_L3_PRE32 | RTE_ETH_RSS_L3_PRE40 |
>   RTE_ETH_RSS_L3_PRE48 | RTE_ETH_RSS_L3_PRE56 |
>   RTE_ETH_RSS_L3_PRE96
> --
> 2.17.1



Re: [dpdk-dev] [PATCH v6 0/4] fix issue with partial DMA unmap

2020-12-22 Thread Nithin Dabilpuram
Ping.

On Fri, Dec 18, 2020 at 12:36:00AM +0530, Nithin Dabilpuram wrote:
> Partial DMA unmap is not supported by VFIO type1 IOMMU
> in Linux. Though the return value is zero, the returned
> DMA unmap size is not same as expected size.
> So add test case and fix to both heap triggered DMA
> mapping and user triggered DMA mapping/unmapping.
> 
> Refer vfio_dma_do_unmap() in drivers/vfio/vfio_iommu_type1.c
> Snippet of comment is below.
> 
> /*
>  * vfio-iommu-type1 (v1) - User mappings were coalesced together to
>  * avoid tracking individual mappings.  This means that the 
> granularity
>  * of the original mapping was lost and the user was allowed to 
> attempt
>  * to unmap any range.  Depending on the contiguousness of physical
>  * memory and page sizes supported by the IOMMU, arbitrary unmaps may
>  * or may not have worked.  We only guaranteed unmap granularity
>  * matching the original mapping; even though it was untracked here,
>  * the original mappings are reflected in IOMMU mappings.  This
>  * resulted in a couple unusual behaviors.  First, if a range is not
>  * able to be unmapped, ex. a set of 4k pages that was mapped as a
>  * 2M hugepage into the IOMMU, the unmap ioctl returns success but 
> with
>  * a zero sized unmap.  Also, if an unmap request overlaps the first
>  * address of a hugepage, the IOMMU will unmap the entire hugepage.
>  * This also returns success and the returned unmap size reflects the
>  * actual size unmapped.
> 
>  * We attempt to maintain compatibility with this "v1" interface, but 
>  
>  * we take control out of the hands of the IOMMU.  Therefore, an 
> unmap 
>  * request offset from the beginning of the original mapping will 
>  
>  * return success with zero sized unmap.  And an unmap request 
> covering
>  * the first iova of mapping will unmap the entire range. 
>  
> 
> This behavior can be verified by using first patch and add return check for
> dma_unmap.size != len in vfio_type1_dma_mem_map()
> 
> v6:
> - Fixed issue with x86-32 build introduced by v5.
> 
> v5:
> - Changed vfio test in test_vfio.c to use system pages allocated from
>   heap instead of mmap() so that it comes in range of initially configured
>   window for POWER9 System.
> - Added acked-by from David for 1/4, 2/4.
> 
> v4:
> - Fixed issue with patch 4/4 on x86 builds.
> 
> v3:
> - Fixed external memory test case(4/4) to use system page size
>   instead of 4K.
> - Fixed check-git-log.sh issue and rebased.
> - Added acked-by from anatoly.bura...@intel.com to first 3 patches.
> 
> v2: 
> - Reverted earlier commit that enables mergin contiguous mapping for
>   IOVA as PA. (see 1/3)
> - Updated documentation about kernel dma mapping limits and vfio
>   module parameter.
> - Moved vfio test to test_vfio.c and handled comments from
>   Anatoly.
> 
> Nithin Dabilpuram (4):
>   vfio: revert changes for map contiguous areas in one go
>   vfio: fix DMA mapping granularity for type1 IOVA as VA
>   test: add test case to validate VFIO DMA map/unmap
>   test: change external memory test to use system page sz
> 
>  app/test/meson.build   |   1 +
>  app/test/test_external_mem.c   |   3 +-
>  app/test/test_vfio.c   | 107 
> +
>  doc/guides/linux_gsg/linux_drivers.rst |  10 +++
>  lib/librte_eal/linux/eal_vfio.c|  93 +++-
>  lib/librte_eal/linux/eal_vfio.h|   1 +
>  6 files changed, 157 insertions(+), 58 deletions(-)
>  create mode 100644 app/test/test_vfio.c
> 
> -- 
> 2.8.4
> 


Re: [dpdk-dev] [PATCH 0/8] Introduce emudev library and iavf emudev driver

2020-12-22 Thread Xia, Chenbo
Hi Maxime,

> -Original Message-
> From: Maxime Coquelin 
> Sent: Tuesday, December 22, 2020 4:49 PM
> To: Xia, Chenbo ; Thomas Monjalon ;
> David Marchand 
> Cc: dev ; Stephen Hemminger ; Liang,
> Cunming ; Lu, Xiuchun ; Li,
> Miao ; Wu, Jingjing 
> Subject: Re: [dpdk-dev] [PATCH 0/8] Introduce emudev library and iavf emudev
> driver
> 
> Hi Chenbo,
> 
> Thanks for the detailed reply.
> 
> On 12/22/20 4:09 AM, Xia, Chenbo wrote:
> > Hi Maxime,
> >
> >> -Original Message-
> >> From: Maxime Coquelin 
> >> Sent: Monday, December 21, 2020 8:02 PM
> >> To: Xia, Chenbo ; Thomas Monjalon
> ;
> >> David Marchand 
> >> Cc: dev ; Stephen Hemminger ;
> Liang,
> >> Cunming ; Lu, Xiuchun ; Li,
> >> Miao ; Wu, Jingjing 
> >> Subject: Re: [dpdk-dev] [PATCH 0/8] Introduce emudev library and iavf
> emudev
> >> driver
> >>
> >>
> >>
> >> On 12/21/20 10:52 AM, Maxime Coquelin wrote:
> >>> Hi Chenbo,
> >>>
> >>> On 12/19/20 7:11 AM, Xia, Chenbo wrote:
>  Hi David,
> 
> > -Original Message-
> > From: David Marchand 
> > Sent: Friday, December 18, 2020 5:54 PM
> > To: Xia, Chenbo 
> > Cc: dev ; Thomas Monjalon ; Stephen
> > Hemminger ; Liang, Cunming
> > ; Lu, Xiuchun ; Li, Miao
> > ; Wu, Jingjing 
> > Subject: Re: [PATCH 0/8] Introduce emudev library and iavf emudev driver
> >
> > On Fri, Dec 18, 2020 at 9:02 AM Chenbo Xia  wrote:
> >>
> >> This series introduces a new device abstraction called emudev for
> > emulated
> >> devices. A new library (librte_emudev) is implemented. The first emudev
> >> driver is also introduced, which emulates Intel Adaptive Virtual
> > Function
> >> (iavf) as a software network device.
> >>
> >> This series has a dependency on librte_vfio_user patch series:
> >> http://patchwork.dpdk.org/cover/85389/
> >>
> >> Background & Motivation
> >> ---
> >> The disaggregated/multi-process QEMU is using VFIO-over-socket/vfio-
> user
> >> as the main transport mechanism to disaggregate IO services from QEMU.
> >> Therefore, librte_vfio_user is introduced in DPDK to accommodate
> >> emulated devices for high performance I/O. Although vfio-user library
> >> provides possibility of emulating devices in DPDK, DPDK does not have
> >> a device abstraction for emulated devices. A good device abstraction
> > will
> >> be useful for applications or high performance data path driver. With
> >> this consideration, emudev library is designed and implemented. It also
> >> make it possbile to keep modular design on emulated devices by
> > implementing
> >> data path related logic in a standalone driver (e.g., an ethdev driver)
> >> and keeps the unrelated logic in the emudev driver.
> >
> > Since you mention performance, how does it compare to vhost-user/virtio?
> 
>  I think it depends on the device specification (i.e., how complex its
> data
> >> path
>  handling is). A first try on iavf spec shows better performance than
> virtio
>  in our simple tests.
> >>>
> >>> That's interesting! How big is the performance difference? And how do
> >>> we explain it?
> >>>
> >>> If there are improvements that could be done in the Virtio
> >>> specification, it would be great to know and work on their
> >>> implementations. It worries me a bit that every one is coming with
> >>> his new device emulation every release, making things like live-
> >>> migration difficult to achieve in the future.
> >>
> >> I did a quick review of the IAVF emudev driver to understand what other
> >> factors than ring layout could explain a performance gain.
> >>
> >> My understanding is that part of the performance gain may come from
> >> following things that are supported/implemented in Vhost-user backend
> >> and not in IAVF driver:
> >> 1. Memory hotplug. It seems the datapath is not safe against memory
> >> hotplug in the VM, which causes the memory tables to be updated
> >> asynchronously from the datapath. In order to support it in Vhost-user
> >> library, we had to introduce locks to ensure the datapath isn't
> >> accessing the shared memory while it is being remapped.
> >
> > I think now it uses the similar way that vhost-user does.
> >
> > First, in the vfio-user patch series, we introduce a callback lock_dp to
> lock
> > the data path when messages like DMA MAP/UNMAP come. It will lock datapath
> > in our data path driver.
> >
> > Note that the data path handling is in our data path driver:
> > http://patchwork.dpdk.org/cover/85500/
> >
> > For modular design, iavf_emu driver emulates the device but the iavf back-
> end
> > driver does data path handling.
> 
> My analysis was actually based on the data path driver series.
> 
> My point was that iavfbe_recv_pkts() and iavfbe_xmit_pkts() are not safe
> against asynchronous changes like memory table updates.
> 
> As far as I can see, the access_lock aren't taken by these fun

[dpdk-dev] [PATCH] net/iavf: fix queue pairs configuration

2020-12-22 Thread Zhang,Alvin
From: Alvin Zhang 

Check if there are enough queue pairs currently allocated, and if not,
request PF to allocate them.

Fixes: e436cd43835b ("net/iavf: negotiate large VF and request more queues")
Cc: sta...@dpdk.org

Signed-off-by: Alvin Zhang 
---
 drivers/net/iavf/iavf_ethdev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index 7e3c26a..f015121 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -372,8 +372,10 @@ struct rte_iavf_xstats_name_off {
} else {
/* Check if large VF is already enabled. If so, disable and
 * release redundant queue resource.
+* Or check if enough queue pairs. If not, request them from PF.
 */
-   if (vf->lv_enabled) {
+   if (vf->lv_enabled ||
+   num_queue_pairs > vf->vsi_res->num_queue_pairs) {
ret = iavf_queues_req_reset(dev, num_queue_pairs);
if (ret)
return ret;
-- 
1.8.3.1



[dpdk-dev] [PATCH] net/ice: check Rx queue number on RSS init

2020-12-22 Thread dapengx . yu
From: YU DAPENG 

When RSS is initialized, rx queues number is used as denominator to set
default value into the RSS lookup table. If it is zero, there will be
error of being divided by 0. So add value check to avoid the error.

Fixes: 50370662b727 ("net/ice: support device and queue ops")
Cc: sta...@dpdk.org

Signed-off-by: YU DAPENG 
---
 drivers/net/ice/ice_ethdev.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index 9a5d6a559..bbb8c1460 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -3182,6 +3182,12 @@ static int ice_init_rss(struct ice_pf *pf)
vsi->rss_key_size = ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE;
vsi->rss_lut_size = pf->hash_lut_size;
 
+   if (nb_q == 0) {
+   PMD_DRV_LOG(WARNING,
+   "RSS is not supported as rx queues number is zero\n");
+   return 0;
+   }
+
if (is_safe_mode) {
PMD_DRV_LOG(WARNING, "RSS is not supported in safe mode\n");
return 0;
-- 
2.27.0



Re: [dpdk-dev] [PATCH 21.02 v2 1/2] net/virtio: fix missing backend features negotiation

2020-12-22 Thread Xia, Chenbo
Hi Maxime,

> -Original Message-
> From: Maxime Coquelin 
> Sent: Tuesday, December 22, 2020 6:56 PM
> To: Xia, Chenbo ; dev@dpdk.org; amore...@redhat.com;
> jasow...@redhat.com; david.march...@redhat.com
> Cc: sta...@dpdk.org
> Subject: Re: [PATCH 21.02 v2 1/2] net/virtio: fix missing backend features
> negotiation
> 
> 
> 
> On 12/21/20 7:23 AM, Xia, Chenbo wrote:
> > Hi Maxime,
> >
> >> -Original Message-
> >> From: Maxime Coquelin 
> >> Sent: Wednesday, November 25, 2020 11:21 PM
> >> To: dev@dpdk.org; Xia, Chenbo ; amore...@redhat.com;
> >> jasow...@redhat.com; david.march...@redhat.com
> >> Cc: Maxime Coquelin ; sta...@dpdk.org
> >> Subject: [PATCH 21.02 v2 1/2] net/virtio: fix missing backend features
> >> negotiation
> >>
> >> This patch adds missing backend features negotiation for
> >> in Vhost-vDPA. Without it, IOTLB messages v2 could be sent
> >> by Virtio-user PMD while not supported by the backend.
> >>
> >> Fixes: 6b901437056e ("net/virtio: introduce vhost-vDPA backend")
> >> Cc: sta...@dpdk.org
> >>
> >> Signed-off-by: Maxime Coquelin 
> >> ---
> >>  drivers/net/virtio/virtio_user/vhost.h   |  4 
> >>  drivers/net/virtio/virtio_user/vhost_vdpa.c  | 14 ++
> >>  drivers/net/virtio/virtio_user/virtio_user_dev.c | 14 ++
> >>  3 files changed, 28 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/drivers/net/virtio/virtio_user/vhost.h
> >> b/drivers/net/virtio/virtio_user/vhost.h
> >> index 210a3704e7..c1dcc50b58 100644
> >> --- a/drivers/net/virtio/virtio_user/vhost.h
> >> +++ b/drivers/net/virtio/virtio_user/vhost.h
> >> @@ -86,6 +86,10 @@ enum vhost_user_request {
> >>VHOST_USER_MAX
> >>  };
> >>
> >> +#ifndef VHOST_BACKEND_F_IOTLB_MSG_V2
> >> +#define VHOST_BACKEND_F_IOTLB_MSG_V2 1
> >> +#endif
> >> +
> >>  extern const char * const vhost_msg_strings[VHOST_USER_MAX];
> >>
> >>  struct vhost_memory_region {
> >> diff --git a/drivers/net/virtio/virtio_user/vhost_vdpa.c
> >> b/drivers/net/virtio/virtio_user/vhost_vdpa.c
> >> index c7b9349fc8..b6c81d6f17 100644
> >> --- a/drivers/net/virtio/virtio_user/vhost_vdpa.c
> >> +++ b/drivers/net/virtio/virtio_user/vhost_vdpa.c
> >> @@ -35,6 +35,8 @@
> >>  #define VHOST_VDPA_SET_STATUS _IOW(VHOST_VIRTIO, 0x72, __u8)
> >>  #define VHOST_VDPA_SET_VRING_ENABLE   _IOW(VHOST_VIRTIO, 0x75, \
> >> struct vhost_vring_state)
> >> +#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
> >> +#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
> >>
> >>  static uint64_t vhost_req_user_to_vdpa[] = {
> >>[VHOST_USER_SET_OWNER] = VHOST_SET_OWNER,
> >> @@ -51,6 +53,8 @@ static uint64_t vhost_req_user_to_vdpa[] = {
> >>[VHOST_USER_SET_STATUS] = VHOST_VDPA_SET_STATUS,
> >>[VHOST_USER_GET_STATUS] = VHOST_VDPA_GET_STATUS,
> >>[VHOST_USER_SET_VRING_ENABLE] = VHOST_VDPA_SET_VRING_ENABLE,
> >> +  [VHOST_USER_GET_PROTOCOL_FEATURES] = VHOST_GET_BACKEND_FEATURES,
> >> +  [VHOST_USER_SET_PROTOCOL_FEATURES] = VHOST_SET_BACKEND_FEATURES,
> >>  };
> >>
> >>  /* no alignment requirement */
> >> @@ -86,6 +90,11 @@ vhost_vdpa_dma_map(struct virtio_user_dev *dev, void
> *addr,
> >>  {
> >>struct vhost_msg msg = {};
> >>
> >> +  if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)))
> >> {
> >> +  PMD_DRV_LOG(ERR, "IOTLB_MSG_V2 not supported by the backend.");
> >> +  return -1;
> >> +  }
> >> +
> >>msg.type = VHOST_IOTLB_MSG_V2;
> >>msg.iotlb.type = VHOST_IOTLB_UPDATE;
> >>msg.iotlb.iova = iova;
> >> @@ -108,6 +117,11 @@ vhost_vdpa_dma_unmap(struct virtio_user_dev *dev,
> >> __rte_unused void *addr,
> >>  {
> >>struct vhost_msg msg = {};
> >>
> >> +  if (!(dev->protocol_features & (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)))
> >> {
> >> +  PMD_DRV_LOG(ERR, "IOTLB_MSG_V2 not supported by the backend.");
> >> +  return -1;
> >> +  }
> >> +
> >>msg.type = VHOST_IOTLB_MSG_V2;
> >>msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
> >>msg.iotlb.iova = iova;
> >> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> >> b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> >> index 053f0267ca..96bc6b232d 100644
> >> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> >> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> >> @@ -439,11 +439,13 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
> >> 1ULL << VIRTIO_F_RING_PACKED   |   \
> >> 1ULL << VHOST_USER_F_PROTOCOL_FEATURES)
> >>
> >> -#define VIRTIO_USER_SUPPORTED_PROTOCOL_FEATURES   \
> >> +#define VHOST_USER_SUPPORTED_PROTOCOL_FEATURES\
> >>(1ULL << VHOST_USER_PROTOCOL_F_MQ | \
> >> 1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK |  \
> >> 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
> >>
> >> +#define VHOST_VDPA_SUPPORTED_PROTOCOL_FEATURES\
> >> +  (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
> >>  int
> >>  virtio_user_dev_init(struct virtio_us

Re: [dpdk-dev] [PATCH] app/testpmd: increase array for fetching supported FEC caps

2020-12-22 Thread Li, Xiaoyun
Hi

> -Original Message-
> From: dev  On Behalf Of Rahul Lakkireddy
> Sent: Monday, December 21, 2020 06:47
> To: dev@dpdk.org
> Cc: kaara.sat...@chelsio.com
> Subject: [dpdk-dev] [PATCH] app/testpmd: increase array for fetching supported
> FEC caps
> 
> From: Karra Satwik 
> 
> Request the driver for number of entries in the FEC caps array and then
> dynamically allocate the array.
> 
> Signed-off-by: Karra Satwik 
> Signed-off-by: Rahul Lakkireddy 
> ---
>  app/test-pmd/cmdline.c | 29 -
>  1 file changed, 20 insertions(+), 9 deletions(-)
> 
> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index
> 2ccbaa039..d7a90d55e 100644
> --- a/app/test-pmd/cmdline.c
> +++ b/app/test-pmd/cmdline.c
> @@ -16263,29 +16263,40 @@ cmd_show_fec_capability_parsed(void
> *parsed_result,
>   __rte_unused struct cmdline *cl,
>   __rte_unused void *data)
>  {
> -#define FEC_CAP_NUM 2
>   struct cmd_show_fec_capability_result *res = parsed_result;
> - struct rte_eth_fec_capa speed_fec_capa[FEC_CAP_NUM];
> - unsigned int num = FEC_CAP_NUM;
> - unsigned int ret_num;
> - int ret;
> + struct rte_eth_fec_capa *speed_fec_capa;
> + int num, ret;
> 
>   if (!rte_eth_dev_is_valid_port(res->cmd_pid)) {
>   printf("Invalid port id %u\n", res->cmd_pid);
>   return;
>   }
> 
> - ret = rte_eth_fec_get_capability(res->cmd_pid, speed_fec_capa, num);
> + ret = rte_eth_fec_get_capability(res->cmd_pid, NULL, 0);
>   if (ret == -ENOTSUP) {
>   printf("Function not implemented\n");
>   return;
>   } else if (ret < 0) {
> - printf("Get FEC capability failed\n");
> + printf("Get FEC capability failed: %d\n", ret);
> + return;
> + }
> +
> + num = ret;
> + speed_fec_capa = calloc(num, sizeof(*speed_fec_capa));
> + if (!speed_fec_capa) {

if (speed_fec_capa == NULL) {

You can check the DPDK coding style 1.9.1. 
http://doc.dpdk.org/guides/contributing/coding_style.html
NULL is the preferred null pointer constant. Use NULL instead of (type *)0 or 
(type *)NULL, except where the compiler does not know the destination type.

Except for this minor issue, Acked-by: Xiaoyun Li 

> + printf("Failed to alloc FEC capability buffer\n");
>   return;
>   }
> 
> - ret_num = (unsigned int)ret;
> - show_fec_capability(ret_num, speed_fec_capa);
> + ret = rte_eth_fec_get_capability(res->cmd_pid, speed_fec_capa, num);
> + if (ret < 0) {
> + printf("Error getting FEC capability: %d\n", ret);
> + goto out;
> + }
> +
> + show_fec_capability(num, speed_fec_capa);
> +out:
> + free(speed_fec_capa);
>  }
> 
>  cmdline_parse_token_string_t cmd_show_fec_capability_show =
> --
> 2.24.0



Re: [dpdk-dev] [PATCH] net/iavf: fix queue pairs configuration

2020-12-22 Thread Xu, Ting
Hi, Alvin,

> -Original Message-
> From: Zhang,Alvin 
> Sent: Wednesday, December 23, 2020 1:30 PM
> To: Xing, Beilei ; Xu, Ting 
> Cc: dev@dpdk.org; Zhang, AlvinX ; sta...@dpdk.org
> Subject: [PATCH] net/iavf: fix queue pairs configuration
> 
> From: Alvin Zhang 
> 
> Check if there are enough queue pairs currently allocated, and if not, request
> PF to allocate them.
> 
> Fixes: e436cd43835b ("net/iavf: negotiate large VF and request more queues")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Alvin Zhang 
> ---
>  drivers/net/iavf/iavf_ethdev.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
> index 7e3c26a..f015121 100644
> --- a/drivers/net/iavf/iavf_ethdev.c
> +++ b/drivers/net/iavf/iavf_ethdev.c
> @@ -372,8 +372,10 @@ struct rte_iavf_xstats_name_off {
>   } else {
>   /* Check if large VF is already enabled. If so, disable and
>* release redundant queue resource.
> +  * Or check if enough queue pairs. If not, request them from
> PF.
>*/
> - if (vf->lv_enabled) {
> + if (vf->lv_enabled ||
> + num_queue_pairs > vf->vsi_res->num_queue_pairs) {
>   ret = iavf_queues_req_reset(dev, num_queue_pairs);
>   if (ret)
>   return ret;
> --
> 1.8.3.1

Will it be better to change ret = iavf_queues_req_reset(dev, num_queue_pairs); 
to ret = iavf_queues_req_reset(dev, IAVF_MAX_NUM_QUEUES_DFLT); based on the 
original codes?
Since PF provides default 16 queue pairs to VF. If large VF is no need to be 
enabled, it may be better to reset the queue pairs number to 16. So that we 
don't need to compare the queue pairs number and request queues each time when 
it is no more than 16. If more, it turns to large VF to handle.
The original codes here are not good.
Thanks.


Re: [dpdk-dev] [dpdk-stable] [PATCH] app/testpmd: fix start index for showing FEC array

2020-12-22 Thread Li, Xiaoyun
Acked-by: Xiaoyun Li 

> -Original Message-
> From: stable  On Behalf Of Rahul Lakkireddy
> Sent: Monday, December 21, 2020 06:47
> To: dev@dpdk.org
> Cc: kaara.sat...@chelsio.com; sta...@dpdk.org
> Subject: [dpdk-stable] [PATCH] app/testpmd: fix start index for showing FEC
> array
> 
> From: Karra Satwik 
> 
> Start from index 0 when going through the FEC array. This will allow "off" to 
> get
> printed for RTE_ETH_FEC_NOFEC mode.
> 
> Fixes: b19da32e3151 ("app/testpmd: add FEC command")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Karra Satwik 
> Signed-off-by: Rahul Lakkireddy 
> ---
>  app/test-pmd/config.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c index
> 3f6c8642b..a6a5baa4e 100644
> --- a/app/test-pmd/config.c
> +++ b/app/test-pmd/config.c
> @@ -3701,7 +3701,7 @@ show_fec_capability(unsigned int num, struct
> rte_eth_fec_capa *speed_fec_capa)
>   printf("%s : ",
>   rte_eth_link_speed_to_str(speed_fec_capa[i].speed));
> 
> - for (j = RTE_ETH_FEC_AUTO; j < RTE_DIM(fec_mode_name);
> j++) {
> + for (j = 0; j < RTE_DIM(fec_mode_name); j++) {
>   if (RTE_ETH_FEC_MODE_TO_CAPA(j) &
>   speed_fec_capa[i].capa)
>   printf("%s ", fec_mode_name[j].name);
> --
> 2.24.0



Re: [dpdk-dev] [PATCH] net/ice: check Rx queue number on RSS init

2020-12-22 Thread Chen, BoX C
Hi, Dapeng

Regards,
Chen Bo

> -Original Message-
> From: dev  On Behalf Of dapengx...@intel.com
> Sent: December 23, 2020 13:30
> To: Yang, Qiming ; Zhang, Qi Z
> 
> Cc: dev@dpdk.org; Yu, DapengX ; sta...@dpdk.org
> Subject: [dpdk-dev] [PATCH] net/ice: check Rx queue number on RSS init
> 
> From: YU DAPENG 
> 
> When RSS is initialized, rx queues number is used as denominator to set
> default value into the RSS lookup table. If it is zero, there will be error of
> being divided by 0. So add value check to avoid the error.
> 
> Fixes: 50370662b727 ("net/ice: support device and queue ops")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: YU DAPENG 
> ---
>  drivers/net/ice/ice_ethdev.c | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
> index 9a5d6a559..bbb8c1460 100644
> --- a/drivers/net/ice/ice_ethdev.c
> +++ b/drivers/net/ice/ice_ethdev.c
> @@ -3182,6 +3182,12 @@ static int ice_init_rss(struct ice_pf *pf)
>   vsi->rss_key_size =
> ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE;
>   vsi->rss_lut_size = pf->hash_lut_size;
> 
> + if (nb_q == 0) {
> + PMD_DRV_LOG(WARNING,
> + "RSS is not supported as rx queues number is
> zero\n");
> + return 0;
> + }
> +

Direct return here will introduce subsequent exceptions, the tetpmd will exit.

testpmd> port start all
Configuring Port 0 (socket 1)
ice_init_rss(): RSS is not supported as rx queues number is zero

Port 0: 68:20:20:06:01:00
Configuring Port 1 (socket 1)
ice_init_rss(): RSS is not supported as rx queues number is zero

Port 1: 68:20:20:06:01:01
Checking link statuses...
Done
testpmd>
Port 1: link state change event

testpmd> show config rxtx
  io packet forwarding packets/burst=32
  nb forwarding cores=1 - nb forwarding ports=2
  port 0: RX queue number: 0 Tx queue number: 1
Rx offloads=0x0 Tx offloads=0x1
Invalid RX queue_id=0
RX queue: 0
  RX desc=0 - RX free threshold=32
  RX threshold registers: pthresh=8 hthresh=8  wthresh=0
  RX Offloads=0x0
TX queue: 0
  TX desc=1024 - TX free threshold=32
  TX threshold registers: pthresh=32 hthresh=0  wthresh=0
  TX offloads=0x1 - TX RS bit threshold=32
  port 1: RX queue number: 0 Tx queue number: 1
Rx offloads=0x0 Tx offloads=0x1
Invalid RX queue_id=0
RX queue: 0
  RX desc=0 - RX free threshold=32
  RX threshold registers: pthresh=8 hthresh=8  wthresh=0
  RX Offloads=0x0
TX queue: 0
  TX desc=1024 - TX free threshold=32
  TX threshold registers: pthresh=32 hthresh=0  wthresh=0
  TX offloads=0x1 - TX RS bit threshold=32
testpmd>
testpmd> start
EAL: Error - exiting with code: 1
  Cause: Either rxq or txq are 0, cannot use io fwd mode

>   if (is_safe_mode) {
>   PMD_DRV_LOG(WARNING, "RSS is not supported in safe
> mode\n");
>   return 0;
> --
> 2.27.0



[dpdk-dev] [PATCH v3] net/iavf: fix invalid RSS combinations rule can be created

2020-12-22 Thread Murphy Yang
Currently, when use 'flow' command to create a rule that combine with
several RSS types, even the RSS type combination is invalid, it also
be created successfully.

Here list some invalid RSS combinations:
 - ETH_RSS_IPV4 | ETH_RSS_NONFRAG_IPV4_TCP
 - ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_TCP
 - ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV4
 - ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV4_UDP
 - ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV4_TCP
 - ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV6
 - ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV6_UDP
 - ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV6_TCP

So, this patch adds these combinations in 'invalid_rss_comb'
array to do valid check, if the combination check failed,
the rule will be created unsuccessful.

Fixes: 91f27b2e39ab ("net/iavf: refactor RSS")

Signed-off-by: Murphy Yang 
---
v3:
- update the comments.
v2:
- add invalid RSS combinations.

 drivers/net/iavf/iavf_hash.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/iavf/iavf_hash.c b/drivers/net/iavf/iavf_hash.c
index c4c73e6644..8393d8535b 100644
--- a/drivers/net/iavf/iavf_hash.c
+++ b/drivers/net/iavf/iavf_hash.c
@@ -806,7 +806,15 @@ static void iavf_refine_proto_hdrs(struct 
virtchnl_proto_hdrs *proto_hdrs,
 
 static uint64_t invalid_rss_comb[] = {
ETH_RSS_IPV4 | ETH_RSS_NONFRAG_IPV4_UDP,
+   ETH_RSS_IPV4 | ETH_RSS_NONFRAG_IPV4_TCP,
ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_UDP,
+   ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_TCP,
+   ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV4,
+   ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV4_UDP,
+   ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV4_TCP,
+   ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV6,
+   ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV6_UDP,
+   ETH_RSS_GTPU | IAVF_RSS_TYPE_INNER_IPV6_TCP,
RTE_ETH_RSS_L3_PRE32 | RTE_ETH_RSS_L3_PRE40 |
RTE_ETH_RSS_L3_PRE48 | RTE_ETH_RSS_L3_PRE56 |
RTE_ETH_RSS_L3_PRE96
-- 
2.17.1



Re: [dpdk-dev] [PATCH] net/iavf: fix queue pairs configuration

2020-12-22 Thread Zhang, AlvinX
Hi Xuting,

Thanks for your reply.
It's very efficient for CVL. But the FVL PF provides default 4 queue pairs to 
VF,
if every VF request 16 queue pairs, there may be not enough queues pairs for 
all VFs.

BR,
Alvin

> -Original Message-
> From: Xu, Ting 
> Sent: Wednesday, December 23, 2020 2:06 PM
> To: Zhang, AlvinX ; Xing, Beilei 
> 
> Cc: dev@dpdk.org; Zhang, AlvinX ; sta...@dpdk.org
> Subject: RE: [PATCH] net/iavf: fix queue pairs configuration
> 
> Hi, Alvin,
> 
> > -Original Message-
> > From: Zhang,Alvin 
> > Sent: Wednesday, December 23, 2020 1:30 PM
> > To: Xing, Beilei ; Xu, Ting 
> > Cc: dev@dpdk.org; Zhang, AlvinX ;
> > sta...@dpdk.org
> > Subject: [PATCH] net/iavf: fix queue pairs configuration
> >
> > From: Alvin Zhang 
> >
> > Check if there are enough queue pairs currently allocated, and if not,
> > request PF to allocate them.
> >
> > Fixes: e436cd43835b ("net/iavf: negotiate large VF and request more
> > queues")
> > Cc: sta...@dpdk.org
> >
> > Signed-off-by: Alvin Zhang 
> > ---
> >  drivers/net/iavf/iavf_ethdev.c | 4 +++-
> >  1 file changed, 3 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/iavf/iavf_ethdev.c
> > b/drivers/net/iavf/iavf_ethdev.c index 7e3c26a..f015121 100644
> > --- a/drivers/net/iavf/iavf_ethdev.c
> > +++ b/drivers/net/iavf/iavf_ethdev.c
> > @@ -372,8 +372,10 @@ struct rte_iavf_xstats_name_off {
> > } else {
> > /* Check if large VF is already enabled. If so, disable and
> >  * release redundant queue resource.
> > +* Or check if enough queue pairs. If not, request them from
> > PF.
> >  */
> > -   if (vf->lv_enabled) {
> > +   if (vf->lv_enabled ||
> > +   num_queue_pairs > vf->vsi_res->num_queue_pairs) {
> > ret = iavf_queues_req_reset(dev, num_queue_pairs);
> > if (ret)
> > return ret;
> > --
> > 1.8.3.1
> 
> Will it be better to change ret = iavf_queues_req_reset(dev, num_queue_pairs);
> to ret = iavf_queues_req_reset(dev, IAVF_MAX_NUM_QUEUES_DFLT); based on
> the original codes?
> Since PF provides default 16 queue pairs to VF. If large VF is no need to be
> enabled, it may be better to reset the queue pairs number to 16. So that we
> don't need to compare the queue pairs number and request queues each time
> when it is no more than 16. If more, it turns to large VF to handle.
> The original codes here are not good.
> Thanks.


Re: [dpdk-dev] [PATCH] net/ice: check Rx queue number on RSS init

2020-12-22 Thread Yu, DapengX
Hi Chen Bo,
Without this patch, I have reproduced your unexpected test result on 
XXV710(with i40e pmd driver).
That means the unexpected situation as the below is a new issue different from 
the original one that this patch try to resolve.
The unexpected is not caused by the patch.

The unexpected situation can be reproduced in this way, at the end, testpmd 
exit with error, and the terminal cannot echo keyboard input.
testpmd> port stop all
testpmd> port config all rxq 0
testpmd> port config all txq 0
testpmd> show config rxtx
testpmd> port start all
testpmd> show config rxtx
testpmd> start
EAL: Error - exiting with code: 1
  Cause: Either rxq or txq are 0, cannot use io fwd mode

-Original Message-
From: Chen, BoX C 
Sent: Wednesday, December 23, 2020 2:23 PM
To: Yu, DapengX ; Yang, Qiming ; 
Zhang, Qi Z 
Cc: dev@dpdk.org; Yu, DapengX ; sta...@dpdk.org
Subject: RE: [dpdk-dev] [PATCH] net/ice: check Rx queue number on RSS init

Hi, Dapeng

Regards,
Chen Bo

> -Original Message-
> From: dev  On Behalf Of dapengx...@intel.com
> Sent: December 23, 2020 13:30
> To: Yang, Qiming ; Zhang, Qi Z 
> 
> Cc: dev@dpdk.org; Yu, DapengX ; sta...@dpdk.org
> Subject: [dpdk-dev] [PATCH] net/ice: check Rx queue number on RSS init
> 
> From: YU DAPENG 
> 
> When RSS is initialized, rx queues number is used as denominator to 
> set default value into the RSS lookup table. If it is zero, there will 
> be error of being divided by 0. So add value check to avoid the error.
> 
> Fixes: 50370662b727 ("net/ice: support device and queue ops")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: YU DAPENG 
> ---
>  drivers/net/ice/ice_ethdev.c | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/drivers/net/ice/ice_ethdev.c 
> b/drivers/net/ice/ice_ethdev.c index 9a5d6a559..bbb8c1460 100644
> --- a/drivers/net/ice/ice_ethdev.c
> +++ b/drivers/net/ice/ice_ethdev.c
> @@ -3182,6 +3182,12 @@ static int ice_init_rss(struct ice_pf *pf)  
> vsi->rss_key_size = ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE;
>  vsi->rss_lut_size = pf->hash_lut_size;
> 
> +if (nb_q == 0) {
> +PMD_DRV_LOG(WARNING,
> +"RSS is not supported as rx queues number is
> zero\n");
> +return 0;
> +}
> +

Direct return here will introduce subsequent exceptions, the tetpmd will exit.

testpmd> port start all
Configuring Port 0 (socket 1)
ice_init_rss(): RSS is not supported as rx queues number is zero

Port 0: 68:20:20:06:01:00
Configuring Port 1 (socket 1)
ice_init_rss(): RSS is not supported as rx queues number is zero

Port 1: 68:20:20:06:01:01
Checking link statuses...
Done
testpmd>
Port 1: link state change event

testpmd> show config rxtx
  io packet forwarding packets/burst=32
  nb forwarding cores=1 - nb forwarding ports=2
  port 0: RX queue number: 0 Tx queue number: 1
Rx offloads=0x0 Tx offloads=0x1
Invalid RX queue_id=0
RX queue: 0
  RX desc=0 - RX free threshold=32
  RX threshold registers: pthresh=8 hthresh=8  wthresh=0
  RX Offloads=0x0
TX queue: 0
  TX desc=1024 - TX free threshold=32
  TX threshold registers: pthresh=32 hthresh=0  wthresh=0
  TX offloads=0x1 - TX RS bit threshold=32
  port 1: RX queue number: 0 Tx queue number: 1
Rx offloads=0x0 Tx offloads=0x1
Invalid RX queue_id=0
RX queue: 0
  RX desc=0 - RX free threshold=32
  RX threshold registers: pthresh=8 hthresh=8  wthresh=0
  RX Offloads=0x0
TX queue: 0
  TX desc=1024 - TX free threshold=32
  TX threshold registers: pthresh=32 hthresh=0  wthresh=0
  TX offloads=0x1 - TX RS bit threshold=32
testpmd>
testpmd> start
EAL: Error - exiting with code: 1
  Cause: Either rxq or txq are 0, cannot use io fwd mode

>  if (is_safe_mode) {
>  PMD_DRV_LOG(WARNING, "RSS is not supported in safe mode\n");  return 
> 0;
> --
> 2.27.0