from:"Cheng Jiang"

[RFC v3] app/dma-perf: introduce dma-perf application

2022-10-18 Thread Cheng Jiang

There are many high-performance DMA devices supported in DPDK now, and
these DMA devices can also be integrated into other modules of DPDK as
accelerators, such as Vhost. Before integrating DMA into applications,
developers need to know the performance of these DMA devices in various
scenarios and the performance of CPUs in the same scenario, such as
different buffer lengths. Only in this way can we know the target
performance of the application accelerated by using them. This patch
introduces a high-performance testing tool, which supports comparing the
performance of CPU and DMA in different scenarios automatically with a
pre-set config file. Memory Copy performance test are supported for now.

Signed-off-by: Cheng Jiang 
Signed-off-by: Jiayu Hu 
Signed-off-by: Yuan Wang 
Acked-by: Morten Brørup 
---
v3: improved nr_buf calculation and added support for getting subprocess exit 
status
v2: fixed some CI issues.

 app/meson.build   |   1 +
 app/test-dma-perf/benchmark.c | 538 ++
 app/test-dma-perf/benchmark.h |  12 +
 app/test-dma-perf/config.ini  |  61 
 app/test-dma-perf/main.c  | 417 ++
 app/test-dma-perf/main.h  |  55 
 app/test-dma-perf/meson.build |  16 +
 7 files changed, 1100 insertions(+)
 create mode 100644 app/test-dma-perf/benchmark.c
 create mode 100644 app/test-dma-perf/benchmark.h
 create mode 100644 app/test-dma-perf/config.ini
 create mode 100644 app/test-dma-perf/main.c
 create mode 100644 app/test-dma-perf/main.h
 create mode 100644 app/test-dma-perf/meson.build

diff --git a/app/meson.build b/app/meson.build
index 93d8c15032..3826a10a27 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -18,6 +18,7 @@ apps = [
 'test-pmd',
 'test-regex',
 'test-sad',
+'test-dma-perf',
 ]

 default_cflags = machine_args + ['-DALLOW_EXPERIMENTAL_API']
diff --git a/app/test-dma-perf/benchmark.c b/app/test-dma-perf/benchmark.c
new file mode 100644
index 00..94ece876ad
--- /dev/null
+++ b/app/test-dma-perf/benchmark.c
@@ -0,0 +1,538 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#include "eal_private.h"
+
+#include "main.h"
+#include "benchmark.h"
+
+
+#define MAX_DMA_CPL_NB 255
+
+#define CSV_LINE_DMA_FMT "Scenario %u,%u,%u,%u,%u,%u,%" PRIu64 ",%.3lf,%" 
PRIu64 "\n"
+#define CSV_LINE_CPU_FMT "Scenario %u,%u,NA,%u,%u,%u,%" PRIu64 ",%.3lf,%" 
PRIu64 "\n"
+
+struct lcore_params {
+   uint16_t dev_id;
+   uint32_t nr_buf;
+   uint16_t kick_batch;
+   uint32_t buf_size;
+   uint32_t repeat_times;
+   uint16_t mpool_iter_step;
+   struct rte_mbuf **srcs;
+   struct rte_mbuf **dsts;
+   uint8_t scenario_id;
+};
+
+struct buf_info {
+   struct rte_mbuf **array;
+   uint32_t nr_buf;
+   uint32_t buf_size;
+};
+
+static struct rte_mempool *src_pool;
+static struct rte_mempool *dst_pool;
+
+#define PRINT_ERR(...) print_err(__func__, __LINE__, __VA_ARGS__)
+
+static inline int
+__rte_format_printf(3, 4)
+print_err(const char *func, int lineno, const char *format, ...)
+{
+   va_list ap;
+   int ret;
+
+   ret = fprintf(stderr, "In %s:%d - ", func, lineno);
+   va_start(ap, format);
+   ret += vfprintf(stderr, format, ap);
+   va_end(ap);
+
+   return ret;
+}
+
+static inline void
+calc_result(struct lcore_params *p, uint64_t cp_cycle_sum, double time_sec,
+   uint32_t repeat_times, uint32_t *memory, uint64_t 
*ave_cycle,
+   float *bandwidth, uint64_t *ops)
+{
+   *memory = (p->buf_size * p->nr_buf * 2) / (1024 * 1024);
+   *ave_cycle = cp_cycle_sum / (p->repeat_times * p->nr_buf);
+   *bandwidth = p->buf_size * 8 * rte_get_timer_hz() / (*ave_cycle * 1000 
* 1000 * 1000.0);
+   *ops = (double)p->nr_buf * repeat_times / time_sec;
+}
+
+static void
+output_result(uint8_t scenario_id, uint32_t lcore_id, uint16_t dev_id, 
uint64_t ave_cycle,
+   uint32_t buf_size, uint32_t nr_buf, uint32_t memory,
+   float bandwidth, uint64_t ops, bool is_dma)
+{
+   if (is_dma)
+   printf("lcore %u, DMA %u:\n"
+   "average cycles: %" PRIu64 ","
+   " buffer size: %u, nr_buf: %u,"
+   " memory: %uMB, frequency: %" PRIu64 ".\n",
+   lcore_id,
+   dev_id,
+   ave_cycle,
+   buf_size,
+   nr_buf,
+   memory,
+   rte_get_timer_hz()

[RFC v4] app/dma-perf: introduce dma-perf application

2022-10-24 Thread Cheng Jiang

There are many high-performance DMA devices supported in DPDK now, and
these DMA devices can also be integrated into other modules of DPDK as
accelerators, such as Vhost. Before integrating DMA into applications,
developers need to know the performance of these DMA devices in various
scenarios and the performance of CPUs in the same scenario, such as
different buffer lengths. Only in this way can we know the target
performance of the application accelerated by using them. This patch
introduces a high-performance testing tool, which supports comparing the
performance of CPU and DMA in different scenarios automatically with a
pre-set config file. Memory Copy performance test are supported for now.

Signed-off-by: Cheng Jiang 
Signed-off-by: Jiayu Hu 
Signed-off-by: Yuan Wang 
Acked-by: Morten Brørup 
---
v4: fixed some building issues.
v3: improved nr_buf calculation and added support for getting subprocess exit 
status.
v2: fixed some CI issues.

 app/meson.build   |   1 +
 app/test-dma-perf/benchmark.c | 537 ++
 app/test-dma-perf/benchmark.h |  12 +
 app/test-dma-perf/config.ini  |  61 
 app/test-dma-perf/main.c  | 416 ++
 app/test-dma-perf/main.h  |  55 
 app/test-dma-perf/meson.build |  16 +
 7 files changed, 1098 insertions(+)
 create mode 100644 app/test-dma-perf/benchmark.c
 create mode 100644 app/test-dma-perf/benchmark.h
 create mode 100644 app/test-dma-perf/config.ini
 create mode 100644 app/test-dma-perf/main.c
 create mode 100644 app/test-dma-perf/main.h
 create mode 100644 app/test-dma-perf/meson.build

diff --git a/app/meson.build b/app/meson.build
index 93d8c15032..3826a10a27 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -18,6 +18,7 @@ apps = [
 'test-pmd',
 'test-regex',
 'test-sad',
+'test-dma-perf',
 ]

 default_cflags = machine_args + ['-DALLOW_EXPERIMENTAL_API']
diff --git a/app/test-dma-perf/benchmark.c b/app/test-dma-perf/benchmark.c
new file mode 100644
index 00..00641345bc
--- /dev/null
+++ b/app/test-dma-perf/benchmark.c
@@ -0,0 +1,537 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#include "eal_private.h"
+
+#include "main.h"
+#include "benchmark.h"
+
+
+#define MAX_DMA_CPL_NB 255
+
+#define CSV_LINE_DMA_FMT "Scenario %u,%u,%u,%u,%u,%u,%" PRIu64 ",%.3lf,%" 
PRIu64 "\n"
+#define CSV_LINE_CPU_FMT "Scenario %u,%u,NA,%u,%u,%u,%" PRIu64 ",%.3lf,%" 
PRIu64 "\n"
+
+struct lcore_params {
+   uint16_t dev_id;
+   uint32_t nr_buf;
+   uint16_t kick_batch;
+   uint32_t buf_size;
+   uint32_t repeat_times;
+   uint16_t mpool_iter_step;
+   struct rte_mbuf **srcs;
+   struct rte_mbuf **dsts;
+   uint8_t scenario_id;
+};
+
+struct buf_info {
+   struct rte_mbuf **array;
+   uint32_t nr_buf;
+   uint32_t buf_size;
+};
+
+static struct rte_mempool *src_pool;
+static struct rte_mempool *dst_pool;
+
+#define PRINT_ERR(...) print_err(__func__, __LINE__, __VA_ARGS__)
+
+static inline int
+__rte_format_printf(3, 4)
+print_err(const char *func, int lineno, const char *format, ...)
+{
+   va_list ap;
+   int ret;
+
+   ret = fprintf(stderr, "In %s:%d - ", func, lineno);
+   va_start(ap, format);
+   ret += vfprintf(stderr, format, ap);
+   va_end(ap);
+
+   return ret;
+}
+
+static inline void
+calc_result(struct lcore_params *p, uint64_t cp_cycle_sum, double time_sec,
+   uint32_t repeat_times, uint32_t *memory, uint64_t 
*ave_cycle,
+   float *bandwidth, uint64_t *ops)
+{
+   *memory = (p->buf_size * p->nr_buf * 2) / (1024 * 1024);
+   *ave_cycle = cp_cycle_sum / (p->repeat_times * p->nr_buf);
+   *bandwidth = p->buf_size * 8 * rte_get_timer_hz() / (*ave_cycle * 1000 
* 1000 * 1000.0);
+   *ops = (double)p->nr_buf * repeat_times / time_sec;
+}
+
+static void
+output_result(uint8_t scenario_id, uint32_t lcore_id, uint16_t dev_id, 
uint64_t ave_cycle,
+   uint32_t buf_size, uint32_t nr_buf, uint32_t memory,
+   float bandwidth, uint64_t ops, bool is_dma)
+{
+   if (is_dma)
+   printf("lcore %u, DMA %u:\n"
+   "average cycles: %" PRIu64 ","
+   " buffer size: %u, nr_buf: %u,"
+   " memory: %uMB, frequency: %" PRIu64 ".\n",
+   lcore_id,
+   dev_id,
+   ave_cycle,
+   buf_size,
+   nr_buf,
+   memory,
+

[RFC v5] app/dma-perf: introduce dma-perf application

2022-10-24 Thread Cheng Jiang

There are many high-performance DMA devices supported in DPDK now, and
these DMA devices can also be integrated into other modules of DPDK as
accelerators, such as Vhost. Before integrating DMA into applications,
developers need to know the performance of these DMA devices in various
scenarios and the performance of CPUs in the same scenario, such as
different buffer lengths. Only in this way can we know the target
performance of the application accelerated by using them. This patch
introduces a high-performance testing tool, which supports comparing the
performance of CPU and DMA in different scenarios automatically with a
pre-set config file. Memory Copy performance test are supported for now.

Signed-off-by: Cheng Jiang 
Signed-off-by: Jiayu Hu 
Signed-off-by: Yuan Wang 
Acked-by: Morten Brørup 
---
v5: fixed some typos.
v4: fixed some building issues.
v3: improved nr_buf calculation and added support for getting subprocess exit 
status.
v2: fixed some CI issues.

 app/meson.build   |   1 +
 app/test-dma-perf/benchmark.c | 537 ++
 app/test-dma-perf/benchmark.h |  12 +
 app/test-dma-perf/config.ini  |  61 
 app/test-dma-perf/main.c  | 416 ++
 app/test-dma-perf/main.h  |  55 
 app/test-dma-perf/meson.build |  16 +
 7 files changed, 1098 insertions(+)
 create mode 100644 app/test-dma-perf/benchmark.c
 create mode 100644 app/test-dma-perf/benchmark.h
 create mode 100644 app/test-dma-perf/config.ini
 create mode 100644 app/test-dma-perf/main.c
 create mode 100644 app/test-dma-perf/main.h
 create mode 100644 app/test-dma-perf/meson.build

diff --git a/app/meson.build b/app/meson.build
index 93d8c15032..3826a10a27 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -18,6 +18,7 @@ apps = [
 'test-pmd',
 'test-regex',
 'test-sad',
+'test-dma-perf',
 ]

 default_cflags = machine_args + ['-DALLOW_EXPERIMENTAL_API']
diff --git a/app/test-dma-perf/benchmark.c b/app/test-dma-perf/benchmark.c
new file mode 100644
index 00..00641345bc
--- /dev/null
+++ b/app/test-dma-perf/benchmark.c
@@ -0,0 +1,537 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#include "eal_private.h"
+
+#include "main.h"
+#include "benchmark.h"
+
+
+#define MAX_DMA_CPL_NB 255
+
+#define CSV_LINE_DMA_FMT "Scenario %u,%u,%u,%u,%u,%u,%" PRIu64 ",%.3lf,%" 
PRIu64 "\n"
+#define CSV_LINE_CPU_FMT "Scenario %u,%u,NA,%u,%u,%u,%" PRIu64 ",%.3lf,%" 
PRIu64 "\n"
+
+struct lcore_params {
+   uint16_t dev_id;
+   uint32_t nr_buf;
+   uint16_t kick_batch;
+   uint32_t buf_size;
+   uint32_t repeat_times;
+   uint16_t mpool_iter_step;
+   struct rte_mbuf **srcs;
+   struct rte_mbuf **dsts;
+   uint8_t scenario_id;
+};
+
+struct buf_info {
+   struct rte_mbuf **array;
+   uint32_t nr_buf;
+   uint32_t buf_size;
+};
+
+static struct rte_mempool *src_pool;
+static struct rte_mempool *dst_pool;
+
+#define PRINT_ERR(...) print_err(__func__, __LINE__, __VA_ARGS__)
+
+static inline int
+__rte_format_printf(3, 4)
+print_err(const char *func, int lineno, const char *format, ...)
+{
+   va_list ap;
+   int ret;
+
+   ret = fprintf(stderr, "In %s:%d - ", func, lineno);
+   va_start(ap, format);
+   ret += vfprintf(stderr, format, ap);
+   va_end(ap);
+
+   return ret;
+}
+
+static inline void
+calc_result(struct lcore_params *p, uint64_t cp_cycle_sum, double time_sec,
+   uint32_t repeat_times, uint32_t *memory, uint64_t 
*ave_cycle,
+   float *bandwidth, uint64_t *ops)
+{
+   *memory = (p->buf_size * p->nr_buf * 2) / (1024 * 1024);
+   *ave_cycle = cp_cycle_sum / (p->repeat_times * p->nr_buf);
+   *bandwidth = p->buf_size * 8 * rte_get_timer_hz() / (*ave_cycle * 1000 
* 1000 * 1000.0);
+   *ops = (double)p->nr_buf * repeat_times / time_sec;
+}
+
+static void
+output_result(uint8_t scenario_id, uint32_t lcore_id, uint16_t dev_id, 
uint64_t ave_cycle,
+   uint32_t buf_size, uint32_t nr_buf, uint32_t memory,
+   float bandwidth, uint64_t ops, bool is_dma)
+{
+   if (is_dma)
+   printf("lcore %u, DMA %u:\n"
+   "average cycles: %" PRIu64 ","
+   " buffer size: %u, nr_buf: %u,"
+   " memory: %uMB, frequency: %" PRIu64 ".\n",
+   lcore_id,
+   dev_id,
+   ave_cycle,
+   buf_size,
+   nr_buf,
+

[PATCH v2 0/3] Async vhost packed ring optimization

2023-01-12 Thread Cheng Jiang

To improve the performance of async vhost packed ring. We remove the
unnecessary data copy in async vhost packed ring. And add the batch
data path in both enqueue data path and dequeue data path.

v2: fixed net header settings.

Cheng Jiang (3):
  vhost: remove redundant copy for packed shadow used ring
  vhost: add batch enqueue in async vhost packed ring
  vhost: add batch dequeue in async vhost packed ring

 lib/vhost/virtio_net.c | 399 +
 1 file changed, 361 insertions(+), 38 deletions(-)

--
2.35.1

[PATCH v2 1/3] vhost: remove redundant copy for packed shadow used ring

2023-01-12 Thread Cheng Jiang

In the packed ring enqueue data path of the current asynchronous
Vhost design, the shadow used ring is first copied to the sync
shadow used ring, and then it will be moved to the async shadow
used ring for some historical reasons. This is completely unnecessary.
This patch removes redundant copy for the shadow used ring. The async
shadow used ring will be updated directly.

Signed-off-by: Cheng Jiang 
---
 lib/vhost/virtio_net.c | 66 --
 1 file changed, 31 insertions(+), 35 deletions(-)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 9abf752f30..7c3ec128a0 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -572,6 +572,26 @@ vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq,
}
 }
 
+static __rte_always_inline void
+vhost_async_shadow_enqueue_packed(struct vhost_virtqueue *vq,
+  uint32_t *len,
+  uint16_t *id,
+  uint16_t *count,
+  uint16_t num_buffers)
+{
+   uint16_t i;
+   struct vhost_async *async = vq->async;
+
+   for (i = 0; i < num_buffers; i++) {
+   async->buffers_packed[async->buffer_idx_packed].id  = id[i];
+   async->buffers_packed[async->buffer_idx_packed].len = len[i];
+   async->buffers_packed[async->buffer_idx_packed].count = 
count[i];
+   async->buffer_idx_packed++;
+   if (async->buffer_idx_packed >= vq->size)
+   async->buffer_idx_packed -= vq->size;
+   }
+}
+
 static __rte_always_inline void
 vhost_shadow_enqueue_single_packed(struct virtio_net *dev,
   struct vhost_virtqueue *vq,
@@ -1647,23 +1667,6 @@ store_dma_desc_info_split(struct vring_used_elem 
*s_ring, struct vring_used_elem
}
 }
 
-static __rte_always_inline void
-store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring,
-   struct vring_used_elem_packed *d_ring,
-   uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t 
count)
-{
-   size_t elem_size = sizeof(struct vring_used_elem_packed);
-
-   if (d_idx + count <= ring_size) {
-   rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);
-   } else {
-   uint16_t size = ring_size - d_idx;
-
-   rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size);
-   rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * 
elem_size);
-   }
-}
-
 static __rte_noinline uint32_t
 virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t 
vchan_id)
@@ -1822,7 +1825,8 @@ vhost_enqueue_async_packed(struct virtio_net *dev,
if (unlikely(mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, 
true) < 0))
return -1;
 
-   vhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, 
buffer_desc_count, *nr_buffers);
+   vhost_async_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id,
+   buffer_desc_count, *nr_buffers);
 
return 0;
 }
@@ -1852,6 +1856,7 @@ dma_error_handler_packed(struct vhost_virtqueue *vq, 
uint16_t slot_idx,
 {
uint16_t descs_err = 0;
uint16_t buffers_err = 0;
+   struct vhost_async *async = vq->async;
struct async_inflight_info *pkts_info = vq->async->pkts_info;
 
*pkt_idx -= nr_err;
@@ -1869,7 +1874,10 @@ dma_error_handler_packed(struct vhost_virtqueue *vq, 
uint16_t slot_idx,
vq->avail_wrap_counter ^= 1;
}
 
-   vq->shadow_used_idx -= buffers_err;
+   if (async->buffer_idx_packed >= buffers_err)
+   async->buffer_idx_packed -= buffers_err;
+   else
+   async->buffer_idx_packed = async->buffer_idx_packed + vq->size 
- buffers_err;
 }
 
 static __rte_noinline uint32_t
@@ -1921,23 +1929,11 @@ virtio_dev_rx_async_submit_packed(struct virtio_net 
*dev, struct vhost_virtqueue
dma_error_handler_packed(vq, slot_idx, pkt_err, &pkt_idx);
}
 
-   if (likely(vq->shadow_used_idx)) {
-   /* keep used descriptors. */
-   store_dma_desc_info_packed(vq->shadow_used_packed, 
async->buffers_packed,
-   vq->size, 0, async->buffer_idx_packed,
-   vq->shadow_used_idx);
-
-   async->buffer_idx_packed += vq->shadow_used_idx;
-   if (async->buffer_idx_packed >= vq->size)
-   async->buffer_idx_packed -= vq->size;
-
-   async->pkts_idx += pkt_idx;
-   if (async->pkts_idx >= vq->size)
-   async->pkts_idx -= vq->size;

[PATCH v2 3/3] vhost: add batch dequeue in async vhost packed ring

2023-01-12 Thread Cheng Jiang

Add batch dequeue function in asynchronous vhost packed ring to
improve the performance. Chained mbufs are not supported, it will
be handled in single dequeue function.

Signed-off-by: Cheng Jiang 
Signed-off-by: Yuan Wang 
---
 lib/vhost/virtio_net.c | 170 -
 1 file changed, 167 insertions(+), 3 deletions(-)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index aea33ef127..8caf05319e 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -450,6 +450,23 @@ vhost_async_shadow_enqueue_packed_batch(struct 
vhost_virtqueue *vq,
}
 }
 
+static __rte_always_inline void
+vhost_async_shadow_dequeue_packed_batch(struct vhost_virtqueue *vq, uint16_t 
*ids)
+{
+   uint16_t i;
+   struct vhost_async *async = vq->async;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   async->buffers_packed[async->buffer_idx_packed].id  = ids[i];
+   async->buffers_packed[async->buffer_idx_packed].len = 0;
+   async->buffers_packed[async->buffer_idx_packed].count = 1;
+
+   async->buffer_idx_packed++;
+   if (async->buffer_idx_packed >= vq->size)
+   async->buffer_idx_packed -= vq->size;
+   }
+}
+
 static __rte_always_inline void
 vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq,
  uint16_t id)
@@ -3199,6 +3216,80 @@ vhost_reserve_avail_batch_packed(struct virtio_net *dev,
return -1;
 }
 
+static __rte_always_inline int
+vhost_async_tx_batch_packed_check(struct virtio_net *dev,
+struct vhost_virtqueue *vq,
+struct rte_mbuf **pkts,
+uint16_t avail_idx,
+uintptr_t *desc_addrs,
+uint64_t *lens,
+uint16_t *ids,
+int16_t dma_id,
+uint16_t vchan_id)
+{
+   bool wrap = vq->avail_wrap_counter;
+   struct vring_packed_desc *descs = vq->desc_packed;
+   uint64_t buf_lens[PACKED_BATCH_SIZE];
+   uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+   uint16_t flags, i;
+
+   if (unlikely(avail_idx & PACKED_BATCH_MASK))
+   return -1;
+   if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
+   return -1;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   flags = descs[avail_idx + i].flags;
+   if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) ||
+(wrap == !!(flags & VRING_DESC_F_USED))  ||
+(flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG)))
+   return -1;
+   }
+
+   rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+   lens[i] = descs[avail_idx + i].len;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   desc_addrs[i] = descs[avail_idx + i].addr;
+   }
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(!desc_addrs[i]))
+   return -1;
+   if (unlikely((lens[i] != descs[avail_idx + i].len)))
+   return -1;
+   }
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i]))
+   goto err;
+   }
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+   buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(buf_lens[i] < (lens[i] - buf_offset)))
+   goto err;
+   }
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   pkts[i]->pkt_len = lens[i] - buf_offset;
+   pkts[i]->data_len = pkts[i]->pkt_len;
+   ids[i] = descs[avail_idx + i].id;
+   }
+
+   if (rte_dma_burst_capacity(dma_id, vchan_id) < PACKED_BATCH_SIZE)
+   return -1;
+
+   return 0;
+
+err:
+   return -1;
+}
+
 static __rte_always_inline int
 virtio_dev_tx_batch_packed(struct virtio_net *dev,
   struct vhost_virtqueue *vq,
@@ -3775,16 +3866,74 @@ virtio_dev_tx_async_single_packed(struct virtio_net 
*dev,
return err;
 }
 
+static __rte_always_inline int
+virtio_dev_tx_async_packed_batch(struct virtio_net *dev,
+  struct vhost_virtqueue *vq,
+  struct rte_mbuf **pkts, uint16_t slot_idx,
+  uint16_t dma_id, uint16_t vchan_id)
+{
+   uint16_t avail_idx = vq->last_avail_idx;
+   uint32_t buf_offset = sizeof(stru

[PATCH v2 2/3] vhost: add batch enqueue in async vhost packed ring

2023-01-12 Thread Cheng Jiang

Add batch enqueue function in asynchronous vhost packed ring to
improve the performance. Chained mbufs are not supported, it will
be handled in single enqueue function.

Signed-off-by: Cheng Jiang 
---
 lib/vhost/virtio_net.c | 163 +
 1 file changed, 163 insertions(+)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 7c3ec128a0..aea33ef127 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -432,6 +432,24 @@ vhost_flush_enqueue_batch_packed(struct virtio_net *dev,
vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
 }
 
+static __rte_always_inline void
+vhost_async_shadow_enqueue_packed_batch(struct vhost_virtqueue *vq,
+uint64_t *lens,
+uint16_t *ids)
+{
+   uint16_t i;
+   struct vhost_async *async = vq->async;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   async->buffers_packed[async->buffer_idx_packed].id  = ids[i];
+   async->buffers_packed[async->buffer_idx_packed].len = lens[i];
+   async->buffers_packed[async->buffer_idx_packed].count = 1;
+   async->buffer_idx_packed++;
+   if (async->buffer_idx_packed >= vq->size)
+   async->buffer_idx_packed -= vq->size;
+   }
+}
+
 static __rte_always_inline void
 vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq,
  uint16_t id)
@@ -1451,6 +1469,58 @@ virtio_dev_rx_sync_batch_check(struct virtio_net *dev,
return 0;
 }
 
+static __rte_always_inline int
+virtio_dev_rx_async_batch_check(struct vhost_virtqueue *vq,
+  struct rte_mbuf **pkts,
+  uint64_t *desc_addrs,
+  uint64_t *lens,
+  int16_t dma_id,
+  uint16_t vchan_id)
+{
+   bool wrap_counter = vq->avail_wrap_counter;
+   struct vring_packed_desc *descs = vq->desc_packed;
+   uint16_t avail_idx = vq->last_avail_idx;
+   uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+   uint16_t i;
+
+   if (unlikely(avail_idx & PACKED_BATCH_MASK))
+   return -1;
+
+   if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
+   return -1;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(pkts[i]->next != NULL))
+   return -1;
+   if (unlikely(!desc_is_avail(&descs[avail_idx + i],
+   wrap_counter)))
+   return -1;
+   }
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+   lens[i] = descs[avail_idx + i].len;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset)))
+   return -1;
+   }
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+   desc_addrs[i] =  descs[avail_idx + i].addr;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(!desc_addrs[i]))
+   return -1;
+   if (unlikely(lens[i] != descs[avail_idx + i].len))
+   return -1;
+   }
+
+   if (rte_dma_burst_capacity(dma_id, vchan_id) < PACKED_BATCH_SIZE)
+   return -1;
+
+   return 0;
+}
+
 static __rte_always_inline void
 virtio_dev_rx_batch_packed_copy(struct virtio_net *dev,
   struct vhost_virtqueue *vq,
@@ -1850,6 +1920,84 @@ virtio_dev_rx_async_packed(struct virtio_net *dev, 
struct vhost_virtqueue *vq,
return 0;
 }
 
+static __rte_always_inline void
+virtio_dev_rx_async_packed_batch_enqueue(struct virtio_net *dev,
+  struct vhost_virtqueue *vq,
+  struct rte_mbuf **pkts,
+  uint64_t *desc_addrs,
+  uint64_t *lens)
+{
+   uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+   struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE];
+   struct vring_packed_desc *descs = vq->desc_packed;
+   struct vhost_async *async = vq->async;
+   uint16_t avail_idx = vq->last_avail_idx;
+   uint32_t mbuf_offset = 0;
+   uint16_t ids[PACKED_BATCH_SIZE];
+   uint64_t mapped_len[PACKED_BATCH_SIZE];
+   void *host_iova[PACKED_BATCH_SIZE];
+   uintptr_t desc;
+   uint16_t i;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
+   desc = vhost_iova_to_vva(dev, vq, desc_addrs[i], &lens[i], 
VHOST_ACCESS_RW);
+   hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc;
+   lens[i] = pkts[i]->pkt_len

[PATCH v2] app/dma-perf: introduce dma-perf application

2023-01-16 Thread Cheng Jiang

There are many high-performance DMA devices supported in DPDK now, and
these DMA devices can also be integrated into other modules of DPDK as
accelerators, such as Vhost. Before integrating DMA into applications,
developers need to know the performance of these DMA devices in various
scenarios and the performance of CPUs in the same scenario, such as
different buffer lengths. Only in this way can we know the target
performance of the application accelerated by using them. This patch
introduces a high-performance testing tool, which supports comparing the
performance of CPU and DMA in different scenarios automatically with a
pre-set config file. Memory Copy performance test are supported for now.

Signed-off-by: Cheng Jiang 
Signed-off-by: Jiayu Hu 
Signed-off-by: Yuan Wang 
Acked-by: Morten Brørup 
---
v2: fixed some CI issues.

 app/meson.build   |   1 +
 app/test-dma-perf/benchmark.c | 539 ++
 app/test-dma-perf/benchmark.h |  12 +
 app/test-dma-perf/config.ini  |  61 
 app/test-dma-perf/main.c  | 434 +++
 app/test-dma-perf/main.h  |  53 
 app/test-dma-perf/meson.build |  22 ++
 7 files changed, 1122 insertions(+)
 create mode 100644 app/test-dma-perf/benchmark.c
 create mode 100644 app/test-dma-perf/benchmark.h
 create mode 100644 app/test-dma-perf/config.ini
 create mode 100644 app/test-dma-perf/main.c
 create mode 100644 app/test-dma-perf/main.h
 create mode 100644 app/test-dma-perf/meson.build

diff --git a/app/meson.build b/app/meson.build
index e32ea4bd5c..a060ad2725 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -28,6 +28,7 @@ apps = [
 'test-regex',
 'test-sad',
 'test-security-perf',
+'test-dma-perf',
 ]

 default_cflags = machine_args + ['-DALLOW_EXPERIMENTAL_API']
diff --git a/app/test-dma-perf/benchmark.c b/app/test-dma-perf/benchmark.c
new file mode 100644
index 00..1cb5b0b291
--- /dev/null
+++ b/app/test-dma-perf/benchmark.c
@@ -0,0 +1,539 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "main.h"
+#include "benchmark.h"
+
+
+#define MAX_DMA_CPL_NB 255
+
+#define CSV_LINE_DMA_FMT "Scenario %u,%u,%u,%u,%u,%u,%" PRIu64 ",%.3lf,%" 
PRIu64 "\n"
+#define CSV_LINE_CPU_FMT "Scenario %u,%u,NA,%u,%u,%u,%" PRIu64 ",%.3lf,%" 
PRIu64 "\n"
+
+struct lcore_params {
+   uint16_t dev_id;
+   uint32_t nr_buf;
+   uint16_t kick_batch;
+   uint32_t buf_size;
+   uint32_t repeat_times;
+   uint16_t mpool_iter_step;
+   struct rte_mbuf **srcs;
+   struct rte_mbuf **dsts;
+   uint8_t scenario_id;
+};
+
+struct buf_info {
+   struct rte_mbuf **array;
+   uint32_t nr_buf;
+   uint32_t buf_size;
+};
+
+static struct rte_mempool *src_pool;
+static struct rte_mempool *dst_pool;
+
+uint16_t dmadev_ids[MAX_WORKER_NB];
+uint32_t nb_dmadevs;
+
+#define PRINT_ERR(...) print_err(__func__, __LINE__, __VA_ARGS__)
+
+static inline int
+__rte_format_printf(3, 4)
+print_err(const char *func, int lineno, const char *format, ...)
+{
+   va_list ap;
+   int ret;
+
+   ret = fprintf(stderr, "In %s:%d - ", func, lineno);
+   va_start(ap, format);
+   ret += vfprintf(stderr, format, ap);
+   va_end(ap);
+
+   return ret;
+}
+
+static inline void
+calc_result(struct lcore_params *p, uint64_t cp_cycle_sum, double time_sec,
+   uint32_t repeat_times, uint32_t *memory, uint64_t 
*ave_cycle,
+   float *bandwidth, uint64_t *ops)
+{
+   *memory = (p->buf_size * p->nr_buf * 2) / (1024 * 1024);
+   *ave_cycle = cp_cycle_sum / (p->repeat_times * p->nr_buf);
+   *bandwidth = p->buf_size * 8 * rte_get_timer_hz() / (*ave_cycle * 1000 
* 1000 * 1000.0);
+   *ops = (double)p->nr_buf * repeat_times / time_sec;
+}
+
+static void
+output_result(uint8_t scenario_id, uint32_t lcore_id, uint16_t dev_id, 
uint64_t ave_cycle,
+   uint32_t buf_size, uint32_t nr_buf, uint32_t memory,
+   float bandwidth, uint64_t ops, bool is_dma)
+{
+   if (is_dma)
+   printf("lcore %u, DMA %u:\n"
+   "average cycles: %" PRIu64 ","
+   " buffer size: %u, nr_buf: %u,"
+   " memory: %uMB, frequency: %" PRIu64 ".\n",
+   lcore_id,
+   dev_id,
+   ave_cycle,
+   buf_size,
+   nr_buf,
+   memory,
+   rte_get_timer_hz());
+

[PATCH v3] app/dma-perf: introduce dma-perf application

2023-01-17 Thread Cheng Jiang

There are many high-performance DMA devices supported in DPDK now, and
these DMA devices can also be integrated into other modules of DPDK as
accelerators, such as Vhost. Before integrating DMA into applications,
developers need to know the performance of these DMA devices in various
scenarios and the performance of CPUs in the same scenario, such as
different buffer lengths. Only in this way can we know the target
performance of the application accelerated by using them. This patch
introduces a high-performance testing tool, which supports comparing the
performance of CPU and DMA in different scenarios automatically with a
pre-set config file. Memory Copy performance test are supported for now.

Signed-off-by: Cheng Jiang 
Signed-off-by: Jiayu Hu 
Signed-off-by: Yuan Wang 
Acked-by: Morten Brørup 
---
v3:
fixed compile issues for loongarch.
fixed compile issues for intel.
fixed coding style issues.

v2:
fixed some CI issues.

 app/meson.build   |   1 +
 app/test-dma-perf/benchmark.c | 541 ++
 app/test-dma-perf/benchmark.h |  12 +
 app/test-dma-perf/config.ini  |  61 
 app/test-dma-perf/main.c  | 434 +++
 app/test-dma-perf/main.h  |  57 
 app/test-dma-perf/meson.build |  20 ++
 7 files changed, 1126 insertions(+)
 create mode 100644 app/test-dma-perf/benchmark.c
 create mode 100644 app/test-dma-perf/benchmark.h
 create mode 100644 app/test-dma-perf/config.ini
 create mode 100644 app/test-dma-perf/main.c
 create mode 100644 app/test-dma-perf/main.h
 create mode 100644 app/test-dma-perf/meson.build

diff --git a/app/meson.build b/app/meson.build
index e32ea4bd5c..a060ad2725 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -28,6 +28,7 @@ apps = [
 'test-regex',
 'test-sad',
 'test-security-perf',
+'test-dma-perf',
 ]

 default_cflags = machine_args + ['-DALLOW_EXPERIMENTAL_API']
diff --git a/app/test-dma-perf/benchmark.c b/app/test-dma-perf/benchmark.c
new file mode 100644
index 00..7ec3f95643
--- /dev/null
+++ b/app/test-dma-perf/benchmark.c
@@ -0,0 +1,541 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "main.h"
+#include "benchmark.h"
+
+
+#define MAX_DMA_CPL_NB 255
+
+#define CSV_LINE_DMA_FMT "Scenario %u,%u,%u,%u,%u,%u,%" PRIu64 ",%.3lf,%" 
PRIu64 "\n"
+#define CSV_LINE_CPU_FMT "Scenario %u,%u,NA,%u,%u,%u,%" PRIu64 ",%.3lf,%" 
PRIu64 "\n"
+
+struct lcore_params {
+   uint16_t dev_id;
+   uint32_t nr_buf;
+   uint16_t kick_batch;
+   uint32_t buf_size;
+   uint32_t repeat_times;
+   uint16_t mpool_iter_step;
+   struct rte_mbuf **srcs;
+   struct rte_mbuf **dsts;
+   uint8_t scenario_id;
+};
+
+struct buf_info {
+   struct rte_mbuf **array;
+   uint32_t nr_buf;
+   uint32_t buf_size;
+};
+
+static struct rte_mempool *src_pool;
+static struct rte_mempool *dst_pool;
+
+uint16_t dmadev_ids[MAX_WORKER_NB];
+uint32_t nb_dmadevs;
+
+#define PRINT_ERR(...) print_err(__func__, __LINE__, __VA_ARGS__)
+
+static inline int
+__rte_format_printf(3, 4)
+print_err(const char *func, int lineno, const char *format, ...)
+{
+   va_list ap;
+   int ret;
+
+   ret = fprintf(stderr, "In %s:%d - ", func, lineno);
+   va_start(ap, format);
+   ret += vfprintf(stderr, format, ap);
+   va_end(ap);
+
+   return ret;
+}
+
+static inline void
+calc_result(struct lcore_params *p, uint64_t cp_cycle_sum, double time_sec,
+   uint32_t repeat_times, uint32_t *memory, uint64_t 
*ave_cycle,
+   float *bandwidth, uint64_t *ops)
+{
+   *memory = (p->buf_size * p->nr_buf * 2) / (1024 * 1024);
+   *ave_cycle = cp_cycle_sum / (p->repeat_times * p->nr_buf);
+   *bandwidth = p->buf_size * 8 * rte_get_timer_hz() / (*ave_cycle * 1000 
* 1000 * 1000.0);
+   *ops = (double)p->nr_buf * repeat_times / time_sec;
+}
+
+static void
+output_result(uint8_t scenario_id, uint32_t lcore_id, uint16_t dev_id, 
uint64_t ave_cycle,
+   uint32_t buf_size, uint32_t nr_buf, uint32_t memory,
+   float bandwidth, uint64_t ops, bool is_dma)
+{
+   if (is_dma)
+   printf("lcore %u, DMA %u:\n"
+   "average cycles: %" PRIu64 ","
+   " buffer size: %u, nr_buf: %u,"
+   " memory: %uMB, frequency: %" PRIu64 ".\n",
+   lcore_id,
+   dev_id,
+   ave_cycle,
+   buf_size,
+

[dpdk-dev] [PATCH] examples/vhost: fix potential overflow in args process

2021-04-18 Thread Cheng Jiang

Add args length check to fix potential overflow issue.

Coverity issue: 363741
Fixes: 965b06f0358 ("examples/vhost: enhance getopt_long usage")
Cc: sta...@dpdk.org

Signed-off-by: Cheng Jiang 
---
 examples/vhost/main.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 119ba7e01..5df36ad3c 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -669,6 +669,11 @@ us_vhost_parse_args(int argc, char **argv)
break;
 
case OPT_DMA_TYPE_NUM:
+   if (strlen(optarg) >= MAX_LONG_OPT_SZ) {
+   RTE_LOG(INFO, VHOST_CONFIG, "Wrong DMA type\n");
+   us_vhost_usage(prgname);
+   return -1;
+   }
strcpy(dma_type, optarg);
break;
 
-- 
2.29.2

[dpdk-dev] [PATCH v8 0/4] add support for packed ring in async vhost

2021-04-19 Thread Cheng Jiang

For now async vhost data path only supports split ring structure. In
order to make async vhost compatible with virtio 1.1 spec this patch
set cleans async split ring codes and enables packed ring in async
vhost data path. Batch datapath is also enabled in async vhost
packed ring.

v8:
 * fix some variable names for consistency
 * clean codes
v7:
 * fix compile issues
 * add argument *dev in vhost_free_async_mem() for ring type decision
v6:
 * fix some typos in commit log
 * improve index usage
 * remove shadow_ring_store()
 * add store_dma_desc_info_split() store_dma_desc_info_packed()
 * remove some checks in vhost_free_async_mem()
 * change index calculation since the size isn't necessarily a power of 2
 * move error handling in a dedicated function
 * clean codes
v5:
 * clean some codes for packed ring datapath
 * fix an index error in shadow_ring_store()
v4:
  * change the patch structure
  * clean code for async split ring
  * reuse some code from split ring
  * change the error handler for DMA-copy packet
  * add check for malloc
  * remove useless code
  * add doc update
v3:
  * fix error handler for DMA-copy packet
v2:
  * fix wrong buffer index in rte_vhost_poll_enqueue_completed()
  * add async_buffers_packed memory free in vhost_free_async_mem()

Cheng Jiang (4):
  vhost: abstract and reorganize async split ring code
  vhost: add support for packed ring in async vhost
  vhost: add batch datapath for async vhost packed ring
  doc: add release note for vhost async packed ring

 doc/guides/rel_notes/release_21_05.rst |   4 +
 lib/librte_vhost/rte_vhost_async.h |   1 +
 lib/librte_vhost/vhost.c   |  79 +++-
 lib/librte_vhost/vhost.h   |  15 +-
 lib/librte_vhost/virtio_net.c  | 598 +
 5 files changed, 587 insertions(+), 110 deletions(-)

--
2.29.2

[dpdk-dev] [PATCH v8 2/4] vhost: add support for packed ring in async vhost

2021-04-19 Thread Cheng Jiang

For now async vhost data path only supports split ring. This patch
enables packed ring in async vhost data path to make async vhost
compatible with virtio 1.1 spec.

Signed-off-by: Cheng Jiang 
---
 lib/librte_vhost/rte_vhost_async.h |   1 +
 lib/librte_vhost/vhost.c   |  79 --
 lib/librte_vhost/vhost.h   |  15 +-
 lib/librte_vhost/virtio_net.c  | 441 +++--
 4 files changed, 488 insertions(+), 48 deletions(-)

diff --git a/lib/librte_vhost/rte_vhost_async.h 
b/lib/librte_vhost/rte_vhost_async.h
index c855ff875..6faa31f5a 100644
--- a/lib/librte_vhost/rte_vhost_async.h
+++ b/lib/librte_vhost/rte_vhost_async.h
@@ -89,6 +89,7 @@ struct rte_vhost_async_channel_ops {
 struct async_inflight_info {
struct rte_mbuf *mbuf;
uint16_t descs; /* num of descs inflight */
+   uint16_t nr_buffers; /* num of buffers inflight for packed ring */
 };
 
 /**
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index a70fe01d8..2e3f9eb09 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -340,17 +340,17 @@ cleanup_device(struct virtio_net *dev, int destroy)
 static void
 vhost_free_async_mem(struct vhost_virtqueue *vq)
 {
-   if (vq->async_pkts_info)
-   rte_free(vq->async_pkts_info);
-   if (vq->async_descs_split)
-   rte_free(vq->async_descs_split);
-   if (vq->it_pool)
-   rte_free(vq->it_pool);
-   if (vq->vec_pool)
-   rte_free(vq->vec_pool);
+   rte_free(vq->async_pkts_info);
 
-   vq->async_pkts_info = NULL;
+   rte_free(vq->async_buffers_packed);
+   vq->async_buffers_packed = NULL;
+   rte_free(vq->async_descs_split);
vq->async_descs_split = NULL;
+
+   rte_free(vq->it_pool);
+   rte_free(vq->vec_pool);
+
+   vq->async_pkts_info = NULL;
vq->it_pool = NULL;
vq->vec_pool = NULL;
 }
@@ -360,10 +360,10 @@ free_vq(struct virtio_net *dev, struct vhost_virtqueue 
*vq)
 {
if (vq_is_packed(dev))
rte_free(vq->shadow_used_packed);
-   else {
+   else
rte_free(vq->shadow_used_split);
-   vhost_free_async_mem(vq);
-   }
+
+   vhost_free_async_mem(vq);
rte_free(vq->batch_copy_elems);
if (vq->iotlb_pool)
rte_mempool_free(vq->iotlb_pool);
@@ -1626,10 +1626,9 @@ int rte_vhost_async_channel_register(int vid, uint16_t 
queue_id,
if (unlikely(vq == NULL || !dev->async_copy))
return -1;
 
-   /* packed queue is not supported */
-   if (unlikely(vq_is_packed(dev) || !f.async_inorder)) {
+   if (unlikely(!f.async_inorder)) {
VHOST_LOG_CONFIG(ERR,
-   "async copy is not supported on packed queue or 
non-inorder mode "
+   "async copy is not supported on non-inorder mode "
"(vid %d, qid: %d)\n", vid, queue_id);
return -1;
}
@@ -1661,24 +1660,60 @@ int rte_vhost_async_channel_register(int vid, uint16_t 
queue_id,
vq->async_pkts_info = rte_malloc_socket(NULL,
vq->size * sizeof(struct async_inflight_info),
RTE_CACHE_LINE_SIZE, node);
+   if (!vq->async_pkts_info) {
+   vhost_free_async_mem(vq);
+   VHOST_LOG_CONFIG(ERR,
+   "async register failed: cannot allocate memory for 
async_pkts_info "
+   "(vid %d, qid: %d)\n", vid, queue_id);
+   goto reg_out;
+   }
+
vq->it_pool = rte_malloc_socket(NULL,
VHOST_MAX_ASYNC_IT * sizeof(struct rte_vhost_iov_iter),
RTE_CACHE_LINE_SIZE, node);
+   if (!vq->it_pool) {
+   vhost_free_async_mem(vq);
+   VHOST_LOG_CONFIG(ERR,
+   "async register failed: cannot allocate memory for 
it_pool "
+   "(vid %d, qid: %d)\n", vid, queue_id);
+   goto reg_out;
+   }
+
vq->vec_pool = rte_malloc_socket(NULL,
VHOST_MAX_ASYNC_VEC * sizeof(struct iovec),
RTE_CACHE_LINE_SIZE, node);
-   vq->async_descs_split = rte_malloc_socket(NULL,
-   vq->size * sizeof(struct vring_used_elem),
-   RTE_CACHE_LINE_SIZE, node);
-   if (!vq->async_descs_split || !vq->async_pkts_info ||
-   !vq->it_pool || !vq->vec_pool) {
+   if (!vq->vec_pool) {
vhost_free_async_mem(vq);
VHOST_LOG_CONFIG(ERR,
-   "async register failed: cannot allocate memory 
for vq data "
-   "(vid %d, qid: %d)\n", vid, queue_id);
+

[dpdk-dev] [PATCH v8 3/4] vhost: add batch datapath for async vhost packed ring

2021-04-19 Thread Cheng Jiang

Add batch datapath for async vhost packed ring to improve the
performance of small packet processing.

Signed-off-by: Cheng Jiang 
---
 lib/librte_vhost/virtio_net.c | 41 +++
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index ce88ad3c0..0ad289e6e 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1724,6 +1724,29 @@ vhost_update_used_packed(struct vhost_virtqueue *vq,
vq->desc_packed[head_idx].flags = head_flags;
 }
 
+static __rte_always_inline int
+virtio_dev_rx_async_batch_packed(struct virtio_net *dev,
+  struct vhost_virtqueue *vq,
+  struct rte_mbuf **pkts,
+  struct rte_mbuf **comp_pkts, uint32_t *pkt_done)
+{
+   uint16_t i;
+   uint32_t cpy_threshold = vq->async_threshold;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(pkts[i]->pkt_len >= cpy_threshold))
+   return -1;
+   }
+   if (!virtio_dev_rx_batch_packed(dev, vq, pkts)) {
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+   comp_pkts[(*pkt_done)++] = pkts[i];
+
+   return 0;
+   }
+
+   return -1;
+}
+
 static __rte_always_inline int
 vhost_enqueue_async_single_packed(struct virtio_net *dev,
struct vhost_virtqueue *vq,
@@ -1874,6 +1897,7 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
struct rte_mbuf **comp_pkts, uint32_t *comp_count)
 {
uint32_t pkt_idx = 0, pkt_burst_idx = 0;
+   uint32_t remained = count;
uint16_t async_descs_idx = 0;
uint16_t num_buffers;
uint16_t num_desc;
@@ -1891,9 +1915,17 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
uint32_t num_async_pkts = 0, num_done_pkts = 0;
struct vring_packed_desc async_descs[vq->size];
 
-   rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
+   do {
+   rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
+   if (remained >= PACKED_BATCH_SIZE) {
+   if (!virtio_dev_rx_async_batch_packed(dev, vq,
+   &pkts[pkt_idx], comp_pkts, &num_done_pkts)) {
+   pkt_idx += PACKED_BATCH_SIZE;
+   remained -= PACKED_BATCH_SIZE;
+   continue;
+   }
+   }
 
-   for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
if (unlikely(virtio_dev_rx_async_single_packed(dev, vq, 
pkts[pkt_idx],
&num_desc, &num_buffers,
&async_descs[async_descs_idx],
@@ -1936,6 +1968,8 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
comp_pkts[num_done_pkts++] = pkts[pkt_idx];
}
 
+   pkt_idx++;
+   remained--;
vq_inc_last_avail_packed(vq, num_desc);
 
/*
@@ -1960,13 +1994,12 @@ virtio_dev_rx_async_submit_packed(struct virtio_net 
*dev,
 */
pkt_err = pkt_burst_idx - n_pkts;
pkt_burst_idx = 0;
-   pkt_idx++;
break;
}
 
pkt_burst_idx = 0;
}
-   }
+   } while (pkt_idx < count);
 
if (pkt_burst_idx) {
n_pkts = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 
0, pkt_burst_idx);
-- 
2.29.2

[dpdk-dev] [PATCH v8 1/4] vhost: abstract and reorganize async split ring code

2021-04-19 Thread Cheng Jiang

This patch puts some codes of async vhost split ring into inline
functions to improve the readability of the code. And, it changes
the pointer index style of iterator to make the code more concise.

Signed-off-by: Cheng Jiang 
Reviewed-by: Maxime Coquelin 
---
 lib/librte_vhost/virtio_net.c | 132 +-
 1 file changed, 66 insertions(+), 66 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index ff3987860..438bdafd1 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1458,6 +1458,22 @@ virtio_dev_rx_async_get_info_idx(uint16_t pkts_idx,
(vq_size - n_inflight + pkts_idx) & (vq_size - 1);
 }

+static __rte_always_inline void
+store_dma_desc_info_split(struct vring_used_elem *s_ring, struct 
vring_used_elem *d_ring,
+   uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t 
count)
+{
+   uint16_t elem_size = sizeof(struct vring_used_elem);
+
+   if (d_idx + count <= ring_size) {
+   rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);
+   } else {
+   uint16_t size = ring_size - d_idx;
+
+   rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size);
+   rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * 
elem_size);
+   }
+}
+
 static __rte_noinline uint32_t
 virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct vhost_virtqueue *vq, uint16_t queue_id,
@@ -1474,10 +1490,9 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct rte_vhost_async_desc tdes[MAX_PKT_BURST];
struct iovec *src_iovec = vec_pool;
struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);
-   struct rte_vhost_iov_iter *src_it = it_pool;
-   struct rte_vhost_iov_iter *dst_it = it_pool + 1;
uint16_t slot_idx = 0;
uint16_t segs_await = 0;
+   uint16_t iovec_idx = 0, it_idx = 0;
struct async_inflight_info *pkts_info = vq->async_pkts_info;
uint32_t n_pkts = 0, pkt_err = 0;
uint32_t num_async_pkts = 0, num_done_pkts = 0;
@@ -1511,29 +1526,30 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
dev->vid, vq->last_avail_idx,
vq->last_avail_idx + num_buffers);

-   if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx],
-   buf_vec, nr_vec, num_buffers,
-   src_iovec, dst_iovec, src_it, dst_it) < 0) {
+   if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, 
num_buffers,
+   &src_iovec[iovec_idx], &dst_iovec[iovec_idx],
+   &it_pool[it_idx], &it_pool[it_idx + 1]) < 0) {
vq->shadow_used_idx -= num_buffers;
break;
}

slot_idx = (vq->async_pkts_idx + num_async_pkts) &
(vq->size - 1);
-   if (src_it->count) {
+   if (it_pool[it_idx].count) {
uint16_t from, to;

-   async_fill_desc(&tdes[pkt_burst_idx++], src_it, dst_it);
+   async_fill_desc(&tdes[pkt_burst_idx++],
+   &it_pool[it_idx], &it_pool[it_idx + 1]);
pkts_info[slot_idx].descs = num_buffers;
pkts_info[slot_idx].mbuf = pkts[pkt_idx];
async_pkts_log[num_async_pkts].pkt_idx = pkt_idx;
async_pkts_log[num_async_pkts++].last_avail_idx =
vq->last_avail_idx;
-   src_iovec += src_it->nr_segs;
-   dst_iovec += dst_it->nr_segs;
-   src_it += 2;
-   dst_it += 2;
-   segs_await += src_it->nr_segs;
+
+   iovec_idx += it_pool[it_idx].nr_segs;
+   it_idx += 2;
+
+   segs_await += it_pool[it_idx].nr_segs;

/**
 * recover shadow used ring and keep DMA-occupied
@@ -1541,23 +1557,10 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
 */
from = vq->shadow_used_idx - num_buffers;
to = vq->async_desc_idx & (vq->size - 1);
-   if (num_buffers + to <= vq->size) {
-   rte_memcpy(&vq->async_descs_split[to],
-   &vq->shadow_used_split[from],
-   num_buffers *
-   sizeof(struct vring_used_elem));
-   } else {
-   int size = vq->size - t

[dpdk-dev] [PATCH v8 4/4] doc: add release note for vhost async packed ring

2021-04-19 Thread Cheng Jiang

Add release note for the support of vhost async packed ring.

Signed-off-by: Cheng Jiang 
---
 doc/guides/rel_notes/release_21_05.rst | 4 
 1 file changed, 4 insertions(+)

diff --git a/doc/guides/rel_notes/release_21_05.rst 
b/doc/guides/rel_notes/release_21_05.rst
index 374d6d98e..eb5200669 100644
--- a/doc/guides/rel_notes/release_21_05.rst
+++ b/doc/guides/rel_notes/release_21_05.rst
@@ -131,6 +131,10 @@ New Features
   * Added command to display Rx queue used descriptor count.
 ``show port (port_id) rxq (queue_id) desc used count``
 
+* **Added support for vhost async packed ring data path.**
+
+  Added packed ring support for async vhost.
+
 
 Removed Items
 -
-- 
2.29.2

[dpdk-dev] [PATCH v2] examples/vhost: fix potential overflow in args process

2021-04-26 Thread Cheng Jiang

Change the way passing args to fix potential overflow in args process.

Coverity issue: 363741
Fixes: 965b06f0358 ("examples/vhost: enhance getopt_long usage")

Signed-off-by: Cheng Jiang 
---
v2:
 * Change the way passing args
 * Change git log

 examples/vhost/main.c | 11 ---
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 119ba7e01..3e2e9a45c 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -55,9 +55,6 @@

 #define INVALID_PORT_ID 0xFF

-/* Maximum long option length for option parsing. */
-#define MAX_LONG_OPT_SZ 64
-
 /* mask of enabled ports */
 static uint32_t enabled_port_mask = 0;

@@ -97,7 +94,7 @@ static int builtin_net_driver;

 static int async_vhost_driver;

-static char dma_type[MAX_LONG_OPT_SZ];
+static char *dma_type;

 /* Specify timeout (in useconds) between retries on RX. */
 static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
@@ -201,7 +198,7 @@ struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE * 
MAX_VHOST_DEVICE];
 static inline int
 open_dma(const char *value)
 {
-   if (strncmp(dma_type, "ioat", 4) == 0)
+   if (dma_type != NULL && strncmp(dma_type, "ioat", 4) == 0)
return open_ioat(value);

return -1;
@@ -669,7 +666,7 @@ us_vhost_parse_args(int argc, char **argv)
break;

case OPT_DMA_TYPE_NUM:
-   strcpy(dma_type, optarg);
+   dma_type = optarg;
break;

case OPT_DMAS_NUM:
@@ -1472,7 +1469,7 @@ new_device(int vid)
struct rte_vhost_async_features f;
struct rte_vhost_async_channel_ops channel_ops;

-   if (strncmp(dma_type, "ioat", 4) == 0) {
+   if (dma_type != NULL && strncmp(dma_type, "ioat", 4) == 0) {
channel_ops.transfer_data = ioat_transfer_data_cb;
channel_ops.check_completed_copies =
ioat_check_completed_copies_cb;
--
2.29.2

[dpdk-dev] [PATCH v9 0/4] add support for packed ring in async vhost

2021-04-27 Thread Cheng Jiang

For now async vhost data path only supports split ring structure. In
order to make async vhost compatible with virtio 1.1 spec this patch
set cleans async split ring codes and enables packed ring in async
vhost data path. Batch datapath is also enabled in async vhost
packed ring.

v9:
 * improve some variable initiation
 * fix some variable names for consistency
 * rebase on the latest code
v8:
 * fix some variable names for consistency
 * clean codes
v7:
 * fix compile issues
 * add argument *dev in vhost_free_async_mem() for ring type decision
v6:
 * fix some typos in commit log
 * improve index usage
 * remove shadow_ring_store()
 * add store_dma_desc_info_split() store_dma_desc_info_packed()
 * remove some checks in vhost_free_async_mem()
 * change index calculation since the size isn't necessarily a power of 2
 * move error handling in a dedicated function
 * clean codes
v5:
 * clean some codes for packed ring datapath
 * fix an index error in shadow_ring_store()
v4:
  * change the patch structure
  * clean code for async split ring
  * reuse some code from split ring
  * change the error handler for DMA-copy packet
  * add check for malloc
  * remove useless code
  * add doc update
v3:
  * fix error handler for DMA-copy packet
v2:
  * fix wrong buffer index in rte_vhost_poll_enqueue_completed()
  * add async_buffers_packed memory free in vhost_free_async_mem()

Cheng Jiang (4):
  vhost: abstract and reorganize async split ring code
  vhost: add support for packed ring in async vhost
  vhost: add batch datapath for async vhost packed ring
  doc: add release note for vhost async packed ring

 doc/guides/rel_notes/release_21_05.rst |   4 +
 lib/vhost/rte_vhost_async.h|   1 +
 lib/vhost/vhost.c  |  79 +++-
 lib/vhost/vhost.h  |  15 +-
 lib/vhost/virtio_net.c | 598 +
 5 files changed, 587 insertions(+), 110 deletions(-)

--
2.29.2

[dpdk-dev] [PATCH v9 1/4] vhost: abstract and reorganize async split ring code

2021-04-27 Thread Cheng Jiang

This patch puts some codes of async vhost split ring into inline
functions to improve the readability of the code. And, it changes
the pointer index style of iterator to make the code more concise.

Signed-off-by: Cheng Jiang 
Reviewed-by: Maxime Coquelin 
Reviewed-by: Jiayu Hu 
---
 lib/vhost/virtio_net.c | 132 -
 1 file changed, 66 insertions(+), 66 deletions(-)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index ff39878609..438bdafd14 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -1458,6 +1458,22 @@ virtio_dev_rx_async_get_info_idx(uint16_t pkts_idx,
(vq_size - n_inflight + pkts_idx) & (vq_size - 1);
 }
 
+static __rte_always_inline void
+store_dma_desc_info_split(struct vring_used_elem *s_ring, struct 
vring_used_elem *d_ring,
+   uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t 
count)
+{
+   uint16_t elem_size = sizeof(struct vring_used_elem);
+
+   if (d_idx + count <= ring_size) {
+   rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);
+   } else {
+   uint16_t size = ring_size - d_idx;
+
+   rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size);
+   rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * 
elem_size);
+   }
+}
+
 static __rte_noinline uint32_t
 virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct vhost_virtqueue *vq, uint16_t queue_id,
@@ -1474,10 +1490,9 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct rte_vhost_async_desc tdes[MAX_PKT_BURST];
struct iovec *src_iovec = vec_pool;
struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);
-   struct rte_vhost_iov_iter *src_it = it_pool;
-   struct rte_vhost_iov_iter *dst_it = it_pool + 1;
uint16_t slot_idx = 0;
uint16_t segs_await = 0;
+   uint16_t iovec_idx = 0, it_idx = 0;
struct async_inflight_info *pkts_info = vq->async_pkts_info;
uint32_t n_pkts = 0, pkt_err = 0;
uint32_t num_async_pkts = 0, num_done_pkts = 0;
@@ -1511,29 +1526,30 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
dev->vid, vq->last_avail_idx,
vq->last_avail_idx + num_buffers);
 
-   if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx],
-   buf_vec, nr_vec, num_buffers,
-   src_iovec, dst_iovec, src_it, dst_it) < 0) {
+   if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, 
num_buffers,
+   &src_iovec[iovec_idx], &dst_iovec[iovec_idx],
+   &it_pool[it_idx], &it_pool[it_idx + 1]) < 0) {
vq->shadow_used_idx -= num_buffers;
break;
}
 
slot_idx = (vq->async_pkts_idx + num_async_pkts) &
(vq->size - 1);
-   if (src_it->count) {
+   if (it_pool[it_idx].count) {
uint16_t from, to;
 
-   async_fill_desc(&tdes[pkt_burst_idx++], src_it, dst_it);
+   async_fill_desc(&tdes[pkt_burst_idx++],
+   &it_pool[it_idx], &it_pool[it_idx + 1]);
pkts_info[slot_idx].descs = num_buffers;
pkts_info[slot_idx].mbuf = pkts[pkt_idx];
async_pkts_log[num_async_pkts].pkt_idx = pkt_idx;
async_pkts_log[num_async_pkts++].last_avail_idx =
vq->last_avail_idx;
-   src_iovec += src_it->nr_segs;
-   dst_iovec += dst_it->nr_segs;
-   src_it += 2;
-   dst_it += 2;
-   segs_await += src_it->nr_segs;
+
+   iovec_idx += it_pool[it_idx].nr_segs;
+   it_idx += 2;
+
+   segs_await += it_pool[it_idx].nr_segs;
 
/**
 * recover shadow used ring and keep DMA-occupied
@@ -1541,23 +1557,10 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
 */
from = vq->shadow_used_idx - num_buffers;
to = vq->async_desc_idx & (vq->size - 1);
-   if (num_buffers + to <= vq->size) {
-   rte_memcpy(&vq->async_descs_split[to],
-   &vq->shadow_used_split[from],
-   num_buffers *
-   sizeof(struct vring_used_elem));
-   } else {
-

[dpdk-dev] [PATCH v9 2/4] vhost: add support for packed ring in async vhost

2021-04-27 Thread Cheng Jiang

For now async vhost data path only supports split ring. This patch
enables packed ring in async vhost data path to make async vhost
compatible with virtio 1.1 spec.

Signed-off-by: Cheng Jiang 
---
 lib/vhost/rte_vhost_async.h |   1 +
 lib/vhost/vhost.c   |  79 +--
 lib/vhost/vhost.h   |  15 +-
 lib/vhost/virtio_net.c  | 442 ++--
 4 files changed, 489 insertions(+), 48 deletions(-)

diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index c855ff875e..6faa31f5ad 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -89,6 +89,7 @@ struct rte_vhost_async_channel_ops {
 struct async_inflight_info {
struct rte_mbuf *mbuf;
uint16_t descs; /* num of descs inflight */
+   uint16_t nr_buffers; /* num of buffers inflight for packed ring */
 };
 
 /**
diff --git a/lib/vhost/vhost.c b/lib/vhost/vhost.c
index a70fe01d8f..2e3f9eb095 100644
--- a/lib/vhost/vhost.c
+++ b/lib/vhost/vhost.c
@@ -340,17 +340,17 @@ cleanup_device(struct virtio_net *dev, int destroy)
 static void
 vhost_free_async_mem(struct vhost_virtqueue *vq)
 {
-   if (vq->async_pkts_info)
-   rte_free(vq->async_pkts_info);
-   if (vq->async_descs_split)
-   rte_free(vq->async_descs_split);
-   if (vq->it_pool)
-   rte_free(vq->it_pool);
-   if (vq->vec_pool)
-   rte_free(vq->vec_pool);
+   rte_free(vq->async_pkts_info);
 
-   vq->async_pkts_info = NULL;
+   rte_free(vq->async_buffers_packed);
+   vq->async_buffers_packed = NULL;
+   rte_free(vq->async_descs_split);
vq->async_descs_split = NULL;
+
+   rte_free(vq->it_pool);
+   rte_free(vq->vec_pool);
+
+   vq->async_pkts_info = NULL;
vq->it_pool = NULL;
vq->vec_pool = NULL;
 }
@@ -360,10 +360,10 @@ free_vq(struct virtio_net *dev, struct vhost_virtqueue 
*vq)
 {
if (vq_is_packed(dev))
rte_free(vq->shadow_used_packed);
-   else {
+   else
rte_free(vq->shadow_used_split);
-   vhost_free_async_mem(vq);
-   }
+
+   vhost_free_async_mem(vq);
rte_free(vq->batch_copy_elems);
if (vq->iotlb_pool)
rte_mempool_free(vq->iotlb_pool);
@@ -1626,10 +1626,9 @@ int rte_vhost_async_channel_register(int vid, uint16_t 
queue_id,
if (unlikely(vq == NULL || !dev->async_copy))
return -1;
 
-   /* packed queue is not supported */
-   if (unlikely(vq_is_packed(dev) || !f.async_inorder)) {
+   if (unlikely(!f.async_inorder)) {
VHOST_LOG_CONFIG(ERR,
-   "async copy is not supported on packed queue or 
non-inorder mode "
+   "async copy is not supported on non-inorder mode "
"(vid %d, qid: %d)\n", vid, queue_id);
return -1;
}
@@ -1661,24 +1660,60 @@ int rte_vhost_async_channel_register(int vid, uint16_t 
queue_id,
vq->async_pkts_info = rte_malloc_socket(NULL,
vq->size * sizeof(struct async_inflight_info),
RTE_CACHE_LINE_SIZE, node);
+   if (!vq->async_pkts_info) {
+   vhost_free_async_mem(vq);
+   VHOST_LOG_CONFIG(ERR,
+   "async register failed: cannot allocate memory for 
async_pkts_info "
+   "(vid %d, qid: %d)\n", vid, queue_id);
+   goto reg_out;
+   }
+
vq->it_pool = rte_malloc_socket(NULL,
VHOST_MAX_ASYNC_IT * sizeof(struct rte_vhost_iov_iter),
RTE_CACHE_LINE_SIZE, node);
+   if (!vq->it_pool) {
+   vhost_free_async_mem(vq);
+   VHOST_LOG_CONFIG(ERR,
+   "async register failed: cannot allocate memory for 
it_pool "
+   "(vid %d, qid: %d)\n", vid, queue_id);
+   goto reg_out;
+   }
+
vq->vec_pool = rte_malloc_socket(NULL,
VHOST_MAX_ASYNC_VEC * sizeof(struct iovec),
RTE_CACHE_LINE_SIZE, node);
-   vq->async_descs_split = rte_malloc_socket(NULL,
-   vq->size * sizeof(struct vring_used_elem),
-   RTE_CACHE_LINE_SIZE, node);
-   if (!vq->async_descs_split || !vq->async_pkts_info ||
-   !vq->it_pool || !vq->vec_pool) {
+   if (!vq->vec_pool) {
vhost_free_async_mem(vq);
VHOST_LOG_CONFIG(ERR,
-   "async register failed: cannot allocate memory 
for vq data "
-   "(vid %d, qid: %d)\n", vid, queue_id);
+   "async register failed: cannot allocate memory for 
vec_p

[dpdk-dev] [PATCH v9 3/4] vhost: add batch datapath for async vhost packed ring

2021-04-27 Thread Cheng Jiang

Add batch datapath for async vhost packed ring to improve the
performance of small packet processing.

Signed-off-by: Cheng Jiang 
---
 lib/vhost/virtio_net.c | 42 +-
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 5d540e5599..f60f97ec72 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -1724,6 +1724,29 @@ vhost_update_used_packed(struct vhost_virtqueue *vq,
vq->desc_packed[head_idx].flags = head_flags;
 }
 
+static __rte_always_inline int
+virtio_dev_rx_async_batch_packed(struct virtio_net *dev,
+  struct vhost_virtqueue *vq,
+  struct rte_mbuf **pkts,
+  struct rte_mbuf **comp_pkts, uint32_t *pkt_done)
+{
+   uint16_t i;
+   uint32_t cpy_threshold = vq->async_threshold;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(pkts[i]->pkt_len >= cpy_threshold))
+   return -1;
+   }
+   if (!virtio_dev_rx_batch_packed(dev, vq, pkts)) {
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+   comp_pkts[(*pkt_done)++] = pkts[i];
+
+   return 0;
+   }
+
+   return -1;
+}
+
 static __rte_always_inline int
 vhost_enqueue_async_single_packed(struct virtio_net *dev,
struct vhost_virtqueue *vq,
@@ -1872,6 +1895,7 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
struct rte_mbuf **comp_pkts, uint32_t *comp_count)
 {
uint32_t pkt_idx = 0, pkt_burst_idx = 0;
+   uint32_t remained = count;
uint16_t async_descs_idx = 0;
uint16_t num_buffers;
uint16_t num_descs;
@@ -1889,12 +1913,19 @@ virtio_dev_rx_async_submit_packed(struct virtio_net 
*dev,
uint32_t num_async_pkts = 0, num_done_pkts = 0;
struct vring_packed_desc async_descs[vq->size];
 
-   rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
+   do {
+   rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
+   if (remained >= PACKED_BATCH_SIZE) {
+   if (!virtio_dev_rx_async_batch_packed(dev, vq,
+   &pkts[pkt_idx], comp_pkts, &num_done_pkts)) {
+   pkt_idx += PACKED_BATCH_SIZE;
+   remained -= PACKED_BATCH_SIZE;
+   continue;
+   }
+   }
 
-   for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
num_buffers = 0;
num_descs = 0;
-
if (unlikely(virtio_dev_rx_async_single_packed(dev, vq, 
pkts[pkt_idx],
&num_descs, &num_buffers,
&async_descs[async_descs_idx],
@@ -1937,6 +1968,8 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
comp_pkts[num_done_pkts++] = pkts[pkt_idx];
}
 
+   pkt_idx++;
+   remained--;
vq_inc_last_avail_packed(vq, num_descs);
 
/*
@@ -1961,13 +1994,12 @@ virtio_dev_rx_async_submit_packed(struct virtio_net 
*dev,
 */
pkt_err = pkt_burst_idx - n_pkts;
pkt_burst_idx = 0;
-   pkt_idx++;
break;
}
 
pkt_burst_idx = 0;
}
-   }
+   } while (pkt_idx < count);
 
if (pkt_burst_idx) {
n_pkts = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 
0, pkt_burst_idx);
-- 
2.29.2

[dpdk-dev] [PATCH v9 4/4] doc: add release note for vhost async packed ring

2021-04-27 Thread Cheng Jiang

Add release note for the support of vhost async packed ring.

Signed-off-by: Cheng Jiang 
---
 doc/guides/rel_notes/release_21_05.rst | 4 
 1 file changed, 4 insertions(+)

diff --git a/doc/guides/rel_notes/release_21_05.rst 
b/doc/guides/rel_notes/release_21_05.rst
index b3224dc332..aec020d558 100644
--- a/doc/guides/rel_notes/release_21_05.rst
+++ b/doc/guides/rel_notes/release_21_05.rst
@@ -271,6 +271,10 @@ New Features
   * Added support for crypto adapter forward mode in octeontx2 event and crypto
 device driver.
 
+* **Added support for vhost async packed ring data path.**
+
+  Added packed ring support for async vhost.
+
 
 Removed Items
 -
-- 
2.29.2

[PATCH] add support for async vhost packed ring dequeue

2022-06-13 Thread Cheng Jiang

This patch implements packed ring dequeue data path for asynchronous
vhost.

Signed-off-by: Cheng Jiang 
Reviewed-by: Maxime Coquelin 
---
 lib/vhost/virtio_net.c | 218 -
 1 file changed, 192 insertions(+), 26 deletions(-)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 68a26eb17d..0d05f7cac3 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -3240,7 +3240,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 }

 static __rte_always_inline uint16_t
-async_poll_dequeue_completed_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
+async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue 
*vq,
struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
uint16_t vchan_id, bool legacy_ol_flags)
 {
@@ -3255,7 +3255,7 @@ async_poll_dequeue_completed_split(struct virtio_net 
*dev, struct vhost_virtqueu
from = start_idx;
while (vq->async->pkts_cmpl_flag[from] && count--) {
vq->async->pkts_cmpl_flag[from] = false;
-   from = (from + 1) & (vq->size - 1);
+   from = (from + 1) % vq->size;
nr_cpl_pkts++;
}

@@ -3263,7 +3263,7 @@ async_poll_dequeue_completed_split(struct virtio_net 
*dev, struct vhost_virtqueu
return 0;

for (i = 0; i < nr_cpl_pkts; i++) {
-   from = (start_idx + i) & (vq->size - 1);
+   from = (start_idx + i) % vq->size;
pkts[i] = pkts_info[from].mbuf;

if (virtio_net_with_host_offload(dev))
@@ -3272,10 +3272,14 @@ async_poll_dequeue_completed_split(struct virtio_net 
*dev, struct vhost_virtqueu
}

/* write back completed descs to used ring and update used idx */
-   write_back_completed_descs_split(vq, nr_cpl_pkts);
-   __atomic_add_fetch(&vq->used->idx, nr_cpl_pkts, __ATOMIC_RELEASE);
-   vhost_vring_call_split(dev, vq);
-
+   if (vq_is_packed(dev)) {
+   write_back_completed_descs_packed(vq, nr_cpl_pkts);
+   vhost_vring_call_packed(dev, vq);
+   } else {
+   write_back_completed_descs_split(vq, nr_cpl_pkts);
+   __atomic_add_fetch(&vq->used->idx, nr_cpl_pkts, 
__ATOMIC_RELEASE);
+   vhost_vring_call_split(dev, vq);
+   }
vq->async->pkts_inflight_n -= nr_cpl_pkts;

return nr_cpl_pkts;
@@ -3412,8 +3416,8 @@ virtio_dev_tx_async_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,

 out:
/* DMA device may serve other queues, unconditionally check completed. 
*/
-   nr_done_pkts = async_poll_dequeue_completed_split(dev, vq, pkts, 
pkts_size,
- dma_id, vchan_id, 
legacy_ol_flags);
+   nr_done_pkts = async_poll_dequeue_completed(dev, vq, pkts, pkts_size,
+   dma_id, vchan_id, 
legacy_ol_flags);

return nr_done_pkts;
 }
@@ -3440,6 +3444,171 @@ virtio_dev_tx_async_split_compliant(struct virtio_net 
*dev,
pkts, count, dma_id, vchan_id, false);
 }

+static __rte_always_inline void
+vhost_async_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, uint16_t 
buf_id)
+{
+   struct vhost_async *async = vq->async;
+   uint16_t idx = async->buffer_idx_packed;
+
+   async->buffers_packed[idx].id = buf_id;
+   async->buffers_packed[idx].len = 0;
+   async->buffers_packed[idx].count = 1;
+
+   async->buffer_idx_packed++;
+   if (async->buffer_idx_packed >= vq->size)
+   async->buffer_idx_packed -= vq->size;
+
+}
+
+static __rte_always_inline int
+virtio_dev_tx_async_single_packed(struct virtio_net *dev,
+   struct vhost_virtqueue *vq,
+   struct rte_mempool *mbuf_pool,
+   struct rte_mbuf *pkts,
+   uint16_t slot_idx,
+   bool legacy_ol_flags)
+{
+   int err;
+   uint16_t buf_id, desc_count = 0;
+   uint16_t nr_vec = 0;
+   uint32_t buf_len;
+   struct buf_vector buf_vec[BUF_VECTOR_MAX];
+   static bool allocerr_warned;
+
+   if (unlikely(fill_vec_buf_packed(dev, vq, vq->last_avail_idx, 
&desc_count,
+buf_vec, &nr_vec, &buf_id, &buf_len,
+VHOST_ACCESS_RO) < 0))
+   return -1;
+
+   if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) {
+   if (!allocerr_warned) {
+   VHOST_LOG_DATA(ERR, "(%s) Failed mbuf alloc of size %d 
from %s.\n",
+   dev->ifname, buf_len, mbuf_pool->name);
+
+   allocerr_warned = true;
+   }
+   return -1;
+

[PATCH] doc: add release notes for async vhost dequeue data-path

2022-06-26 Thread Cheng Jiang

Add release notes for asynchronous vhost dequeue data-path. Emphasize
that split virtqueue and packed virtqueue are both supported in
asynchronous vhost dequeue data-path

Signed-off-by: Cheng Jiang 
---
 doc/guides/rel_notes/release_22_07.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/guides/rel_notes/release_22_07.rst 
b/doc/guides/rel_notes/release_22_07.rst
index e743031f61..2594ddcec1 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -102,7 +102,8 @@ New Features
 * **Added vhost async dequeue API to receive packets from guest.**
 
   Added vhost async dequeue API which can leverage DMA devices to
-  accelerate receiving packets from guest.
+  accelerate receiving packets from guest. Split virtqueue and packed
+  virtqueue are both supported.
 
 * **Added vhost API to get the device type of a vDPA device.**
 
-- 
2.35.1

[PATCH v2] doc: add release notes for async vhost dequeue data-path

2022-06-26 Thread Cheng Jiang

Add release notes for asynchronous vhost dequeue data-path. Emphasize
that split virtqueue and packed virtqueue are both supported in
asynchronous vhost dequeue data-path.

Signed-off-by: Cheng Jiang 
---
v2: fixed a full stop missing in the commit message.

 doc/guides/rel_notes/release_22_07.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/guides/rel_notes/release_22_07.rst 
b/doc/guides/rel_notes/release_22_07.rst
index e743031f61..2594ddcec1 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -102,7 +102,8 @@ New Features
 * **Added vhost async dequeue API to receive packets from guest.**

   Added vhost async dequeue API which can leverage DMA devices to
-  accelerate receiving packets from guest.
+  accelerate receiving packets from guest. Split virtqueue and packed
+  virtqueue are both supported.

 * **Added vhost API to get the device type of a vDPA device.**

--
2.35.1

[PATCH v3] doc: add release notes for async vhost dequeue data-path

2022-06-27 Thread Cheng Jiang

Add release notes for asynchronous vhost dequeue data-path. Emphasize
that split virtqueue and packed virtqueue are both supported in
asynchronous vhost dequeue data-path.

Signed-off-by: Cheng Jiang 
---
v3: code rebased.
v2: fixed a full stop missing in the commit message.

 doc/guides/rel_notes/release_22_07.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/guides/rel_notes/release_22_07.rst 
b/doc/guides/rel_notes/release_22_07.rst
index 6365800313..e43ab15260 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -107,7 +107,8 @@ New Features
 * **Added vhost async dequeue API to receive packets from guest.**

   Added vhost async dequeue API which can leverage DMA devices to
-  accelerate receiving packets from guest.
+  accelerate receiving packets from guest. Split virtqueue and packed
+  virtqueue are both supported.

 * **Added thread-safe version of in-flight packet clear API in vhost library.**

--
2.35.1

[dpdk-dev] [PATCH v3] vhost: add support for packed ring in async vhost

2021-03-31 Thread Cheng Jiang

For now async vhost data path only supports split ring structure. In
order to make async vhost compatible with virtio 1.1 spec this patch
enables packed ring in async vhost data path.

Signed-off-by: Cheng Jiang 
---
v3:
  * fix error handler for DMA-copy packet
  * remove variables that are no longer needed
v2:
  * fix wrong buffer index in rte_vhost_poll_enqueue_completed()
  * add async_buffers_packed memory free in vhost_free_async_mem()

 lib/librte_vhost/rte_vhost_async.h |   1 +
 lib/librte_vhost/vhost.c   |  24 +-
 lib/librte_vhost/vhost.h   |   7 +-
 lib/librte_vhost/virtio_net.c  | 463 +++--
 4 files changed, 457 insertions(+), 38 deletions(-)

diff --git a/lib/librte_vhost/rte_vhost_async.h 
b/lib/librte_vhost/rte_vhost_async.h
index c855ff875..6faa31f5a 100644
--- a/lib/librte_vhost/rte_vhost_async.h
+++ b/lib/librte_vhost/rte_vhost_async.h
@@ -89,6 +89,7 @@ struct rte_vhost_async_channel_ops {
 struct async_inflight_info {
struct rte_mbuf *mbuf;
uint16_t descs; /* num of descs inflight */
+   uint16_t nr_buffers; /* num of buffers inflight for packed ring */
 };

 /**
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 52ab93d1e..51b44d6f2 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -330,15 +330,20 @@ vhost_free_async_mem(struct vhost_virtqueue *vq)
 {
if (vq->async_pkts_info)
rte_free(vq->async_pkts_info);
-   if (vq->async_descs_split)
+   if (vq->async_buffers_packed) {
+   rte_free(vq->async_buffers_packed);
+   vq->async_buffers_packed = NULL;
+   } else {
rte_free(vq->async_descs_split);
+   vq->async_descs_split = NULL;
+   }
+
if (vq->it_pool)
rte_free(vq->it_pool);
if (vq->vec_pool)
rte_free(vq->vec_pool);

vq->async_pkts_info = NULL;
-   vq->async_descs_split = NULL;
vq->it_pool = NULL;
vq->vec_pool = NULL;
 }
@@ -1603,9 +1608,9 @@ int rte_vhost_async_channel_register(int vid, uint16_t 
queue_id,
return -1;

/* packed queue is not supported */
-   if (unlikely(vq_is_packed(dev) || !f.async_inorder)) {
+   if (unlikely(!f.async_inorder)) {
VHOST_LOG_CONFIG(ERR,
-   "async copy is not supported on packed queue or 
non-inorder mode "
+   "async copy is not supported on non-inorder mode "
"(vid %d, qid: %d)\n", vid, queue_id);
return -1;
}
@@ -1643,10 +1648,17 @@ int rte_vhost_async_channel_register(int vid, uint16_t 
queue_id,
vq->vec_pool = rte_malloc_socket(NULL,
VHOST_MAX_ASYNC_VEC * sizeof(struct iovec),
RTE_CACHE_LINE_SIZE, node);
-   vq->async_descs_split = rte_malloc_socket(NULL,
+   if (vq_is_packed(dev)) {
+   vq->async_buffers_packed = rte_malloc_socket(NULL,
+   vq->size * sizeof(struct vring_used_elem_packed),
+   RTE_CACHE_LINE_SIZE, node);
+   } else {
+   vq->async_descs_split = rte_malloc_socket(NULL,
vq->size * sizeof(struct vring_used_elem),
RTE_CACHE_LINE_SIZE, node);
-   if (!vq->async_descs_split || !vq->async_pkts_info ||
+   }
+
+   if (!vq->async_pkts_info ||
!vq->it_pool || !vq->vec_pool) {
vhost_free_async_mem(vq);
VHOST_LOG_CONFIG(ERR,
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 658f6fc28..d6324fbf8 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -206,9 +206,14 @@ struct vhost_virtqueue {
uint16_tasync_pkts_idx;
uint16_tasync_pkts_inflight_n;
uint16_tasync_last_pkts_n;
-   struct vring_used_elem  *async_descs_split;
+   union {
+   struct vring_used_elem  *async_descs_split;
+   struct vring_used_elem_packed *async_buffers_packed;
+   };
uint16_t async_desc_idx;
+   uint16_t async_packed_buffer_idx;
uint16_t last_async_desc_idx;
+   uint16_t last_async_buffer_idx;

/* vq async features */
boolasync_inorder;
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 583bf379c..fa2dfde02 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -363,8 +363,7 @@ vhost_shadow_dequeue_single_packed_inorder(struct 
vhost_virtqueue *vq,
 }

 static __rte_always_inline void
-vhost_shadow_enqueue_single_packed(struct virtio_net *dev,
-  struct vhost_virtqueue *vq,
+vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq,

[dpdk-dev] [PATCH v4 0/4] add support for packed ring in async vhost

2021-04-10 Thread Cheng Jiang

For now async vhost data path only supports split ring structure. In
order to make async vhost compatible with virtio 1.1 spec this patch
set cleans async split ring codes and enables packed ring in async
vhost data path. Batch datapath is also enabled in packed ring.

v4:
  * change the patch structure
  * clean code for async split ring
  * reuse some code from split ring
  * change the error handler for DMA-copy packet
  * add check for malloc
  * remove useless code
  * add doc update
v3:
  * fix error handler for DMA-copy packet
v2:
  * fix wrong buffer index in rte_vhost_poll_enqueue_completed()
  * add async_buffers_packed memory free in vhost_free_async_mem()

Cheng Jiang (4):
  vhost: abstract and reorganize async split ring code
  vhost: add support for packed ring in async vhost
  vhost: add batch datapath for async vhost packed ring
  doc: add release note for vhost async packed ring

 doc/guides/rel_notes/release_21_05.rst |   4 +
 lib/librte_vhost/rte_vhost_async.h |   1 +
 lib/librte_vhost/vhost.c   |  27 +-
 lib/librte_vhost/vhost.h   |   7 +-
 lib/librte_vhost/virtio_net.c  | 603 ++---
 5 files changed, 560 insertions(+), 82 deletions(-)

--
2.29.2

[dpdk-dev] [PATCH v4 1/4] vhost: abstract and reorganize async split ring code

2021-04-10 Thread Cheng Jiang

In order to improve code efficiency and readability when async packed
ring support is enabled. This patch abstract some functions like
shadow_ring_store and write_back_completed_descs_split. And improve
the efficiency of some pointer offset calculation.

Signed-off-by: Cheng Jiang 
---
 lib/librte_vhost/virtio_net.c | 146 +++---
 1 file changed, 84 insertions(+), 62 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index ff3987860..69553e7c3 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1458,6 +1458,29 @@ virtio_dev_rx_async_get_info_idx(uint16_t pkts_idx,
(vq_size - n_inflight + pkts_idx) & (vq_size - 1);
 }
 
+static __rte_always_inline void
+shadow_ring_store(struct vhost_virtqueue *vq,  void *shadow_ring, void *d_ring,
+   uint16_t s_idx, uint16_t d_idx,
+   uint16_t count, uint16_t elem_size)
+{
+   if (s_idx + count <= vq->size) {
+   rte_memcpy((void *)((uintptr_t)d_ring + d_idx * elem_size),
+   (void *)((uintptr_t)shadow_ring + s_idx * elem_size),
+   count * elem_size);
+   } else {
+   uint16_t size = vq->size - d_idx;
+
+   rte_memcpy((void *)((uintptr_t)d_ring + d_idx * elem_size),
+   (void *)((uintptr_t)shadow_ring + s_idx * elem_size),
+   size * elem_size);
+
+   rte_memcpy((void *)((uintptr_t)d_ring),
+   (void *)((uintptr_t)shadow_ring +
+   (s_idx + size) * elem_size),
+   (count - size) * elem_size);
+   }
+}
+
 static __rte_noinline uint32_t
 virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct vhost_virtqueue *vq, uint16_t queue_id,
@@ -1478,6 +1501,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct rte_vhost_iov_iter *dst_it = it_pool + 1;
uint16_t slot_idx = 0;
uint16_t segs_await = 0;
+   uint16_t iovec_idx = 0, it_idx = 0;
struct async_inflight_info *pkts_info = vq->async_pkts_info;
uint32_t n_pkts = 0, pkt_err = 0;
uint32_t num_async_pkts = 0, num_done_pkts = 0;
@@ -1513,27 +1537,32 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
 
if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx],
buf_vec, nr_vec, num_buffers,
-   src_iovec, dst_iovec, src_it, dst_it) < 0) {
+   &src_iovec[iovec_idx],
+   &dst_iovec[iovec_idx],
+   &src_it[it_idx],
+   &dst_it[it_idx]) < 0) {
vq->shadow_used_idx -= num_buffers;
break;
}
 
slot_idx = (vq->async_pkts_idx + num_async_pkts) &
(vq->size - 1);
-   if (src_it->count) {
+   if (src_it[it_idx].count) {
uint16_t from, to;
 
-   async_fill_desc(&tdes[pkt_burst_idx++], src_it, dst_it);
+   async_fill_desc(&tdes[pkt_burst_idx++],
+   &src_it[it_idx],
+   &dst_it[it_idx]);
pkts_info[slot_idx].descs = num_buffers;
pkts_info[slot_idx].mbuf = pkts[pkt_idx];
async_pkts_log[num_async_pkts].pkt_idx = pkt_idx;
async_pkts_log[num_async_pkts++].last_avail_idx =
vq->last_avail_idx;
-   src_iovec += src_it->nr_segs;
-   dst_iovec += dst_it->nr_segs;
-   src_it += 2;
-   dst_it += 2;
-   segs_await += src_it->nr_segs;
+
+   iovec_idx += src_it[it_idx].nr_segs;
+   it_idx += 2;
+
+   segs_await += src_it[it_idx].nr_segs;
 
/**
 * recover shadow used ring and keep DMA-occupied
@@ -1541,23 +1570,12 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
 */
from = vq->shadow_used_idx - num_buffers;
to = vq->async_desc_idx & (vq->size - 1);
-   if (num_buffers + to <= vq->size) {
-   rte_memcpy(&vq->async_descs_split[to],
-   &vq->shadow_used_split[from],
-   num_buffers *
-   sizeof(struct vring_used_elem));
-   } else {
-   int size = vq->size - to;
-
-

[dpdk-dev] [PATCH v4 1/4] vhost: abstract and reorganize async split ring code

2021-04-10 Thread Cheng Jiang

In order to improve code efficiency and readability when async packed
ring support is enabled. This patch abstract some functions like
shadow_ring_store and write_back_completed_descs_split. And improve
the efficiency of some pointer offset calculation.

Signed-off-by: Cheng Jiang 
---
 lib/librte_vhost/virtio_net.c | 146 +++---
 1 file changed, 84 insertions(+), 62 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index ff3987860..69553e7c3 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1458,6 +1458,29 @@ virtio_dev_rx_async_get_info_idx(uint16_t pkts_idx,
(vq_size - n_inflight + pkts_idx) & (vq_size - 1);
 }
 
+static __rte_always_inline void
+shadow_ring_store(struct vhost_virtqueue *vq,  void *shadow_ring, void *d_ring,
+   uint16_t s_idx, uint16_t d_idx,
+   uint16_t count, uint16_t elem_size)
+{
+   if (s_idx + count <= vq->size) {
+   rte_memcpy((void *)((uintptr_t)d_ring + d_idx * elem_size),
+   (void *)((uintptr_t)shadow_ring + s_idx * elem_size),
+   count * elem_size);
+   } else {
+   uint16_t size = vq->size - d_idx;
+
+   rte_memcpy((void *)((uintptr_t)d_ring + d_idx * elem_size),
+   (void *)((uintptr_t)shadow_ring + s_idx * elem_size),
+   size * elem_size);
+
+   rte_memcpy((void *)((uintptr_t)d_ring),
+   (void *)((uintptr_t)shadow_ring +
+   (s_idx + size) * elem_size),
+   (count - size) * elem_size);
+   }
+}
+
 static __rte_noinline uint32_t
 virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct vhost_virtqueue *vq, uint16_t queue_id,
@@ -1478,6 +1501,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct rte_vhost_iov_iter *dst_it = it_pool + 1;
uint16_t slot_idx = 0;
uint16_t segs_await = 0;
+   uint16_t iovec_idx = 0, it_idx = 0;
struct async_inflight_info *pkts_info = vq->async_pkts_info;
uint32_t n_pkts = 0, pkt_err = 0;
uint32_t num_async_pkts = 0, num_done_pkts = 0;
@@ -1513,27 +1537,32 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
 
if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx],
buf_vec, nr_vec, num_buffers,
-   src_iovec, dst_iovec, src_it, dst_it) < 0) {
+   &src_iovec[iovec_idx],
+   &dst_iovec[iovec_idx],
+   &src_it[it_idx],
+   &dst_it[it_idx]) < 0) {
vq->shadow_used_idx -= num_buffers;
break;
}
 
slot_idx = (vq->async_pkts_idx + num_async_pkts) &
(vq->size - 1);
-   if (src_it->count) {
+   if (src_it[it_idx].count) {
uint16_t from, to;
 
-   async_fill_desc(&tdes[pkt_burst_idx++], src_it, dst_it);
+   async_fill_desc(&tdes[pkt_burst_idx++],
+   &src_it[it_idx],
+   &dst_it[it_idx]);
pkts_info[slot_idx].descs = num_buffers;
pkts_info[slot_idx].mbuf = pkts[pkt_idx];
async_pkts_log[num_async_pkts].pkt_idx = pkt_idx;
async_pkts_log[num_async_pkts++].last_avail_idx =
vq->last_avail_idx;
-   src_iovec += src_it->nr_segs;
-   dst_iovec += dst_it->nr_segs;
-   src_it += 2;
-   dst_it += 2;
-   segs_await += src_it->nr_segs;
+
+   iovec_idx += src_it[it_idx].nr_segs;
+   it_idx += 2;
+
+   segs_await += src_it[it_idx].nr_segs;
 
/**
 * recover shadow used ring and keep DMA-occupied
@@ -1541,23 +1570,12 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
 */
from = vq->shadow_used_idx - num_buffers;
to = vq->async_desc_idx & (vq->size - 1);
-   if (num_buffers + to <= vq->size) {
-   rte_memcpy(&vq->async_descs_split[to],
-   &vq->shadow_used_split[from],
-   num_buffers *
-   sizeof(struct vring_used_elem));
-   } else {
-   int size = vq->size - to;
-
-

[dpdk-dev] [PATCH v4 2/4] vhost: add support for packed ring in async vhost

2021-04-10 Thread Cheng Jiang

For now async vhost data path only supports split ring structure. In
order to make async vhost compatible with virtio 1.1 spec this patch
enables packed ring in async vhost data path.

Signed-off-by: Cheng Jiang 
---
 lib/librte_vhost/rte_vhost_async.h |   1 +
 lib/librte_vhost/vhost.c   |  27 +-
 lib/librte_vhost/vhost.h   |   7 +-
 lib/librte_vhost/virtio_net.c  | 428 -
 4 files changed, 441 insertions(+), 22 deletions(-)

diff --git a/lib/librte_vhost/rte_vhost_async.h 
b/lib/librte_vhost/rte_vhost_async.h
index c855ff875..6faa31f5a 100644
--- a/lib/librte_vhost/rte_vhost_async.h
+++ b/lib/librte_vhost/rte_vhost_async.h
@@ -89,6 +89,7 @@ struct rte_vhost_async_channel_ops {
 struct async_inflight_info {
struct rte_mbuf *mbuf;
uint16_t descs; /* num of descs inflight */
+   uint16_t nr_buffers; /* num of buffers inflight for packed ring */
 };
 
 /**
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index a70fe01d8..8c9935c0f 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -342,15 +342,21 @@ vhost_free_async_mem(struct vhost_virtqueue *vq)
 {
if (vq->async_pkts_info)
rte_free(vq->async_pkts_info);
-   if (vq->async_descs_split)
+   if (vq->async_buffers_packed) {
+   rte_free(vq->async_buffers_packed);
+   vq->async_buffers_packed = NULL;
+   }
+   if (vq->async_descs_split) {
rte_free(vq->async_descs_split);
+   vq->async_descs_split = NULL;
+   }
+
if (vq->it_pool)
rte_free(vq->it_pool);
if (vq->vec_pool)
rte_free(vq->vec_pool);
 
vq->async_pkts_info = NULL;
-   vq->async_descs_split = NULL;
vq->it_pool = NULL;
vq->vec_pool = NULL;
 }
@@ -1627,9 +1633,9 @@ int rte_vhost_async_channel_register(int vid, uint16_t 
queue_id,
return -1;
 
/* packed queue is not supported */
-   if (unlikely(vq_is_packed(dev) || !f.async_inorder)) {
+   if (unlikely(!f.async_inorder)) {
VHOST_LOG_CONFIG(ERR,
-   "async copy is not supported on packed queue or 
non-inorder mode "
+   "async copy is not supported on non-inorder mode "
"(vid %d, qid: %d)\n", vid, queue_id);
return -1;
}
@@ -1667,11 +1673,18 @@ int rte_vhost_async_channel_register(int vid, uint16_t 
queue_id,
vq->vec_pool = rte_malloc_socket(NULL,
VHOST_MAX_ASYNC_VEC * sizeof(struct iovec),
RTE_CACHE_LINE_SIZE, node);
-   vq->async_descs_split = rte_malloc_socket(NULL,
+   if (vq_is_packed(dev)) {
+   vq->async_buffers_packed = rte_malloc_socket(NULL,
+   vq->size * sizeof(struct vring_used_elem_packed),
+   RTE_CACHE_LINE_SIZE, node);
+   } else {
+   vq->async_descs_split = rte_malloc_socket(NULL,
vq->size * sizeof(struct vring_used_elem),
RTE_CACHE_LINE_SIZE, node);
-   if (!vq->async_descs_split || !vq->async_pkts_info ||
-   !vq->it_pool || !vq->vec_pool) {
+   }
+
+   if (!vq->async_buffers_packed || !vq->async_descs_split ||
+   !vq->async_pkts_info || !vq->it_pool || !vq->vec_pool) {
vhost_free_async_mem(vq);
VHOST_LOG_CONFIG(ERR,
"async register failed: cannot allocate memory 
for vq data "
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index f628714c2..fe131ae8f 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -201,9 +201,14 @@ struct vhost_virtqueue {
uint16_tasync_pkts_idx;
uint16_tasync_pkts_inflight_n;
uint16_tasync_last_pkts_n;
-   struct vring_used_elem  *async_descs_split;
+   union {
+   struct vring_used_elem  *async_descs_split;
+   struct vring_used_elem_packed *async_buffers_packed;
+   };
uint16_t async_desc_idx;
+   uint16_t async_packed_buffer_idx;
uint16_t last_async_desc_idx;
+   uint16_t last_async_buffer_idx;
 
/* vq async features */
boolasync_inorder;
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 69553e7c3..2b8b873ca 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -363,8 +363,7 @@ vhost_shadow_dequeue_single_packed_inorder(struct 
vhost_virtqueue *vq,
 }
 
 static __rte_always_inline void
-vhost_shadow_enqueue_single_packed(struct virtio_net *dev,
-  struct vhost_virtqueue *vq,
+vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq,

[dpdk-dev] [PATCH v4 3/4] vhost: add batch datapath for async vhost packed ring

2021-04-10 Thread Cheng Jiang

Add batch datapath for async vhost packed ring to improve the
performance of small packet.

Signed-off-by: Cheng Jiang 
---
 lib/librte_vhost/virtio_net.c | 43 +++
 1 file changed, 38 insertions(+), 5 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 2b8b873ca..c98fe6dbb 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1721,6 +1721,29 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
return pkt_idx;
 }
 
+static __rte_always_inline int
+virtio_dev_rx_async_batch_packed(struct virtio_net *dev,
+  struct vhost_virtqueue *vq,
+  struct rte_mbuf **pkts,
+  struct rte_mbuf **comp_pkts, uint32_t *pkt_done)
+{
+   uint16_t i;
+   uint32_t cpy_threshold = vq->async_threshold;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(pkts[i]->pkt_len >= cpy_threshold))
+   return -1;
+   }
+   if (!virtio_dev_rx_batch_packed(dev, vq, pkts)) {
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+   comp_pkts[(*pkt_done)++] = pkts[i];
+
+   return 0;
+   }
+
+   return -1;
+}
+
 static __rte_always_inline int
 vhost_enqueue_async_single_packed(struct virtio_net *dev,
struct vhost_virtqueue *vq,
@@ -1844,6 +1867,7 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
struct rte_mbuf **comp_pkts, uint32_t *comp_count)
 {
uint32_t pkt_idx = 0, pkt_burst_idx = 0;
+   uint32_t remained = count;
uint16_t async_descs_idx = 0;
uint16_t num_buffers;
uint16_t num_desc;
@@ -1863,9 +1887,17 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
uint32_t num_async_pkts = 0, num_done_pkts = 0;
struct vring_packed_desc async_descs[vq->size];
 
-   rte_prefetch0(&vq->desc_packed[vq->last_avail_idx & (vq->size - 1)]);
-
-   for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
+   do {
+   rte_prefetch0(&vq->desc_packed[vq->last_avail_idx &
+   (vq->size - 1)]);
+   if (remained >= PACKED_BATCH_SIZE) {
+   if (!virtio_dev_rx_async_batch_packed(dev, vq,
+   &pkts[pkt_idx], comp_pkts, &num_done_pkts)) {
+   pkt_idx += PACKED_BATCH_SIZE;
+   remained -= PACKED_BATCH_SIZE;
+   continue;
+   }
+   }
if (unlikely(virtio_dev_rx_async_single_packed(dev, vq,
pkts[pkt_idx],
&num_desc, &num_buffers,
@@ -1912,6 +1944,8 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
} else
comp_pkts[num_done_pkts++] = pkts[pkt_idx];
 
+   pkt_idx++;
+   remained--;
vq_inc_last_avail_packed(vq, num_desc);
 
/*
@@ -1937,13 +1971,12 @@ virtio_dev_rx_async_submit_packed(struct virtio_net 
*dev,
 */
pkt_err = pkt_burst_idx - n_pkts;
pkt_burst_idx = 0;
-   pkt_idx++;
break;
}
 
pkt_burst_idx = 0;
}
-   }
+   } while (pkt_idx < count);
 
if (pkt_burst_idx) {
n_pkts = vq->async_ops.transfer_data(dev->vid,
-- 
2.29.2

[dpdk-dev] [PATCH v4 4/4] doc: add release note for vhost async packed ring

2021-04-10 Thread Cheng Jiang

Add release note for the support of vhost async packed ring.

Signed-off-by: Cheng Jiang 
---
 doc/guides/rel_notes/release_21_05.rst | 4 
 1 file changed, 4 insertions(+)

diff --git a/doc/guides/rel_notes/release_21_05.rst 
b/doc/guides/rel_notes/release_21_05.rst
index 374d6d98e..eb5200669 100644
--- a/doc/guides/rel_notes/release_21_05.rst
+++ b/doc/guides/rel_notes/release_21_05.rst
@@ -131,6 +131,10 @@ New Features
   * Added command to display Rx queue used descriptor count.
 ``show port (port_id) rxq (queue_id) desc used count``
 
+* **Added support for vhost async packed ring data path.**
+
+  Added packed ring support for async vhost.
+
 
 Removed Items
 -
-- 
2.29.2

[dpdk-dev] [PATCH v5 1/4] vhost: abstract and reorganize async split ring code

2021-04-12 Thread Cheng Jiang

In order to improve code efficiency and readability when async packed
ring support is enabled. This patch abstract some functions like
shadow_ring_store and write_back_completed_descs_split. And improve
the efficiency of some pointer offset calculation.

Signed-off-by: Cheng Jiang 
---
 lib/librte_vhost/virtio_net.c | 146 +++---
 1 file changed, 84 insertions(+), 62 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index ff3987860..c43ab0093 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1458,6 +1458,29 @@ virtio_dev_rx_async_get_info_idx(uint16_t pkts_idx,
(vq_size - n_inflight + pkts_idx) & (vq_size - 1);
 }
 
+static __rte_always_inline void
+shadow_ring_store(struct vhost_virtqueue *vq,  void *shadow_ring, void *d_ring,
+   uint16_t s_idx, uint16_t d_idx,
+   uint16_t count, uint16_t elem_size)
+{
+   if (d_idx + count <= vq->size) {
+   rte_memcpy((void *)((uintptr_t)d_ring + d_idx * elem_size),
+   (void *)((uintptr_t)shadow_ring + s_idx * elem_size),
+   count * elem_size);
+   } else {
+   uint16_t size = vq->size - d_idx;
+
+   rte_memcpy((void *)((uintptr_t)d_ring + d_idx * elem_size),
+   (void *)((uintptr_t)shadow_ring + s_idx * elem_size),
+   size * elem_size);
+
+   rte_memcpy((void *)((uintptr_t)d_ring),
+   (void *)((uintptr_t)shadow_ring +
+   (s_idx + size) * elem_size),
+   (count - size) * elem_size);
+   }
+}
+
 static __rte_noinline uint32_t
 virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct vhost_virtqueue *vq, uint16_t queue_id,
@@ -1478,6 +1501,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct rte_vhost_iov_iter *dst_it = it_pool + 1;
uint16_t slot_idx = 0;
uint16_t segs_await = 0;
+   uint16_t iovec_idx = 0, it_idx = 0;
struct async_inflight_info *pkts_info = vq->async_pkts_info;
uint32_t n_pkts = 0, pkt_err = 0;
uint32_t num_async_pkts = 0, num_done_pkts = 0;
@@ -1513,27 +1537,32 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
 
if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx],
buf_vec, nr_vec, num_buffers,
-   src_iovec, dst_iovec, src_it, dst_it) < 0) {
+   &src_iovec[iovec_idx],
+   &dst_iovec[iovec_idx],
+   &src_it[it_idx],
+   &dst_it[it_idx]) < 0) {
vq->shadow_used_idx -= num_buffers;
break;
}
 
slot_idx = (vq->async_pkts_idx + num_async_pkts) &
(vq->size - 1);
-   if (src_it->count) {
+   if (src_it[it_idx].count) {
uint16_t from, to;
 
-   async_fill_desc(&tdes[pkt_burst_idx++], src_it, dst_it);
+   async_fill_desc(&tdes[pkt_burst_idx++],
+   &src_it[it_idx],
+   &dst_it[it_idx]);
pkts_info[slot_idx].descs = num_buffers;
pkts_info[slot_idx].mbuf = pkts[pkt_idx];
async_pkts_log[num_async_pkts].pkt_idx = pkt_idx;
async_pkts_log[num_async_pkts++].last_avail_idx =
vq->last_avail_idx;
-   src_iovec += src_it->nr_segs;
-   dst_iovec += dst_it->nr_segs;
-   src_it += 2;
-   dst_it += 2;
-   segs_await += src_it->nr_segs;
+
+   iovec_idx += src_it[it_idx].nr_segs;
+   it_idx += 2;
+
+   segs_await += src_it[it_idx].nr_segs;
 
/**
 * recover shadow used ring and keep DMA-occupied
@@ -1541,23 +1570,12 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
 */
from = vq->shadow_used_idx - num_buffers;
to = vq->async_desc_idx & (vq->size - 1);
-   if (num_buffers + to <= vq->size) {
-   rte_memcpy(&vq->async_descs_split[to],
-   &vq->shadow_used_split[from],
-   num_buffers *
-   sizeof(struct vring_used_elem));
-   } else {
-   int size = vq->size - to;
-
-

[dpdk-dev] [PATCH v5 2/4] vhost: add support for packed ring in async vhost

2021-04-12 Thread Cheng Jiang

For now async vhost data path only supports split ring structure. In
order to make async vhost compatible with virtio 1.1 spec this patch
enables packed ring in async vhost data path.

Signed-off-by: Cheng Jiang 
---
 lib/librte_vhost/rte_vhost_async.h |   1 +
 lib/librte_vhost/vhost.c   |  27 +-
 lib/librte_vhost/vhost.h   |   7 +-
 lib/librte_vhost/virtio_net.c  | 438 +++--
 4 files changed, 448 insertions(+), 25 deletions(-)

diff --git a/lib/librte_vhost/rte_vhost_async.h 
b/lib/librte_vhost/rte_vhost_async.h
index c855ff875..6faa31f5a 100644
--- a/lib/librte_vhost/rte_vhost_async.h
+++ b/lib/librte_vhost/rte_vhost_async.h
@@ -89,6 +89,7 @@ struct rte_vhost_async_channel_ops {
 struct async_inflight_info {
struct rte_mbuf *mbuf;
uint16_t descs; /* num of descs inflight */
+   uint16_t nr_buffers; /* num of buffers inflight for packed ring */
 };
 
 /**
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index a70fe01d8..8c9935c0f 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -342,15 +342,21 @@ vhost_free_async_mem(struct vhost_virtqueue *vq)
 {
if (vq->async_pkts_info)
rte_free(vq->async_pkts_info);
-   if (vq->async_descs_split)
+   if (vq->async_buffers_packed) {
+   rte_free(vq->async_buffers_packed);
+   vq->async_buffers_packed = NULL;
+   }
+   if (vq->async_descs_split) {
rte_free(vq->async_descs_split);
+   vq->async_descs_split = NULL;
+   }
+
if (vq->it_pool)
rte_free(vq->it_pool);
if (vq->vec_pool)
rte_free(vq->vec_pool);
 
vq->async_pkts_info = NULL;
-   vq->async_descs_split = NULL;
vq->it_pool = NULL;
vq->vec_pool = NULL;
 }
@@ -1627,9 +1633,9 @@ int rte_vhost_async_channel_register(int vid, uint16_t 
queue_id,
return -1;
 
/* packed queue is not supported */
-   if (unlikely(vq_is_packed(dev) || !f.async_inorder)) {
+   if (unlikely(!f.async_inorder)) {
VHOST_LOG_CONFIG(ERR,
-   "async copy is not supported on packed queue or 
non-inorder mode "
+   "async copy is not supported on non-inorder mode "
"(vid %d, qid: %d)\n", vid, queue_id);
return -1;
}
@@ -1667,11 +1673,18 @@ int rte_vhost_async_channel_register(int vid, uint16_t 
queue_id,
vq->vec_pool = rte_malloc_socket(NULL,
VHOST_MAX_ASYNC_VEC * sizeof(struct iovec),
RTE_CACHE_LINE_SIZE, node);
-   vq->async_descs_split = rte_malloc_socket(NULL,
+   if (vq_is_packed(dev)) {
+   vq->async_buffers_packed = rte_malloc_socket(NULL,
+   vq->size * sizeof(struct vring_used_elem_packed),
+   RTE_CACHE_LINE_SIZE, node);
+   } else {
+   vq->async_descs_split = rte_malloc_socket(NULL,
vq->size * sizeof(struct vring_used_elem),
RTE_CACHE_LINE_SIZE, node);
-   if (!vq->async_descs_split || !vq->async_pkts_info ||
-   !vq->it_pool || !vq->vec_pool) {
+   }
+
+   if (!vq->async_buffers_packed || !vq->async_descs_split ||
+   !vq->async_pkts_info || !vq->it_pool || !vq->vec_pool) {
vhost_free_async_mem(vq);
VHOST_LOG_CONFIG(ERR,
"async register failed: cannot allocate memory 
for vq data "
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index f628714c2..fe131ae8f 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -201,9 +201,14 @@ struct vhost_virtqueue {
uint16_tasync_pkts_idx;
uint16_tasync_pkts_inflight_n;
uint16_tasync_last_pkts_n;
-   struct vring_used_elem  *async_descs_split;
+   union {
+   struct vring_used_elem  *async_descs_split;
+   struct vring_used_elem_packed *async_buffers_packed;
+   };
uint16_t async_desc_idx;
+   uint16_t async_packed_buffer_idx;
uint16_t last_async_desc_idx;
+   uint16_t last_async_buffer_idx;
 
/* vq async features */
boolasync_inorder;
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index c43ab0093..410be9678 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -363,14 +363,14 @@ vhost_shadow_dequeue_single_packed_inorder(struct 
vhost_virtqueue *vq,
 }
 
 static __rte_always_inline void
-vhost_shadow_enqueue_single_packed(struct virtio_net *dev,
-  struct vhost_virtqueue *vq,
-  uint32_t len[],
-

[dpdk-dev] [PATCH v5 3/4] vhost: add batch datapath for async vhost packed ring

2021-04-12 Thread Cheng Jiang

Add batch datapath for async vhost packed ring to improve the
performance of small packet.

Signed-off-by: Cheng Jiang 
---
 lib/librte_vhost/virtio_net.c | 43 +++
 1 file changed, 38 insertions(+), 5 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 410be9678..854f7afd6 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1723,6 +1723,29 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
return pkt_idx;
 }
 
+static __rte_always_inline int
+virtio_dev_rx_async_batch_packed(struct virtio_net *dev,
+  struct vhost_virtqueue *vq,
+  struct rte_mbuf **pkts,
+  struct rte_mbuf **comp_pkts, uint32_t *pkt_done)
+{
+   uint16_t i;
+   uint32_t cpy_threshold = vq->async_threshold;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(pkts[i]->pkt_len >= cpy_threshold))
+   return -1;
+   }
+   if (!virtio_dev_rx_batch_packed(dev, vq, pkts)) {
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+   comp_pkts[(*pkt_done)++] = pkts[i];
+
+   return 0;
+   }
+
+   return -1;
+}
+
 static __rte_always_inline int
 vhost_enqueue_async_single_packed(struct virtio_net *dev,
struct vhost_virtqueue *vq,
@@ -1846,6 +1869,7 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
struct rte_mbuf **comp_pkts, uint32_t *comp_count)
 {
uint32_t pkt_idx = 0, pkt_burst_idx = 0;
+   uint32_t remained = count;
uint16_t async_descs_idx = 0;
uint16_t num_buffers;
uint16_t num_desc;
@@ -1865,9 +1889,17 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
uint32_t num_async_pkts = 0, num_done_pkts = 0;
struct vring_packed_desc async_descs[vq->size];
 
-   rte_prefetch0(&vq->desc_packed[vq->last_avail_idx & (vq->size - 1)]);
-
-   for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
+   do {
+   rte_prefetch0(&vq->desc_packed[vq->last_avail_idx &
+   (vq->size - 1)]);
+   if (remained >= PACKED_BATCH_SIZE) {
+   if (!virtio_dev_rx_async_batch_packed(dev, vq,
+   &pkts[pkt_idx], comp_pkts, &num_done_pkts)) {
+   pkt_idx += PACKED_BATCH_SIZE;
+   remained -= PACKED_BATCH_SIZE;
+   continue;
+   }
+   }
if (unlikely(virtio_dev_rx_async_single_packed(dev, vq,
pkts[pkt_idx],
&num_desc, &num_buffers,
@@ -1915,6 +1947,8 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
} else
comp_pkts[num_done_pkts++] = pkts[pkt_idx];
 
+   pkt_idx++;
+   remained--;
vq_inc_last_avail_packed(vq, num_desc);
 
/*
@@ -1940,13 +1974,12 @@ virtio_dev_rx_async_submit_packed(struct virtio_net 
*dev,
 */
pkt_err = pkt_burst_idx - n_pkts;
pkt_burst_idx = 0;
-   pkt_idx++;
break;
}
 
pkt_burst_idx = 0;
}
-   }
+   } while (pkt_idx < count);
 
if (pkt_burst_idx) {
n_pkts = vq->async_ops.transfer_data(dev->vid,
-- 
2.29.2

[dpdk-dev] [PATCH v5 4/4] doc: add release note for vhost async packed ring

2021-04-12 Thread Cheng Jiang

Add release note for the support of vhost async packed ring.

Signed-off-by: Cheng Jiang 
---
 doc/guides/rel_notes/release_21_05.rst | 4 
 1 file changed, 4 insertions(+)

diff --git a/doc/guides/rel_notes/release_21_05.rst 
b/doc/guides/rel_notes/release_21_05.rst
index 374d6d98e..eb5200669 100644
--- a/doc/guides/rel_notes/release_21_05.rst
+++ b/doc/guides/rel_notes/release_21_05.rst
@@ -131,6 +131,10 @@ New Features
   * Added command to display Rx queue used descriptor count.
 ``show port (port_id) rxq (queue_id) desc used count``
 
+* **Added support for vhost async packed ring data path.**
+
+  Added packed ring support for async vhost.
+
 
 Removed Items
 -
-- 
2.29.2

[dpdk-dev] [PATCH v5 0/4] add support for packed ring in async vhost

2021-04-12 Thread Cheng Jiang

For now async vhost data path only supports split ring structure. In
order to make async vhost compatible with virtio 1.1 spec this patch
set cleans async split ring codes and enables packed ring in async
vhost data path. Batch datapath is also enabled in async vhost
packed ring.

v5:
 * clean some codes for packed ring datapath
 * fix an index error in shadow_ring_store()
v4:
  * change the patch structure
  * clean code for async split ring
  * reuse some code from split ring
  * change the error handler for DMA-copy packet
  * add check for malloc
  * remove useless code
  * add doc update
v3:
  * fix error handler for DMA-copy packet
v2:
  * fix wrong buffer index in rte_vhost_poll_enqueue_completed()
  * add async_buffers_packed memory free in vhost_free_async_mem()

Cheng Jiang (4):
  vhost: abstract and reorganize async split ring code
  vhost: add support for packed ring in async vhost
  vhost: add batch datapath for async vhost packed ring
  doc: add release note for vhost async packed ring

 doc/guides/rel_notes/release_21_05.rst |   4 +
 lib/librte_vhost/rte_vhost_async.h |   1 +
 lib/librte_vhost/vhost.c   |  27 +-
 lib/librte_vhost/vhost.h   |   7 +-
 lib/librte_vhost/virtio_net.c  | 613 +
 5 files changed, 567 insertions(+), 85 deletions(-)

--
2.29.2

[dpdk-dev] [PATCH v6 3/4] vhost: add batch datapath for async vhost packed ring

2021-04-13 Thread Cheng Jiang

Add batch datapath for async vhost packed ring to improve the
performance of small packet processing.

Signed-off-by: Cheng Jiang 
---
 lib/librte_vhost/virtio_net.c | 41 +++
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index e2b35a319..42439a86d 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1725,6 +1725,29 @@ vhost_update_used_packed(struct vhost_virtqueue *vq,
vq->desc_packed[head_idx].flags = head_flags;
 }
 
+static __rte_always_inline int
+virtio_dev_rx_async_batch_packed(struct virtio_net *dev,
+  struct vhost_virtqueue *vq,
+  struct rte_mbuf **pkts,
+  struct rte_mbuf **comp_pkts, uint32_t *pkt_done)
+{
+   uint16_t i;
+   uint32_t cpy_threshold = vq->async_threshold;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(pkts[i]->pkt_len >= cpy_threshold))
+   return -1;
+   }
+   if (!virtio_dev_rx_batch_packed(dev, vq, pkts)) {
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+   comp_pkts[(*pkt_done)++] = pkts[i];
+
+   return 0;
+   }
+
+   return -1;
+}
+
 static __rte_always_inline int
 vhost_enqueue_async_single_packed(struct virtio_net *dev,
struct vhost_virtqueue *vq,
@@ -1875,6 +1898,7 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
struct rte_mbuf **comp_pkts, uint32_t *comp_count)
 {
uint32_t pkt_idx = 0, pkt_burst_idx = 0;
+   uint32_t remained = count;
uint16_t async_descs_idx = 0;
uint16_t num_buffers;
uint16_t num_desc;
@@ -1892,9 +1916,17 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
uint32_t num_async_pkts = 0, num_done_pkts = 0;
struct vring_packed_desc async_descs[vq->size];
 
-   rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
+   do {
+   rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
+   if (remained >= PACKED_BATCH_SIZE) {
+   if (!virtio_dev_rx_async_batch_packed(dev, vq,
+   &pkts[pkt_idx], comp_pkts, &num_done_pkts)) {
+   pkt_idx += PACKED_BATCH_SIZE;
+   remained -= PACKED_BATCH_SIZE;
+   continue;
+   }
+   }
 
-   for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
if (unlikely(virtio_dev_rx_async_single_packed(dev, vq, 
pkts[pkt_idx],
&num_desc, &num_buffers,
&async_descs[async_descs_idx],
@@ -1937,6 +1969,8 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
comp_pkts[num_done_pkts++] = pkts[pkt_idx];
}
 
+   pkt_idx++;
+   remained--;
vq_inc_last_avail_packed(vq, num_desc);
 
/*
@@ -1961,13 +1995,12 @@ virtio_dev_rx_async_submit_packed(struct virtio_net 
*dev,
 */
pkt_err = pkt_burst_idx - n_pkts;
pkt_burst_idx = 0;
-   pkt_idx++;
break;
}
 
pkt_burst_idx = 0;
}
-   }
+   } while (pkt_idx < count);
 
if (pkt_burst_idx) {
n_pkts = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 
0, pkt_burst_idx);
-- 
2.29.2

[dpdk-dev] [PATCH v6 1/4] vhost: abstract and reorganize async split ring code

2021-04-13 Thread Cheng Jiang

This patch puts some codes of async vhost split ring into inline
functions to improve the readability of the code. And, it changes
the pointer index style of iterator to make the code more concise.

Signed-off-by: Cheng Jiang 
---
 lib/librte_vhost/virtio_net.c | 132 +-
 1 file changed, 66 insertions(+), 66 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index ff3987860..438bdafd1 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1458,6 +1458,22 @@ virtio_dev_rx_async_get_info_idx(uint16_t pkts_idx,
(vq_size - n_inflight + pkts_idx) & (vq_size - 1);
 }
 
+static __rte_always_inline void
+store_dma_desc_info_split(struct vring_used_elem *s_ring, struct 
vring_used_elem *d_ring,
+   uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t 
count)
+{
+   uint16_t elem_size = sizeof(struct vring_used_elem);
+
+   if (d_idx + count <= ring_size) {
+   rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);
+   } else {
+   uint16_t size = ring_size - d_idx;
+
+   rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size);
+   rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * 
elem_size);
+   }
+}
+
 static __rte_noinline uint32_t
 virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct vhost_virtqueue *vq, uint16_t queue_id,
@@ -1474,10 +1490,9 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct rte_vhost_async_desc tdes[MAX_PKT_BURST];
struct iovec *src_iovec = vec_pool;
struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);
-   struct rte_vhost_iov_iter *src_it = it_pool;
-   struct rte_vhost_iov_iter *dst_it = it_pool + 1;
uint16_t slot_idx = 0;
uint16_t segs_await = 0;
+   uint16_t iovec_idx = 0, it_idx = 0;
struct async_inflight_info *pkts_info = vq->async_pkts_info;
uint32_t n_pkts = 0, pkt_err = 0;
uint32_t num_async_pkts = 0, num_done_pkts = 0;
@@ -1511,29 +1526,30 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
dev->vid, vq->last_avail_idx,
vq->last_avail_idx + num_buffers);
 
-   if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx],
-   buf_vec, nr_vec, num_buffers,
-   src_iovec, dst_iovec, src_it, dst_it) < 0) {
+   if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, 
num_buffers,
+   &src_iovec[iovec_idx], &dst_iovec[iovec_idx],
+   &it_pool[it_idx], &it_pool[it_idx + 1]) < 0) {
vq->shadow_used_idx -= num_buffers;
break;
}
 
slot_idx = (vq->async_pkts_idx + num_async_pkts) &
(vq->size - 1);
-   if (src_it->count) {
+   if (it_pool[it_idx].count) {
uint16_t from, to;
 
-   async_fill_desc(&tdes[pkt_burst_idx++], src_it, dst_it);
+   async_fill_desc(&tdes[pkt_burst_idx++],
+   &it_pool[it_idx], &it_pool[it_idx + 1]);
pkts_info[slot_idx].descs = num_buffers;
pkts_info[slot_idx].mbuf = pkts[pkt_idx];
async_pkts_log[num_async_pkts].pkt_idx = pkt_idx;
async_pkts_log[num_async_pkts++].last_avail_idx =
vq->last_avail_idx;
-   src_iovec += src_it->nr_segs;
-   dst_iovec += dst_it->nr_segs;
-   src_it += 2;
-   dst_it += 2;
-   segs_await += src_it->nr_segs;
+
+   iovec_idx += it_pool[it_idx].nr_segs;
+   it_idx += 2;
+
+   segs_await += it_pool[it_idx].nr_segs;
 
/**
 * recover shadow used ring and keep DMA-occupied
@@ -1541,23 +1557,10 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
 */
from = vq->shadow_used_idx - num_buffers;
to = vq->async_desc_idx & (vq->size - 1);
-   if (num_buffers + to <= vq->size) {
-   rte_memcpy(&vq->async_descs_split[to],
-   &vq->shadow_used_split[from],
-   num_buffers *
-   sizeof(struct vring_used_elem));
-   } else {
-   int size = vq->size - to;
-
-

[dpdk-dev] [PATCH v6 2/4] vhost: add support for packed ring in async vhost

2021-04-13 Thread Cheng Jiang

For now async vhost data path only supports split ring. This patch
enables packed ring in async vhost data path to make async vhost
compatible with virtio 1.1 spec.

Signed-off-by: Cheng Jiang 
---
 lib/librte_vhost/rte_vhost_async.h |   1 +
 lib/librte_vhost/vhost.c   |  37 ++-
 lib/librte_vhost/vhost.h   |  15 +-
 lib/librte_vhost/virtio_net.c  | 432 +++--
 4 files changed, 449 insertions(+), 36 deletions(-)

diff --git a/lib/librte_vhost/rte_vhost_async.h 
b/lib/librte_vhost/rte_vhost_async.h
index c855ff875..6faa31f5a 100644
--- a/lib/librte_vhost/rte_vhost_async.h
+++ b/lib/librte_vhost/rte_vhost_async.h
@@ -89,6 +89,7 @@ struct rte_vhost_async_channel_ops {
 struct async_inflight_info {
struct rte_mbuf *mbuf;
uint16_t descs; /* num of descs inflight */
+   uint16_t nr_buffers; /* num of buffers inflight for packed ring */
 };
 
 /**
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index a70fe01d8..467d1d5a2 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -340,17 +340,18 @@ cleanup_device(struct virtio_net *dev, int destroy)
 static void
 vhost_free_async_mem(struct vhost_virtqueue *vq)
 {
-   if (vq->async_pkts_info)
-   rte_free(vq->async_pkts_info);
-   if (vq->async_descs_split)
-   rte_free(vq->async_descs_split);
-   if (vq->it_pool)
-   rte_free(vq->it_pool);
-   if (vq->vec_pool)
-   rte_free(vq->vec_pool);
+   rte_free(vq->async_pkts_info);
 
-   vq->async_pkts_info = NULL;
+   rte_free(vq->async_buffers_packed);
+   rte_free(vq->async_descs_split);
+
+   rte_free(vq->it_pool);
+   rte_free(vq->vec_pool);
+
+   vq->async_buffers_packed = NULL;
vq->async_descs_split = NULL;
+
+   vq->async_pkts_info = NULL;
vq->it_pool = NULL;
vq->vec_pool = NULL;
 }
@@ -1626,10 +1627,9 @@ int rte_vhost_async_channel_register(int vid, uint16_t 
queue_id,
if (unlikely(vq == NULL || !dev->async_copy))
return -1;
 
-   /* packed queue is not supported */
-   if (unlikely(vq_is_packed(dev) || !f.async_inorder)) {
+   if (unlikely(!f.async_inorder)) {
VHOST_LOG_CONFIG(ERR,
-   "async copy is not supported on packed queue or 
non-inorder mode "
+   "async copy is not supported on non-inorder mode "
"(vid %d, qid: %d)\n", vid, queue_id);
return -1;
}
@@ -1667,11 +1667,18 @@ int rte_vhost_async_channel_register(int vid, uint16_t 
queue_id,
vq->vec_pool = rte_malloc_socket(NULL,
VHOST_MAX_ASYNC_VEC * sizeof(struct iovec),
RTE_CACHE_LINE_SIZE, node);
-   vq->async_descs_split = rte_malloc_socket(NULL,
+   if (vq_is_packed(dev)) {
+   vq->async_buffers_packed = rte_malloc_socket(NULL,
+   vq->size * sizeof(struct vring_used_elem_packed),
+   RTE_CACHE_LINE_SIZE, node);
+   } else {
+   vq->async_descs_split = rte_malloc_socket(NULL,
vq->size * sizeof(struct vring_used_elem),
RTE_CACHE_LINE_SIZE, node);
-   if (!vq->async_descs_split || !vq->async_pkts_info ||
-   !vq->it_pool || !vq->vec_pool) {
+   }
+
+   if (!vq->async_buffers_packed || !vq->async_descs_split ||
+   !vq->async_pkts_info || !vq->it_pool || !vq->vec_pool) {
vhost_free_async_mem(vq);
VHOST_LOG_CONFIG(ERR,
"async register failed: cannot allocate memory 
for vq data "
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index f628714c2..673335217 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -201,9 +201,18 @@ struct vhost_virtqueue {
uint16_tasync_pkts_idx;
uint16_tasync_pkts_inflight_n;
uint16_tasync_last_pkts_n;
-   struct vring_used_elem  *async_descs_split;
-   uint16_t async_desc_idx;
-   uint16_t last_async_desc_idx;
+   union {
+   struct vring_used_elem  *async_descs_split;
+   struct vring_used_elem_packed *async_buffers_packed;
+   };
+   union {
+   uint16_t async_desc_idx;
+   uint16_t async_packed_buffer_idx;
+   };
+   union {
+   uint16_t last_async_desc_idx;
+   uint16_t last_async_buffer_idx;
+   };
 
/* vq async features */
boolasync_inorder;
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 438bdafd1..e2b35a319 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -363,14 +363,14 @@

[dpdk-dev] [PATCH v6 0/4] add support for packed ring in async vhost

2021-04-13 Thread Cheng Jiang

For now async vhost data path only supports split ring structure. In
order to make async vhost compatible with virtio 1.1 spec this patch
set cleans async split ring codes and enables packed ring in async
vhost data path. Batch datapath is also enabled in async vhost
packed ring.

v6:
 * fix some typos in commit log
 * improve index usage
 * remove shadow_ring_store()
 * add store_dma_desc_info_split() store_dma_desc_info_packed()
 * remove some checks in vhost_free_async_mem()
 * change index calculation since the size isn't necessarily a power of 2
 * move error handling in a dedicated function
 * clean codes
v5:
 * clean some codes for packed ring datapath
 * fix an index error in shadow_ring_store()
v4:
  * change the patch structure
  * clean code for async split ring
  * reuse some code from split ring
  * change the error handler for DMA-copy packet
  * add check for malloc
  * remove useless code
  * add doc update
v3:
  * fix error handler for DMA-copy packet
v2:
  * fix wrong buffer index in rte_vhost_poll_enqueue_completed()
  * add async_buffers_packed memory free in vhost_free_async_mem()

Cheng Jiang (4):
  vhost: abstract and reorganize async split ring code
  vhost: add support for packed ring in async vhost
  vhost: add batch datapath for async vhost packed ring
  doc: add release note for vhost async packed ring

 doc/guides/rel_notes/release_21_05.rst |   4 +
 lib/librte_vhost/rte_vhost_async.h |   1 +
 lib/librte_vhost/vhost.c   |  37 +-
 lib/librte_vhost/vhost.h   |  15 +-
 lib/librte_vhost/virtio_net.c  | 593 +
 5 files changed, 550 insertions(+), 100 deletions(-)

--
2.29.2

[dpdk-dev] [PATCH v6 4/4] doc: add release note for vhost async packed ring

2021-04-13 Thread Cheng Jiang

Add release note for the support of vhost async packed ring.

Signed-off-by: Cheng Jiang 
---
 doc/guides/rel_notes/release_21_05.rst | 4 
 1 file changed, 4 insertions(+)

diff --git a/doc/guides/rel_notes/release_21_05.rst 
b/doc/guides/rel_notes/release_21_05.rst
index 374d6d98e..eb5200669 100644
--- a/doc/guides/rel_notes/release_21_05.rst
+++ b/doc/guides/rel_notes/release_21_05.rst
@@ -131,6 +131,10 @@ New Features
   * Added command to display Rx queue used descriptor count.
 ``show port (port_id) rxq (queue_id) desc used count``
 
+* **Added support for vhost async packed ring data path.**
+
+  Added packed ring support for async vhost.
+
 
 Removed Items
 -
-- 
2.29.2

[dpdk-dev] [PATCH v7 0/4] add support for packed ring in async vhost

2021-04-13 Thread Cheng Jiang

For now async vhost data path only supports split ring structure. In
order to make async vhost compatible with virtio 1.1 spec this patch
set cleans async split ring codes and enables packed ring in async
vhost data path. Batch datapath is also enabled in async vhost
packed ring.

v7:
 * fix compile issues
 * add argument *dev in vhost_free_async_mem() for ring type decision
v6:
 * fix some typos in commit log
 * improve index usage
 * remove shadow_ring_store()
 * add store_dma_desc_info_split() store_dma_desc_info_packed()
 * remove some checks in vhost_free_async_mem()
 * change index calculation since the size isn't necessarily a power of 2
 * move error handling in a dedicated function
 * clean codes
v5:
 * clean some codes for packed ring datapath
 * fix an index error in shadow_ring_store()
v4:
  * change the patch structure
  * clean code for async split ring
  * reuse some code from split ring
  * change the error handler for DMA-copy packet
  * add check for malloc
  * remove useless code
  * add doc update
v3:
  * fix error handler for DMA-copy packet
v2:
  * fix wrong buffer index in rte_vhost_poll_enqueue_completed()
  * add async_buffers_packed memory free in vhost_free_async_mem()

Cheng Jiang (4):
  vhost: abstract and reorganize async split ring code
  vhost: add support for packed ring in async vhost
  vhost: add batch datapath for async vhost packed ring
  doc: add release note for vhost async packed ring

 doc/guides/rel_notes/release_21_05.rst |   4 +
 lib/librte_vhost/rte_vhost_async.h |   1 +
 lib/librte_vhost/vhost.c   |  49 +-
 lib/librte_vhost/vhost.h   |  15 +-
 lib/librte_vhost/virtio_net.c  | 593 +
 5 files changed, 557 insertions(+), 105 deletions(-)

--
2.29.2

[dpdk-dev] [PATCH v7 1/4] vhost: abstract and reorganize async split ring code

2021-04-13 Thread Cheng Jiang

This patch puts some codes of async vhost split ring into inline
functions to improve the readability of the code. And, it changes
the pointer index style of iterator to make the code more concise.

Signed-off-by: Cheng Jiang 
---
 lib/librte_vhost/virtio_net.c | 132 +-
 1 file changed, 66 insertions(+), 66 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index ff3987860..438bdafd1 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1458,6 +1458,22 @@ virtio_dev_rx_async_get_info_idx(uint16_t pkts_idx,
(vq_size - n_inflight + pkts_idx) & (vq_size - 1);
 }
 
+static __rte_always_inline void
+store_dma_desc_info_split(struct vring_used_elem *s_ring, struct 
vring_used_elem *d_ring,
+   uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t 
count)
+{
+   uint16_t elem_size = sizeof(struct vring_used_elem);
+
+   if (d_idx + count <= ring_size) {
+   rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);
+   } else {
+   uint16_t size = ring_size - d_idx;
+
+   rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size);
+   rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * 
elem_size);
+   }
+}
+
 static __rte_noinline uint32_t
 virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct vhost_virtqueue *vq, uint16_t queue_id,
@@ -1474,10 +1490,9 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct rte_vhost_async_desc tdes[MAX_PKT_BURST];
struct iovec *src_iovec = vec_pool;
struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);
-   struct rte_vhost_iov_iter *src_it = it_pool;
-   struct rte_vhost_iov_iter *dst_it = it_pool + 1;
uint16_t slot_idx = 0;
uint16_t segs_await = 0;
+   uint16_t iovec_idx = 0, it_idx = 0;
struct async_inflight_info *pkts_info = vq->async_pkts_info;
uint32_t n_pkts = 0, pkt_err = 0;
uint32_t num_async_pkts = 0, num_done_pkts = 0;
@@ -1511,29 +1526,30 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
dev->vid, vq->last_avail_idx,
vq->last_avail_idx + num_buffers);
 
-   if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx],
-   buf_vec, nr_vec, num_buffers,
-   src_iovec, dst_iovec, src_it, dst_it) < 0) {
+   if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, 
num_buffers,
+   &src_iovec[iovec_idx], &dst_iovec[iovec_idx],
+   &it_pool[it_idx], &it_pool[it_idx + 1]) < 0) {
vq->shadow_used_idx -= num_buffers;
break;
}
 
slot_idx = (vq->async_pkts_idx + num_async_pkts) &
(vq->size - 1);
-   if (src_it->count) {
+   if (it_pool[it_idx].count) {
uint16_t from, to;
 
-   async_fill_desc(&tdes[pkt_burst_idx++], src_it, dst_it);
+   async_fill_desc(&tdes[pkt_burst_idx++],
+   &it_pool[it_idx], &it_pool[it_idx + 1]);
pkts_info[slot_idx].descs = num_buffers;
pkts_info[slot_idx].mbuf = pkts[pkt_idx];
async_pkts_log[num_async_pkts].pkt_idx = pkt_idx;
async_pkts_log[num_async_pkts++].last_avail_idx =
vq->last_avail_idx;
-   src_iovec += src_it->nr_segs;
-   dst_iovec += dst_it->nr_segs;
-   src_it += 2;
-   dst_it += 2;
-   segs_await += src_it->nr_segs;
+
+   iovec_idx += it_pool[it_idx].nr_segs;
+   it_idx += 2;
+
+   segs_await += it_pool[it_idx].nr_segs;
 
/**
 * recover shadow used ring and keep DMA-occupied
@@ -1541,23 +1557,10 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
 */
from = vq->shadow_used_idx - num_buffers;
to = vq->async_desc_idx & (vq->size - 1);
-   if (num_buffers + to <= vq->size) {
-   rte_memcpy(&vq->async_descs_split[to],
-   &vq->shadow_used_split[from],
-   num_buffers *
-   sizeof(struct vring_used_elem));
-   } else {
-   int size = vq->size - to;
-
-

[dpdk-dev] [PATCH v7 2/4] vhost: add support for packed ring in async vhost

2021-04-13 Thread Cheng Jiang

For now async vhost data path only supports split ring. This patch
enables packed ring in async vhost data path to make async vhost
compatible with virtio 1.1 spec.

Signed-off-by: Cheng Jiang 
---
 lib/librte_vhost/rte_vhost_async.h |   1 +
 lib/librte_vhost/vhost.c   |  49 ++--
 lib/librte_vhost/vhost.h   |  15 +-
 lib/librte_vhost/virtio_net.c  | 432 +++--
 4 files changed, 456 insertions(+), 41 deletions(-)

diff --git a/lib/librte_vhost/rte_vhost_async.h 
b/lib/librte_vhost/rte_vhost_async.h
index c855ff875..6faa31f5a 100644
--- a/lib/librte_vhost/rte_vhost_async.h
+++ b/lib/librte_vhost/rte_vhost_async.h
@@ -89,6 +89,7 @@ struct rte_vhost_async_channel_ops {
 struct async_inflight_info {
struct rte_mbuf *mbuf;
uint16_t descs; /* num of descs inflight */
+   uint16_t nr_buffers; /* num of buffers inflight for packed ring */
 };
 
 /**
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index a70fe01d8..f509186c6 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -338,19 +338,22 @@ cleanup_device(struct virtio_net *dev, int destroy)
 }
 
 static void
-vhost_free_async_mem(struct vhost_virtqueue *vq)
+vhost_free_async_mem(struct virtio_net *dev, struct vhost_virtqueue *vq)
 {
-   if (vq->async_pkts_info)
-   rte_free(vq->async_pkts_info);
-   if (vq->async_descs_split)
+   rte_free(vq->async_pkts_info);
+
+   if (vq_is_packed(dev)) {
+   rte_free(vq->async_buffers_packed);
+   vq->async_buffers_packed = NULL;
+   } else {
rte_free(vq->async_descs_split);
-   if (vq->it_pool)
-   rte_free(vq->it_pool);
-   if (vq->vec_pool)
-   rte_free(vq->vec_pool);
+   vq->async_descs_split = NULL;
+   }
+
+   rte_free(vq->it_pool);
+   rte_free(vq->vec_pool);
 
vq->async_pkts_info = NULL;
-   vq->async_descs_split = NULL;
vq->it_pool = NULL;
vq->vec_pool = NULL;
 }
@@ -360,10 +363,10 @@ free_vq(struct virtio_net *dev, struct vhost_virtqueue 
*vq)
 {
if (vq_is_packed(dev))
rte_free(vq->shadow_used_packed);
-   else {
+   else
rte_free(vq->shadow_used_split);
-   vhost_free_async_mem(vq);
-   }
+
+   vhost_free_async_mem(dev, vq);
rte_free(vq->batch_copy_elems);
if (vq->iotlb_pool)
rte_mempool_free(vq->iotlb_pool);
@@ -1626,10 +1629,9 @@ int rte_vhost_async_channel_register(int vid, uint16_t 
queue_id,
if (unlikely(vq == NULL || !dev->async_copy))
return -1;
 
-   /* packed queue is not supported */
-   if (unlikely(vq_is_packed(dev) || !f.async_inorder)) {
+   if (unlikely(!f.async_inorder)) {
VHOST_LOG_CONFIG(ERR,
-   "async copy is not supported on packed queue or 
non-inorder mode "
+   "async copy is not supported on non-inorder mode "
"(vid %d, qid: %d)\n", vid, queue_id);
return -1;
}
@@ -1667,12 +1669,19 @@ int rte_vhost_async_channel_register(int vid, uint16_t 
queue_id,
vq->vec_pool = rte_malloc_socket(NULL,
VHOST_MAX_ASYNC_VEC * sizeof(struct iovec),
RTE_CACHE_LINE_SIZE, node);
-   vq->async_descs_split = rte_malloc_socket(NULL,
+   if (vq_is_packed(dev)) {
+   vq->async_buffers_packed = rte_malloc_socket(NULL,
+   vq->size * sizeof(struct vring_used_elem_packed),
+   RTE_CACHE_LINE_SIZE, node);
+   } else {
+   vq->async_descs_split = rte_malloc_socket(NULL,
vq->size * sizeof(struct vring_used_elem),
RTE_CACHE_LINE_SIZE, node);
-   if (!vq->async_descs_split || !vq->async_pkts_info ||
-   !vq->it_pool || !vq->vec_pool) {
-   vhost_free_async_mem(vq);
+   }
+
+   if (!vq->async_buffers_packed || !vq->async_descs_split ||
+   !vq->async_pkts_info || !vq->it_pool || !vq->vec_pool) {
+   vhost_free_async_mem(dev, vq);
VHOST_LOG_CONFIG(ERR,
"async register failed: cannot allocate memory 
for vq data "
"(vid %d, qid: %d)\n", vid, queue_id);
@@ -1728,7 +1737,7 @@ int rte_vhost_async_channel_unregister(int vid, uint16_t 
queue_id)
goto out;
}
 
-   vhost_free_async_mem(vq);
+   vhost_free_async_mem(dev, vq);
 
vq->async_ops.transfer_data = NULL;
vq->async_ops.check_completed_copies = NULL;
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index f628714c2..67333521

[dpdk-dev] [PATCH v7 3/4] vhost: add batch datapath for async vhost packed ring

2021-04-13 Thread Cheng Jiang

Add batch datapath for async vhost packed ring to improve the
performance of small packet processing.

Signed-off-by: Cheng Jiang 
---
 lib/librte_vhost/virtio_net.c | 41 +++
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 54e11e3a5..7ba186585 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -1725,6 +1725,29 @@ vhost_update_used_packed(struct vhost_virtqueue *vq,
vq->desc_packed[head_idx].flags = head_flags;
 }
 
+static __rte_always_inline int
+virtio_dev_rx_async_batch_packed(struct virtio_net *dev,
+  struct vhost_virtqueue *vq,
+  struct rte_mbuf **pkts,
+  struct rte_mbuf **comp_pkts, uint32_t *pkt_done)
+{
+   uint16_t i;
+   uint32_t cpy_threshold = vq->async_threshold;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(pkts[i]->pkt_len >= cpy_threshold))
+   return -1;
+   }
+   if (!virtio_dev_rx_batch_packed(dev, vq, pkts)) {
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+   comp_pkts[(*pkt_done)++] = pkts[i];
+
+   return 0;
+   }
+
+   return -1;
+}
+
 static __rte_always_inline int
 vhost_enqueue_async_single_packed(struct virtio_net *dev,
struct vhost_virtqueue *vq,
@@ -1875,6 +1898,7 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
struct rte_mbuf **comp_pkts, uint32_t *comp_count)
 {
uint32_t pkt_idx = 0, pkt_burst_idx = 0;
+   uint32_t remained = count;
uint16_t async_descs_idx = 0;
uint16_t num_buffers;
uint16_t num_desc;
@@ -1892,9 +1916,17 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
uint32_t num_async_pkts = 0, num_done_pkts = 0;
struct vring_packed_desc async_descs[vq->size];
 
-   rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
+   do {
+   rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
+   if (remained >= PACKED_BATCH_SIZE) {
+   if (!virtio_dev_rx_async_batch_packed(dev, vq,
+   &pkts[pkt_idx], comp_pkts, &num_done_pkts)) {
+   pkt_idx += PACKED_BATCH_SIZE;
+   remained -= PACKED_BATCH_SIZE;
+   continue;
+   }
+   }
 
-   for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
if (unlikely(virtio_dev_rx_async_single_packed(dev, vq, 
pkts[pkt_idx],
&num_desc, &num_buffers,
&async_descs[async_descs_idx],
@@ -1937,6 +1969,8 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
comp_pkts[num_done_pkts++] = pkts[pkt_idx];
}
 
+   pkt_idx++;
+   remained--;
vq_inc_last_avail_packed(vq, num_desc);
 
/*
@@ -1961,13 +1995,12 @@ virtio_dev_rx_async_submit_packed(struct virtio_net 
*dev,
 */
pkt_err = pkt_burst_idx - n_pkts;
pkt_burst_idx = 0;
-   pkt_idx++;
break;
}
 
pkt_burst_idx = 0;
}
-   }
+   } while (pkt_idx < count);
 
if (pkt_burst_idx) {
n_pkts = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 
0, pkt_burst_idx);
-- 
2.29.2

[dpdk-dev] [PATCH v7 4/4] doc: add release note for vhost async packed ring

2021-04-13 Thread Cheng Jiang

Add release note for the support of vhost async packed ring.

Signed-off-by: Cheng Jiang 
---
 doc/guides/rel_notes/release_21_05.rst | 4 
 1 file changed, 4 insertions(+)

diff --git a/doc/guides/rel_notes/release_21_05.rst 
b/doc/guides/rel_notes/release_21_05.rst
index 374d6d98e..eb5200669 100644
--- a/doc/guides/rel_notes/release_21_05.rst
+++ b/doc/guides/rel_notes/release_21_05.rst
@@ -131,6 +131,10 @@ New Features
   * Added command to display Rx queue used descriptor count.
 ``show port (port_id) rxq (queue_id) desc used count``
 
+* **Added support for vhost async packed ring data path.**
+
+  Added packed ring support for async vhost.
+
 
 Removed Items
 -
-- 
2.29.2

[RFC v3] add support for async vhost packed ring dequeue

2022-05-29 Thread Cheng Jiang

This RFC patch implements packed ring dequeue data path for asynchronous
vhost.

Signed-off-by: Cheng Jiang 
---
 lib/vhost/virtio_net.c | 217 -
 1 file changed, 191 insertions(+), 26 deletions(-)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 68a26eb17d..9c8964c876 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -3240,7 +3240,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
 }
 
 static __rte_always_inline uint16_t
-async_poll_dequeue_completed_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
+async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue 
*vq,
struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
uint16_t vchan_id, bool legacy_ol_flags)
 {
@@ -3255,7 +3255,7 @@ async_poll_dequeue_completed_split(struct virtio_net 
*dev, struct vhost_virtqueu
from = start_idx;
while (vq->async->pkts_cmpl_flag[from] && count--) {
vq->async->pkts_cmpl_flag[from] = false;
-   from = (from + 1) & (vq->size - 1);
+   from = (from + 1) % vq->size;
nr_cpl_pkts++;
}
 
@@ -3263,7 +3263,7 @@ async_poll_dequeue_completed_split(struct virtio_net 
*dev, struct vhost_virtqueu
return 0;
 
for (i = 0; i < nr_cpl_pkts; i++) {
-   from = (start_idx + i) & (vq->size - 1);
+   from = (start_idx + i) % vq->size;
pkts[i] = pkts_info[from].mbuf;
 
if (virtio_net_with_host_offload(dev))
@@ -3272,10 +3272,14 @@ async_poll_dequeue_completed_split(struct virtio_net 
*dev, struct vhost_virtqueu
}
 
/* write back completed descs to used ring and update used idx */
-   write_back_completed_descs_split(vq, nr_cpl_pkts);
-   __atomic_add_fetch(&vq->used->idx, nr_cpl_pkts, __ATOMIC_RELEASE);
-   vhost_vring_call_split(dev, vq);
-
+   if (vq_is_packed(dev)) {
+   write_back_completed_descs_packed(vq, nr_cpl_pkts);
+   vhost_vring_call_packed(dev, vq);
+   } else {
+   write_back_completed_descs_split(vq, nr_cpl_pkts);
+   __atomic_add_fetch(&vq->used->idx, nr_cpl_pkts, 
__ATOMIC_RELEASE);
+   vhost_vring_call_split(dev, vq);
+   }
vq->async->pkts_inflight_n -= nr_cpl_pkts;
 
return nr_cpl_pkts;
@@ -3412,8 +3416,8 @@ virtio_dev_tx_async_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 
 out:
/* DMA device may serve other queues, unconditionally check completed. 
*/
-   nr_done_pkts = async_poll_dequeue_completed_split(dev, vq, pkts, 
pkts_size,
- dma_id, vchan_id, 
legacy_ol_flags);
+   nr_done_pkts = async_poll_dequeue_completed(dev, vq, pkts, pkts_size,
+   dma_id, vchan_id, 
legacy_ol_flags);
 
return nr_done_pkts;
 }
@@ -3440,6 +3444,170 @@ virtio_dev_tx_async_split_compliant(struct virtio_net 
*dev,
pkts, count, dma_id, vchan_id, false);
 }
 
+static __rte_always_inline void
+vhost_async_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, uint16_t 
buf_id)
+{
+   struct vhost_async *async = vq->async;
+   uint16_t idx = async->buffer_idx_packed;
+
+   async->buffers_packed[idx].id = buf_id;
+   async->buffers_packed[idx].len = 0;
+   async->buffers_packed[idx].count = 1;
+
+   async->buffer_idx_packed++;
+   if (async->buffer_idx_packed >= vq->size)
+   async->buffer_idx_packed -= vq->size;
+
+}
+
+static __rte_always_inline int
+virtio_dev_tx_async_single_packed(struct virtio_net *dev,
+   struct vhost_virtqueue *vq,
+   struct rte_mempool *mbuf_pool,
+   struct rte_mbuf *pkts,
+   uint16_t slot_idx,
+   bool legacy_ol_flags)
+{
+   int err;
+   uint16_t buf_id, desc_count = 0;
+   uint16_t nr_vec = 0;
+   uint32_t buf_len;
+   struct buf_vector buf_vec[BUF_VECTOR_MAX];
+   static bool allocerr_warned;
+
+   if (unlikely(fill_vec_buf_packed(dev, vq, vq->last_avail_idx, 
&desc_count,
+buf_vec, &nr_vec, &buf_id, &buf_len,
+VHOST_ACCESS_RO) < 0))
+   return -1;
+
+   if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) {
+   if (!allocerr_warned) {
+   VHOST_LOG_DATA(ERR, "Failed mbuf alloc of size %d from 
%s on %s.\n",
+   buf_len, mbuf_pool->name, dev->ifname);
+   allocerr_warned = true;
+   }
+   return -1;
+   }
+
+

[dpdk-dev] [RFC v2] vhost: add support async dequeue for packed ring

2021-09-14 Thread Cheng Jiang

This patch implements asynchronous dequeue data path for packed ring.

Signed-off-by: Cheng Jiang 
---
It's based on these 2 patches:
1. vhost: remove copy threshold for async vhost
http://patches.dpdk.org/project/dpdk/patch/1629463466-450012-1-git-send-email-jiayu...@intel.com/
2. vhost: support async dequeue for split ring
http://patches.dpdk.org/project/dpdk/patch/20210906204837.112466-2-wenwux...@intel.com/

v2:
 * fixed some issues

 lib/vhost/virtio_net.c | 325 ++---
 1 file changed, 302 insertions(+), 23 deletions(-)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index e0159b53e3..9a842ce8f4 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -1654,7 +1654,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
 }
 
 static __rte_always_inline void
-vhost_update_used_packed(struct vhost_virtqueue *vq,
+vhost_enqueue_update_used_packed(struct vhost_virtqueue *vq,
struct vring_used_elem_packed *shadow_ring,
uint16_t count)
 {
@@ -1970,22 +1970,66 @@ write_back_completed_descs_split(struct vhost_virtqueue 
*vq, uint16_t n_descs)
} while (nr_left > 0);
 }
 
+static __rte_always_inline void
+vhost_dequeue_update_used_packed(struct vhost_virtqueue *vq,
+   struct vring_used_elem_packed *shadow_ring,
+   uint16_t count)
+{
+   uint16_t i;
+   uint16_t flags;
+   uint16_t head_idx = vq->last_used_idx;
+   uint16_t head_flags = 0;
+
+   for (i = 0; i < count; i++)
+   vq->desc_packed[vq->last_used_idx + i].id = shadow_ring[i].id;
+
+   /* The ordering for storing desc flags needs to be enforced. */
+   rte_atomic_thread_fence(__ATOMIC_RELEASE);
+
+   for (i = 0; i < count; i++) {
+   flags = vq->desc_packed[vq->last_used_idx].flags;
+   if (vq->used_wrap_counter) {
+   flags |= VRING_DESC_F_USED;
+   flags |= VRING_DESC_F_AVAIL;
+   } else {
+   flags &= ~VRING_DESC_F_USED;
+   flags &= ~VRING_DESC_F_AVAIL;
+   }
+
+   if (i > 0)
+   vq->desc_packed[vq->last_used_idx].flags = flags;
+   else
+   head_flags = flags;
+
+   vq_inc_last_used_packed(vq, 1);
+   }
+
+   vq->desc_packed[head_idx].flags = head_flags;
+}
+
 static __rte_always_inline void
 write_back_completed_descs_packed(struct vhost_virtqueue *vq,
-   uint16_t n_buffers)
+   uint16_t n_buffers, bool is_txq)
 {
uint16_t nr_left = n_buffers;
uint16_t from, to;
+   void (*update_used_packed)(struct vhost_virtqueue *vq,
+   struct vring_used_elem_packed *shadow_ring, 
uint16_t count);
+
+   if (is_txq)
+   update_used_packed = vhost_enqueue_update_used_packed;
+   else
+   update_used_packed = vhost_dequeue_update_used_packed;
 
do {
from = vq->last_async_buffer_idx_packed;
to = (from + nr_left) % vq->size;
if (to > from) {
-   vhost_update_used_packed(vq, vq->async_buffers_packed + 
from, to - from);
+   update_used_packed(vq, vq->async_buffers_packed + from, 
to - from);
vq->last_async_buffer_idx_packed += nr_left;
nr_left = 0;
} else {
-   vhost_update_used_packed(vq, vq->async_buffers_packed + 
from,
+   update_used_packed(vq, vq->async_buffers_packed + from,
vq->size - from);
vq->last_async_buffer_idx_packed = 0;
nr_left -= vq->size - from;
@@ -2049,7 +2093,7 @@ vhost_poll_enqueue_completed(struct virtio_net *dev, 
uint16_t queue_id,
 
if (likely(vq->enabled && vq->access_ok)) {
if (vq_is_packed(dev)) {
-   write_back_completed_descs_packed(vq, n_buffers);
+   write_back_completed_descs_packed(vq, n_buffers, 1);
 
vhost_vring_call_packed(dev, vq);
} else {
@@ -3328,7 +3372,7 @@ async_desc_to_mbuf(struct virtio_net *dev,
 }
 
 static __rte_always_inline uint16_t
-async_poll_dequeue_completed_split(struct virtio_net *dev,
+async_poll_dequeue_completed(struct virtio_net *dev,
struct vhost_virtqueue *vq, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count, bool legacy_ol_flags)
 {
@@ -3336,7 +3380,7 @@ async_poll_dequeue_completed_split(struct virtio_net *dev,
uint16_t start_idx, pkt_idx, from;
struct async_inflight_info *pkts_info;
 
-   pkt_idx = vq->as

[dpdk-dev] [PATCH] net/virtio: fix refill order in packed ring datapath

2021-07-08 Thread Cheng Jiang

The front-end should refill the descriptor with the mbuf indicated by
the buff_id rather then the index of used descriptor. Back-end may
return buffers out of order if async copy mode is enabled.

When initializing rxq, refill the descriptors in order as buff_id is
not available at that time.

Fixes: a76290c8f1cf ("net/virtio: implement Rx path for packed queues")
Cc: sta...@dpdk.org

Signed-off-by: Cheng Jiang 
Signed-off-by: Marvin Liu 

diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 3ac847317f..d35875d9ce 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -328,13 +328,35 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, 
struct rte_mbuf **cookie,
return 0;
 }

+static inline void
+virtqueue_refill_single_packed(struct virtqueue *vq,
+  struct vring_packed_desc *dp,
+  struct rte_mbuf *cookie)
+{
+   uint16_t flags = vq->vq_packed.cached_flags;
+   struct virtio_hw *hw = vq->hw;
+
+   dp->addr = cookie->buf_iova +
+   RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
+   dp->len = cookie->buf_len -
+   RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
+
+   virtqueue_store_flags_packed(dp, flags,
+hw->weak_barriers);
+
+   if (++vq->vq_avail_idx >= vq->vq_nentries) {
+   vq->vq_avail_idx -= vq->vq_nentries;
+   vq->vq_packed.cached_flags ^=
+   VRING_PACKED_DESC_F_AVAIL_USED;
+   flags = vq->vq_packed.cached_flags;
+   }
+}
+
 static inline int
-virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
+virtqueue_enqueue_recv_refill_packed_init(struct virtqueue *vq,
 struct rte_mbuf **cookie, uint16_t num)
 {
struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
-   uint16_t flags = vq->vq_packed.cached_flags;
-   struct virtio_hw *hw = vq->hw;
struct vq_desc_extra *dxp;
uint16_t idx;
int i;
@@ -350,24 +372,34 @@ virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
dxp->cookie = (void *)cookie[i];
dxp->ndescs = 1;

-   start_dp[idx].addr = cookie[i]->buf_iova +
-   RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
-   start_dp[idx].len = cookie[i]->buf_len -
-   RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
+   virtqueue_refill_single_packed(vq, &start_dp[idx], cookie[i]);
+   }
+   vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
+   return 0;
+}

-   vq->vq_desc_head_idx = dxp->next;
-   if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
-   vq->vq_desc_tail_idx = vq->vq_desc_head_idx;
+static inline int
+virtqueue_enqueue_recv_refill_packed(struct virtqueue *vq,
+struct rte_mbuf **cookie, uint16_t num)
+{
+   struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
+   struct vq_desc_extra *dxp;
+   uint16_t idx, did;
+   int i;

-   virtqueue_store_flags_packed(&start_dp[idx], flags,
-hw->weak_barriers);
+   if (unlikely(vq->vq_free_cnt == 0))
+   return -ENOSPC;
+   if (unlikely(vq->vq_free_cnt < num))
+   return -EMSGSIZE;

-   if (++vq->vq_avail_idx >= vq->vq_nentries) {
-   vq->vq_avail_idx -= vq->vq_nentries;
-   vq->vq_packed.cached_flags ^=
-   VRING_PACKED_DESC_F_AVAIL_USED;
-   flags = vq->vq_packed.cached_flags;
-   }
+   for (i = 0; i < num; i++) {
+   idx = vq->vq_avail_idx;
+   did = start_dp[idx].id;
+   dxp = &vq->vq_descx[did];
+   dxp->cookie = (void *)cookie[i];
+   dxp->ndescs = 1;
+
+   virtqueue_refill_single_packed(vq, &start_dp[idx], cookie[i]);
}
vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
return 0;
@@ -742,7 +774,7 @@ virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, 
uint16_t queue_idx)

/* Enqueue allocated buffers */
if (virtio_with_packed_queue(vq->hw))
-   error = virtqueue_enqueue_recv_refill_packed(vq,
+   error = 
virtqueue_enqueue_recv_refill_packed_init(vq,
&m, 1);
else
error = virtqueue_enqueue_recv_refill(vq,
--
2.17.1

[dpdk-dev] [PATCH] vhost: fix async packed ring batch datapath

2021-07-08 Thread Cheng Jiang

We assume that in the sync path, if there is no buffer wrap in the
avail descriptors fetched in a batch, there is no buffer wrap in the
used descriptors which need to be written back in this batch, but
this assumption is wrong in the async path since there are inflight
descriptors which are processed by the DMA device.

This patch refactors the batch copy code and adds used ring buffer
wrap check as a batch copy condition to fix this issue.

Fixes: 873e8dad6f49 ("vhost: support packed ring in async datapath")
Cc: sta...@dpdk.org

Signed-off-by: Cheng Jiang 
---
 lib/vhost/virtio_net.c | 163 -
 1 file changed, 128 insertions(+), 35 deletions(-)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 6bd00b746b..f4a2c88d8b 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -221,11 +221,6 @@ vhost_flush_enqueue_batch_packed(struct virtio_net *dev,
uint16_t last_used_idx;
struct vring_packed_desc *desc_base;
 
-   if (vq->shadow_used_idx) {
-   do_data_copy_enqueue(dev, vq);
-   vhost_flush_enqueue_shadow_packed(dev, vq);
-   }
-
last_used_idx = vq->last_used_idx;
desc_base = &vq->desc_packed[last_used_idx];
 
@@ -1258,18 +1253,16 @@ virtio_dev_rx_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
 }
 
 static __rte_always_inline int
-virtio_dev_rx_batch_packed(struct virtio_net *dev,
+virtio_dev_rx_sync_batch_check(struct virtio_net *dev,
   struct vhost_virtqueue *vq,
-  struct rte_mbuf **pkts)
+  struct rte_mbuf **pkts,
+  uint64_t *desc_addrs,
+  uint64_t *lens)
 {
bool wrap_counter = vq->avail_wrap_counter;
struct vring_packed_desc *descs = vq->desc_packed;
uint16_t avail_idx = vq->last_avail_idx;
-   uint64_t desc_addrs[PACKED_BATCH_SIZE];
-   struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE];
uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
-   uint64_t lens[PACKED_BATCH_SIZE];
-   uint16_t ids[PACKED_BATCH_SIZE];
uint16_t i;
 
if (unlikely(avail_idx & PACKED_BATCH_MASK))
@@ -1307,6 +1300,84 @@ virtio_dev_rx_batch_packed(struct virtio_net *dev,
return -1;
}
 
+   return 0;
+}
+
+static __rte_always_inline int
+virtio_dev_rx_async_batch_check(struct virtio_net *dev,
+  struct vhost_virtqueue *vq,
+  struct rte_mbuf **pkts,
+  uint64_t *desc_addrs,
+  uint64_t *lens)
+{
+   bool wrap_counter = vq->avail_wrap_counter;
+   struct vring_packed_desc *descs = vq->desc_packed;
+   uint16_t avail_idx = vq->last_avail_idx;
+   uint16_t used_idx = vq->last_used_idx;
+   uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+   uint32_t cpy_threshold = vq->async_threshold;
+   uint16_t i;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(pkts[i]->data_len >= cpy_threshold))
+   return -1;
+   }
+
+   if (unlikely(avail_idx & PACKED_BATCH_MASK))
+   return -1;
+
+   if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
+   return -1;
+
+   if (unlikely((used_idx + PACKED_BATCH_SIZE) > vq->size))
+   return -1;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(pkts[i]->next != NULL))
+   return -1;
+   if (unlikely(!desc_is_avail(&descs[avail_idx + i],
+   wrap_counter)))
+   return -1;
+   }
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+   lens[i] = descs[avail_idx + i].len;
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset)))
+   return -1;
+   }
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
+   desc_addrs[i] = vhost_iova_to_vva(dev, vq,
+ descs[avail_idx + i].addr,
+ &lens[i],
+ VHOST_ACCESS_RW);
+
+   vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
+   if (unlikely(!desc_addrs[i]))
+   return -1;
+   if (unlikely(lens[i] != descs[avail_idx + i].len))
+   return -1;
+   }
+
+   return 0;
+}
+
+static __rte_always_inline void
+virtio_dev_rx_batch_packed_copy(struct virtio_net *dev,
+  struct vhost_virtqueue *vq,
+  struct rte_mbuf **pkts,

[dpdk-dev] [PATCH] vhost: fix index overflow issue in async vhost

2021-07-08 Thread Cheng Jiang

We introduced some new indexes in async vhost. If we don't pay
attention to the management of these indexes, they will eventually
overflow and lead to errors. This patch is to check and keep these
indexes within a reasonable range.

Fixes: 873e8dad6f49 ("vhost: support packed ring in async datapath")
Cc: sta...@dpdk.org

Signed-off-by: Cheng Jiang 
---
 lib/vhost/virtio_net.c | 22 +++---
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index f4a2c88d8b..61cb5a126c 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -1614,6 +1614,7 @@ store_dma_desc_info_packed(struct vring_used_elem_packed 
*s_ring,
 
if (d_idx + count <= ring_size) {
rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);
+
} else {
uint16_t size = ring_size - d_idx;
 
@@ -2036,7 +2037,7 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
 
slot_idx = (vq->async_pkts_idx + num_async_pkts) % vq->size;
if (it_pool[it_idx].count) {
-   uint16_t from, to;
+   uint16_t from;
 
async_descs_idx += num_descs;
async_fill_desc(&tdes[pkt_burst_idx++],
@@ -2055,11 +2056,13 @@ virtio_dev_rx_async_submit_packed(struct virtio_net 
*dev,
 * descriptors.
 */
from = vq->shadow_used_idx - num_buffers;
-   to = vq->async_buffer_idx_packed % vq->size;
store_dma_desc_info_packed(vq->shadow_used_packed,
-   vq->async_buffers_packed, vq->size, 
from, to, num_buffers);
+   vq->async_buffers_packed, vq->size, 
from,
+   vq->async_buffer_idx_packed, 
num_buffers);
 
vq->async_buffer_idx_packed += num_buffers;
+   if (vq->async_buffer_idx_packed >= vq->size)
+   vq->async_buffer_idx_packed -= vq->size;
vq->shadow_used_idx -= num_buffers;
} else {
comp_pkts[num_done_pkts++] = pkts[pkt_idx];
@@ -2112,6 +2115,8 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
dma_error_handler_packed(vq, async_descs, async_descs_idx, 
slot_idx, pkt_err,
&pkt_idx, &num_async_pkts, 
&num_done_pkts);
vq->async_pkts_idx += num_async_pkts;
+   if (vq->async_pkts_idx >= vq->size)
+   vq->async_pkts_idx -= vq->size;
*comp_count = num_done_pkts;
 
if (likely(vq->shadow_used_idx)) {
@@ -2160,7 +2165,7 @@ write_back_completed_descs_packed(struct vhost_virtqueue 
*vq,
uint16_t from, to;
 
do {
-   from = vq->last_async_buffer_idx_packed % vq->size;
+   from = vq->last_async_buffer_idx_packed;
to = (from + nr_left) % vq->size;
if (to > from) {
vhost_update_used_packed(vq, vq->async_buffers_packed + 
from, to - from);
@@ -2169,7 +2174,7 @@ write_back_completed_descs_packed(struct vhost_virtqueue 
*vq,
} else {
vhost_update_used_packed(vq, vq->async_buffers_packed + 
from,
vq->size - from);
-   vq->last_async_buffer_idx_packed += vq->size - from;
+   vq->last_async_buffer_idx_packed = 0;
nr_left -= vq->size - from;
}
} while (nr_left > 0);
@@ -2252,10 +2257,13 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
vhost_vring_call_split(dev, vq);
}
} else {
-   if (vq_is_packed(dev))
+   if (vq_is_packed(dev)) {
vq->last_async_buffer_idx_packed += n_buffers;
-   else
+   if (vq->last_async_buffer_idx_packed >= vq->size)
+   vq->last_async_buffer_idx_packed -= vq->size;
+   } else {
vq->last_async_desc_idx_split += n_descs;
+   }
}
 
 done:
-- 
2.29.2

[dpdk-dev] [PATCH v3 0/5] vhost: handle memory hotplug for async vhost

2021-07-14 Thread Cheng Jiang

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

This patch set is to provide an unsafe API to drain in-flight pkts
which are submitted to DMA engine in vhost async data path, and
notify the vhost application of stopping DMA transfers. And enable it
in vhost example.

v3:
 * added a patch to fix async ops return type
 * fixed async ops fail handler
 * updated the doc
v2:
 * changed the patch structure

Cheng Jiang (4):
  vhost: fix async vhost ops return type
  vhost: add unsafe API to drain pkts in async vhost
  examples/vhost: handle memory hotplug for async vhost
  doc: update doc for try drain API in vhost lib

Jiayu Hu (1):
  vhost: handle memory hotplug for async vhost

 doc/guides/prog_guide/vhost_lib.rst|   5 +
 doc/guides/rel_notes/release_21_08.rst |   5 +
 examples/vhost/ioat.c  |   4 +-
 examples/vhost/ioat.h  |   4 +-
 examples/vhost/main.c  |  48 +++-
 examples/vhost/main.h  |   1 +
 lib/vhost/rte_vhost_async.h|  28 -
 lib/vhost/version.map  |   3 +
 lib/vhost/vhost_user.c |   9 ++
 lib/vhost/virtio_net.c | 146 -
 10 files changed, 215 insertions(+), 38 deletions(-)

--
2.29.2

[dpdk-dev] [PATCH v3 1/5] vhost: fix async vhost ops return type

2021-07-14 Thread Cheng Jiang

The async vhost ops callback should return -1 when there are something
wrong in the callback, so the return type should be changed into
int32_t. The issue in vhost example is also fixed in this patch.

Signed-off-by: Cheng Jiang 
---
 examples/vhost/ioat.c   |  4 +--
 examples/vhost/ioat.h   |  4 +--
 lib/vhost/rte_vhost_async.h |  4 +--
 lib/vhost/virtio_net.c  | 58 -
 4 files changed, 56 insertions(+), 14 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 2a2c2d7202..457f8171f0 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -122,7 +122,7 @@ open_ioat(const char *value)
return ret;
 }
 
-uint32_t
+int32_t
 ioat_transfer_data_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data, uint16_t count)
@@ -168,7 +168,7 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
return i_desc;
 }
 
-uint32_t
+int32_t
 ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets)
diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index 1aa28ed6a3..b57b5645b0 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -27,12 +27,12 @@ struct dma_for_vhost {
 #ifdef RTE_RAW_IOAT
 int open_ioat(const char *value);
 
-uint32_t
+int32_t
 ioat_transfer_data_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data, uint16_t count);
 
-uint32_t
+int32_t
 ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets);
diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index 6faa31f5ad..bc81cd0caa 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -61,7 +61,7 @@ struct rte_vhost_async_channel_ops {
 * @return
 *  number of descs processed
 */
-   uint32_t (*transfer_data)(int vid, uint16_t queue_id,
+   int32_t (*transfer_data)(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data,
uint16_t count);
@@ -78,7 +78,7 @@ struct rte_vhost_async_channel_ops {
 * @return
 *  number of async descs completed
 */
-   uint32_t (*check_completed_copies)(int vid, uint16_t queue_id,
+   int32_t (*check_completed_copies)(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets);
 };
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index b93482587c..8156796a46 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -1528,6 +1528,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct async_inflight_info *pkts_info = vq->async_pkts_info;
uint32_t n_pkts = 0, pkt_err = 0;
uint32_t num_async_pkts = 0, num_done_pkts = 0;
+   int32_t n_enq;
struct {
uint16_t pkt_idx;
uint16_t last_avail_idx;
@@ -1608,8 +1609,16 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
((VHOST_MAX_ASYNC_VEC >> 1) - segs_await <
BUF_VECTOR_MAX))) {
-   n_pkts = vq->async_ops.transfer_data(dev->vid,
+   n_enq = vq->async_ops.transfer_data(dev->vid,
queue_id, tdes, 0, pkt_burst_idx);
+   if (n_enq >= 0) {
+   n_pkts = n_enq;
+   } else {
+   VHOST_LOG_DATA(ERR, "(%d) %s: wrong opaque data 
for queue id %d.\n",
+   dev->vid, __func__, queue_id);
+   n_pkts = 0;
+   }
+
iovec_idx = 0;
it_idx = 0;
 
@@ -1632,8 +1641,15 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
}
 
if (pkt_burst_idx) {
-   n_pkts = vq->async_ops.transfer_data(dev->vid,
-   queue_id, tdes, 0, pkt_burst_idx);
+   n_enq = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 
0, pkt_burst_idx);
+   if (n_enq >= 0) {
+   n_pkts = n_enq;
+   } else {
+   VHOST_LOG_DATA(ERR, "(%d) %s: wrong opaque data for 
queue id %d.\n",
+   dev->vid, __func__, queue_id);
+   n_pkts = 0;
+   }
+
vq->async_pkts_inflight_n += n_pkts;

[dpdk-dev] [PATCH v3 2/5] vhost: add unsafe API to drain pkts in async vhost

2021-07-14 Thread Cheng Jiang

Applications need to stop DMA transfers and finish all the in-flight
pkts when in VM memory hot-plug case and async vhost is used. This
patch is to provide an unsafe API to drain in-flight pkts which are
submitted to DMA engine in vhost async data path.

Signed-off-by: Cheng Jiang 
---
 lib/vhost/rte_vhost_async.h | 24 ++
 lib/vhost/version.map   |  3 ++
 lib/vhost/virtio_net.c  | 90 +++--
 3 files changed, 94 insertions(+), 23 deletions(-)

diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index bc81cd0caa..fd622631b2 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -193,4 +193,28 @@ __rte_experimental
 uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count);
 
+/**
+ * This function checks async completion status and empty all pakcets
+ * for a specific vhost device queue. Packets which are inflight will
+ * be returned in an array.
+ *
+ * @note This function does not perform any locking
+ *
+ * @param vid
+ *  id of vhost device to enqueue data
+ * @param queue_id
+ *  queue id to enqueue data
+ * @param pkts
+ *  blank array to get return packet pointer
+ * @param count
+ *  size of the packet array
+ * @param times
+ *  max number of poll attempts
+ * @return
+ *  num of packets returned
+ */
+__rte_experimental
+uint16_t rte_vhost_try_drain_queue_thread_unsafe(int vid, uint16_t queue_id,
+   struct rte_mbuf **pkts, uint16_t count, uint16_t times);
+
 #endif /* _RTE_VHOST_ASYNC_H_ */
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index 9103a23cd4..b8fc8770dd 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -79,4 +79,7 @@ EXPERIMENTAL {
 
# added in 21.05
rte_vhost_get_negotiated_protocol_features;
+
+   # added in 21.08
+   rte_vhost_try_drain_queue_thread_unsafe;
 };
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 8156796a46..9f541679b9 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -2115,10 +2115,10 @@ write_back_completed_descs_packed(struct 
vhost_virtqueue *vq,
} while (nr_left > 0);
 }
 
-uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+static __rte_always_inline uint16_t
+vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count)
 {
-   struct virtio_net *dev = get_device(vid);
struct vhost_virtqueue *vq;
uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
uint16_t start_idx, pkts_idx, vq_size;
@@ -2126,26 +2126,8 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
uint16_t from, i;
int32_t n_poll;
 
-   if (!dev)
-   return 0;
-
-   VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
-   if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
-   VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
-   dev->vid, __func__, queue_id);
-   return 0;
-   }
-
vq = dev->virtqueue[queue_id];
 
-   if (unlikely(!vq->async_registered)) {
-   VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id 
%d.\n",
-   dev->vid, __func__, queue_id);
-   return 0;
-   }
-
-   rte_spinlock_lock(&vq->access_lock);
-
pkts_idx = vq->async_pkts_idx % vq->size;
pkts_info = vq->async_pkts_info;
vq_size = vq->size;
@@ -2153,7 +2135,7 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
vq_size, vq->async_pkts_inflight_n);
 
if (count > vq->async_last_pkts_n) {
-   n_poll = vq->async_ops.check_completed_copies(vid,
+   n_poll = vq->async_ops.check_completed_copies(dev->vid,
queue_id, 0, count - vq->async_last_pkts_n);
if (n_poll >= 0) {
n_pkts_cpl = n_poll;
@@ -2168,7 +2150,7 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
n_pkts_put = RTE_MIN(count, n_pkts_cpl);
if (unlikely(n_pkts_put == 0)) {
vq->async_last_pkts_n = n_pkts_cpl;
-   goto done;
+   return 0;
}
 
if (vq_is_packed(dev)) {
@@ -2207,12 +2189,74 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
vq->last_async_desc_idx_split += n_descs;
}
 
-done:
+   return n_pkts_put;
+}
+
+uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+   struct rte_mbuf **pkts, uint16_t count)
+{
+   struct virtio_net *dev = get_device(vid);
+   struct vhost_virtqueue *vq;
+   uint16_t n_pkts_put = 0;
+
+

[dpdk-dev] [PATCH v3 3/5] vhost: handle memory hotplug for async vhost

2021-07-14 Thread Cheng Jiang

From: Jiayu Hu 

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

This patch is to notify the vhost application of stopping DMA
transfers.

Signed-off-by: Jiayu Hu 
---
 lib/vhost/vhost_user.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 031c578e54..39e8432d1c 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -1275,6 +1275,15 @@ vhost_user_set_mem_table(struct virtio_net **pdev, 
struct VhostUserMsg *msg,
vdpa_dev->ops->dev_close(dev->vid);
dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED;
}
+
+   /* notify the backend application to stop DMA transfers */
+   if (dev->async_copy && dev->notify_ops->vring_state_changed) {
+   for (i = 0; i < dev->nr_vring; i++) {
+   dev->notify_ops->vring_state_changed(dev->vid,
+   i, 0);
+   }
+   }
+
free_mem_region(dev);
rte_free(dev->mem);
dev->mem = NULL;
-- 
2.29.2

[dpdk-dev] [PATCH v3 4/5] examples/vhost: handle memory hotplug for async vhost

2021-07-14 Thread Cheng Jiang

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

To accomplish that, we need to do these changes in the vhost sample:
1. add inflight pkt count.
2. add vring_state_changed() callback.
3. add inflight pkt drain process in destroy_device() and
vring_state_changed().

Signed-off-by: Cheng Jiang 
---
 examples/vhost/main.c | 48 +--
 examples/vhost/main.h |  1 +
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index d2179eadb9..9014c999be 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -851,8 +851,11 @@ complete_async_pkts(struct vhost_dev *vdev)
 
complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
-   if (complete_count)
+   if (complete_count) {
free_pkts(p_cpl, complete_count);
+   __atomic_sub_fetch(&vdev->pkts_inflight, complete_count, 
__ATOMIC_SEQ_CST);
+   }
+
 }
 
 static __rte_always_inline void
@@ -895,6 +898,7 @@ drain_vhost(struct vhost_dev *vdev)
complete_async_pkts(vdev);
ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+   __atomic_add_fetch(&vdev->pkts_inflight, ret - cpu_cpl_nr, 
__ATOMIC_SEQ_CST);
 
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
@@ -1226,6 +1230,9 @@ drain_eth_rx(struct vhost_dev *vdev)
enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
VIRTIO_RXQ, pkts, rx_count,
m_cpu_cpl, &cpu_cpl_nr);
+   __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count - 
cpu_cpl_nr,
+   __ATOMIC_SEQ_CST);
+
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
 
@@ -1397,8 +1404,15 @@ destroy_device(int vid)
"(%d) device has been removed from data core\n",
vdev->vid);
 
-   if (async_vhost_driver)
+   if (async_vhost_driver) {
+   uint16_t n_pkt = 0;
+   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+   n_pkt = rte_vhost_try_drain_queue_thread_unsafe(vid, 
VIRTIO_RXQ, m_cpl,
+   vdev->pkts_inflight, 2);
+
+   free_pkts(m_cpl, n_pkt);
rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+   }
 
rte_free(vdev);
 }
@@ -1487,6 +1501,35 @@ new_device(int vid)
return 0;
 }
 
+static int
+vring_state_changed(int vid, uint16_t queue_id, int enable)
+{
+   struct vhost_dev *vdev = NULL;
+
+   TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
+   if (vdev->vid == vid)
+   break;
+   }
+   if (!vdev)
+   return -1;
+
+   if (queue_id != VIRTIO_RXQ)
+   return 0;
+
+   if (async_vhost_driver) {
+   if (!enable) {
+   uint16_t n_pkt;
+   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+
+   n_pkt = rte_vhost_try_drain_queue_thread_unsafe(vid, 
queue_id,
+   m_cpl, 
vdev->pkts_inflight, 2);
+   free_pkts(m_cpl, n_pkt);
+   }
+   }
+
+   return 0;
+}
+
 /*
  * These callback allow devices to be added to the data core when configuration
  * has been fully complete.
@@ -1495,6 +1538,7 @@ static const struct vhost_device_ops 
virtio_net_device_ops =
 {
.new_device =  new_device,
.destroy_device = destroy_device,
+   .vring_state_changed = vring_state_changed,
 };
 
 /*
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 0ccdce4b4a..e7b1ac60a6 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -51,6 +51,7 @@ struct vhost_dev {
uint64_t features;
size_t hdr_len;
uint16_t nr_vrings;
+   uint16_t pkts_inflight;
struct rte_vhost_memory *mem;
struct device_statistics stats;
TAILQ_ENTRY(vhost_dev) global_vdev_entry;
-- 
2.29.2

[dpdk-dev] [PATCH v3 5/5] doc: update doc for try drain API in vhost lib

2021-07-14 Thread Cheng Jiang

update the program guide and release notes for try drain API in vhost
lib.

Signed-off-by: Cheng Jiang 
---
 doc/guides/prog_guide/vhost_lib.rst| 5 +
 doc/guides/rel_notes/release_21_08.rst | 5 +
 2 files changed, 10 insertions(+)

diff --git a/doc/guides/prog_guide/vhost_lib.rst 
b/doc/guides/prog_guide/vhost_lib.rst
index d18fb98910..85aabc4a75 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -281,6 +281,11 @@ The following is an overview of some key Vhost API 
functions:
   Poll enqueue completion status from async data path. Completed packets
   are returned to applications through ``pkts``.
 
+* ``rte_vhost_try_drain_queue_thread_unsafe(vid, queue_id, **pkts, count, 
times)``
+
+  Try to drain in-flight packets which are submitted to DMA engine in vhost 
async data
+  path. Completed packets are returned to applications through ``pkts``.
+
 Vhost-user Implementations
 --
 
diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index a6ecfdf3ce..d1e5df2003 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -55,6 +55,11 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===
 
+* **Added try drain API in vhost library.**
+
+  Added an API which can try to drain the inflight packets submitted to DMA
+  engine in vhost async data path.
+
 
 Removed Items
 -
-- 
2.29.2

[dpdk-dev] [PATCH v2] vhost: fix index overflow for packed ring in async vhost

2021-07-15 Thread Cheng Jiang

We introduced some new indexes in packed ring of async vhost. They
will eventually overflow and lead to errors if the ring size is not
a power of 2. This patch is to check and keep these indexes within a
reasonable range.

Fixes: 873e8dad6f49 ("vhost: support packed ring in async datapath")
Cc: sta...@dpdk.org

Signed-off-by: Cheng Jiang 
---
 lib/vhost/virtio_net.c | 21 ++---
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index f4a2c88d8b..bfb2bf8fc4 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -2036,7 +2036,7 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
 
slot_idx = (vq->async_pkts_idx + num_async_pkts) % vq->size;
if (it_pool[it_idx].count) {
-   uint16_t from, to;
+   uint16_t from;
 
async_descs_idx += num_descs;
async_fill_desc(&tdes[pkt_burst_idx++],
@@ -2055,11 +2055,13 @@ virtio_dev_rx_async_submit_packed(struct virtio_net 
*dev,
 * descriptors.
 */
from = vq->shadow_used_idx - num_buffers;
-   to = vq->async_buffer_idx_packed % vq->size;
store_dma_desc_info_packed(vq->shadow_used_packed,
-   vq->async_buffers_packed, vq->size, 
from, to, num_buffers);
+   vq->async_buffers_packed, vq->size, 
from,
+   vq->async_buffer_idx_packed, 
num_buffers);
 
vq->async_buffer_idx_packed += num_buffers;
+   if (vq->async_buffer_idx_packed >= vq->size)
+   vq->async_buffer_idx_packed -= vq->size;
vq->shadow_used_idx -= num_buffers;
} else {
comp_pkts[num_done_pkts++] = pkts[pkt_idx];
@@ -2112,6 +2114,8 @@ virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
dma_error_handler_packed(vq, async_descs, async_descs_idx, 
slot_idx, pkt_err,
&pkt_idx, &num_async_pkts, 
&num_done_pkts);
vq->async_pkts_idx += num_async_pkts;
+   if (vq->async_pkts_idx >= vq->size)
+   vq->async_pkts_idx -= vq->size;
*comp_count = num_done_pkts;
 
if (likely(vq->shadow_used_idx)) {
@@ -2160,7 +2164,7 @@ write_back_completed_descs_packed(struct vhost_virtqueue 
*vq,
uint16_t from, to;
 
do {
-   from = vq->last_async_buffer_idx_packed % vq->size;
+   from = vq->last_async_buffer_idx_packed;
to = (from + nr_left) % vq->size;
if (to > from) {
vhost_update_used_packed(vq, vq->async_buffers_packed + 
from, to - from);
@@ -2169,7 +2173,7 @@ write_back_completed_descs_packed(struct vhost_virtqueue 
*vq,
} else {
vhost_update_used_packed(vq, vq->async_buffers_packed + 
from,
vq->size - from);
-   vq->last_async_buffer_idx_packed += vq->size - from;
+   vq->last_async_buffer_idx_packed = 0;
nr_left -= vq->size - from;
}
} while (nr_left > 0);
@@ -2252,10 +2256,13 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
vhost_vring_call_split(dev, vq);
}
} else {
-   if (vq_is_packed(dev))
+   if (vq_is_packed(dev)) {
vq->last_async_buffer_idx_packed += n_buffers;
-   else
+   if (vq->last_async_buffer_idx_packed >= vq->size)
+   vq->last_async_buffer_idx_packed -= vq->size;
+   } else {
vq->last_async_desc_idx_split += n_descs;
+   }
}
 
 done:
-- 
2.29.2

[dpdk-dev] [PATCH v4 0/5] vhost: handle memory hotplug for async vhost

2021-07-15 Thread Cheng Jiang

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

This patch set is to provide an unsafe API to drain in-flight pkts
which are submitted to DMA engine in vhost async data path, and
notify the vhost application of stopping DMA transfers. And enable it
in vhost example.

v4:
 * rebased on the latest codess
v3:
 * added a patch to fix async ops return type
 * fixed async ops fail handler
 * updated the doc
v2:
 * changed the patch structure

Cheng Jiang (4):
  vhost: fix async vhost ops return type
  vhost: add unsafe API to drain pkts in async vhost
  examples/vhost: handle memory hotplug for async vhost
  doc: update doc for try drain API in vhost lib

Jiayu Hu (1):
  vhost: handle memory hotplug for async vhost

 doc/guides/prog_guide/vhost_lib.rst|   5 +
 doc/guides/rel_notes/release_21_08.rst |   5 +
 examples/vhost/ioat.c  |   4 +-
 examples/vhost/ioat.h  |   4 +-
 examples/vhost/main.c  |  48 +++-
 examples/vhost/main.h  |   1 +
 lib/vhost/rte_vhost_async.h|  28 -
 lib/vhost/version.map  |   3 +
 lib/vhost/vhost_user.c |   9 ++
 lib/vhost/virtio_net.c | 146 -
 10 files changed, 215 insertions(+), 38 deletions(-)

--
2.29.2

[dpdk-dev] [PATCH v4 1/5] vhost: fix async vhost ops return type

2021-07-15 Thread Cheng Jiang

The async vhost ops callback should return -1 when there are something
wrong in the callback, so the return type should be changed into
int32_t. The issue in vhost example is also fixed in this patch.

Signed-off-by: Cheng Jiang 
---
 examples/vhost/ioat.c   |  4 +--
 examples/vhost/ioat.h   |  4 +--
 lib/vhost/rte_vhost_async.h |  4 +--
 lib/vhost/virtio_net.c  | 58 -
 4 files changed, 56 insertions(+), 14 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 2a2c2d7202..457f8171f0 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -122,7 +122,7 @@ open_ioat(const char *value)
return ret;
 }
 
-uint32_t
+int32_t
 ioat_transfer_data_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data, uint16_t count)
@@ -168,7 +168,7 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
return i_desc;
 }
 
-uint32_t
+int32_t
 ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets)
diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index 1aa28ed6a3..b57b5645b0 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -27,12 +27,12 @@ struct dma_for_vhost {
 #ifdef RTE_RAW_IOAT
 int open_ioat(const char *value);
 
-uint32_t
+int32_t
 ioat_transfer_data_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data, uint16_t count);
 
-uint32_t
+int32_t
 ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets);
diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index 6faa31f5ad..bc81cd0caa 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -61,7 +61,7 @@ struct rte_vhost_async_channel_ops {
 * @return
 *  number of descs processed
 */
-   uint32_t (*transfer_data)(int vid, uint16_t queue_id,
+   int32_t (*transfer_data)(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data,
uint16_t count);
@@ -78,7 +78,7 @@ struct rte_vhost_async_channel_ops {
 * @return
 *  number of async descs completed
 */
-   uint32_t (*check_completed_copies)(int vid, uint16_t queue_id,
+   int32_t (*check_completed_copies)(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets);
 };
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index b93482587c..8156796a46 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -1528,6 +1528,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct async_inflight_info *pkts_info = vq->async_pkts_info;
uint32_t n_pkts = 0, pkt_err = 0;
uint32_t num_async_pkts = 0, num_done_pkts = 0;
+   int32_t n_enq;
struct {
uint16_t pkt_idx;
uint16_t last_avail_idx;
@@ -1608,8 +1609,16 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
((VHOST_MAX_ASYNC_VEC >> 1) - segs_await <
BUF_VECTOR_MAX))) {
-   n_pkts = vq->async_ops.transfer_data(dev->vid,
+   n_enq = vq->async_ops.transfer_data(dev->vid,
queue_id, tdes, 0, pkt_burst_idx);
+   if (n_enq >= 0) {
+   n_pkts = n_enq;
+   } else {
+   VHOST_LOG_DATA(ERR, "(%d) %s: wrong opaque data 
for queue id %d.\n",
+   dev->vid, __func__, queue_id);
+   n_pkts = 0;
+   }
+
iovec_idx = 0;
it_idx = 0;
 
@@ -1632,8 +1641,15 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
}
 
if (pkt_burst_idx) {
-   n_pkts = vq->async_ops.transfer_data(dev->vid,
-   queue_id, tdes, 0, pkt_burst_idx);
+   n_enq = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 
0, pkt_burst_idx);
+   if (n_enq >= 0) {
+   n_pkts = n_enq;
+   } else {
+   VHOST_LOG_DATA(ERR, "(%d) %s: wrong opaque data for 
queue id %d.\n",
+   dev->vid, __func__, queue_id);
+   n_pkts = 0;
+   }
+
vq->async_pkts_inflight_n += n_pkts;

[dpdk-dev] [PATCH v4 2/5] vhost: add unsafe API to drain pkts in async vhost

2021-07-15 Thread Cheng Jiang

Applications need to stop DMA transfers and finish all the in-flight
pkts when in VM memory hot-plug case and async vhost is used. This
patch is to provide an unsafe API to drain in-flight pkts which are
submitted to DMA engine in vhost async data path.

Signed-off-by: Cheng Jiang 
---
 lib/vhost/rte_vhost_async.h | 24 ++
 lib/vhost/version.map   |  3 ++
 lib/vhost/virtio_net.c  | 90 +++--
 3 files changed, 94 insertions(+), 23 deletions(-)

diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index bc81cd0caa..fd622631b2 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -193,4 +193,28 @@ __rte_experimental
 uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count);
 
+/**
+ * This function checks async completion status and empty all pakcets
+ * for a specific vhost device queue. Packets which are inflight will
+ * be returned in an array.
+ *
+ * @note This function does not perform any locking
+ *
+ * @param vid
+ *  id of vhost device to enqueue data
+ * @param queue_id
+ *  queue id to enqueue data
+ * @param pkts
+ *  blank array to get return packet pointer
+ * @param count
+ *  size of the packet array
+ * @param times
+ *  max number of poll attempts
+ * @return
+ *  num of packets returned
+ */
+__rte_experimental
+uint16_t rte_vhost_try_drain_queue_thread_unsafe(int vid, uint16_t queue_id,
+   struct rte_mbuf **pkts, uint16_t count, uint16_t times);
+
 #endif /* _RTE_VHOST_ASYNC_H_ */
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index 9103a23cd4..b8fc8770dd 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -79,4 +79,7 @@ EXPERIMENTAL {
 
# added in 21.05
rte_vhost_get_negotiated_protocol_features;
+
+   # added in 21.08
+   rte_vhost_try_drain_queue_thread_unsafe;
 };
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 8156796a46..9f541679b9 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -2115,10 +2115,10 @@ write_back_completed_descs_packed(struct 
vhost_virtqueue *vq,
} while (nr_left > 0);
 }
 
-uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+static __rte_always_inline uint16_t
+vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count)
 {
-   struct virtio_net *dev = get_device(vid);
struct vhost_virtqueue *vq;
uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
uint16_t start_idx, pkts_idx, vq_size;
@@ -2126,26 +2126,8 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
uint16_t from, i;
int32_t n_poll;
 
-   if (!dev)
-   return 0;
-
-   VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
-   if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
-   VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
-   dev->vid, __func__, queue_id);
-   return 0;
-   }
-
vq = dev->virtqueue[queue_id];
 
-   if (unlikely(!vq->async_registered)) {
-   VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id 
%d.\n",
-   dev->vid, __func__, queue_id);
-   return 0;
-   }
-
-   rte_spinlock_lock(&vq->access_lock);
-
pkts_idx = vq->async_pkts_idx % vq->size;
pkts_info = vq->async_pkts_info;
vq_size = vq->size;
@@ -2153,7 +2135,7 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
vq_size, vq->async_pkts_inflight_n);
 
if (count > vq->async_last_pkts_n) {
-   n_poll = vq->async_ops.check_completed_copies(vid,
+   n_poll = vq->async_ops.check_completed_copies(dev->vid,
queue_id, 0, count - vq->async_last_pkts_n);
if (n_poll >= 0) {
n_pkts_cpl = n_poll;
@@ -2168,7 +2150,7 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
n_pkts_put = RTE_MIN(count, n_pkts_cpl);
if (unlikely(n_pkts_put == 0)) {
vq->async_last_pkts_n = n_pkts_cpl;
-   goto done;
+   return 0;
}
 
if (vq_is_packed(dev)) {
@@ -2207,12 +2189,74 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
vq->last_async_desc_idx_split += n_descs;
}
 
-done:
+   return n_pkts_put;
+}
+
+uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+   struct rte_mbuf **pkts, uint16_t count)
+{
+   struct virtio_net *dev = get_device(vid);
+   struct vhost_virtqueue *vq;
+   uint16_t n_pkts_put = 0;
+
+

[dpdk-dev] [PATCH v4 3/5] vhost: handle memory hotplug for async vhost

2021-07-15 Thread Cheng Jiang

From: Jiayu Hu 

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

This patch is to notify the vhost application of stopping DMA
transfers.

Signed-off-by: Jiayu Hu 
---
 lib/vhost/vhost_user.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 031c578e54..39e8432d1c 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -1275,6 +1275,15 @@ vhost_user_set_mem_table(struct virtio_net **pdev, 
struct VhostUserMsg *msg,
vdpa_dev->ops->dev_close(dev->vid);
dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED;
}
+
+   /* notify the backend application to stop DMA transfers */
+   if (dev->async_copy && dev->notify_ops->vring_state_changed) {
+   for (i = 0; i < dev->nr_vring; i++) {
+   dev->notify_ops->vring_state_changed(dev->vid,
+   i, 0);
+   }
+   }
+
free_mem_region(dev);
rte_free(dev->mem);
dev->mem = NULL;
-- 
2.29.2

[dpdk-dev] [PATCH v4 4/5] examples/vhost: handle memory hotplug for async vhost

2021-07-15 Thread Cheng Jiang

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

To accomplish that, we need to do these changes in the vhost sample:
1. add inflight pkt count.
2. add vring_state_changed() callback.
3. add inflight pkt drain process in destroy_device() and
vring_state_changed().

Signed-off-by: Cheng Jiang 
---
 examples/vhost/main.c | 48 +--
 examples/vhost/main.h |  1 +
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index d2179eadb9..9014c999be 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -851,8 +851,11 @@ complete_async_pkts(struct vhost_dev *vdev)
 
complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
-   if (complete_count)
+   if (complete_count) {
free_pkts(p_cpl, complete_count);
+   __atomic_sub_fetch(&vdev->pkts_inflight, complete_count, 
__ATOMIC_SEQ_CST);
+   }
+
 }
 
 static __rte_always_inline void
@@ -895,6 +898,7 @@ drain_vhost(struct vhost_dev *vdev)
complete_async_pkts(vdev);
ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+   __atomic_add_fetch(&vdev->pkts_inflight, ret - cpu_cpl_nr, 
__ATOMIC_SEQ_CST);
 
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
@@ -1226,6 +1230,9 @@ drain_eth_rx(struct vhost_dev *vdev)
enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
VIRTIO_RXQ, pkts, rx_count,
m_cpu_cpl, &cpu_cpl_nr);
+   __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count - 
cpu_cpl_nr,
+   __ATOMIC_SEQ_CST);
+
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
 
@@ -1397,8 +1404,15 @@ destroy_device(int vid)
"(%d) device has been removed from data core\n",
vdev->vid);
 
-   if (async_vhost_driver)
+   if (async_vhost_driver) {
+   uint16_t n_pkt = 0;
+   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+   n_pkt = rte_vhost_try_drain_queue_thread_unsafe(vid, 
VIRTIO_RXQ, m_cpl,
+   vdev->pkts_inflight, 2);
+
+   free_pkts(m_cpl, n_pkt);
rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+   }
 
rte_free(vdev);
 }
@@ -1487,6 +1501,35 @@ new_device(int vid)
return 0;
 }
 
+static int
+vring_state_changed(int vid, uint16_t queue_id, int enable)
+{
+   struct vhost_dev *vdev = NULL;
+
+   TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
+   if (vdev->vid == vid)
+   break;
+   }
+   if (!vdev)
+   return -1;
+
+   if (queue_id != VIRTIO_RXQ)
+   return 0;
+
+   if (async_vhost_driver) {
+   if (!enable) {
+   uint16_t n_pkt;
+   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+
+   n_pkt = rte_vhost_try_drain_queue_thread_unsafe(vid, 
queue_id,
+   m_cpl, 
vdev->pkts_inflight, 2);
+   free_pkts(m_cpl, n_pkt);
+   }
+   }
+
+   return 0;
+}
+
 /*
  * These callback allow devices to be added to the data core when configuration
  * has been fully complete.
@@ -1495,6 +1538,7 @@ static const struct vhost_device_ops 
virtio_net_device_ops =
 {
.new_device =  new_device,
.destroy_device = destroy_device,
+   .vring_state_changed = vring_state_changed,
 };
 
 /*
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 0ccdce4b4a..e7b1ac60a6 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -51,6 +51,7 @@ struct vhost_dev {
uint64_t features;
size_t hdr_len;
uint16_t nr_vrings;
+   uint16_t pkts_inflight;
struct rte_vhost_memory *mem;
struct device_statistics stats;
TAILQ_ENTRY(vhost_dev) global_vdev_entry;
-- 
2.29.2

[dpdk-dev] [PATCH v4 5/5] doc: update doc for try drain API in vhost lib

2021-07-15 Thread Cheng Jiang

update the program guide and release notes for try drain API in vhost
lib.

Signed-off-by: Cheng Jiang 
---
 doc/guides/prog_guide/vhost_lib.rst| 5 +
 doc/guides/rel_notes/release_21_08.rst | 5 +
 2 files changed, 10 insertions(+)

diff --git a/doc/guides/prog_guide/vhost_lib.rst 
b/doc/guides/prog_guide/vhost_lib.rst
index d18fb98910..85aabc4a75 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -281,6 +281,11 @@ The following is an overview of some key Vhost API 
functions:
   Poll enqueue completion status from async data path. Completed packets
   are returned to applications through ``pkts``.
 
+* ``rte_vhost_try_drain_queue_thread_unsafe(vid, queue_id, **pkts, count, 
times)``
+
+  Try to drain in-flight packets which are submitted to DMA engine in vhost 
async data
+  path. Completed packets are returned to applications through ``pkts``.
+
 Vhost-user Implementations
 --
 
diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index 6a902ef9ac..c38e358cf9 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -117,6 +117,11 @@ New Features
   The experimental PMD power management API now supports managing
   multiple Ethernet Rx queues per lcore.
 
+* **Added try drain API in vhost library.**
+
+  Added an API which can try to drain the inflight packets submitted to DMA
+  engine in vhost async data path.
+
 
 Removed Items
 -
-- 
2.29.2

[dpdk-dev] [PATCH v5 0/5] vhost: handle memory hotplug for async vhost

2021-07-16 Thread Cheng Jiang

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

This patch set is to provide an unsafe API to drain in-flight pkts
which are submitted to DMA engine in vhost async data path, and
notify the vhost application of stopping DMA transfers. And enable it
in vhost example.

v5:
 * added fixes in 'vhost: fix async vhost ops return type'
 * improved git log, variable names and logs
v4:
 * rebased on the latest codess
v3:
 * added a patch to fix async ops return type
 * fixed async ops fail handler
 * updated the doc
v2:
 * changed the patch structure

Cheng Jiang (4):
  vhost: fix async vhost ops return type
  vhost: add unsafe API to drain pkts in async vhost
  examples/vhost: handle memory hotplug for async vhost
  doc: update doc for try drain API in vhost lib

Jiayu Hu (1):
  vhost: handle memory hotplug for async vhost

 doc/guides/prog_guide/vhost_lib.rst|   5 +
 doc/guides/rel_notes/release_21_08.rst |   5 +
 examples/vhost/ioat.c  |   4 +-
 examples/vhost/ioat.h  |   4 +-
 examples/vhost/main.c  |  48 +++-
 examples/vhost/main.h  |   1 +
 lib/vhost/rte_vhost_async.h|  32 +-
 lib/vhost/version.map  |   3 +
 lib/vhost/vhost_user.c |   9 ++
 lib/vhost/virtio_net.c | 149 -
 10 files changed, 220 insertions(+), 40 deletions(-)

--
2.29.2

[dpdk-dev] [PATCH v5 1/5] vhost: fix async vhost ops return type

2021-07-16 Thread Cheng Jiang

The async vhost callback ops should return negative value when there
are something wrong in the callback, so the return type should be
changed into int32_t. The issue in vhost example is also fixed.

Fixes: cd6760da1076 ("vhost: introduce async enqueue for split ring")
Fixes: 819a71685826 ("vhost: fix async callback return type")
Fixes: 6b3c81db8bb7 ("vhost: simplify async copy completion")
Fixes: abec60e7115d ("examples/vhost: support vhost async data path")
Fixes: 873e8dad6f49 ("vhost: support packed ring in async datapath")
Cc: sta...@dpdk.org

Signed-off-by: Cheng Jiang 
---
 examples/vhost/ioat.c   |  4 +--
 examples/vhost/ioat.h   |  4 +--
 lib/vhost/rte_vhost_async.h |  8 ++---
 lib/vhost/virtio_net.c  | 61 -
 4 files changed, 61 insertions(+), 16 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 2a2c2d7202..457f8171f0 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -122,7 +122,7 @@ open_ioat(const char *value)
return ret;
 }
 
-uint32_t
+int32_t
 ioat_transfer_data_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data, uint16_t count)
@@ -168,7 +168,7 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
return i_desc;
 }
 
-uint32_t
+int32_t
 ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets)
diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index 1aa28ed6a3..b57b5645b0 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -27,12 +27,12 @@ struct dma_for_vhost {
 #ifdef RTE_RAW_IOAT
 int open_ioat(const char *value);
 
-uint32_t
+int32_t
 ioat_transfer_data_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data, uint16_t count);
 
-uint32_t
+int32_t
 ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets);
diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index 6faa31f5ad..e964d83837 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -59,9 +59,9 @@ struct rte_vhost_async_channel_ops {
 * @param count
 *  number of elements in the "descs" array
 * @return
-*  number of descs processed
+*  number of descs processed, negative value means error
 */
-   uint32_t (*transfer_data)(int vid, uint16_t queue_id,
+   int32_t (*transfer_data)(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data,
uint16_t count);
@@ -76,9 +76,9 @@ struct rte_vhost_async_channel_ops {
 * @param max_packets
 *  max number of packets could be completed
 * @return
-*  number of async descs completed
+*  number of async descs completed, negative value means error
 */
-   uint32_t (*check_completed_copies)(int vid, uint16_t queue_id,
+   int32_t (*check_completed_copies)(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets);
 };
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index b93482587c..16ae4d9e19 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -1528,6 +1528,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct async_inflight_info *pkts_info = vq->async_pkts_info;
uint32_t n_pkts = 0, pkt_err = 0;
uint32_t num_async_pkts = 0, num_done_pkts = 0;
+   int32_t n_xfer;
struct {
uint16_t pkt_idx;
uint16_t last_avail_idx;
@@ -1608,8 +1609,17 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
((VHOST_MAX_ASYNC_VEC >> 1) - segs_await <
BUF_VECTOR_MAX))) {
-   n_pkts = vq->async_ops.transfer_data(dev->vid,
+   n_xfer = vq->async_ops.transfer_data(dev->vid,
queue_id, tdes, 0, pkt_burst_idx);
+   if (n_xfer >= 0) {
+   n_pkts = n_xfer;
+   } else {
+   VHOST_LOG_DATA(ERR,
+   "(%d) %s: failed to transfer data for 
queue id %d.\n",
+   dev->vid, __func__, queue_id);
+   n_pkts = 0;
+   }
+
iovec_idx = 0;
it_idx = 0;

[dpdk-dev] [PATCH v5 2/5] vhost: add unsafe API to drain pkts in async vhost

2021-07-16 Thread Cheng Jiang

Applications need to stop DMA transfers and finish all the in-flight
pkts when in VM memory hot-plug case and async vhost is used. This
patch is to provide an unsafe API to drain in-flight pkts which are
submitted to DMA engine in vhost async data path.

Signed-off-by: Cheng Jiang 
---
 lib/vhost/rte_vhost_async.h | 24 ++
 lib/vhost/version.map   |  3 ++
 lib/vhost/virtio_net.c  | 90 +++--
 3 files changed, 94 insertions(+), 23 deletions(-)

diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index e964d83837..c3de79d1e1 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -193,4 +193,28 @@ __rte_experimental
 uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count);
 
+/**
+ * This function checks async completion status and empty all pakcets
+ * for a specific vhost device queue. Packets which are inflight will
+ * be returned in an array.
+ *
+ * @note This function does not perform any locking
+ *
+ * @param vid
+ *  id of vhost device to enqueue data
+ * @param queue_id
+ *  queue id to enqueue data
+ * @param pkts
+ *  blank array to get return packet pointer
+ * @param count
+ *  size of the packet array
+ * @param times
+ *  max number of poll attempts
+ * @return
+ *  num of packets returned
+ */
+__rte_experimental
+uint16_t rte_vhost_try_drain_queue_thread_unsafe(int vid, uint16_t queue_id,
+   struct rte_mbuf **pkts, uint16_t count, uint16_t times);
+
 #endif /* _RTE_VHOST_ASYNC_H_ */
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index 9103a23cd4..b8fc8770dd 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -79,4 +79,7 @@ EXPERIMENTAL {
 
# added in 21.05
rte_vhost_get_negotiated_protocol_features;
+
+   # added in 21.08
+   rte_vhost_try_drain_queue_thread_unsafe;
 };
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 16ae4d9e19..c3a1493e0d 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -2117,10 +2117,10 @@ write_back_completed_descs_packed(struct 
vhost_virtqueue *vq,
} while (nr_left > 0);
 }
 
-uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+static __rte_always_inline uint16_t
+vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count)
 {
-   struct virtio_net *dev = get_device(vid);
struct vhost_virtqueue *vq;
uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
uint16_t start_idx, pkts_idx, vq_size;
@@ -2128,26 +2128,8 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
uint16_t from, i;
int32_t n_cpl;
 
-   if (!dev)
-   return 0;
-
-   VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
-   if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
-   VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
-   dev->vid, __func__, queue_id);
-   return 0;
-   }
-
vq = dev->virtqueue[queue_id];
 
-   if (unlikely(!vq->async_registered)) {
-   VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id 
%d.\n",
-   dev->vid, __func__, queue_id);
-   return 0;
-   }
-
-   rte_spinlock_lock(&vq->access_lock);
-
pkts_idx = vq->async_pkts_idx % vq->size;
pkts_info = vq->async_pkts_info;
vq_size = vq->size;
@@ -2155,7 +2137,7 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
vq_size, vq->async_pkts_inflight_n);
 
if (count > vq->async_last_pkts_n) {
-   n_cpl = vq->async_ops.check_completed_copies(vid,
+   n_cpl = vq->async_ops.check_completed_copies(dev->vid,
queue_id, 0, count - vq->async_last_pkts_n);
if (n_cpl >= 0) {
n_pkts_cpl = n_cpl;
@@ -2171,7 +2153,7 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
n_pkts_put = RTE_MIN(count, n_pkts_cpl);
if (unlikely(n_pkts_put == 0)) {
vq->async_last_pkts_n = n_pkts_cpl;
-   goto done;
+   return 0;
}
 
if (vq_is_packed(dev)) {
@@ -2210,12 +2192,74 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
vq->last_async_desc_idx_split += n_descs;
}
 
-done:
+   return n_pkts_put;
+}
+
+uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+   struct rte_mbuf **pkts, uint16_t count)
+{
+   struct virtio_net *dev = get_device(vid);
+   struct vhost_virtqueue *vq;
+   uint16_t n_pkts_put = 0;
+
+   if (!dev)
+

[dpdk-dev] [PATCH v5 3/5] vhost: handle memory hotplug for async vhost

2021-07-16 Thread Cheng Jiang

From: Jiayu Hu 

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

This patch is to notify the vhost application of stopping DMA
transfers.

Signed-off-by: Jiayu Hu 
---
 lib/vhost/vhost_user.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 031c578e54..39e8432d1c 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -1275,6 +1275,15 @@ vhost_user_set_mem_table(struct virtio_net **pdev, 
struct VhostUserMsg *msg,
vdpa_dev->ops->dev_close(dev->vid);
dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED;
}
+
+   /* notify the backend application to stop DMA transfers */
+   if (dev->async_copy && dev->notify_ops->vring_state_changed) {
+   for (i = 0; i < dev->nr_vring; i++) {
+   dev->notify_ops->vring_state_changed(dev->vid,
+   i, 0);
+   }
+   }
+
free_mem_region(dev);
rte_free(dev->mem);
dev->mem = NULL;
-- 
2.29.2

[dpdk-dev] [PATCH v5 4/5] examples/vhost: handle memory hotplug for async vhost

2021-07-16 Thread Cheng Jiang

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

To accomplish that, we need to do these changes in the vhost sample:
1. add inflight pkt count.
2. add vring_state_changed() callback.
3. add inflight pkt drain process in destroy_device() and
vring_state_changed().

Signed-off-by: Cheng Jiang 
---
 examples/vhost/main.c | 48 +--
 examples/vhost/main.h |  1 +
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index d2179eadb9..9014c999be 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -851,8 +851,11 @@ complete_async_pkts(struct vhost_dev *vdev)
 
complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
-   if (complete_count)
+   if (complete_count) {
free_pkts(p_cpl, complete_count);
+   __atomic_sub_fetch(&vdev->pkts_inflight, complete_count, 
__ATOMIC_SEQ_CST);
+   }
+
 }
 
 static __rte_always_inline void
@@ -895,6 +898,7 @@ drain_vhost(struct vhost_dev *vdev)
complete_async_pkts(vdev);
ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+   __atomic_add_fetch(&vdev->pkts_inflight, ret - cpu_cpl_nr, 
__ATOMIC_SEQ_CST);
 
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
@@ -1226,6 +1230,9 @@ drain_eth_rx(struct vhost_dev *vdev)
enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
VIRTIO_RXQ, pkts, rx_count,
m_cpu_cpl, &cpu_cpl_nr);
+   __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count - 
cpu_cpl_nr,
+   __ATOMIC_SEQ_CST);
+
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
 
@@ -1397,8 +1404,15 @@ destroy_device(int vid)
"(%d) device has been removed from data core\n",
vdev->vid);
 
-   if (async_vhost_driver)
+   if (async_vhost_driver) {
+   uint16_t n_pkt = 0;
+   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+   n_pkt = rte_vhost_try_drain_queue_thread_unsafe(vid, 
VIRTIO_RXQ, m_cpl,
+   vdev->pkts_inflight, 2);
+
+   free_pkts(m_cpl, n_pkt);
rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+   }
 
rte_free(vdev);
 }
@@ -1487,6 +1501,35 @@ new_device(int vid)
return 0;
 }
 
+static int
+vring_state_changed(int vid, uint16_t queue_id, int enable)
+{
+   struct vhost_dev *vdev = NULL;
+
+   TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
+   if (vdev->vid == vid)
+   break;
+   }
+   if (!vdev)
+   return -1;
+
+   if (queue_id != VIRTIO_RXQ)
+   return 0;
+
+   if (async_vhost_driver) {
+   if (!enable) {
+   uint16_t n_pkt;
+   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+
+   n_pkt = rte_vhost_try_drain_queue_thread_unsafe(vid, 
queue_id,
+   m_cpl, 
vdev->pkts_inflight, 2);
+   free_pkts(m_cpl, n_pkt);
+   }
+   }
+
+   return 0;
+}
+
 /*
  * These callback allow devices to be added to the data core when configuration
  * has been fully complete.
@@ -1495,6 +1538,7 @@ static const struct vhost_device_ops 
virtio_net_device_ops =
 {
.new_device =  new_device,
.destroy_device = destroy_device,
+   .vring_state_changed = vring_state_changed,
 };
 
 /*
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 0ccdce4b4a..e7b1ac60a6 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -51,6 +51,7 @@ struct vhost_dev {
uint64_t features;
size_t hdr_len;
uint16_t nr_vrings;
+   uint16_t pkts_inflight;
struct rte_vhost_memory *mem;
struct device_statistics stats;
TAILQ_ENTRY(vhost_dev) global_vdev_entry;
-- 
2.29.2

[dpdk-dev] [PATCH v5 5/5] doc: update doc for try drain API in vhost lib

2021-07-16 Thread Cheng Jiang

update the program guide and release notes for try drain API in vhost
lib.

Signed-off-by: Cheng Jiang 
---
 doc/guides/prog_guide/vhost_lib.rst| 5 +
 doc/guides/rel_notes/release_21_08.rst | 5 +
 2 files changed, 10 insertions(+)

diff --git a/doc/guides/prog_guide/vhost_lib.rst 
b/doc/guides/prog_guide/vhost_lib.rst
index d18fb98910..85aabc4a75 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -281,6 +281,11 @@ The following is an overview of some key Vhost API 
functions:
   Poll enqueue completion status from async data path. Completed packets
   are returned to applications through ``pkts``.
 
+* ``rte_vhost_try_drain_queue_thread_unsafe(vid, queue_id, **pkts, count, 
times)``
+
+  Try to drain in-flight packets which are submitted to DMA engine in vhost 
async data
+  path. Completed packets are returned to applications through ``pkts``.
+
 Vhost-user Implementations
 --
 
diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index 6a902ef9ac..c38e358cf9 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -117,6 +117,11 @@ New Features
   The experimental PMD power management API now supports managing
   multiple Ethernet Rx queues per lcore.
 
+* **Added try drain API in vhost library.**
+
+  Added an API which can try to drain the inflight packets submitted to DMA
+  engine in vhost async data path.
+
 
 Removed Items
 -
-- 
2.29.2

[dpdk-dev] [PATCH v6 0/5] vhost: handle memory hotplug for async vhost

2021-07-19 Thread Cheng Jiang

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

This patch set is to provide an unsafe API to drain inflight pkts
which are submitted to DMA engine in vhost async data path, and
notify the vhost application of stopping DMA transfers. And enable it
in vhost example.

v6:
 * removed unnecessary args for the new API
 * improved variable names and function names
 * added enable notification in set_mem_table
 * fixed vhost example queue clear process
v5:
 * added fixes in 'vhost: fix async vhost ops return type'
 * improved git log, variable names and logs
v4:
 * rebased on the latest codess
v3:
 * added a patch to fix async ops return type
 * fixed async ops fail handler
 * updated the doc
v2:
 * changed the patch structure

Cheng Jiang (4):
  vhost: fix async vhost ops return type
  vhost: add unsafe API to clear packets in async vhost
  examples/vhost: handle memory hotplug for async vhost
  doc: update doc for inflight packets clear API in vhost lib

Jiayu Hu (1):
  vhost: handle memory hotplug for async vhost

 doc/guides/prog_guide/vhost_lib.rst|   5 +
 doc/guides/rel_notes/release_21_08.rst |   5 +
 examples/vhost/ioat.c  |   4 +-
 examples/vhost/ioat.h  |   4 +-
 examples/vhost/main.c  |  55 -
 examples/vhost/main.h  |   1 +
 lib/vhost/rte_vhost_async.h|  30 -
 lib/vhost/version.map  |   3 +
 lib/vhost/vhost_user.c |  16 +++
 lib/vhost/virtio_net.c | 152 -
 10 files changed, 234 insertions(+), 41 deletions(-)

--
2.29.2

[dpdk-dev] [PATCH v6 1/5] vhost: fix async vhost ops return type

2021-07-19 Thread Cheng Jiang

The async vhost callback ops should return negative value when there
are something wrong in the callback, so the return type should be
changed into int32_t. The issue in vhost example is also fixed.

Fixes: cd6760da1076 ("vhost: introduce async enqueue for split ring")
Fixes: 819a71685826 ("vhost: fix async callback return type")
Fixes: 6b3c81db8bb7 ("vhost: simplify async copy completion")
Fixes: abec60e7115d ("examples/vhost: support vhost async data path")
Fixes: 873e8dad6f49 ("vhost: support packed ring in async datapath")
Cc: sta...@dpdk.org

Signed-off-by: Cheng Jiang 
---
 examples/vhost/ioat.c   |  4 +--
 examples/vhost/ioat.h   |  4 +--
 lib/vhost/rte_vhost_async.h |  8 ++---
 lib/vhost/virtio_net.c  | 61 -
 4 files changed, 61 insertions(+), 16 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 2a2c2d7202..457f8171f0 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -122,7 +122,7 @@ open_ioat(const char *value)
return ret;
 }
 
-uint32_t
+int32_t
 ioat_transfer_data_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data, uint16_t count)
@@ -168,7 +168,7 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
return i_desc;
 }
 
-uint32_t
+int32_t
 ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets)
diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index 1aa28ed6a3..b57b5645b0 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -27,12 +27,12 @@ struct dma_for_vhost {
 #ifdef RTE_RAW_IOAT
 int open_ioat(const char *value);
 
-uint32_t
+int32_t
 ioat_transfer_data_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data, uint16_t count);
 
-uint32_t
+int32_t
 ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets);
diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index 6faa31f5ad..e964d83837 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -59,9 +59,9 @@ struct rte_vhost_async_channel_ops {
 * @param count
 *  number of elements in the "descs" array
 * @return
-*  number of descs processed
+*  number of descs processed, negative value means error
 */
-   uint32_t (*transfer_data)(int vid, uint16_t queue_id,
+   int32_t (*transfer_data)(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data,
uint16_t count);
@@ -76,9 +76,9 @@ struct rte_vhost_async_channel_ops {
 * @param max_packets
 *  max number of packets could be completed
 * @return
-*  number of async descs completed
+*  number of async descs completed, negative value means error
 */
-   uint32_t (*check_completed_copies)(int vid, uint16_t queue_id,
+   int32_t (*check_completed_copies)(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets);
 };
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index b93482587c..16ae4d9e19 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -1528,6 +1528,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct async_inflight_info *pkts_info = vq->async_pkts_info;
uint32_t n_pkts = 0, pkt_err = 0;
uint32_t num_async_pkts = 0, num_done_pkts = 0;
+   int32_t n_xfer;
struct {
uint16_t pkt_idx;
uint16_t last_avail_idx;
@@ -1608,8 +1609,17 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
((VHOST_MAX_ASYNC_VEC >> 1) - segs_await <
BUF_VECTOR_MAX))) {
-   n_pkts = vq->async_ops.transfer_data(dev->vid,
+   n_xfer = vq->async_ops.transfer_data(dev->vid,
queue_id, tdes, 0, pkt_burst_idx);
+   if (n_xfer >= 0) {
+   n_pkts = n_xfer;
+   } else {
+   VHOST_LOG_DATA(ERR,
+   "(%d) %s: failed to transfer data for 
queue id %d.\n",
+   dev->vid, __func__, queue_id);
+   n_pkts = 0;
+   }
+
iovec_idx = 0;
it_idx = 0;

[dpdk-dev] [PATCH v6 2/5] vhost: add unsafe API to clear packets in async vhost

2021-07-19 Thread Cheng Jiang

Applications need to stop DMA transfers and finish all the inflight
packets when in VM memory hot-plug case and async vhost is used. This
patch is to provide an unsafe API to clear inflight packets which
are submitted to DMA engine in vhost async data path.

Signed-off-by: Cheng Jiang 
---
 lib/vhost/rte_vhost_async.h | 22 +
 lib/vhost/version.map   |  3 ++
 lib/vhost/virtio_net.c  | 93 +++--
 3 files changed, 94 insertions(+), 24 deletions(-)

diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index e964d83837..9961e4970e 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -193,4 +193,26 @@ __rte_experimental
 uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count);
 
+/**
+ * This function checks async completion status and clear packets for
+ * a specific vhost device queue. Packets which are inflight will be
+ * returned in an array.
+ *
+ * @note This function does not perform any locking
+ *
+ * @param vid
+ *  ID of vhost device to clear data
+ * @param queue_id
+ *  Queue id to clear data
+ * @param pkts
+ *  Blank array to get return packet pointer
+ * @param count
+ *  Size of the packet array
+ * @return
+ *  Number of packets returned
+ */
+__rte_experimental
+uint16_t rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id,
+   struct rte_mbuf **pkts, uint16_t count);
+
 #endif /* _RTE_VHOST_ASYNC_H_ */
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index 9103a23cd4..8dcf9e802a 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -79,4 +79,7 @@ EXPERIMENTAL {
 
# added in 21.05
rte_vhost_get_negotiated_protocol_features;
+
+   # added in 21.08
+   rte_vhost_clear_queue_thread_unsafe;
 };
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 16ae4d9e19..29f91f9ad4 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -2117,10 +2117,10 @@ write_back_completed_descs_packed(struct 
vhost_virtqueue *vq,
} while (nr_left > 0);
 }
 
-uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+static __rte_always_inline uint16_t
+vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count)
 {
-   struct virtio_net *dev = get_device(vid);
struct vhost_virtqueue *vq;
uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
uint16_t start_idx, pkts_idx, vq_size;
@@ -2128,26 +2128,8 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
uint16_t from, i;
int32_t n_cpl;
 
-   if (!dev)
-   return 0;
-
-   VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
-   if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
-   VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
-   dev->vid, __func__, queue_id);
-   return 0;
-   }
-
vq = dev->virtqueue[queue_id];
 
-   if (unlikely(!vq->async_registered)) {
-   VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id 
%d.\n",
-   dev->vid, __func__, queue_id);
-   return 0;
-   }
-
-   rte_spinlock_lock(&vq->access_lock);
-
pkts_idx = vq->async_pkts_idx % vq->size;
pkts_info = vq->async_pkts_info;
vq_size = vq->size;
@@ -2155,7 +2137,7 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
vq_size, vq->async_pkts_inflight_n);
 
if (count > vq->async_last_pkts_n) {
-   n_cpl = vq->async_ops.check_completed_copies(vid,
+   n_cpl = vq->async_ops.check_completed_copies(dev->vid,
queue_id, 0, count - vq->async_last_pkts_n);
if (n_cpl >= 0) {
n_pkts_cpl = n_cpl;
@@ -2171,7 +2153,7 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
n_pkts_put = RTE_MIN(count, n_pkts_cpl);
if (unlikely(n_pkts_put == 0)) {
vq->async_last_pkts_n = n_pkts_cpl;
-   goto done;
+   return 0;
}
 
if (vq_is_packed(dev)) {
@@ -2210,10 +2192,73 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
vq->last_async_desc_idx_split += n_descs;
}
 
-done:
+   return n_pkts_put;
+}
+
+uint16_t
+rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+   struct rte_mbuf **pkts, uint16_t count)
+{
+   struct virtio_net *dev = get_device(vid);
+   struct vhost_virtqueue *vq;
+   uint16_t n_pkts_cpl = 0;
+
+   if (!dev)
+   return 0;
+
+   VHOST_LOG_DATA(DEBUG, "

[dpdk-dev] [PATCH v6 3/5] vhost: handle memory hotplug for async vhost

2021-07-19 Thread Cheng Jiang

From: Jiayu Hu 

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

This patch is to notify the vhost application of stopping DMA
transfers.

Signed-off-by: Jiayu Hu 
---
 lib/vhost/vhost_user.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 031c578e54..8106cc1c30 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -1248,6 +1248,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct 
VhostUserMsg *msg,
int numa_node = SOCKET_ID_ANY;
uint64_t mmap_offset;
uint32_t i;
+   bool async_notify = false;
 
if (validate_msg_fds(msg, memory->nregions) != 0)
return RTE_VHOST_MSG_RESULT_ERR;
@@ -1275,6 +1276,16 @@ vhost_user_set_mem_table(struct virtio_net **pdev, 
struct VhostUserMsg *msg,
vdpa_dev->ops->dev_close(dev->vid);
dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED;
}
+
+   /* notify the vhost application to stop DMA transfers */
+   if (dev->async_copy && dev->notify_ops->vring_state_changed) {
+   for (i = 0; i < dev->nr_vring; i++) {
+   dev->notify_ops->vring_state_changed(dev->vid,
+   i, 0);
+   }
+   async_notify = true;
+   }
+
free_mem_region(dev);
rte_free(dev->mem);
dev->mem = NULL;
@@ -1371,6 +1382,11 @@ vhost_user_set_mem_table(struct virtio_net **pdev, 
struct VhostUserMsg *msg,
 
dump_guest_pages(dev);
 
+   if (async_notify) {
+   for (i = 0; i < dev->nr_vring; i++)
+   dev->notify_ops->vring_state_changed(dev->vid, i, 1);
+   }
+
return RTE_VHOST_MSG_RESULT_OK;
 
 free_mem_table:
-- 
2.29.2

[dpdk-dev] [PATCH v6 4/5] examples/vhost: handle memory hotplug for async vhost

2021-07-19 Thread Cheng Jiang

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

To accomplish that, we need to do these changes in the vhost sample:
1. add inflight packets count.
2. add vring_state_changed() callback.
3. add inflight packets clear process in destroy_device() and
vring_state_changed().

Signed-off-by: Cheng Jiang 
---
 examples/vhost/main.c | 55 +--
 examples/vhost/main.h |  1 +
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index d2179eadb9..cfd2bc157c 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -851,8 +851,11 @@ complete_async_pkts(struct vhost_dev *vdev)
 
complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
-   if (complete_count)
+   if (complete_count) {
free_pkts(p_cpl, complete_count);
+   __atomic_sub_fetch(&vdev->pkts_inflight, complete_count, 
__ATOMIC_SEQ_CST);
+   }
+
 }
 
 static __rte_always_inline void
@@ -895,6 +898,7 @@ drain_vhost(struct vhost_dev *vdev)
complete_async_pkts(vdev);
ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+   __atomic_add_fetch(&vdev->pkts_inflight, ret - cpu_cpl_nr, 
__ATOMIC_SEQ_CST);
 
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
@@ -1226,6 +1230,9 @@ drain_eth_rx(struct vhost_dev *vdev)
enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
VIRTIO_RXQ, pkts, rx_count,
m_cpu_cpl, &cpu_cpl_nr);
+   __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count - 
cpu_cpl_nr,
+   __ATOMIC_SEQ_CST);
+
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
 
@@ -1397,8 +1404,19 @@ destroy_device(int vid)
"(%d) device has been removed from data core\n",
vdev->vid);
 
-   if (async_vhost_driver)
+   if (async_vhost_driver) {
+   uint16_t n_pkt = 0;
+   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+
+   while (vdev->pkts_inflight) {
+   n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, 
VIRTIO_RXQ,
+   m_cpl, vdev->pkts_inflight);
+   free_pkts(m_cpl, n_pkt);
+   __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, 
__ATOMIC_SEQ_CST);
+   }
+
rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+   }
 
rte_free(vdev);
 }
@@ -1487,6 +1505,38 @@ new_device(int vid)
return 0;
 }
 
+static int
+vring_state_changed(int vid, uint16_t queue_id, int enable)
+{
+   struct vhost_dev *vdev = NULL;
+
+   TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
+   if (vdev->vid == vid)
+   break;
+   }
+   if (!vdev)
+   return -1;
+
+   if (queue_id != VIRTIO_RXQ)
+   return 0;
+
+   if (async_vhost_driver) {
+   if (!enable) {
+   uint16_t n_pkt = 0;
+   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+
+   while (vdev->pkts_inflight) {
+   n_pkt = 
rte_vhost_clear_queue_thread_unsafe(vid, queue_id,
+   m_cpl, 
vdev->pkts_inflight);
+   free_pkts(m_cpl, n_pkt);
+   __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, 
__ATOMIC_SEQ_CST);
+   }
+   }
+   }
+
+   return 0;
+}
+
 /*
  * These callback allow devices to be added to the data core when configuration
  * has been fully complete.
@@ -1495,6 +1545,7 @@ static const struct vhost_device_ops 
virtio_net_device_ops =
 {
.new_device =  new_device,
.destroy_device = destroy_device,
+   .vring_state_changed = vring_state_changed,
 };
 
 /*
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 0ccdce4b4a..e7b1ac60a6 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -51,6 +51,7 @@ struct vhost_dev {
uint64_t features;
size_t hdr_len;
uint16_t nr_vrings;
+   uint16_t pkts_inflight;
struct rte_vhost_memory *mem;
struct device_statistics stats;
TAILQ_ENTRY(vhost_dev) global_vdev_entry;
-- 
2.29.2

[dpdk-dev] [PATCH v6 5/5] doc: update doc for inflight packets clear API in vhost lib

2021-07-19 Thread Cheng Jiang

Update the program guide and release notes for inflight packets clear
API in vhost lib.

Signed-off-by: Cheng Jiang 
---
 doc/guides/prog_guide/vhost_lib.rst| 5 +
 doc/guides/rel_notes/release_21_08.rst | 5 +
 2 files changed, 10 insertions(+)

diff --git a/doc/guides/prog_guide/vhost_lib.rst 
b/doc/guides/prog_guide/vhost_lib.rst
index d18fb98910..3cdfdc0725 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -281,6 +281,11 @@ The following is an overview of some key Vhost API 
functions:
   Poll enqueue completion status from async data path. Completed packets
   are returned to applications through ``pkts``.
 
+* ``rte_vhost_clear_queue_thread_unsafe(vid, queue_id, **pkts, count)``
+
+  Clear inflight packets which are submitted to DMA engine in vhost async data
+  path. Completed packets are returned to applications through ``pkts``.
+
 Vhost-user Implementations
 --
 
diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index 6a902ef9ac..482d16ba13 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -117,6 +117,11 @@ New Features
   The experimental PMD power management API now supports managing
   multiple Ethernet Rx queues per lcore.
 
+* **Added inflight packets clear API in vhost library.**
+
+  Added an API which can clear the inflight packets submitted to DMA
+  engine in vhost async data path.
+
 
 Removed Items
 -
-- 
2.29.2

[dpdk-dev] [PATCH v7 0/5] vhost: handle memory hotplug for async vhost

2021-07-21 Thread Cheng Jiang

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

This patch set is to provide an unsafe API to drain inflight pkts
which are submitted to DMA engine in vhost async data path, and
notify the vhost application of stopping DMA transfers. And enable it
in vhost example.

v7:
 * rebased on the latest codes
 * improved commit log
v6:
 * removed unnecessary args for the new API
 * improved variable names and function names
 * added enable notification in set_mem_table
 * fixed vhost example queue clear process
v5:
 * added fixes in 'vhost: fix async vhost ops return type'
 * improved git log, variable names and logs
v4:
 * rebased on the latest codes
v3:
 * added a patch to fix async ops return type
 * fixed async ops fail handler
 * updated the doc
v2:
 * changed the patch structure

Cheng Jiang (4):
  vhost: fix async vhost ops return type
  vhost: add unsafe API to clear packets in async vhost
  examples/vhost: handle memory hotplug for async vhost
  doc: update doc for queue clear API in vhost lib

Jiayu Hu (1):
  vhost: handle memory hotplug for async vhost

 doc/guides/prog_guide/vhost_lib.rst|   5 +
 doc/guides/rel_notes/release_21_08.rst |   5 +
 examples/vhost/ioat.c  |   4 +-
 examples/vhost/ioat.h  |   4 +-
 examples/vhost/main.c  |  55 -
 examples/vhost/main.h  |   1 +
 lib/vhost/rte_vhost_async.h|  30 -
 lib/vhost/version.map  |   1 +
 lib/vhost/vhost_user.c |  16 +++
 lib/vhost/virtio_net.c | 152 -
 10 files changed, 232 insertions(+), 41 deletions(-)

--
2.29.2

[dpdk-dev] [PATCH v7 1/5] vhost: fix async vhost ops return type

2021-07-21 Thread Cheng Jiang

The async vhost callback ops should return negative value when there
are something wrong in the callback, so the return type should be
changed into int32_t. The issue in vhost example is also fixed.

Fixes: cd6760da1076 ("vhost: introduce async enqueue for split ring")
Fixes: 819a71685826 ("vhost: fix async callback return type")
Fixes: 6b3c81db8bb7 ("vhost: simplify async copy completion")
Fixes: abec60e7115d ("examples/vhost: support vhost async data path")
Fixes: 873e8dad6f49 ("vhost: support packed ring in async datapath")
Cc: sta...@dpdk.org

Signed-off-by: Cheng Jiang 
Reviewed-by: Maxime Coquelin 
---
 examples/vhost/ioat.c   |  4 +--
 examples/vhost/ioat.h   |  4 +--
 lib/vhost/rte_vhost_async.h |  8 ++---
 lib/vhost/virtio_net.c  | 61 -
 4 files changed, 61 insertions(+), 16 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 2a2c2d7202..457f8171f0 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -122,7 +122,7 @@ open_ioat(const char *value)
return ret;
 }
 
-uint32_t
+int32_t
 ioat_transfer_data_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data, uint16_t count)
@@ -168,7 +168,7 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
return i_desc;
 }
 
-uint32_t
+int32_t
 ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets)
diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index 1aa28ed6a3..b57b5645b0 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -27,12 +27,12 @@ struct dma_for_vhost {
 #ifdef RTE_RAW_IOAT
 int open_ioat(const char *value);
 
-uint32_t
+int32_t
 ioat_transfer_data_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data, uint16_t count);
 
-uint32_t
+int32_t
 ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets);
diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index 69ec66bba5..02d012ae23 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -59,9 +59,9 @@ struct rte_vhost_async_channel_ops {
 * @param count
 *  number of elements in the "descs" array
 * @return
-*  number of descs processed
+*  number of descs processed, negative value means error
 */
-   uint32_t (*transfer_data)(int vid, uint16_t queue_id,
+   int32_t (*transfer_data)(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data,
uint16_t count);
@@ -76,9 +76,9 @@ struct rte_vhost_async_channel_ops {
 * @param max_packets
 *  max number of packets could be completed
 * @return
-*  number of async descs completed
+*  number of async descs completed, negative value means error
 */
-   uint32_t (*check_completed_copies)(int vid, uint16_t queue_id,
+   int32_t (*check_completed_copies)(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets);
 };
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 6e5d82c1a8..3ab5229f76 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -1644,6 +1644,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct async_inflight_info *pkts_info = vq->async_pkts_info;
uint32_t n_pkts = 0, pkt_err = 0;
uint32_t num_async_pkts = 0, num_done_pkts = 0;
+   int32_t n_xfer;
struct {
uint16_t pkt_idx;
uint16_t last_avail_idx;
@@ -1724,8 +1725,17 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
((VHOST_MAX_ASYNC_VEC >> 1) - segs_await <
BUF_VECTOR_MAX))) {
-   n_pkts = vq->async_ops.transfer_data(dev->vid,
+   n_xfer = vq->async_ops.transfer_data(dev->vid,
queue_id, tdes, 0, pkt_burst_idx);
+   if (n_xfer >= 0) {
+   n_pkts = n_xfer;
+   } else {
+   VHOST_LOG_DATA(ERR,
+   "(%d) %s: failed to transfer data for 
queue id %d.\n",
+   dev->vid, __func__, queue_id);
+   n_pkts = 0;
+   }
+
iovec_idx = 0;

[dpdk-dev] [PATCH v7 2/5] vhost: add unsafe API to clear packets in async vhost

2021-07-21 Thread Cheng Jiang

Applications need to stop DMA transfers and finish all the inflight
packets when in VM memory hot-plug case and async vhost is used. This
patch is to provide an unsafe API to clear inflight packets which
are submitted to DMA engine in vhost async data path.

Signed-off-by: Cheng Jiang 
Reviewed-by: Maxime Coquelin 
---
 lib/vhost/rte_vhost_async.h | 22 +
 lib/vhost/version.map   |  1 +
 lib/vhost/virtio_net.c  | 93 +++--
 3 files changed, 92 insertions(+), 24 deletions(-)

diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index 02d012ae23..b25ff446f7 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -246,4 +246,26 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
 __rte_experimental
 int rte_vhost_async_get_inflight(int vid, uint16_t queue_id);
 
+/**
+ * This function checks async completion status and clear packets for
+ * a specific vhost device queue. Packets which are inflight will be
+ * returned in an array.
+ *
+ * @note This function does not perform any locking
+ *
+ * @param vid
+ *  ID of vhost device to clear data
+ * @param queue_id
+ *  Queue id to clear data
+ * @param pkts
+ *  Blank array to get return packet pointer
+ * @param count
+ *  Size of the packet array
+ * @return
+ *  Number of packets returned
+ */
+__rte_experimental
+uint16_t rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id,
+   struct rte_mbuf **pkts, uint16_t count);
+
 #endif /* _RTE_VHOST_ASYNC_H_ */
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index e0c89646e8..e2504ba657 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -84,4 +84,5 @@ EXPERIMENTAL {
rte_vhost_async_get_inflight;
rte_vhost_async_channel_register_thread_unsafe;
rte_vhost_async_channel_unregister_thread_unsafe;
+   rte_vhost_clear_queue_thread_unsafe;
 };
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 3ab5229f76..8549afbbe1 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -2214,10 +2214,10 @@ write_back_completed_descs_packed(struct 
vhost_virtqueue *vq,
} while (nr_left > 0);
 }
 
-uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+static __rte_always_inline uint16_t
+vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count)
 {
-   struct virtio_net *dev = get_device(vid);
struct vhost_virtqueue *vq;
uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
uint16_t start_idx, pkts_idx, vq_size;
@@ -2225,26 +2225,8 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
uint16_t from, i;
int32_t n_cpl;
 
-   if (!dev)
-   return 0;
-
-   VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
-   if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
-   VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
-   dev->vid, __func__, queue_id);
-   return 0;
-   }
-
vq = dev->virtqueue[queue_id];
 
-   if (unlikely(!vq->async_registered)) {
-   VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id 
%d.\n",
-   dev->vid, __func__, queue_id);
-   return 0;
-   }
-
-   rte_spinlock_lock(&vq->access_lock);
-
pkts_idx = vq->async_pkts_idx % vq->size;
pkts_info = vq->async_pkts_info;
vq_size = vq->size;
@@ -2252,7 +2234,7 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
vq_size, vq->async_pkts_inflight_n);
 
if (count > vq->async_last_pkts_n) {
-   n_cpl = vq->async_ops.check_completed_copies(vid,
+   n_cpl = vq->async_ops.check_completed_copies(dev->vid,
queue_id, 0, count - vq->async_last_pkts_n);
if (n_cpl >= 0) {
n_pkts_cpl = n_cpl;
@@ -2268,7 +2250,7 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
n_pkts_put = RTE_MIN(count, n_pkts_cpl);
if (unlikely(n_pkts_put == 0)) {
vq->async_last_pkts_n = n_pkts_cpl;
-   goto done;
+   return 0;
}
 
if (vq_is_packed(dev)) {
@@ -2310,10 +2292,73 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
}
}
 
-done:
+   return n_pkts_put;
+}
+
+uint16_t
+rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+   struct rte_mbuf **pkts, uint16_t count)
+{
+   struct virtio_net *dev = get_device(vid);
+   struct vhost_virtqueue *vq;
+   uint16_t n_pkts_cpl = 0;
+
+   if (!dev)
+   return 0;
+
+   VHOST_LOG_DAT

[dpdk-dev] [PATCH v7 3/5] vhost: handle memory hotplug for async vhost

2021-07-21 Thread Cheng Jiang

From: Jiayu Hu 

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

This patch is to notify the vhost application of stopping DMA
transfers.

Signed-off-by: Jiayu Hu 
Reviewed-by: Maxime Coquelin 
---
 lib/vhost/vhost_user.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 31300e194f..433f412fa8 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -1248,6 +1248,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct 
VhostUserMsg *msg,
int numa_node = SOCKET_ID_ANY;
uint64_t mmap_offset;
uint32_t i;
+   bool async_notify = false;
 
if (validate_msg_fds(msg, memory->nregions) != 0)
return RTE_VHOST_MSG_RESULT_ERR;
@@ -1275,6 +1276,16 @@ vhost_user_set_mem_table(struct virtio_net **pdev, 
struct VhostUserMsg *msg,
vdpa_dev->ops->dev_close(dev->vid);
dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED;
}
+
+   /* notify the vhost application to stop DMA transfers */
+   if (dev->async_copy && dev->notify_ops->vring_state_changed) {
+   for (i = 0; i < dev->nr_vring; i++) {
+   dev->notify_ops->vring_state_changed(dev->vid,
+   i, 0);
+   }
+   async_notify = true;
+   }
+
free_mem_region(dev);
rte_free(dev->mem);
dev->mem = NULL;
@@ -1371,6 +1382,11 @@ vhost_user_set_mem_table(struct virtio_net **pdev, 
struct VhostUserMsg *msg,
 
dump_guest_pages(dev);
 
+   if (async_notify) {
+   for (i = 0; i < dev->nr_vring; i++)
+   dev->notify_ops->vring_state_changed(dev->vid, i, 1);
+   }
+
return RTE_VHOST_MSG_RESULT_OK;
 
 free_mem_table:
-- 
2.29.2

[dpdk-dev] [PATCH v7 4/5] examples/vhost: handle memory hotplug for async vhost

2021-07-21 Thread Cheng Jiang

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

To accomplish that, we need to do these changes in the vhost sample:
1. add inflight packets count.
2. add vring_state_changed() callback.
3. add inflight packets clear process in destroy_device() and
vring_state_changed().

Signed-off-by: Cheng Jiang 
Reviewed-by: Maxime Coquelin 
---
 examples/vhost/main.c | 55 +--
 examples/vhost/main.h |  1 +
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 9cd855a696..bc3d71c898 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -851,8 +851,11 @@ complete_async_pkts(struct vhost_dev *vdev)
 
complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
-   if (complete_count)
+   if (complete_count) {
free_pkts(p_cpl, complete_count);
+   __atomic_sub_fetch(&vdev->pkts_inflight, complete_count, 
__ATOMIC_SEQ_CST);
+   }
+
 }
 
 static __rte_always_inline void
@@ -895,6 +898,7 @@ drain_vhost(struct vhost_dev *vdev)
complete_async_pkts(vdev);
ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+   __atomic_add_fetch(&vdev->pkts_inflight, ret - cpu_cpl_nr, 
__ATOMIC_SEQ_CST);
 
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
@@ -1226,6 +1230,9 @@ drain_eth_rx(struct vhost_dev *vdev)
enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
VIRTIO_RXQ, pkts, rx_count,
m_cpu_cpl, &cpu_cpl_nr);
+   __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count - 
cpu_cpl_nr,
+   __ATOMIC_SEQ_CST);
+
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
 
@@ -1397,8 +1404,19 @@ destroy_device(int vid)
"(%d) device has been removed from data core\n",
vdev->vid);
 
-   if (async_vhost_driver)
+   if (async_vhost_driver) {
+   uint16_t n_pkt = 0;
+   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+
+   while (vdev->pkts_inflight) {
+   n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, 
VIRTIO_RXQ,
+   m_cpl, vdev->pkts_inflight);
+   free_pkts(m_cpl, n_pkt);
+   __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, 
__ATOMIC_SEQ_CST);
+   }
+
rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+   }
 
rte_free(vdev);
 }
@@ -1487,6 +1505,38 @@ new_device(int vid)
return 0;
 }
 
+static int
+vring_state_changed(int vid, uint16_t queue_id, int enable)
+{
+   struct vhost_dev *vdev = NULL;
+
+   TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
+   if (vdev->vid == vid)
+   break;
+   }
+   if (!vdev)
+   return -1;
+
+   if (queue_id != VIRTIO_RXQ)
+   return 0;
+
+   if (async_vhost_driver) {
+   if (!enable) {
+   uint16_t n_pkt = 0;
+   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+
+   while (vdev->pkts_inflight) {
+   n_pkt = 
rte_vhost_clear_queue_thread_unsafe(vid, queue_id,
+   m_cpl, 
vdev->pkts_inflight);
+   free_pkts(m_cpl, n_pkt);
+   __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, 
__ATOMIC_SEQ_CST);
+   }
+   }
+   }
+
+   return 0;
+}
+
 /*
  * These callback allow devices to be added to the data core when configuration
  * has been fully complete.
@@ -1495,6 +1545,7 @@ static const struct vhost_device_ops 
virtio_net_device_ops =
 {
.new_device =  new_device,
.destroy_device = destroy_device,
+   .vring_state_changed = vring_state_changed,
 };
 
 /*
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 0ccdce4b4a..e7b1ac60a6 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -51,6 +51,7 @@ struct vhost_dev {
uint64_t features;
size_t hdr_len;
uint16_t nr_vrings;
+   uint16_t pkts_inflight;
struct rte_vhost_memory *mem;
struct device_statistics stats;
TAILQ_ENTRY(vhost_dev) global_vdev_entry;
-- 
2.29.2

[dpdk-dev] [PATCH v7 5/5] doc: update doc for queue clear API in vhost lib

2021-07-21 Thread Cheng Jiang

Update the program guide and release notes for virtqueue inflight
packets clear API in vhost lib.

Signed-off-by: Cheng Jiang 
Reviewed-by: Maxime Coquelin 
---
 doc/guides/prog_guide/vhost_lib.rst| 5 +
 doc/guides/rel_notes/release_21_08.rst | 5 +
 2 files changed, 10 insertions(+)

diff --git a/doc/guides/prog_guide/vhost_lib.rst 
b/doc/guides/prog_guide/vhost_lib.rst
index 70ce4974df..8874033165 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -305,6 +305,11 @@ The following is an overview of some key Vhost API 
functions:
   This function returns the amount of in-flight packets for the vhost
   queue using async acceleration.
 
+* ``rte_vhost_clear_queue_thread_unsafe(vid, queue_id, **pkts, count)``
+
+  Clear inflight packets which are submitted to DMA engine in vhost async data
+  path. Completed packets are returned to applications through ``pkts``.
+
 Vhost-user Implementations
 --
 
diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index 543e93ff1d..d9c4cc5df0 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -155,6 +155,11 @@ New Features
   The experimental PMD power management API now supports managing
   multiple Ethernet Rx queues per lcore.
 
+* **Added inflight packets clear API in vhost library.**
+
+  Added an API which can clear the inflight packets submitted to DMA
+  engine in vhost async data path.
+
 
 Removed Items
 -
-- 
2.29.2

[dpdk-dev] [PATCH v8 0/4] vhost: handle memory hotplug for async vhost

2021-07-23 Thread Cheng Jiang

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

This patch set is to provide an unsafe API to drain inflight pkts
which are submitted to DMA engine in vhost async data path, and
notify the vhost application of stopping DMA transfers. And enable it
in vhost example.

v8:
 * updated doc in the code patch
 * fix a compile error in cross-compilation
v7:
 * rebased on the latest codes
 * improved commit log
v6:
 * removed unnecessary args for the new API
 * improved variable names and function names
 * added enable notification in set_mem_table
 * fixed vhost example queue clear process
v5:
 * added fixes in 'vhost: fix async vhost ops return type'
 * improved git log, variable names and logs
v4:
 * rebased on the latest codes
v3:
 * added a patch to fix async ops return type
 * fixed async ops fail handler
 * updated the doc
v2:
 * changed the patch structure

Cheng Jiang (3):
  vhost: fix async vhost ops return type
  vhost: add unsafe API to clear packets in async vhost
  examples/vhost: handle memory hotplug for async vhost

Jiayu Hu (1):
  vhost: handle memory hotplug for async vhost

 doc/guides/prog_guide/vhost_lib.rst|   5 +
 doc/guides/rel_notes/release_21_08.rst |   5 +
 examples/vhost/ioat.c  |   4 +-
 examples/vhost/ioat.h  |   8 +-
 examples/vhost/main.c  |  55 -
 examples/vhost/main.h  |   1 +
 lib/vhost/rte_vhost_async.h|  30 -
 lib/vhost/version.map  |   1 +
 lib/vhost/vhost_user.c |  16 +++
 lib/vhost/virtio_net.c | 152 -
 10 files changed, 234 insertions(+), 43 deletions(-)

--
2.29.2

[dpdk-dev] [PATCH v8 1/4] vhost: fix async vhost ops return type

2021-07-23 Thread Cheng Jiang

The async vhost callback ops should return negative value when there
are something wrong in the callback, so the return type should be
changed into int32_t. The issue in vhost example is also fixed.

Fixes: cd6760da1076 ("vhost: introduce async enqueue for split ring")
Fixes: 819a71685826 ("vhost: fix async callback return type")
Fixes: 6b3c81db8bb7 ("vhost: simplify async copy completion")
Fixes: abec60e7115d ("examples/vhost: support vhost async data path")
Fixes: 6e9a9d2a02ae ("examples/vhost: fix ioat dependency")
Fixes: 873e8dad6f49 ("vhost: support packed ring in async datapath")
Cc: sta...@dpdk.org

Signed-off-by: Cheng Jiang 
Reviewed-by: Maxime Coquelin 
---
 examples/vhost/ioat.c   |  4 +--
 examples/vhost/ioat.h   |  8 ++---
 lib/vhost/rte_vhost_async.h |  8 ++---
 lib/vhost/virtio_net.c  | 61 -
 4 files changed, 63 insertions(+), 18 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 2a2c2d7202..457f8171f0 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -122,7 +122,7 @@ open_ioat(const char *value)
return ret;
 }
 
-uint32_t
+int32_t
 ioat_transfer_data_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data, uint16_t count)
@@ -168,7 +168,7 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
return i_desc;
 }
 
-uint32_t
+int32_t
 ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets)
diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index 1aa28ed6a3..62e163c585 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -27,12 +27,12 @@ struct dma_for_vhost {
 #ifdef RTE_RAW_IOAT
 int open_ioat(const char *value);
 
-uint32_t
+int32_t
 ioat_transfer_data_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data, uint16_t count);
 
-uint32_t
+int32_t
 ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets);
@@ -42,7 +42,7 @@ static int open_ioat(const char *value __rte_unused)
return -1;
 }
 
-static uint32_t
+static int32_t
 ioat_transfer_data_cb(int vid __rte_unused, uint16_t queue_id __rte_unused,
struct rte_vhost_async_desc *descs __rte_unused,
struct rte_vhost_async_status *opaque_data __rte_unused,
@@ -51,7 +51,7 @@ ioat_transfer_data_cb(int vid __rte_unused, uint16_t queue_id 
__rte_unused,
return -1;
 }
 
-static uint32_t
+static int32_t
 ioat_check_completed_copies_cb(int vid __rte_unused,
uint16_t queue_id __rte_unused,
struct rte_vhost_async_status *opaque_data __rte_unused,
diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index 69ec66bba5..02d012ae23 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -59,9 +59,9 @@ struct rte_vhost_async_channel_ops {
 * @param count
 *  number of elements in the "descs" array
 * @return
-*  number of descs processed
+*  number of descs processed, negative value means error
 */
-   uint32_t (*transfer_data)(int vid, uint16_t queue_id,
+   int32_t (*transfer_data)(int vid, uint16_t queue_id,
struct rte_vhost_async_desc *descs,
struct rte_vhost_async_status *opaque_data,
uint16_t count);
@@ -76,9 +76,9 @@ struct rte_vhost_async_channel_ops {
 * @param max_packets
 *  max number of packets could be completed
 * @return
-*  number of async descs completed
+*  number of async descs completed, negative value means error
 */
-   uint32_t (*check_completed_copies)(int vid, uint16_t queue_id,
+   int32_t (*check_completed_copies)(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data,
uint16_t max_packets);
 };
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 6e5d82c1a8..3ab5229f76 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -1644,6 +1644,7 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
struct async_inflight_info *pkts_info = vq->async_pkts_info;
uint32_t n_pkts = 0, pkt_err = 0;
uint32_t num_async_pkts = 0, num_done_pkts = 0;
+   int32_t n_xfer;
struct {
uint16_t pkt_idx;
uint16_t last_avail_idx;
@@ -1724,8 +1725,17 @@ virtio_dev_rx_async_submit_split(struct virtio_net *dev,
if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
((VHOST_MAX_ASYNC_VEC >> 1) - segs_await

[dpdk-dev] [PATCH v8 2/4] vhost: add unsafe API to clear packets in async vhost

2021-07-23 Thread Cheng Jiang

Applications need to stop DMA transfers and finish all the inflight
packets when in VM memory hot-plug case and async vhost is used. This
patch is to provide an unsafe API to clear inflight packets which
are submitted to DMA engine in vhost async data path. Update the
program guide and release notes for virtqueue inflight packets clear
API in vhost lib.

Signed-off-by: Cheng Jiang 
Reviewed-by: Maxime Coquelin 
---
 doc/guides/prog_guide/vhost_lib.rst|  5 ++
 doc/guides/rel_notes/release_21_08.rst |  5 ++
 lib/vhost/rte_vhost_async.h| 22 ++
 lib/vhost/version.map  |  1 +
 lib/vhost/virtio_net.c | 93 +++---
 5 files changed, 102 insertions(+), 24 deletions(-)

diff --git a/doc/guides/prog_guide/vhost_lib.rst 
b/doc/guides/prog_guide/vhost_lib.rst
index 70ce4974df..8874033165 100644
--- a/doc/guides/prog_guide/vhost_lib.rst
+++ b/doc/guides/prog_guide/vhost_lib.rst
@@ -305,6 +305,11 @@ The following is an overview of some key Vhost API 
functions:
   This function returns the amount of in-flight packets for the vhost
   queue using async acceleration.
 
+* ``rte_vhost_clear_queue_thread_unsafe(vid, queue_id, **pkts, count)``
+
+  Clear inflight packets which are submitted to DMA engine in vhost async data
+  path. Completed packets are returned to applications through ``pkts``.
+
 Vhost-user Implementations
 --
 
diff --git a/doc/guides/rel_notes/release_21_08.rst 
b/doc/guides/rel_notes/release_21_08.rst
index 543e93ff1d..d9c4cc5df0 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -155,6 +155,11 @@ New Features
   The experimental PMD power management API now supports managing
   multiple Ethernet Rx queues per lcore.
 
+* **Added inflight packets clear API in vhost library.**
+
+  Added an API which can clear the inflight packets submitted to DMA
+  engine in vhost async data path.
+
 
 Removed Items
 -
diff --git a/lib/vhost/rte_vhost_async.h b/lib/vhost/rte_vhost_async.h
index 02d012ae23..b25ff446f7 100644
--- a/lib/vhost/rte_vhost_async.h
+++ b/lib/vhost/rte_vhost_async.h
@@ -246,4 +246,26 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
 __rte_experimental
 int rte_vhost_async_get_inflight(int vid, uint16_t queue_id);
 
+/**
+ * This function checks async completion status and clear packets for
+ * a specific vhost device queue. Packets which are inflight will be
+ * returned in an array.
+ *
+ * @note This function does not perform any locking
+ *
+ * @param vid
+ *  ID of vhost device to clear data
+ * @param queue_id
+ *  Queue id to clear data
+ * @param pkts
+ *  Blank array to get return packet pointer
+ * @param count
+ *  Size of the packet array
+ * @return
+ *  Number of packets returned
+ */
+__rte_experimental
+uint16_t rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id,
+   struct rte_mbuf **pkts, uint16_t count);
+
 #endif /* _RTE_VHOST_ASYNC_H_ */
diff --git a/lib/vhost/version.map b/lib/vhost/version.map
index e0c89646e8..e2504ba657 100644
--- a/lib/vhost/version.map
+++ b/lib/vhost/version.map
@@ -84,4 +84,5 @@ EXPERIMENTAL {
rte_vhost_async_get_inflight;
rte_vhost_async_channel_register_thread_unsafe;
rte_vhost_async_channel_unregister_thread_unsafe;
+   rte_vhost_clear_queue_thread_unsafe;
 };
diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 3ab5229f76..8549afbbe1 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -2214,10 +2214,10 @@ write_back_completed_descs_packed(struct 
vhost_virtqueue *vq,
} while (nr_left > 0);
 }
 
-uint16_t rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
+static __rte_always_inline uint16_t
+vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
struct rte_mbuf **pkts, uint16_t count)
 {
-   struct virtio_net *dev = get_device(vid);
struct vhost_virtqueue *vq;
uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
uint16_t start_idx, pkts_idx, vq_size;
@@ -2225,26 +2225,8 @@ uint16_t rte_vhost_poll_enqueue_completed(int vid, 
uint16_t queue_id,
uint16_t from, i;
int32_t n_cpl;
 
-   if (!dev)
-   return 0;
-
-   VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
-   if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
-   VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
-   dev->vid, __func__, queue_id);
-   return 0;
-   }
-
vq = dev->virtqueue[queue_id];
 
-   if (unlikely(!vq->async_registered)) {
-   VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id 
%d.\n",
-   dev->vid, __func__, queue_id);
-   return 0;
-   }
-
-   rte_spinlock_lock(&vq

[dpdk-dev] [PATCH v8 3/4] vhost: handle memory hotplug for async vhost

2021-07-23 Thread Cheng Jiang

From: Jiayu Hu 

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

This patch is to notify the vhost application of stopping DMA
transfers.

Signed-off-by: Jiayu Hu 
Reviewed-by: Maxime Coquelin 
---
 lib/vhost/vhost_user.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 31300e194f..433f412fa8 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -1248,6 +1248,7 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct 
VhostUserMsg *msg,
int numa_node = SOCKET_ID_ANY;
uint64_t mmap_offset;
uint32_t i;
+   bool async_notify = false;
 
if (validate_msg_fds(msg, memory->nregions) != 0)
return RTE_VHOST_MSG_RESULT_ERR;
@@ -1275,6 +1276,16 @@ vhost_user_set_mem_table(struct virtio_net **pdev, 
struct VhostUserMsg *msg,
vdpa_dev->ops->dev_close(dev->vid);
dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED;
}
+
+   /* notify the vhost application to stop DMA transfers */
+   if (dev->async_copy && dev->notify_ops->vring_state_changed) {
+   for (i = 0; i < dev->nr_vring; i++) {
+   dev->notify_ops->vring_state_changed(dev->vid,
+   i, 0);
+   }
+   async_notify = true;
+   }
+
free_mem_region(dev);
rte_free(dev->mem);
dev->mem = NULL;
@@ -1371,6 +1382,11 @@ vhost_user_set_mem_table(struct virtio_net **pdev, 
struct VhostUserMsg *msg,
 
dump_guest_pages(dev);
 
+   if (async_notify) {
+   for (i = 0; i < dev->nr_vring; i++)
+   dev->notify_ops->vring_state_changed(dev->vid, i, 1);
+   }
+
return RTE_VHOST_MSG_RESULT_OK;
 
 free_mem_table:
-- 
2.29.2

[dpdk-dev] [PATCH v8 4/4] examples/vhost: handle memory hotplug for async vhost

2021-07-23 Thread Cheng Jiang

When the guest memory is hotplugged, the vhost application which
enables DMA acceleration must stop DMA transfers before the vhost
re-maps the guest memory.

To accomplish that, we need to do these changes in the vhost sample:
1. add inflight packets count.
2. add vring_state_changed() callback.
3. add inflight packets clear process in destroy_device() and
vring_state_changed().

Signed-off-by: Cheng Jiang 
Reviewed-by: Maxime Coquelin 
---
 examples/vhost/main.c | 55 +--
 examples/vhost/main.h |  1 +
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 9cd855a696..bc3d71c898 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -851,8 +851,11 @@ complete_async_pkts(struct vhost_dev *vdev)
 
complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
-   if (complete_count)
+   if (complete_count) {
free_pkts(p_cpl, complete_count);
+   __atomic_sub_fetch(&vdev->pkts_inflight, complete_count, 
__ATOMIC_SEQ_CST);
+   }
+
 }
 
 static __rte_always_inline void
@@ -895,6 +898,7 @@ drain_vhost(struct vhost_dev *vdev)
complete_async_pkts(vdev);
ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
+   __atomic_add_fetch(&vdev->pkts_inflight, ret - cpu_cpl_nr, 
__ATOMIC_SEQ_CST);
 
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
@@ -1226,6 +1230,9 @@ drain_eth_rx(struct vhost_dev *vdev)
enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
VIRTIO_RXQ, pkts, rx_count,
m_cpu_cpl, &cpu_cpl_nr);
+   __atomic_add_fetch(&vdev->pkts_inflight, enqueue_count - 
cpu_cpl_nr,
+   __ATOMIC_SEQ_CST);
+
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
 
@@ -1397,8 +1404,19 @@ destroy_device(int vid)
"(%d) device has been removed from data core\n",
vdev->vid);
 
-   if (async_vhost_driver)
+   if (async_vhost_driver) {
+   uint16_t n_pkt = 0;
+   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+
+   while (vdev->pkts_inflight) {
+   n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, 
VIRTIO_RXQ,
+   m_cpl, vdev->pkts_inflight);
+   free_pkts(m_cpl, n_pkt);
+   __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, 
__ATOMIC_SEQ_CST);
+   }
+
rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+   }
 
rte_free(vdev);
 }
@@ -1487,6 +1505,38 @@ new_device(int vid)
return 0;
 }
 
+static int
+vring_state_changed(int vid, uint16_t queue_id, int enable)
+{
+   struct vhost_dev *vdev = NULL;
+
+   TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
+   if (vdev->vid == vid)
+   break;
+   }
+   if (!vdev)
+   return -1;
+
+   if (queue_id != VIRTIO_RXQ)
+   return 0;
+
+   if (async_vhost_driver) {
+   if (!enable) {
+   uint16_t n_pkt = 0;
+   struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+
+   while (vdev->pkts_inflight) {
+   n_pkt = 
rte_vhost_clear_queue_thread_unsafe(vid, queue_id,
+   m_cpl, 
vdev->pkts_inflight);
+   free_pkts(m_cpl, n_pkt);
+   __atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, 
__ATOMIC_SEQ_CST);
+   }
+   }
+   }
+
+   return 0;
+}
+
 /*
  * These callback allow devices to be added to the data core when configuration
  * has been fully complete.
@@ -1495,6 +1545,7 @@ static const struct vhost_device_ops 
virtio_net_device_ops =
 {
.new_device =  new_device,
.destroy_device = destroy_device,
+   .vring_state_changed = vring_state_changed,
 };
 
 /*
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 0ccdce4b4a..e7b1ac60a6 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -51,6 +51,7 @@ struct vhost_dev {
uint64_t features;
size_t hdr_len;
uint16_t nr_vrings;
+   uint16_t pkts_inflight;
struct rte_vhost_memory *mem;
struct device_statistics stats;
TAILQ_ENTRY(vhost_dev) global_vdev_entry;
-- 
2.29.2

[dpdk-dev] [PATCH] examples/vhost: add error propagation in ioat ops

2021-01-31 Thread Cheng Jiang

It makes more sense to add error propagation for rte_ioat_completed_ops.

Signed-off-by: Cheng Jiang 
---
 examples/vhost/ioat.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index dbad28d43..60b73be93 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -22,7 +22,6 @@ struct packet_tracker {
 
 struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
 
-
 int
 open_ioat(const char *value)
 {
@@ -129,7 +128,7 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data, uint16_t count)
 {
uint32_t i_desc;
-   int dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id;
+   uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id;
struct rte_vhost_iov_iter *src = NULL;
struct rte_vhost_iov_iter *dst = NULL;
unsigned long i_seg;
@@ -182,10 +181,17 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
unsigned short mask = MAX_ENQUEUED_SIZE - 1;
unsigned short i;
 
-   int dev_id = dma_bind[vid].dmas[queue_id * 2
+   uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2
+ VIRTIO_RXQ].dev_id;
n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump);
-   if (n_seg <= 0)
+   if (n_seg < 0) {
+   RTE_LOG(ERR,
+   VHOST_DATA,
+   "fail to poll completed buf on IOAT device %u",
+   dev_id);
+   return 0;
+   }
+   if (n_seg == 0)
return 0;
 
cb_tracker[dev_id].ioat_space += n_seg;
-- 
2.29.2

[dpdk-dev] [PATCH] examples/vhost: remove async inflight packet counter

2021-01-31 Thread Cheng Jiang

Remove async inflight packet counter since there is no need to keep
traking it. Increase MAX_ENQUEUED_SIZE to prevent packet segment number
traking ring from being exhausted.

Fixes: 63dabdeda690 ("examples/vhost: refactor vhost data path")

Signed-off-by: Cheng Jiang 
---
 examples/vhost/ioat.h |  2 +-
 examples/vhost/main.c | 10 +-
 examples/vhost/main.h |  1 -
 3 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index 0a1dbb811..1aa28ed6a 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -11,7 +11,7 @@
 
 #define MAX_VHOST_DEVICE 1024
 #define IOAT_RING_SIZE 4096
-#define MAX_ENQUEUED_SIZE 512
+#define MAX_ENQUEUED_SIZE 4096
 
 struct dma_info {
struct rte_pci_addr addr;
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 233a2dc6e..99e8d9c69 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -831,11 +831,8 @@ complete_async_pkts(struct vhost_dev *vdev)
 
complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
-   if (complete_count) {
-   __atomic_sub_fetch(&vdev->nr_async_pkts, complete_count,
-   __ATOMIC_SEQ_CST);
+   if (complete_count)
free_pkts(p_cpl, complete_count);
-   }
 }
 
 static __rte_always_inline void
@@ -878,8 +875,6 @@ drain_vhost(struct vhost_dev *vdev)
complete_async_pkts(vdev);
ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
-   __atomic_add_fetch(&vdev->nr_async_pkts, ret - cpu_cpl_nr,
-   __ATOMIC_SEQ_CST);
 
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
@@ -1210,9 +1205,6 @@ drain_eth_rx(struct vhost_dev *vdev)
enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
VIRTIO_RXQ, pkts, rx_count,
m_cpu_cpl, &cpu_cpl_nr);
-   __atomic_add_fetch(&vdev->nr_async_pkts,
-   enqueue_count - cpu_cpl_nr,
-   __ATOMIC_SEQ_CST);
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
 
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 2d6c05fd7..0ccdce4b4 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -51,7 +51,6 @@ struct vhost_dev {
uint64_t features;
size_t hdr_len;
uint16_t nr_vrings;
-   uint64_t nr_async_pkts;
struct rte_vhost_memory *mem;
struct device_statistics stats;
TAILQ_ENTRY(vhost_dev) global_vdev_entry;
-- 
2.29.2

[dpdk-dev] [PATCH v2] examples/vhost: remove async inflight packet counter

2021-01-31 Thread Cheng Jiang

Remove async inflight packet counter since there is no need to keep
tracking it. Increase MAX_ENQUEUED_SIZE to prevent packet segment number
tracking ring from being exhausted.

Fixes: 63dabdeda690 ("examples/vhost: refactor vhost data path")

Signed-off-by: Cheng Jiang 
---
v2: fixed a typo

 examples/vhost/ioat.h |  2 +-
 examples/vhost/main.c | 10 +-
 examples/vhost/main.h |  1 -
 3 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index 0a1dbb811..1aa28ed6a 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -11,7 +11,7 @@

 #define MAX_VHOST_DEVICE 1024
 #define IOAT_RING_SIZE 4096
-#define MAX_ENQUEUED_SIZE 512
+#define MAX_ENQUEUED_SIZE 4096

 struct dma_info {
struct rte_pci_addr addr;
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 233a2dc6e..99e8d9c69 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -831,11 +831,8 @@ complete_async_pkts(struct vhost_dev *vdev)

complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
-   if (complete_count) {
-   __atomic_sub_fetch(&vdev->nr_async_pkts, complete_count,
-   __ATOMIC_SEQ_CST);
+   if (complete_count)
free_pkts(p_cpl, complete_count);
-   }
 }

 static __rte_always_inline void
@@ -878,8 +875,6 @@ drain_vhost(struct vhost_dev *vdev)
complete_async_pkts(vdev);
ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
-   __atomic_add_fetch(&vdev->nr_async_pkts, ret - cpu_cpl_nr,
-   __ATOMIC_SEQ_CST);

if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
@@ -1210,9 +1205,6 @@ drain_eth_rx(struct vhost_dev *vdev)
enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
VIRTIO_RXQ, pkts, rx_count,
m_cpu_cpl, &cpu_cpl_nr);
-   __atomic_add_fetch(&vdev->nr_async_pkts,
-   enqueue_count - cpu_cpl_nr,
-   __ATOMIC_SEQ_CST);
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);

diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 2d6c05fd7..0ccdce4b4 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -51,7 +51,6 @@ struct vhost_dev {
uint64_t features;
size_t hdr_len;
uint16_t nr_vrings;
-   uint64_t nr_async_pkts;
struct rte_vhost_memory *mem;
struct device_statistics stats;
TAILQ_ENTRY(vhost_dev) global_vdev_entry;
--
2.29.2

[dpdk-dev] [PATCH v3] examples/vhost: remove async inflight packet counter

2021-02-01 Thread Cheng Jiang

Remove async inflight packet counter since there is no need to keep
tracking it. Increase MAX_ENQUEUED_SIZE to prevent packet segment number
tracking ring from being exhausted.

Fixes: a68ba8e0a6b6 ("examples/vhost: refactor vhost data path")

Signed-off-by: Cheng Jiang 
---
v3: fixed fixes commit id in git log

v2: fixed a typo

 examples/vhost/ioat.h |  2 +-
 examples/vhost/main.c | 10 +-
 examples/vhost/main.h |  1 -
 3 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index 0a1dbb811..1aa28ed6a 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -11,7 +11,7 @@

 #define MAX_VHOST_DEVICE 1024
 #define IOAT_RING_SIZE 4096
-#define MAX_ENQUEUED_SIZE 512
+#define MAX_ENQUEUED_SIZE 4096

 struct dma_info {
struct rte_pci_addr addr;
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index e74fc8750..ca73e7086 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -831,11 +831,8 @@ complete_async_pkts(struct vhost_dev *vdev)

complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
-   if (complete_count) {
-   __atomic_sub_fetch(&vdev->nr_async_pkts, complete_count,
-   __ATOMIC_SEQ_CST);
+   if (complete_count)
free_pkts(p_cpl, complete_count);
-   }
 }

 static __rte_always_inline void
@@ -878,8 +875,6 @@ drain_vhost(struct vhost_dev *vdev)
complete_async_pkts(vdev);
ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ,
m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr);
-   __atomic_add_fetch(&vdev->nr_async_pkts, ret - cpu_cpl_nr,
-   __ATOMIC_SEQ_CST);

if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);
@@ -1210,9 +1205,6 @@ drain_eth_rx(struct vhost_dev *vdev)
enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
VIRTIO_RXQ, pkts, rx_count,
m_cpu_cpl, &cpu_cpl_nr);
-   __atomic_add_fetch(&vdev->nr_async_pkts,
-   enqueue_count - cpu_cpl_nr,
-   __ATOMIC_SEQ_CST);
if (cpu_cpl_nr)
free_pkts(m_cpu_cpl, cpu_cpl_nr);

diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 2d6c05fd7..0ccdce4b4 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -51,7 +51,6 @@ struct vhost_dev {
uint64_t features;
size_t hdr_len;
uint16_t nr_vrings;
-   uint64_t nr_async_pkts;
struct rte_vhost_memory *mem;
struct device_statistics stats;
TAILQ_ENTRY(vhost_dev) global_vdev_entry;
--
2.29.2

[dpdk-dev] [PATCH] examples/vhost: fix potentially overflowing expression

2021-02-01 Thread Cheng Jiang

Change the type of buff_idx from uint64_t to uint32_t to fix coverity
issue.

Fixes: a68ba8e0a6b6 ("examples/vhost: refactor vhost data path")
Coverity issue: 366264

Signed-off-by: Cheng Jiang 
---
 examples/vhost/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 233a2dc6e..e74fc8750 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -864,7 +864,7 @@ static __rte_always_inline void
 drain_vhost(struct vhost_dev *vdev)
 {
uint16_t ret;
-   uint64_t buff_idx = rte_lcore_id() * MAX_VHOST_DEVICE + vdev->vid;
+   uint32_t buff_idx = rte_lcore_id() * MAX_VHOST_DEVICE + vdev->vid;
uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
 
-- 
2.29.2

[dpdk-dev] [PATCH v2] examples/vhost: fix potentially overflowing expression

2021-02-01 Thread Cheng Jiang

Change the type of buff_idx from uint64_t to uint32_t to fix coverity
issue.

Coverity issue: 366264
Fixes: a68ba8e0a6b6 ("examples/vhost: refactor vhost data path")

Signed-off-by: Cheng Jiang 
---
v2: fixed the format of git log

 examples/vhost/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 233a2dc6e..e74fc8750 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -864,7 +864,7 @@ static __rte_always_inline void
 drain_vhost(struct vhost_dev *vdev)
 {
uint16_t ret;
-   uint64_t buff_idx = rte_lcore_id() * MAX_VHOST_DEVICE + vdev->vid;
+   uint32_t buff_idx = rte_lcore_id() * MAX_VHOST_DEVICE + vdev->vid;
uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;

--
2.29.2

[dpdk-dev] [PATCH v2] examples/vhost: add error propagation in ioat ops

2021-02-03 Thread Cheng Jiang

It makes more sense to add error propagation for rte_ioat_completed_ops.
And change the type of dev_id from int to uint16_t.

Signed-off-by: Cheng Jiang 
---
v2: mentioned dev_id type changes in git log

 examples/vhost/ioat.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index dbad28d43..60b73be93 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -22,7 +22,6 @@ struct packet_tracker {

 struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];

-
 int
 open_ioat(const char *value)
 {
@@ -129,7 +128,7 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id,
struct rte_vhost_async_status *opaque_data, uint16_t count)
 {
uint32_t i_desc;
-   int dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id;
+   uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id;
struct rte_vhost_iov_iter *src = NULL;
struct rte_vhost_iov_iter *dst = NULL;
unsigned long i_seg;
@@ -182,10 +181,17 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
unsigned short mask = MAX_ENQUEUED_SIZE - 1;
unsigned short i;

-   int dev_id = dma_bind[vid].dmas[queue_id * 2
+   uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2
+ VIRTIO_RXQ].dev_id;
n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump);
-   if (n_seg <= 0)
+   if (n_seg < 0) {
+   RTE_LOG(ERR,
+   VHOST_DATA,
+   "fail to poll completed buf on IOAT device %u",
+   dev_id);
+   return 0;
+   }
+   if (n_seg == 0)
return 0;

cb_tracker[dev_id].ioat_space += n_seg;
--
2.29.2

[PATCH] vhost: fix slot index in async split virtqueue Tx

2023-02-22 Thread Cheng Jiang

The slot_idx in the asynchronous Vhost split virtqueue dequeue data path
is supposed to be decreased by 1 when desc_to_mbuf() fails.

Fixes: 84d5204310d7 ("vhost: support async dequeue for split ring")
Cc: sta...@dpdk.org

Signed-off-by: Cheng Jiang 
---
 lib/vhost/virtio_net.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/vhost/virtio_net.c b/lib/vhost/virtio_net.c
index 8caf05319e..374007a61a 100644
--- a/lib/vhost/virtio_net.c
+++ b/lib/vhost/virtio_net.c
@@ -3720,6 +3720,7 @@ virtio_dev_tx_async_split(struct virtio_net *dev, struct 
vhost_virtqueue *vq,
allocerr_warned = true;
}
dropped = true;
+   slot_idx--;
break;
}

--
2.35.1

[PATCH v2] app/dma-perf: introduce dma-perf application

2023-05-16 Thread Cheng Jiang

There are many high-performance DMA devices supported in DPDK now, and
these DMA devices can also be integrated into other modules of DPDK as
accelerators, such as Vhost. Before integrating DMA into applications,
developers need to know the performance of these DMA devices in various
scenarios and the performance of CPUs in the same scenario, such as
different buffer lengths. Only in this way can we know the target
performance of the application accelerated by using them. This patch
introduces a high-performance testing tool, which supports comparing the
performance of CPU and DMA in different scenarios automatically with a
pre-set config file. Memory Copy performance test are supported for now.

Signed-off-by: Cheng Jiang 
Signed-off-by: Jiayu Hu 
Signed-off-by: Yuan Wang 
Acked-by: Morten Brørup 
---
v2:
  added lcore/dmadev designation;
  added error case process;
  removed worker_threads parameter from config.ini;
  improved the logs;
  improved config file;

 app/meson.build   |   1 +
 app/test-dma-perf/benchmark.c | 471 
 app/test-dma-perf/config.ini  |  59 
 app/test-dma-perf/main.c  | 567 ++
 app/test-dma-perf/main.h  |  69 +
 app/test-dma-perf/meson.build |  17 +
 6 files changed, 1184 insertions(+)
 create mode 100644 app/test-dma-perf/benchmark.c
 create mode 100644 app/test-dma-perf/config.ini
 create mode 100644 app/test-dma-perf/main.c
 create mode 100644 app/test-dma-perf/main.h
 create mode 100644 app/test-dma-perf/meson.build

diff --git a/app/meson.build b/app/meson.build
index e32ea4bd5c..514cb2f7b2 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -19,6 +19,7 @@ apps = [
 'test-cmdline',
 'test-compress-perf',
 'test-crypto-perf',
+'test-dma-perf',
 'test-eventdev',
 'test-fib',
 'test-flow-perf',
diff --git a/app/test-dma-perf/benchmark.c b/app/test-dma-perf/benchmark.c
new file mode 100644
index 00..4e99ab9736
--- /dev/null
+++ b/app/test-dma-perf/benchmark.c
@@ -0,0 +1,471 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "main.h"
+
+#define MAX_DMA_CPL_NB 255
+
+#define TEST_WAIT_U_SECOND 1
+
+#define CSV_LINE_DMA_FMT "Scenario %u,%u,%s,%u,%u,%u,%" PRIu64 ",%.3lf,%.3lf\n"
+#define CSV_LINE_CPU_FMT "Scenario %u,%u,NA,%u,%u,%u,%" PRIu64 ",%.3lf,%.3lf\n"
+
+struct worker_info {
+   bool ready_flag;
+   bool start_flag;
+   bool stop_flag;
+   uint32_t total_cpl;
+   uint32_t test_cpl;
+};
+
+struct lcore_params {
+   uint8_t scenario_id;
+   unsigned int lcore_id;
+   char *dma_name;
+   uint16_t worker_id;
+   uint16_t dev_id;
+   uint32_t nr_buf;
+   uint16_t kick_batch;
+   uint32_t buf_size;
+   uint16_t test_secs;
+   struct rte_mbuf **srcs;
+   struct rte_mbuf **dsts;
+   struct worker_info worker_info;
+};
+
+static struct rte_mempool *src_pool;
+static struct rte_mempool *dst_pool;
+
+static volatile struct lcore_params *worker_params[MAX_WORKER_NB];
+
+#define PRINT_ERR(...) print_err(__func__, __LINE__, __VA_ARGS__)
+
+static inline int
+__rte_format_printf(3, 4)
+print_err(const char *func, int lineno, const char *format, ...)
+{
+   va_list ap;
+   int ret;
+
+   ret = fprintf(stderr, "In %s:%d - ", func, lineno);
+   va_start(ap, format);
+   ret += vfprintf(stderr, format, ap);
+   va_end(ap);
+
+   return ret;
+}
+
+static inline void
+calc_result(uint32_t buf_size, uint32_t nr_buf, uint16_t nb_workers, uint16_t 
test_secs,
+   uint32_t total_cnt, uint32_t *memory, uint32_t 
*ave_cycle,
+   float *bandwidth, float *mops)
+{
+   *memory = (buf_size * (nr_buf / nb_workers) * 2) / (1024 * 1024);
+   *ave_cycle = test_secs * rte_get_timer_hz() / total_cnt;
+   *bandwidth = (buf_size * 8 * (rte_get_timer_hz() / (float)*ave_cycle)) 
/ 10;
+   *mops = (float)rte_get_timer_hz() / *ave_cycle / 100;
+}
+
+static void
+output_result(uint8_t scenario_id, uint32_t lcore_id, char *dma_name, uint64_t 
ave_cycle,
+   uint32_t buf_size, uint32_t nr_buf, uint32_t memory,
+   float bandwidth, float mops, bool is_dma)
+{
+   if (is_dma)
+   printf("lcore %u, DMA %s:\n", lcore_id, dma_name);
+   else
+   printf("lcore %u\n", lcore_id);
+
+   printf("average cycles/op: %" PRIu64 ", buffer size: %u, nr_buf: %u, 
memory: %uMB, frequency: %" PRIu64 ".\n",
+   ave_cycle, buf_size, nr_buf, memory, 
rte_get_timer_hz());
+

[PATCH v3] app/dma-perf: introduce dma-perf application

2023-05-17 Thread Cheng Jiang

There are many high-performance DMA devices supported in DPDK now, and
these DMA devices can also be integrated into other modules of DPDK as
accelerators, such as Vhost. Before integrating DMA into applications,
developers need to know the performance of these DMA devices in various
scenarios and the performance of CPUs in the same scenario, such as
different buffer lengths. Only in this way can we know the target
performance of the application accelerated by using them. This patch
introduces a high-performance testing tool, which supports comparing the
performance of CPU and DMA in different scenarios automatically with a
pre-set config file. Memory Copy performance test are supported for now.

Signed-off-by: Cheng Jiang 
Signed-off-by: Jiayu Hu 
Signed-off-by: Yuan Wang 
Acked-by: Morten Brørup 
---
v3:
  fixed some typos;
v2:
  added lcore/dmadev designation;
  added error case process;
  removed worker_threads parameter from config.ini;
  improved the logs;
  improved config file;

 app/meson.build   |   1 +
 app/test-dma-perf/benchmark.c | 471 
 app/test-dma-perf/config.ini  |  59 
 app/test-dma-perf/main.c  | 567 ++
 app/test-dma-perf/main.h  |  69 +
 app/test-dma-perf/meson.build |  17 +
 6 files changed, 1184 insertions(+)
 create mode 100644 app/test-dma-perf/benchmark.c
 create mode 100644 app/test-dma-perf/config.ini
 create mode 100644 app/test-dma-perf/main.c
 create mode 100644 app/test-dma-perf/main.h
 create mode 100644 app/test-dma-perf/meson.build

diff --git a/app/meson.build b/app/meson.build
index e32ea4bd5c..514cb2f7b2 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -19,6 +19,7 @@ apps = [
 'test-cmdline',
 'test-compress-perf',
 'test-crypto-perf',
+'test-dma-perf',
 'test-eventdev',
 'test-fib',
 'test-flow-perf',
diff --git a/app/test-dma-perf/benchmark.c b/app/test-dma-perf/benchmark.c
new file mode 100644
index 00..4e99ab9736
--- /dev/null
+++ b/app/test-dma-perf/benchmark.c
@@ -0,0 +1,471 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "main.h"
+
+#define MAX_DMA_CPL_NB 255
+
+#define TEST_WAIT_U_SECOND 1
+
+#define CSV_LINE_DMA_FMT "Scenario %u,%u,%s,%u,%u,%u,%" PRIu64 ",%.3lf,%.3lf\n"
+#define CSV_LINE_CPU_FMT "Scenario %u,%u,NA,%u,%u,%u,%" PRIu64 ",%.3lf,%.3lf\n"
+
+struct worker_info {
+   bool ready_flag;
+   bool start_flag;
+   bool stop_flag;
+   uint32_t total_cpl;
+   uint32_t test_cpl;
+};
+
+struct lcore_params {
+   uint8_t scenario_id;
+   unsigned int lcore_id;
+   char *dma_name;
+   uint16_t worker_id;
+   uint16_t dev_id;
+   uint32_t nr_buf;
+   uint16_t kick_batch;
+   uint32_t buf_size;
+   uint16_t test_secs;
+   struct rte_mbuf **srcs;
+   struct rte_mbuf **dsts;
+   struct worker_info worker_info;
+};
+
+static struct rte_mempool *src_pool;
+static struct rte_mempool *dst_pool;
+
+static volatile struct lcore_params *worker_params[MAX_WORKER_NB];
+
+#define PRINT_ERR(...) print_err(__func__, __LINE__, __VA_ARGS__)
+
+static inline int
+__rte_format_printf(3, 4)
+print_err(const char *func, int lineno, const char *format, ...)
+{
+   va_list ap;
+   int ret;
+
+   ret = fprintf(stderr, "In %s:%d - ", func, lineno);
+   va_start(ap, format);
+   ret += vfprintf(stderr, format, ap);
+   va_end(ap);
+
+   return ret;
+}
+
+static inline void
+calc_result(uint32_t buf_size, uint32_t nr_buf, uint16_t nb_workers, uint16_t 
test_secs,
+   uint32_t total_cnt, uint32_t *memory, uint32_t 
*ave_cycle,
+   float *bandwidth, float *mops)
+{
+   *memory = (buf_size * (nr_buf / nb_workers) * 2) / (1024 * 1024);
+   *ave_cycle = test_secs * rte_get_timer_hz() / total_cnt;
+   *bandwidth = (buf_size * 8 * (rte_get_timer_hz() / (float)*ave_cycle)) 
/ 10;
+   *mops = (float)rte_get_timer_hz() / *ave_cycle / 100;
+}
+
+static void
+output_result(uint8_t scenario_id, uint32_t lcore_id, char *dma_name, uint64_t 
ave_cycle,
+   uint32_t buf_size, uint32_t nr_buf, uint32_t memory,
+   float bandwidth, float mops, bool is_dma)
+{
+   if (is_dma)
+   printf("lcore %u, DMA %s:\n", lcore_id, dma_name);
+   else
+   printf("lcore %u\n", lcore_id);
+
+   printf("average cycles/op: %" PRIu64 ", buffer size: %u, nr_buf: %u, 
memory: %uMB, frequency: %" PRIu64 ".\n",
+   ave_cycle, buf_size, nr_buf, memory, 
rte_get_

[PATCH] app/dma-perf: introduce dma-perf application

2023-04-20 Thread Cheng Jiang

There are many high-performance DMA devices supported in DPDK now, and
these DMA devices can also be integrated into other modules of DPDK as
accelerators, such as Vhost. Before integrating DMA into applications,
developers need to know the performance of these DMA devices in various
scenarios and the performance of CPUs in the same scenario, such as
different buffer lengths. Only in this way can we know the target
performance of the application accelerated by using them. This patch
introduces a high-performance testing tool, which supports comparing the
performance of CPU and DMA in different scenarios automatically with a
pre-set config file. Memory Copy performance test are supported for now.

Signed-off-by: Cheng Jiang 
Signed-off-by: Jiayu Hu 
Signed-off-by: Yuan Wang 
Acked-by: Morten Brørup 
---
 app/meson.build   |   1 +
 app/test-dma-perf/benchmark.c | 467 ++
 app/test-dma-perf/config.ini  |  56 
 app/test-dma-perf/main.c  | 445 
 app/test-dma-perf/main.h  |  56 
 app/test-dma-perf/meson.build |  17 ++
 6 files changed, 1042 insertions(+)
 create mode 100644 app/test-dma-perf/benchmark.c
 create mode 100644 app/test-dma-perf/config.ini
 create mode 100644 app/test-dma-perf/main.c
 create mode 100644 app/test-dma-perf/main.h
 create mode 100644 app/test-dma-perf/meson.build

diff --git a/app/meson.build b/app/meson.build
index e32ea4bd5c..514cb2f7b2 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -19,6 +19,7 @@ apps = [
 'test-cmdline',
 'test-compress-perf',
 'test-crypto-perf',
+'test-dma-perf',
 'test-eventdev',
 'test-fib',
 'test-flow-perf',
diff --git a/app/test-dma-perf/benchmark.c b/app/test-dma-perf/benchmark.c
new file mode 100644
index 00..36e3413bdc
--- /dev/null
+++ b/app/test-dma-perf/benchmark.c
@@ -0,0 +1,467 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "main.h"
+
+#define MAX_DMA_CPL_NB 255
+
+#define TEST_WAIT_U_SECOND 1
+
+#define CSV_LINE_DMA_FMT "Scenario %u,%u,%u,%u,%u,%u,%" PRIu64 ",%.3lf,%.3lf\n"
+#define CSV_LINE_CPU_FMT "Scenario %u,%u,NA,%u,%u,%u,%" PRIu64 ",%.3lf,%.3lf\n"
+
+struct worker_info {
+   bool ready_flag;
+   bool start_flag;
+   bool stop_flag;
+   uint32_t total_cpl;
+   uint32_t test_cpl;
+};
+
+struct lcore_params {
+   uint8_t scenario_id;
+   unsigned int lcore_id;
+   uint16_t worker_id;
+   uint16_t dev_id;
+   uint32_t nr_buf;
+   uint16_t kick_batch;
+   uint32_t buf_size;
+   uint16_t test_secs;
+   struct rte_mbuf **srcs;
+   struct rte_mbuf **dsts;
+   struct worker_info worker_info;
+};
+
+static struct rte_mempool *src_pool;
+static struct rte_mempool *dst_pool;
+
+static volatile struct lcore_params *worker_params[MAX_WORKER_NB];
+
+uint16_t dmadev_ids[MAX_WORKER_NB];
+uint32_t nb_dmadevs;
+
+
+#define PRINT_ERR(...) print_err(__func__, __LINE__, __VA_ARGS__)
+
+static inline int
+__rte_format_printf(3, 4)
+print_err(const char *func, int lineno, const char *format, ...)
+{
+   va_list ap;
+   int ret;
+
+   ret = fprintf(stderr, "In %s:%d - ", func, lineno);
+   va_start(ap, format);
+   ret += vfprintf(stderr, format, ap);
+   va_end(ap);
+
+   return ret;
+}
+
+static inline void
+calc_result(uint32_t buf_size, uint32_t nr_buf, uint16_t nb_workers, uint16_t 
test_secs,
+   uint32_t total_cnt, uint32_t *memory, uint32_t 
*ave_cycle,
+   float *bandwidth, float *mops)
+{
+   *memory = (buf_size * (nr_buf / nb_workers) * 2) / (1024 * 1024);
+   *ave_cycle = test_secs * rte_get_timer_hz() / total_cnt;
+   *bandwidth = (buf_size * 8 * (rte_get_timer_hz() / (float)*ave_cycle)) 
/ 10;
+   *mops = (float)rte_get_timer_hz() / *ave_cycle / 100;
+}
+
+static void
+output_result(uint8_t scenario_id, uint32_t lcore_id, uint16_t dev_id, 
uint64_t ave_cycle,
+   uint32_t buf_size, uint32_t nr_buf, uint32_t memory,
+   float bandwidth, float mops, bool is_dma)
+{
+   if (is_dma)
+   printf("lcore %u, DMA %u:\n", lcore_id, dev_id);
+   else
+   printf("lcore %u\n", lcore_id);
+
+   printf("average cycles/op: %" PRIu64 ", buffer size: %u, nr_buf: %u, 
memory: %uMB, frequency: %" PRIu64 ".\n",
+   ave_cycle, buf_size, nr_buf, memory, 
rte_get_timer_hz());
+   printf("Average bandwidth: %.3lfGbps, MOps: %.3lf\n", bandwidth, mops);
+
+   if (is_dma)
+   snprintf(ou

[PATCH 0/2] vhost: add port mirroring function in the vhost lib

2023-04-20 Thread Cheng Jiang

Similar to the port mirroring function on the switch or router, this
patch set implements such function on the Vhost lib. When
data is sent to a front-end, it will also send the data to its mirror
front-end. When data is received from a front-end, it will also send
the data to its mirror front-end.

Cheng Jiang (2):
  vhost: add ingress API for port mirroring datapath
  vhost: add egress API for port mirroring datapath

 lib/vhost/rte_vhost_async.h |   17 +
 lib/vhost/version.map   |3 +
 lib/vhost/virtio_net.c  | 1266 +++
 3 files changed, 1286 insertions(+)

--
2.35.1

1 2 3 >

1 - 100 of 240 matches

Mail list logo